handouts/src/Advanced/Fast Inverse Root/parts/5 float.tex

\section{Floats}

\definition{}
\textit{Binary decimals}\footnotemark{} are very similar to base-10 decimals. \par
In base 10, we interpret place value as follows:
\begin{itemize}
	\item $0.1 = 10^{-1}$
	\item $0.03 = 3 \ \times 10^{-2}$
	\item $0.0008 = 8 \times 10^{-4}$
\end{itemize}

\footnotetext{this is a misnomer, but that's ok.}

\vspace{5mm}

We can do the same in base 2:
\begin{itemize}
	\item $\texttt{0.1} = 2^{-1} = 0.5$
	\item $\texttt{0.011} = 2^{-2} + 2^{-3} = 0.375$
	\item $\texttt{101.01} = 5.125$
\end{itemize}

\vspace{5mm}

\problem{}
Rewrite the following binary decimals in base 10: \par
\note{You may leave your answer as a fraction}
\begin{itemize}
	\item $\texttt{1011.101}$
	\item $\texttt{110.1101}$
\end{itemize}

\vfill
\pagebreak

\definition{}
Another way we can interpret a bit string is as a \textit{signed floating-point decimal}, or a \texttt{float} for short. \par
Floats represent a subset of the real numbers, and are interpreted as follows: \par
\note{The following only applies to floats that consist of 32 bits. We won't encounter any others today.}
\begin{center}
	\begin{tikzpicture}
		\node[anchor=south west] at (0, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (0.25, 0) {\texttt{\texttt{b}}};
		\node[anchor=south west] at (0.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (0.75, 0) {\texttt{\texttt{\_}}};

		\node[anchor=south west] at (1.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (1.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (1.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (1.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (2.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (2.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (2.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (2.75, 0) {\texttt{\texttt{0}}};

		\node[anchor=south west] at (3.00, 0) {\texttt{\texttt{\_}}};
		\node[anchor=south west] at (3.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (3.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (3.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (4.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (4.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (4.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (4.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (5.00, 0) {\texttt{\texttt{\_}}};

		\node[anchor=south west] at (5.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (5.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (5.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (6.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (6.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (6.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (6.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (7.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (7.25, 0) {\texttt{\texttt{\_}}};

		\node[anchor=south west] at (7.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (7.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (8.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (8.25, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (8.50, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (8.75, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (9.00, 0) {\texttt{\texttt{0}}};
		\node[anchor=south west] at (9.25, 0) {\texttt{\texttt{0}}};


		\draw (0.50, 0) -- (0.95, 0) node [midway, below=1mm] {sign};
		\draw (1.05, 0) -- (3.15, 0) node [midway, below=1mm] {exponent};
		\draw (3.30, 0) -- (9.70, 0) node [midway, below=1mm] {fraction};
	\end{tikzpicture}
\end{center}

\begin{itemize}[itemsep = 2mm]
	\item The first bit denotes the sign of the float's value. We'll label it $s$. \par
	If $s = \texttt{1}$, this float is negative; if $s = \texttt{0}$, it is positive.

	\item The next eight bits represent the \textit{exponent} of this float. \note{(we'll see what that means soon)}\par
	We'll call the value of this eight-bit binary integer $E$. \par
	Naturally, $0 \leq E \leq 255$ \note{(since $E$ consist of eight bits.)}

	\item The remaining 23 bits represent the \textit{fraction} of this float, which we'll call $F$. \par
	These 23 bits are interpreted as the fractional part of a binary decimal. \par
	For example, the bits \texttt{0b1010000\_00000000\_00000000} represents $0.5 + 0.125 = 0.625$.
\end{itemize}

\problem{}<floata>
Consider \texttt{0b01000001\_10101000\_00000000\_00000000}. \par
Find the $s$, $E$, and $F$ we get if we interpret this bit string as a \texttt{float}. \par
\note[Note]{Leave $F$ as a sum of powers of two.}

\begin{solution}
	$s = 0$ \par
	$E = 258$ \par
	$F = 2^{31}+2^{19} = 2,621,440$
\end{solution}

\vfill


\definition{}
The final value of a float with sign $s$, exponent $E$, and fraction $F$ is
\begin{equation*}
	(-1)^s ~\times~ 2^{E - 127} ~\times~ \left(1 + \frac{F}{2^{23}}\right)
\end{equation*}

Notice that this is very similar to decimal scientific notation, which is written as
\begin{equation*}
	(-1)^s ~\times~ 10^{e} ~\times~ (f)
\end{equation*}

\problem{}
Consider \texttt{0b01000001\_10101000\_00000000\_00000000}. \par
This is the same bit string we used in \ref{floata}. \par

\vspace{2mm}

What value do we get if we interpret this bit string as a float? \par
\hint{$21 \div 16 = 1.3125$}

\begin{solution}
	This is 21:
	\begin{equation*}
		2^{131} \times \biggl(1 + \frac{2^{21} + 2^{19}}{2^{23}}\biggr)
		~=~ 2^{4} \times (1 + 0.25 + 0.0625)
		~=~ 16 \times (1.3125)
		~=~ 21
	\end{equation*}
\end{solution}

\vfill
\pagebreak

\problem{}
Encode $12.5$ as a float. \par
\hint{$12.5 \div 8 = 1.5625$}

\begin{solution}
	\begin{equation*}
		12.5
		~=~ 8 \times 1.5625
		~=~ 2^{3} \times \biggl(1 + (0.5 + 0.0625)\biggr)
		~=~ 2^{130} \times \biggl(1 + \frac{2^{22} + 2^{19}}{2^{23}}\biggr)
	\end{equation*}

	which is \texttt{0b01000001\_01001000\_00000000\_00000000}. \par
\end{solution}


\vfill

\definition{}
Say we have a bit string $x$. \par
We'll let $x_f$ denote the value we get if we interpret $x$ as a float, \par
and we'll let $x_i$ denote the value we get if we interpret $x$ an integer.

\problem{}
Let $x = \texttt{0b01000001\_01001000\_00000000\_00000000}$. \par
What are $x_f$ and $x_i$? \note{As always, you may leave big numbers as powers of two.}
\begin{solution}
	$x_f = 12.5$ \par
	\vspace{2mm}
	$x_i = 2^{30} + 2^{24} + 2^{22} + 2^{19} = 11,095,237,632$
\end{solution}

\vfill

\pagebreak