diff --git a/Advanced/Compression/parts/3 huffman.tex b/Advanced/Compression/parts/3 huffman.tex index d027734..84a3601 100644 --- a/Advanced/Compression/parts/3 huffman.tex +++ b/Advanced/Compression/parts/3 huffman.tex @@ -57,7 +57,7 @@ How many bits does this code need per symbol, on average? \vfill \problem{} -Consider the code below. How is it different from the one above? \par +Consider the code below. How is it different from the one on the previous page? \par Is this a good way to encode five-letter strings? \begin{itemize} \item $\texttt{A}$ to $\texttt{00}$ @@ -78,52 +78,167 @@ Is this a good way to encode five-letter strings? \pagebreak + + + + + + + \remark{} -Huffman codes can be visualized as a tree which we traverse while decoding our sequence. \par -We start at the topmost node, taking the left edge if we see a \texttt{0} and the right edge if we see a \texttt{1}. \par -As an example, consider the code for $\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}, \texttt{E}\}$ -on the previous page: +The code from the previous page can be visualized as a tree which we traverse while decoding our sequence. +Starting from the topmost node, we take the left edge if we see a \texttt{0} and the right edge if we see a \texttt{1}. +Once we reach a letter, we return to the top node and repeat the process. + +\vspace{-5mm} +\null\hfill +\begin{minipage}[t]{0.48\textwidth} + \vspace{0pt} + + \begin{itemize} + \item $\texttt{A}$ encodes as $\texttt{00}$ + \item $\texttt{B}$ encodes as $\texttt{01}$ + \item $\texttt{C}$ encodes as $\texttt{10}$ + \item $\texttt{D}$ encodes as $\texttt{110}$ + \item $\texttt{E}$ encodes as $\texttt{111}$ + \end{itemize} +\end{minipage} +\hfill +\begin{minipage}[t]{0.48\textwidth} + \vspace{0pt} + + \begin{center} + \begin{tikzpicture}[scale=1.0] + \begin{scope}[layer = nodes] + \node[int] (x) at (0, 0) {}; + \node[int] (0) at (-0.75, -1) {}; + \node[int] (1) at (0.75, -1) {}; + \node[end] (00) at (-1.25, -2) {\texttt{A}}; + \node[end] (01) at (-0.25, -2) {\texttt{B}}; + \node[end] (10) at (0.25, -2) {\texttt{C}}; + \node[int] (11) at (1.25, -2) {}; + \node[end] (110) at (0.75, -3) {\texttt{D}}; + \node[end] (111) at (1.75, -3) {\texttt{E}}; + \end{scope} + + \draw[-] + (x) to node[edg] {\texttt{0}} (0) + (x) to node[edg] {\texttt{1}} (1) + (0) to node[edg] {\texttt{0}} (00) + (0) to node[edg] {\texttt{1}} (01) + (1) to node[edg] {\texttt{0}} (10) + (1) to node[edg] {\texttt{1}} (11) + (11) to node[edg] {\texttt{0}} (110) + (11) to node[edg] {\texttt{1}} (111) + ; + \end{tikzpicture} + \end{center} +\end{minipage} +\hfill\null -\begin{itemize} - \item $\texttt{A}$ encodes as $\texttt{00}$ - \item $\texttt{B}$ encodes as $\texttt{01}$ - \item $\texttt{C}$ encodes as $\texttt{10}$ - \item $\texttt{D}$ encodes as $\texttt{110}$ - \item $\texttt{E}$ encodes as $\texttt{111}$ -\end{itemize} - -Drawing this scheme as a tree, we get the following: -\begin{center} -\begin{tikzpicture}[scale=1.0] - \begin{scope}[layer = nodes] - \node[int] (x) at (0, 0) {}; - \node[int] (0) at (-0.75, -1) {}; - \node[int] (1) at (0.75, -1) {}; - \node[end] (00) at (-1.25, -2) {\texttt{A}}; - \node[end] (01) at (-0.25, -2) {\texttt{B}}; - \node[end] (10) at (0.25, -2) {\texttt{C}}; - \node[int] (11) at (1.25, -2) {}; - \node[end] (110) at (0.75, -3) {\texttt{D}}; - \node[end] (111) at (1.75, -3) {\texttt{E}}; - \end{scope} - - \draw[-] - (x) to node[midway, fill=white, text=gray] {\texttt{0}} (0) - (x) to node[midway, fill=white, text=gray] {\texttt{1}} (1) - (0) to node[midway, fill=white, text=gray] {\texttt{0}} (00) - (0) to node[midway, fill=white, text=gray] {\texttt{1}} (01) - (1) to node[midway, fill=white, text=gray] {\texttt{0}} (10) - (1) to node[midway, fill=white, text=gray] {\texttt{1}} (11) - (11) to node[midway, fill=white, text=gray] {\texttt{0}} (110) - (11) to node[midway, fill=white, text=gray] {\texttt{1}} (111) - ; -\end{tikzpicture} -\end{center} +\problem{} +Decode \texttt{[110111001001110110]} using the tree above. +\begin{solution} + This is \texttt{[110$\cdot$111$\cdot$00$\cdot$10$\cdot$01$\cdot$110$\cdot$110]}, which is \texttt{DEACBDD} +\end{solution} \vfill + +\problem{} +In \ref{treedecode}, we needed 18 bits to encode \texttt{DEACBDD}. \par +\note{Note that we'd need $3 \times 7 = 21$ bits to encode this string na\"ively.} + +\vspace{2mm} +Draw a tree that encodes this string more efficiently. \par + +\begin{solution} + Two possible solutions are below. \par + \begin{itemize} + \item The left tree encodes \texttt{DEACBDD} as \texttt{[00$\cdot$111$\cdot$110$\cdot$10$\cdot$01$\cdot$00$\cdot$00]}, using 16 bits. + \item The right tree encodes \texttt{DEACBDD} as \texttt{[0$\cdot$111$\cdot$101$\cdot$110$\cdot$100$\cdot$0$\cdot$0]}, using 15 bits. + \end{itemize} + + \null\hfill + \begin{minipage}{0.48\textwidth} + \begin{center} + \begin{tikzpicture}[scale=1.0] + \begin{scope}[layer = nodes] + \node[int] (x) at (0, 0) {}; + \node[int] (0) at (-0.75, -1) {}; + \node[int] (1) at (0.75, -1) {}; + \node[end] (00) at (-1.25, -2) {\texttt{D}}; + \node[end] (01) at (-0.25, -2) {\texttt{B}}; + \node[end] (10) at (0.25, -2) {\texttt{C}}; + \node[int] (11) at (1.25, -2) {}; + \node[end] (110) at (0.75, -3) {\texttt{A}}; + \node[end] (111) at (1.75, -3) {\texttt{E}}; + \end{scope} + + \draw[-] + (x) to node[edg] {\texttt{0}} (0) + (x) to node[edg] {\texttt{1}} (1) + (0) to node[edg] {\texttt{0}} (00) + (0) to node[edg] {\texttt{1}} (01) + (1) to node[edg] {\texttt{0}} (10) + (1) to node[edg] {\texttt{1}} (11) + (11) to node[edg] {\texttt{0}} (110) + (11) to node[edg] {\texttt{1}} (111) + ; + \end{tikzpicture} + \end{center} + \end{minipage} + \hfill + \begin{minipage}{0.48\textwidth} + \begin{center} + \begin{tikzpicture}[scale=1.0] + \begin{scope}[layer = nodes] + \node[int] (x) at (0, 0) {}; + \node[int] (0) at (-0.75, -1) {\texttt{D}}; + \node[int] (1) at (0.75, -1) {}; + \node[end] (10) at (0.25, -2) {}; + \node[int] (11) at (1.25, -2) {}; + \node[end] (100) at (-0.15, -3) {\texttt{A}}; + \node[end] (101) at (0.6, -3) {\texttt{B}}; + \node[end] (110) at (0.9, -3) {\texttt{C}}; + \node[end] (111) at (1.6, -3) {\texttt{E}}; + \end{scope} + + \draw[-] + (x) to node[edg] {\texttt{0}} (0) + (x) to node[edg] {\texttt{1}} (1) + (1) to node[edg] {\texttt{0}} (10) + (1) to node[edg] {\texttt{1}} (11) + (10) to node[edg] {\texttt{0}} (101) + (10) to node[edg] {\texttt{1}} (100) + (11) to node[edg] {\texttt{0}} (110) + (11) to node[edg] {\texttt{1}} (111) + ; + \end{tikzpicture} + \end{center} + \end{minipage} + \hfill\null +\end{solution} + +\vfill + +\problem{} +Now, do the opposite: draw a tree that encodes \texttt{DEACBDD} \textit{less} efficiently than before. + +\begin{solution} + Bury \texttt{D} as deep as possible in the tree, so that we need four bits to encode it. +\end{solution} + +\vfill + +\remark{} +We say a coding scheme is \textit{prefix-free} if no whole code word is a prefix of another code word. \par +As we've seen, it is fairly easy to construct a prefix-free variable-length code using a binary tree. \par +Constucting the \textit{most efficient} prefix-free code for a given message is a bit more difficult. \par +We'll spend the rest of this section solving this problem. + \pagebreak \ No newline at end of file diff --git a/Advanced/Compression/tikzset.tex b/Advanced/Compression/tikzset.tex index 8bfdf1c..b2aa528 100644 --- a/Advanced/Compression/tikzset.tex +++ b/Advanced/Compression/tikzset.tex @@ -30,6 +30,11 @@ }, % % Nodes + edg/.style = { + midway, + fill = \ORMCbgcolor, + text = gray + }, int/.style = {}, end/.style = { anchor=north