From 173705112ff307c38f2f7b4688bea2397095dab9 Mon Sep 17 00:00:00 2001
From: Mark <mark@betalupi.com>
Date: Fri, 12 Apr 2024 13:11:24 -0700
Subject: [PATCH] Added compression parts

---
 Advanced/Compression/main.tex              |  28 ++++
 Advanced/Compression/media/box.png         | Bin 0 -> 4543 bytes
 Advanced/Compression/media/noise.png       | Bin 0 -> 5630 bytes
 Advanced/Compression/parts/0 intro.tex     |  37 +++++
 Advanced/Compression/parts/1 runlength.tex | 145 +++++++++++++++++++
 Advanced/Compression/parts/2 lzss.tex      | 155 +++++++++++++++++++++
 Advanced/Compression/parts/3 huffman.tex   |  27 ++++
 Advanced/Compression/tikzset.tex           |  65 +++++++++
 8 files changed, 457 insertions(+)
 create mode 100755 Advanced/Compression/main.tex
 create mode 100644 Advanced/Compression/media/box.png
 create mode 100644 Advanced/Compression/media/noise.png
 create mode 100644 Advanced/Compression/parts/0 intro.tex
 create mode 100644 Advanced/Compression/parts/1 runlength.tex
 create mode 100644 Advanced/Compression/parts/2 lzss.tex
 create mode 100644 Advanced/Compression/parts/3 huffman.tex
 create mode 100644 Advanced/Compression/tikzset.tex
diff --git a/Advanced/Compression/main.tex b/Advanced/Compression/main.tex
new file mode 100755
index 0000000..33012d2
--- /dev/null
+++ b/Advanced/Compression/main.tex
@@ -0,0 +1,28 @@
+% use [nosolutions] flag to hide solutions.
+% use [solutions] flag to show solutions.
+\documentclass[
+	solutions,
+	singlenumbering,
+	unfinished
+]{../../resources/ormc_handout}
+\usepackage{../../resources/macros}
+
+\input{tikzset.tex}
+
+
+\uptitlel{Advanced 2}
+\uptitler{\smallurl{}}
+\title{Compression}
+\subtitle{Prepared by Mark on \today{}}
+
+
+\begin{document}
+
+	\maketitle
+
+	\input{parts/0 intro.tex}
+	\input{parts/1 runlength.tex}
+	\input{parts/2 lzss.tex}
+	\input{parts/3 huffman.tex}
+
+\end{document}
\ No newline at end of file
diff --git a/Advanced/Compression/media/box.png b/Advanced/Compression/media/box.png
new file mode 100644
index 0000000000000000000000000000000000000000..42cb086cb521d5aad496cf878bc53b4b142bc8db
GIT binary patch
literal 4543
zcmeHLdu$X%7~iASVzm`(D+DpFOArzF_IbB=Ymc_}N_)aVEd`2TWFNEb(%$WLZ`)pp
z4@AWWK6nTLtOg^3AweZTB!VDOM8pR%;EM#CA`xR0jmDx7@teJCdyOWhiTu~So158h
ze)F4ee)G+DxA#QtjOv2?vH1i+6hvymbx@b%GV*fx+-kO-gz6}2h!~N`t;9&U0i-9F
z@L&uSPgZ+`Cx^%Z+cGFc(8d^C#(FMu>!+Z7eWv{!v~S5^@EFcBf+&Ex3knbIVyL%5
zF&|6MwGZ0(8tJ~I5rm8AtE(f?T9Ws2tXJ^Ec$ODs7Qac3lQ~i51eo8malj+Y^!>4F
z3H*D+tgNh!R92D++cL}+L=dU2`&Wi)-Yp-q?3>Lcuhi%MNcOCFN&c#;YFu;4leIS;
z%ikuiTkvOIbnL3)(bWfbb$5TN9zSq!-rT&o6{9YATH6zK&dQ?w)ADI`@2TC7_3rCE
z{nPE`O>@>8j}|T2e8RYSO>y#%+1q<dZrbqh<K%6^k#(y#iT54)A~fQ@{mXy7?c!Ty
z)%)*$W6Y>gPh9&E8|x|PSsy*oy?5p~gyv0{GVw+B`=#$ayI^kV1E&_)^SamHB|N`=
z;xk3dU-8a)Yu%dY)2R)C&5PvIR|cP*{mJ~UU5j4Wb@uSRV<w%Md}P5E-;>4HZO^@e
zDn52sMTx&>=MS42J~-O>*`czYKPT9ezTLWHrRMQ9t+=P`+>IB0+BUmu!*8po$j0vu
zwoZ9}$;o>*9#3?g)ef025C<B&?>u|1IlA!o)xS-xUVeio=fpb)+p1^%zU0Ua3(p;1
z5&de-(w&c}*QCl`wm!_Me6u|D@elLQkK1x}`P#*6_IU~mANr*Ty?%c0p4azS7eC*5
zzBi|2hy9SZXZJ*cD4cKBH#iN^>9S_UyozqA$eW5K@P-kkWvPUsH6w>qQKK0TQD^t;
zr$|!|QFDD!HkznJO=itv8_ilgqh4FwtOa$dY)XD<N(O=$auhNZYl$c2RETnUWoTnF
zLy<1TX%0~h5J#1kjY!cedRe+EWw!Iwlzg(()(yEXJT*fBcOk0DaS}4abaZrhI|Q#~
zH!@r>7-U$U;dvSmbh0z<C@DIg9FHl|9AT8yY%}4QR-D9~ifXkvA&P>2vd_O*A{y<d
zk0&!KfF4XrNidw3WnwWVdqmQyY6p^xL;pG=S>KsJOdU#EZMKH0+ELsYpG~1_{o@mD
zc8j|lU1LxSiUBkUvvPx`#3!zQ1eT!Dj3wMrAod`nV;Tcu4e|~5xXa0Q1cvwX4np^>
z-30?G8kNJA)`s1SghLdbU)C+n)MdA+2m!?Lz5p%yg&-|*euWNdz91bGRU}EGz>2z(
zMHPuB9VM<IOa<g#6L<{d<0T}pw4X<qg5znG=Ttf%Nq$~cRTc$}EQ*=72~nxEWP63F
zbf8jMQIdSDP6LS_yCl&8pBkWf5lNclR|P@vxm3C)Pqplr0^4cElt#oP;*D+x7F@2V
zjf5!P%MMs-TNKBD0U>I-8E;Dsbkv(MG|N%2nw-S(yiXK;VnFaq0bu|%2iZx8M9k^t
zPPYediwuhaVHF&wK;YV8Epnxe6vwjbEvqF&^=IAw?kF55U2&AK;vgXH&%^b7d6;G;
znN7Qo>7tfy8lC@z#)pS21w3qdjhTe`J6%)yM9o62>AUo8i|HOEl5`J+tZ3;aB$alg
zyM6*!x=U+P;*AKhM<!hR?B>6Of}jQ^!_a)RARwL=C4-|Cl@HKtP!A%(;91284rWhU
zhSQ<gsG<>c1g#)E-ES!J7I&j24Muk~A-oG97|r507L4`Fyp$1)x%5la4c)K$Rf*PA
zq|jmj&WeHrp7sX<oEk(*KotJ>CF1X9SgsornI)DoI4$;VYAN%NZBGNT@+e4?wn2V|
zbkFqX`;2BdvtGh06K<Dq2B03EWJvlB%QY<5kQ5jacsRO-<r<O#Ljn&+*Z(G0{=gF>
zio*}R4tO5)`bX}7XUWUeY1LukB7SXrd&_FDT#=}mmn4WQuf@efZ0H&T#ylqyt;#!-
zn_E;^c<b)AnPBRSge&S(U;lVwd%-*GRXG*>&wouma{Z>r$~$^<UJcYd?U4@atuxo=
z0W|CKx9=-l_eXE=rEU0Mj{mq$HeVQ>fWJ7huZ8nR$G^Pn!J=h#A7XQ)YDW0AI~Fee
E8@#~gxBvhE

literal 0
HcmV?d00001

diff --git a/Advanced/Compression/media/noise.png b/Advanced/Compression/media/noise.png
new file mode 100644
index 0000000000000000000000000000000000000000..8b6dee648b56ad23ba29f74fbe5de02165ee1534
GIT binary patch
literal 5630
zcmeHKdpMNa8lOoviM^ddu~TzKD4P3yGNp{o#7IVkRKA({hA-yAj9aCobkU9IMpQ!R
zauW7d#8#4&<l03kq)noWPP?L<HAB*K_Vb*l=Q;bozHip7_gm}t{?=N*^{#j3+u`o!
zWT0oJheDwYc+Ol8P}{4E?hoKQ7YS|!)os{|C*kpIP`cm&AkpJdXf*~D^cZ^_S_`EG
z+O~o+3D|0kx|pH0HT5oF|2$$R0ejX6W*o3d=y50mP#*z>3~VD%=YoQ48y>3^*y>AH
zb2|e|&HTA`;_=-vWITz8r_g~vkxXS1)gKIr#3nM>6c#|EkA0bunyJvZ{S#o>f;c$1
z^Bf#7iV(R334&3mh$B%69Oo<UMzM<<;;^`wFvH*~_tfX<*gb`Mj@gE5Vwah_uKJ+E
zH``}4#i_jT@#9)yQ~6cz<vPn37!IO?!xSE>gh?-4^l-wGwuHU?eXNr385rs**ZhSO
z>>H$%#6@uz26OKY#2H3y*2Q2cHx4CcP}leLq<zTlYqz^m_M$c}TYD!Ooxm*DZ?I~`
zq}?kpxHV9*WZ8kG?aKBEi9b3IR)t>eVBE4isGVj?%Re01J}b}bnJ{yw_r1pLCwGW)
zvjnB_{a&3VSG`X)dk-8L+}ajXdj4jp&k1MfA1?8jg$vA$Js}IXjj2WxP46%M@XpD%
zjEj%;H7174Irf9W`5v8(n;+#D#t9Nvc`zruDPi8aHuZM#!~(ND{ZY@EXC`Cb>HBt@
zZ#*;M=7xQ(T>*I(ettg}74-#FPgh*f(w}d7es-}(`~mkl<6ii`agFrrGU-~~|FpUF
zTDMkwrgx@1db5LH3HM&lyg&PT(pt(mz3)Gw(1jt9tXe}`5Q7njr<cl$@5&a*rFcjz
z7sB`msY1PBD7*O)3P=<Pt1v>?ACYmeFN({s7(~p$E~oK{e1!uXfH<!Wfdy;bJVk2*
zMJzFP{yaUq2sR*)!YT+8Aq|o#*%2J9hL;U&bu$5r(Lhvz9IO|JBL{g1jG^MGcp}a*
z0tq8y=jmbWLc|ib2ltl|3h>0i2B=gDHh~Zx9*z&E;N>Cy1QLtIA`r<0G8qRDIAx?v
z1x4UwN((i`Fb5Y_ib4>D3X#h&YEDQf4^?rnSkRC8JU^*|&mX0iDMwTQdJrNY1%ZSo
z5~NbXSP!MjF$|E5Oz1B?l%A0anBV~`<)I-W*f9*2sVv4)h()9RiqMcC&2q#d0vrTO
z0aOV_C4Dud`oxWTs3q`6qza7}5c?~n3Xyyf>#Mn`do;@#I}zYM%KH`i^V&6Ffa3Gn
zT)8MzJv|<mgH?~u7RyD5n5{915*CF{l`wEZNJ7F<g$yx{DS;pyg(4P<#1Ml-rO?Ju
z@nlLBBoo1EDnO1$01wO%l2~F1j3YuMA&yF8FmMo)M#3>!BA8AUQb=?LeGJ9o5ClXe
z6f|~LYAP|HA`vAtCQD4j(HSfjjw%sRa4cBJz`+y=%oGx7Bv>NRP>DtCU*sWD2y7=J
zh5TWHLgugOPz%mp;LhV<$#~+I7WW`XB>@f`tScf5jrh{xiAZ6A3R0^{VvxvW8kI;V
z(phvmh4clq3=UC(NK|uba;K(8y)A687$7X9j#EIOX$NazJA}ZHN*>}Vmj`jMqgi*f
zn-7kY7*au8NCgAZ(LC(=IS=DNHYN^FUrooCixElW|3a$|55^AQ!InECN-%z;rfK*@
z3E<%2r{TvSM01od7|o$zL!#j&D4{S|teGd^8txJWKr(+AWRH<>{oIcHMJSL!z(7JW
z11F+0L^vv0NC%-nrQ%?SD3XAUq|=z9ui2Gyi7Fflffx7#9f4LLJT<>im|2>Qn*B98
zJOEbj0uT&GRDVVUqY_61BaAGVpw1beS8PZ4FHP(;fH6r1_zky#<OOmeVKf<zXr@j(
zf8#Z>7k}djfcpC&-=yz%xxUNwO$vMy_<MAHm+PAp_$Ki8==#6OrT67^3YLL?LE+$K
zY1<B?C*Vcv2ce4-7d525k6k*M2wEm6oV}GO)Wpf^f<`4DnF<<pR6M?;PS^PHlXPay
z4tE(qp|n$Y+y$Ny_s^`jC+lJBCut825qjqN7R6iVUuUA@=sD@JH{z#N-<c}fO{(+>
z^u0rC_7UwOo$WEJ{iAl>;DBj<SsJM}aK@FV9xqZZ+~@Rm1h?k=m~B$Ye{6N{GE=r%
zhjFX9vi9Y%=R-wlX7l%(yz;AF*m(RUYT&1VA8pVZ=MO1$TnL_(p({HiS=p}VNVC@;
z9e=PivGP>mpO=m=T!lY&(>3a>Yv{pq1Jh-?h0nGmzrFl+Yk!+vqB$G6wrRTTka-Du
zQKzH}-$l=cB$5A$G+}MpiE{J1Zr#*&_d$fcF>9Jh4lQRQ#-{jXOfsx<7rhOccE|pA
zPpcl^9lf~Nrk?9sle$>>s^q|qqI~-)S*Ctj!2|0jTlPj*v~8lWdNyNS$s5CYNlJkU
zDXmPNxg;r|eo-Pf^EAJ6627N`lSNYuK8RZKeyNsZPQj9+TE~0m+t^u^EI}nFy{sM|
zth`mCaNygy9j7C9mLB-#;!wHqZ!3SP6qYPvCoMZt*;qa)fPUEK;_rpcKKz*W__=7m
zxuv{KKbL1D!>c>?>#`mp6kFp19uppHd1z&-^t$@?ZMD{?{S^W)kEeGlBK<9|*JpQf
zOcd!KE1wJAtr=WyPWE1XqNI#F>#Eea-ca8=LK(`Oyw>KTQ4#hf*=>PiY)dR_`P9tD
zhF25MRw23kUmrE&6i36c7QS_Ls02!x$<4;;=A`sV*)wdp`T;Yu-R;oco9ktLgm(!F
zF@1}+%R28@NvAg>uae$W{M#tJDt+RWtQ|Vbv!Cg@_?zk(uEg!_juf@8slD!zMOiUr
z-_*LUj9xwKYg!j|Gu~BM7<zV|h|`y$b;1lTWl^ghWOW(~wx`-Wu|#ZUggFXwZ!4lI
zr!saQvAXn0Yk|{z#?$Bb{q+wEc0by>(7Is!-2vbJ8w+!l4UO8lI&Ft6Etyt|1N-mG
z?hKVJfAVnO{-X7EHBI8Ze2ZTn>S&i6)q5h{9vuSug|@+!lqQ;Uqi^C>SL-Ctz4`B_
zMczlPcBrxJTbv4Cl->9#YG(A7*s0@7jV5>$7w5UUL?z7ZIZ1h<XS}Uh)^8&#ets_V
zNn$MpYJ1dBm~ePD<IHQLgG)EWo1M8?{a|fw(1zWS>Gpz9+V%(Jg5B|@1ugeKmZsY~
z%%!Cs+u9PYU$AE5-0A1xeQ@>sD$5)$VvSB+Gxx%h6B$-Xlr+DKeCBOIX_MzcenQMY
zNw7+<x5ILmxt5uPS$8*Qp0iO(ed)yD?sfRQi-Lyq>vw%uwMw?1ZIs$HdG514{z$s#
zp=3wR%5iXTpB1l#EK;&NT`i>tvBkS?!5gX>=Kja#?CBh5Lp~n*G1_kpwa742*YH~P
zxngI0p)}>UJ?obqt(iRCeWlR7=5|Hh(Tu$Ar_NU(a%RmBu_?*FODl}$?U2Z2F|?e5
z%R7rnIlsDvXZT<APx5(Yo%(ouGGR(Ux8#qLy(>2v-AHNl4%W^|Pdc0T$Pkf57WwP>
z*s>z6%A3#zyAM{@7`3cCWCtlpT$?@72^M{O&UIjYt!$8B%S(wmmdTA?=-E5_yIU0=
zX@OY}qz?6=5pL`<gDVYhrZr9X2zyT<Sa99&%v9e~fiuzquB=#5aPLnoJTEioR8C!r
zZJu@bF0;~?85h?>rat!Tye}UXvlSm?g5C=Ok6-KBL`(AB>d&mBqEb$u?i}*5O~JXG
zNZyNovA8K+7~S{*=iTzgbm-Hzw?m!fTeeby&OW5?XqvLMDyH8pE^E%S8@8N{s@K`8
z^kUtRHP6M8nv6=bna|MI1#Pp+!hZ50+nnijF8eT!{~HbM(P2~bvHhvg;XGa^I%q~i
z(`&sb^Q#XHtyZtzr6&-(v{)QEn^zZaa%M06PIt<@m{UVbU0Ul@`At@~V$bbCcqdlw
kt~{sS{SS?rU#%ZPuU5(mnptUAK;}mA9NoBQ7y8Bg2VU1%(*OVf

literal 0
HcmV?d00001

diff --git a/Advanced/Compression/parts/0 intro.tex b/Advanced/Compression/parts/0 intro.tex
new file mode 100644
index 0000000..eda06ca
--- /dev/null
+++ b/Advanced/Compression/parts/0 intro.tex	
@@ -0,0 +1,37 @@
+\section{Introduction}
+
+\definition{}
+An \textit{alphabet} is a set of symbols. Two examples are
+$\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}\}$ and $\{\texttt{0}, \texttt{1}\}$.
+
+\definition{}
+A \textit{string} is a sequence of symbols from an alphabet. \par
+For example, \texttt{CBCAADDD} is a string over the alphabet $\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}\}$.
+
+\problem{}
+Say we want to store a length-$n$ string over the alphabet $\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}\}$ as a binary blob. \par
+How many bits will we need? \par
+\hint{
+	Our alphabet has four symbols, so we can encode each symbol using two bits, \par
+	mapping $\texttt{A} \rightarrow \texttt{00}$,
+	$\texttt{B} \rightarrow \texttt{01}$,
+	$\texttt{C} \rightarrow \texttt{10}$, and
+	$\texttt{D} \rightarrow \texttt{11}$.
+}
+
+\begin{solution}
+	$2n$ bits.
+\end{solution}
+
+\vfill
+
+
+\problem{}<naivelen>
+Similarly, we can use a na\"ive coding scheme to encode an $n$-symbol string over an alphabet of size $k$ \par
+using $n \times \lceil \log_2k \rceil$ bits. Convince yourself that this is true.
+
+
+\vfill
+Of course, this isn't ideal---we can do much better than $n \times \lceil \log_2k \rceil$.
+We will spend the rest of this handout exploring more efficient ways of encoding such sequences of symbols.
+\pagebreak
diff --git a/Advanced/Compression/parts/1 runlength.tex b/Advanced/Compression/parts/1 runlength.tex
new file mode 100644
index 0000000..6b4d5e6
--- /dev/null
+++ b/Advanced/Compression/parts/1 runlength.tex	
@@ -0,0 +1,145 @@
+% TODO:
+% Basic run-length
+% LZ77
+
+\section{Run-length Coding}
+
+
+\definition{}
+\textit{Entropy} is a measure of information in a certain sequence. \par
+A sequence with high entropy contains a lot of information, and a sequence with low entropy contains relatively little.
+For example, consider the following two ten-symbol ASCII\footnotemark{} strings:
+\begin{itemize}
+	\item \texttt{AAAAAAAAAA}
+	\item \texttt{pDa3:7?j;F}
+\end{itemize}
+The first string clearly contains less information than the second.
+It's much harder to describe \texttt{pDa3:7?j;F} than it is \texttt{AAAAAAAAAA}.
+Thus, we say that the first has low entropy, and the second has fairly high entropy.
+
+\vspace{2mm}
+
+The definition above is intentionally hand-wavy. \par
+Formal definitions of entropy exist, but we won't need them today---we just need
+an intuitive understanding of the \say{density} of information in a given string.
+
+
+\footnotetext{
+	American Standard Code for Information Exchange, an early character encoding for computers. \par
+	It contains 128 symbols, including numbers, letters, and
+	\texttt{!"\#\$\%\&`()*+,-./:;<=>?@[\textbackslash]\^\_\{|\}\textasciitilde}
+}
+
+
+\vspace{5mm}
+
+
+\problem{}<runlenone>
+Using a na\"ive coding scheme, encode \texttt{AAAA$\cdot$AAAA$\cdot$BCD$\cdot$AAAA$\cdot$AAAA} as binary blob. \par
+\note[Note]{
+	We're still using the four-symbol alphabet $\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}\}$. \par
+	Dots ($\cdot$) in the string are drawn for readability. Ignore them.
+}
+
+\begin{solution}
+	There are eight \texttt{A}s on each end of that string. Mapping symbols as before, \par
+	we get \texttt{[00 00 00 00 00 00 00 00 01 10 11 00 00 00 00 00 00 00 00]}
+\end{solution}
+
+
+\vfill
+In \ref{runlenone}---and often, in the real world---the strings we want to encode have fairly low entropy.
+We can leverage this fact to develop efficient encoding schemes.
+
+\example{}
+The simplest such coding scheme is \textit{run-length encoding}. Instead of simply listing letters of a string
+in their binary form, we'll add a \textit{count} to each letter, compressing repeated sequences of the same symbol.
+
+\vspace{2mm}
+
+We'll encode our string into a sequence of 6-bit blocks, interpreted as follows:
+
+\begin{center}
+	\begin{tikzpicture}
+		\node[anchor=west,color=gray] at (-2.3, 0) {Bits};
+		\node[anchor=west,color=gray] at (-2.3, -0.5) {Meaning};
+		\draw[color=gray] (-2.3, -0.25) -- (5.5, -0.25);
+		\draw[color=gray] (-2.3, 0.15) -- (-2.3, -0.65);
+
+		\node at (0, 0) {\texttt{0}};
+		\node at (1, 0) {\texttt{0}};
+		\node at (2, 0) {\texttt{1}};
+		\node at (3, 0) {\texttt{1}};
+		\node at (4, 0) {\texttt{0}};
+		\node at (5, 0) {\texttt{1}};
+
+		\draw (-0.5, 0.25) -- (5.5, 0.25);
+		\draw (-0.5, -0.25) -- (5.5, -0.25);
+		\draw (-0.5, -0.75) -- (5.5, -0.75);
+
+		\draw (-0.5, 0.25) -- (-0.5, -0.75);
+		\draw (3.5, 0.25) -- (3.5, -0.75);
+		\draw (5.5, 0.25) -- (5.5, -0.75);
+
+		\node at (1.5, -0.5) {number of copies};
+		\node at (4.5, -0.5) {symbol};
+	\end{tikzpicture}
+\end{center}
+So, the sequence \texttt{BBB} will be encoded as \texttt{[0011-01]}. \par
+\note[Notation]{Just like spaces, dashes in a binary blob are added for readability.}
+
+\problem{}
+Encode \texttt{AAAA$\cdot$AAAA$\cdot$BCD$\cdot$AAAA$\cdot$AAAA} using this scheme. \par
+Is this more or less efficient than \ref{runlenone}?
+
+\begin{solution}
+	\texttt{[1000-00 0001-01 0001-10 0001-11 1000-00]} \par
+	This requires 30 bits, as compared to 38 in \ref{runlenone}.
+\end{solution}
+
+\vfill
+\pagebreak
+
+\problem{}
+Is run-length coding always efficient? When does it work well, and when does it fail?
+
+\vfill
+
+
+\problem{}
+Our coding scheme wastes a lot of space when our string has few runs of the same symbol. \par
+Fix this problem: modify the scheme so that single occurrences of symbols do not waste space. \par
+\hint{We don't need a run length for every symbol. We only need one for \textit{repeated} symbols.}
+
+\begin{solution}
+	One idea is as follows: \par
+	\begin{itemize}
+		\item Encode single symbols na\"ively: \texttt{ABCD} becomes \texttt{[00 01 10 11]}
+		\item Signal runs using two copies of the same symbol: \texttt{AAAAAA} becomes \texttt{[00 00 0110]}. \par
+		When our decoder sees two copies of the same symbol, it will interpret the next four bits as
+		a run length.
+	\end{itemize}
+	\texttt{BDC$\cdot$DDDDD$\cdot$AADBDC} will be encoded as \texttt{[01 11 10 11-11-0101 01-01-0010 11 01 11 10]}.
+\end{solution}
+
+\vfill
+
+\problem{}<firstlz>
+Consider the following string: \texttt{ABCD$\cdot$ABCD$\cdot$BABABA$\cdot$ABCD$\cdot$ABCD}. \par
+\begin{itemize}
+	\item How many bits do we need to encode this na\"ively? \par
+	\item How about with the (unmodified) run-length scheme described above?
+\end{itemize}
+\hint{You don't need to encode this string---just find the length of its encoded form.}
+
+\begin{solution}
+	Na\"ively: \tab 22 bits \par
+	Run-length: \tab $6 \times 21 = 126$ bits. Watch out for the two repeated \texttt{A}s!
+\end{solution}
+
+
+\vfill
+
+Neither solution to \ref{firstlz} is ideal. Run-length is very wasteful due to the lack of runs, and na\"ive coding
+does not take advantage of repetition in the string. We'll need a better coding scheme.
+\pagebreak
diff --git a/Advanced/Compression/parts/2 lzss.tex b/Advanced/Compression/parts/2 lzss.tex
new file mode 100644
index 0000000..c710993
--- /dev/null
+++ b/Advanced/Compression/parts/2 lzss.tex	
@@ -0,0 +1,155 @@
+\section{LZ Codes}
+
+The LZ-family\footnotemark{} of codes (LZ77, LZ78, LZSS, LZMA, and others) take advantage of repeated sequences of symbols
+in a string. They are the basis of most modern compression algorithms, including DEFLATE, which is used in the ZIP, PNG,
+and GZIP formats.
+
+\footnotetext{
+	Named after Abraham Lempel and Jacob Ziv, the original inventors. \par
+	LZ77 is the algorithm described in their first paper on the topic, which was published in 1977. \par
+	LZ78, LZSS, and LZMA are minor variations on the same general idea.
+}
+
+\vspace{2mm}
+
+The idea behind LZ is to represent repeated substrings as \textit{pointers} to previous parts of the string. \par
+Pointers take the form \texttt{<pos, len>}, where \texttt{pos} is the position of the string to repeat and
+\texttt{len} is the number of symbols to copy.
+
+\vspace{2mm}
+
+For example, we can encode the string \texttt{ABRACADABRA} as \texttt{[ABRACAD<7, 4>]}. \par
+The pointer \texttt{<7, 4>} tells us to look back 7 positions (to the first \texttt{A}), and copy the next 4 symbols. \par
+Note that pointers refer to the partially decoded output---\textit{not} to the encoded string. \par
+This allows pointers to reference other pointers, and ensures codes like \texttt{A<1,9>} are valid.
+
+\problem{}
+Encode \texttt{ABCD$\cdot$ABCD$\cdot$BABABA$\cdot$ABCD$\cdot$ABCD} using LZ.
+Then, decode the following:
+\begin{itemize}
+	\item \texttt{[ABCD<4,4>]}
+	\item \texttt{[A<1,9>]}
+	\item \texttt{[DAC<3,5>]}
+\end{itemize}
+
+\begin{solution}
+
+	\texttt{ABCD$\cdot$ABCD$\cdot$BABABA$\cdot$ABCD$\cdot$ABCD} becomes \texttt{[ABCD<4, 4> BA<2,4> ABCD<4,4>]}.
+
+	\linehack{}
+
+	In parts two and three, remember that we're reading the \textit{output string.} \par
+	The nine \texttt{A}s in part two are produced one by one, \par
+	with the decoder's \say{read head} following its \say{write head.}
+
+	\begin{itemize}
+		\item \texttt{ABCD$\cdot$ABCD}
+		\item \texttt{AAAAA$\cdot$AAAAA}
+		\item \texttt{DACDACDA}
+	\end{itemize}
+\end{solution}
+
+\vfill
+
+\problem{}
+Convince yourself that LZ is a generalization of the run-length code we discussed in the previous section.
+\hint{\texttt{[A<1,9>]} and \texttt{[00-1001]} are the same thing!}
+
+\remark{}
+Note that we left a few things out of this section: we didn't discuss the algorithm that converts a string to an LZ-encoded blob,
+nor did we discuss how we should represent strings encoded with LZ in binary. We skipped these details because they are
+problems of implementation---they're the engineer's headache, not the mathematician's. If you're interested, a brief explanation is below.
+Ask an instructor to explain.
+
+\begin{center}
+	\begin{tikzpicture}
+		\node[anchor=west,color=gray] at (-2.3, 0) {Bits};
+		\node[anchor=west,color=gray] at (-2.3, -0.5) {Meaning};
+		\draw[color=gray] (-2.3, -0.25) -- (5.5, -0.25);
+		\draw[color=gray] (-2.3, 0.15) -- (-2.3, -0.65);
+
+		\node at (0, 0) {\texttt{0}};
+		\node at (1, 0) {\texttt{0}};
+		\node at (2, 0) {\texttt{1}};
+		\node at (3, 0) {\texttt{0}};
+		\node at (4, 0) {\texttt{1}};
+		\node at (5, 0) {\texttt{1}};
+		\node at (6, 0) {\texttt{0}};
+		\node at (7, 0) {\texttt{0}};
+		\node at (8, 0) {\texttt{1}};
+
+		\draw (-0.5, 0.25) -- (8.5, 0.25);
+		\draw (-0.5, -0.25) -- (8.5, -0.25);
+		\draw (-0.5, -0.75) -- (8.5, -0.75);
+
+		\draw (-0.5, 0.25) -- (-0.5, -0.75);
+		\draw (0.5, 0.25) -- (0.5, -0.75);
+		\draw (8.5, 0.25) -- (8.5, -0.75);
+
+		\node at (0, -0.5) {flag};
+		\node at (4.5, -0.5) {if flag \texttt{<pos, len>}, else eight-bit symbol};
+	\end{tikzpicture}
+\end{center}
+
+
+\begin{center}
+	\begin{tikzpicture}
+		% Text tape
+		\node[color=gray] at (-0.75, 0) {\texttt{...}};
+		\node[color=gray] at (0.0, 0) {\texttt{D}};
+		\node at (0.5, 0) {\texttt{A}};
+		\node at (1.0, 0) {\texttt{B}};
+		\node at (1.5, 0) {\texttt{C}};
+		\node at (2.0, 0) {\texttt{D}};
+		\node at (2.5, 0) {\texttt{A}};
+		\node at (3.0, 0) {\texttt{B}};
+		\node at (3.5, 0) {\texttt{C}};
+		\node at (4.0, 0) {\texttt{D}};
+		\node[color=gray] at (4.5, 0) {\texttt{B}};
+		\node[color=gray] at (5.0, 0) {\texttt{D}};
+		\node[color=gray] at (5.5, 0) {\texttt{A}};
+		\node[color=gray] at (6.0, 0) {\texttt{C}};
+		\node[color=gray] at (6.75, 0) {\texttt{...}};
+
+		\draw (-1.75, 0.25) -- (7.25, 0.25);
+		\draw (-1.75, -0.25) -- (7.25, -0.25);
+
+
+		\draw[line width = 0.7mm, color=oblue, dotted] (2.25, 0.5) -- (2.25, -0.5);
+		\draw[line width = 0.7mm, color=oblue]
+			(-1.25, 0.5)
+			-- (4.25, 0.5)
+			-- (4.25, -0.5)
+			-- (-1.25, -0.5)
+			-- cycle
+		;
+
+		\draw
+			(4.2, -0.625)
+			-- (4.2, -0.75)
+			to node[anchor=north, midway] {lookahead} (2.3, -0.75)
+			-- (2.3, -0.625)
+		;
+
+		\draw
+			(2.2, -0.625)
+			-- (2.2, -0.75)
+			to node[anchor=north, midway] {search buffer} (-1.1, -0.75)
+			-- (-1.1, -0.625)
+		;
+
+		\draw[color=gray]
+			(2.2, 0.625)
+			-- (2.2, 0.75)
+			to node[anchor=south, midway] {match!} (0.3, 0.75)
+			-- (0.3, 0.625)
+		;
+
+		%\draw[->, color=gray] (2.5, 0.3) -- (2.5, 0.8) to[out=90,in=90] (0.5, 0.8);
+		\node at (7.0, -0.75) {Result: \texttt{[$\cdot\cdot\cdot$DABCD<4,4>$\cdot\cdot\cdot$]}};
+	\end{tikzpicture}
+\end{center}
+
+
+\vfill
+\pagebreak
\ No newline at end of file
diff --git a/Advanced/Compression/parts/3 huffman.tex b/Advanced/Compression/parts/3 huffman.tex
new file mode 100644
index 0000000..5824fc6
--- /dev/null
+++ b/Advanced/Compression/parts/3 huffman.tex	
@@ -0,0 +1,27 @@
+\section{Huffman Codes}
+
+
+\remark{}
+As a first example, consider the alphabet $\{\texttt{A}, \texttt{B}, \texttt{C}, \texttt{D}, \texttt{E}\}$. \par
+With a na\"ive coding scheme, we can encode a length-$n$ string with $3n$ bits, by mapping...
+\begin{itemize}
+	\item $\texttt{A}$ to $\texttt{000}$
+	\item $\texttt{B}$ to $\texttt{001}$
+	\item $\texttt{C}$ to $\texttt{010}$
+	\item $\texttt{D}$ to $\texttt{011}$
+	\item $\texttt{E}$ to $\texttt{100}$
+\end{itemize}
+With this scheme, the string \texttt{ADEBCE} becomes \texttt{[000 011 100 001 010 100]}. \par
+This matches what we computed in \ref{naivelen}: ~ $6 \times \lceil \log_2(5) \rceil = 6 \times 3 = 18$. \par
+\note[Notation]{
+	The spaces in \texttt{[000 011 100 001 010 100]} are provided for convenience. \par
+	This is equivalent to \texttt{[000011100001010100]}, but is easier to read. \par
+	In this handout, encoded binary blobs will always be written in square brackets.
+}
+
+\vspace{2mm}
+
+You could argue that this coding scheme is wasteful: we're not using three of the eight possible three-bit sequences!
+
+\vfill
+\pagebreak
\ No newline at end of file
diff --git a/Advanced/Compression/tikzset.tex b/Advanced/Compression/tikzset.tex
new file mode 100644
index 0000000..d83fa32
--- /dev/null
+++ b/Advanced/Compression/tikzset.tex
@@ -0,0 +1,65 @@
+\usetikzlibrary{arrows.meta}
+\usetikzlibrary{shapes.geometric}
+\usetikzlibrary{patterns}
+
+% We put nodes in a separate layer, so we can
+% slightly overlap with paths for a perfect fit
+\pgfdeclarelayer{nodes}
+\pgfdeclarelayer{path}
+\pgfsetlayers{main,nodes}
+
+% Layer settings
+\tikzset{
+	% Layer hack, lets us write
+	% later = * in scopes.
+	layer/.style = {
+		execute at begin scope={\pgfonlayer{#1}},
+		execute at end scope={\endpgfonlayer}
+	},
+	%
+	% Arrowhead tweak
+	>={Latex[ width=2mm, length=2mm ]},
+	%
+	% Labels inside edges
+	label/.style = {
+		rectangle,
+		% For automatic red background in solutions
+		fill = \ORMCbgcolor,
+		draw = none,
+		rounded corners = 0mm
+	},
+	%
+	% Nodes
+	main/.style = {
+		draw,
+		circle,
+		fill = white,
+		line width = 0.35mm
+	},
+	%
+	% Loop tweaks
+	loop above/.style = {
+		min distance = 2mm,
+		looseness = 8,
+		out = 45,
+		in = 135
+	},
+	loop below/.style = {
+		min distance = 5mm,
+		looseness = 10,
+		out = 315,
+		in = 225
+	},
+	loop right/.style = {
+		min distance = 5mm,
+		looseness = 10,
+		out = 45,
+		in = 315
+	},
+	loop left/.style = {
+		min distance = 5mm,
+		looseness = 10,
+		out = 135,
+		in = 215
+	}
+}
\ No newline at end of file