handouts/Advanced/De Bruijn/parts/2 bruijn.tex

\section{De Bruijn Words}

Before we continue, we'll need to review some basic
graph theory.

\definition{}
A \textit{directed graph} consists of nodes and directed edges. \par
An example is shown below. It consists of three vertices (labeled $a, b, c$), \par
and five edges (labeled $0, ... , 4$).

\begin{center}
	\begin{tikzpicture}
		\begin{scope}[layer = nodes]
			\node[main] (a) at (0, 0) {$a$};
			\node[main] (b) at (2, 0) {$b$};
			\node[main] (c) at (4, 0) {$c$};
		\end{scope}

		\draw[->]
			(a) edge node[label] {$0$} (b)
			(a) edge[loop above] node[label] {$1$} (a)
			(b) edge[bend left] node[label] {$2$} (c)
			(b) edge[loop above] node[label] {$3$} (b)
			(c) edge[bend left] node[label] {$4$} (b)
		;
	\end{tikzpicture}
\end{center}

\definition{}
A \textit{path} in a graph is a sequence of adjacent edges. \par
In a directed graph, adjacent edges are those that start and end at the same node. \par
\vspace{2mm}
For example, consider the graph above. \par
The edges $0$ and $1$ are not adjacent, because $0$ and $1$ both \textit{end} at $b$. \par
$0$ and $2$, however, are: $0$ ends at $b$, and $2$ starts at $b$.
$[0, 3, 2]$ is a path in the graph above, drawn below. \par


\definition{}
An \textit{Eulerian path} is a path that visits each edge of a graph exactly once. \par
An \textit{Eulerian cycle} is an Eulerian path that starts and ends on the same node.

\problem{}
Find the single unique Eulerian cycle in the graph below.
\begin{center}
	\begin{tikzpicture}
		\begin{scope}[layer = nodes]
			\node[main] (a) at (0, 0) {$a$};
			\node[main] (b) at (2, 0) {$b$};
			\node[main] (c) at (4, 0) {$c$};
		\end{scope}

		\draw[->]
			(a) edge[bend left] node[label] {$0$} (b)
			(b) edge[bend left] node[label] {$1$} (a)
			(b) edge[bend left] node[label] {$2$} (c)
			(c) edge[bend left] node[label] {$3$} (b)
			(c) edge[loop right] node[label] {$4$} (c)
		;
	\end{tikzpicture}
\end{center}

\begin{solution}
	$24310$ is one way to write this cycle. \par
	There are other options, but they're all the same.
\end{solution}

\vfill
\pagebreak


\definition{}
Now, consider the $n$-subword problem over $\{\texttt{0}, \texttt{1}\}$. \par
We'll call the optimal solution to this problem a \textit{De Bruijn\footnotemark{} word} of order $n$. \par

\footnotetext{Dutch. Rhymes with \say{De Grown.}}


\problem{}
Let $\mathcal{B}_n$ be the length of an order-$n$ De Bruijn word. \par
Show that the following bounds always hold:
\begin{itemize}
	\item $\mathcal{B}_n \leq n2^n$
	\item $\mathcal{B}_n \geq 2^n + n - 1$
\end{itemize}

\begin{solution}
	\begin{itemize}
		\item There are $2^n$ binary words with length $n$. \par
		Concatenate these to get a word with length $n2^n$.
		\item A word must have at least $2^n + n - 1$ letters to have $2^n$ subwords with length $n$.
	\end{itemize}
\end{solution}


\remark{}
Now, we'd like to show that $\mathcal{B}_n = 2^n + n - 1$... \par
That is, that the optimal solution to the subword problem always has $2^n + n - 1$ letters.

\definition{}
Consider a $n$-length word $w$. \par
The \textit{prefix} of $w$ is the word formed by the first $n-1$ letters of $w$. \par
The \textit{suffix} of $w$ is the word formed by the last $n-1$ letters of $w$. \par
For example, the prefix of the word \texttt{1101} is \texttt{110}, and its suffix is \texttt{101}.
The prefix and suffix of any one-letter word are both $\varnothing$.

\definition{}
A \textit{De Bruijn graph} of order $n$, denoted $G_n$, is constructed as follows:
\begin{itemize}
	\item Nodes are created for each word of length $n - 1$.
	\item A directed edge is drawn from $a$ to $b$ if the suffix of
	$a$ matches the prefix of $b$. \par
	Note that a node may have an edge to itself.
	\item We label each edge with the last letter of $b$.
\end{itemize}
$G_2$ and $G_3$ are shown below.

\null\hfill
\begin{minipage}{0.48\textwidth}
	\begin{center}
		$G_2$

		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (0) at (0, 0) {\texttt{0}};
				\node[main] (1) at (2, 0) {\texttt{1}};
			\end{scope}

			\draw[->]
				(0) edge[loop left] node[label] {$0$} (0)
				(1) edge[loop right] node[label] {$1$} (1)
				(1) edge[bend left] node[label] {$0$} (0)
				(0) edge[bend left] node[label] {$1$} (1)
			;
		\end{tikzpicture}
	\end{center}
\end{minipage}
\hfill
\begin{minipage}{0.48\textwidth}
	\begin{center}
		$G_3$

		\begin{tikzpicture}[scale = 0.9]
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[loop left] node[label] {$0$} (00)
				(11) edge[loop right] node[label] {$1$} (11)
				(00) edge[bend left] node[label] {$1$} (01)
				(01) edge[bend left] node[label] {$0$} (10)
				(10) edge[bend left] node[label] {$1$} (01)
				(10) edge[bend left] node[label] {$0$} (00)
				(01) edge[bend left] node[label] {$1$} (11)
				(11) edge[bend left] node[label] {$0$} (10)
			;
		\end{tikzpicture}
	\end{center}
\end{minipage}
\hfill\null

\vfill
\pagebreak

\problem{}
Draw $G_4$.

\begin{solution}
	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (7) at (0, 0) {\texttt{111}};
				\node[main] (3) at (0, -2) {\texttt{011}};
				\node[main] (6) at (2, -2) {\texttt{110}};
				\node[main] (4) at (4, -2) {\texttt{100}};
				\node[main] (1) at (-4, -4) {\texttt{001}};
				\node[main] (5) at (0, -4) {\texttt{101}};
				\node[main] (2) at (-2, -4) {\texttt{010}};
				\node[main] (0) at (-2, -6) {\texttt{000}};
			\end{scope}

			\draw[->]
				(0) edge[loop left, looseness = 7] node[label] {\texttt{0}} (0)
				(7) edge[loop above, looseness = 7] node[label] {\texttt{1}} (7)

				(0) edge[out=90,in=-90] node[label] {\texttt{1}} (1)
				(1) edge node[label] {\texttt{0}} (2)
				(1) edge[out=45,in=-135] node[label] {\texttt{1}} (3)
				(2) edge[bend left] node[label] {\texttt{1}} (5)
				(3) edge node[label] {\texttt{0}} (6)
				(3) edge node[label] {\texttt{1}} (7)
				(5) edge[bend left] node[label] {\texttt{0}} (2)
				(5) edge node[label] {\texttt{1}} (3)
				(6) edge[bend left] node[label] {\texttt{0}} (4)
				(6) edge[out=-90,in=0] node[label] {\texttt{1}} (5)
				(7) edge[out=0,in=90] node[label] {\texttt{0}} (6)
			;

			\draw[->, rounded corners = 10mm]
				(4) to (4, 2) to node[label] {\texttt{1}} (-4, 2) to (1)
			;

			\draw[->, rounded corners = 10mm]
				(4) to (4, -6) to node[label] {\texttt{0}} (0)
			;

			\draw[->, rounded corners = 5mm]
				(2) to (-2, -5) to node[label] {\texttt{0}} (3, -5) to (3, -2) to (4)
			;
		\end{tikzpicture}
	\end{center}

	\begin{instructornote}
		This graph also appears as a solution to a different
		problem in the DFA handout.
	\end{instructornote}
\end{solution}

\vfill


\problem{}
\begin{itemize}
	\item Show that $G_n$ has $2^{n-1}$ nodes and $2^n$ edges;
	\item that each node has two outgoing edges;
	\item and that there are as many edges labeled $0$ as are labeled $1$.
\end{itemize}

\begin{solution}
	\begin{itemize}
		\item There $2^{n-1}$ binary words of length $n-1$.
		\item The suffix of a given word is the prefix of two other words, \par
		so there are two edges leaving each node.
		\item One of those words will end with one, and the other will end with zero.
		\item Our $2^{n-1}$ nodes each have $2$ outgoing edges---we thus have $2^n$ edges in total.
	\end{itemize}
\end{solution}

\vfill
\pagebreak


\theorem{}
We can now easily construct De Bruijn words for a given $n$: \par
\begin{itemize}
	\item Construct $G_n$,
	\item then an Eulerian cycle in $G_n$.
	\item Finally, construct a De Bruijn by writing the label of our starting vertex,
	then appending the label of every edge we travel.
\end{itemize}

\problem{}
Find De Bruijn words of orders $2$, $3$, and $4$.

\begin{solution}
	\begin{itemize}
		\item
		One Eulerian cycle in $G_2$ starts at node \texttt{0}, and takes the edges labeled $[1, 1, 0, 0]$. \par
		We thus have the word \texttt{01100}.

		\item
		In $G_3$, we have an Eulerian cycle that visits nodes in the following order: \par
		$
		\texttt{00}
		\rightarrow \texttt{01}
		\rightarrow \texttt{11}
		\rightarrow \texttt{11}
		\rightarrow \texttt{10}
		\rightarrow \texttt{01}
		\rightarrow \texttt{10}
		\rightarrow \texttt{00}
		\rightarrow \texttt{00}
		$\par
		This gives us the word \texttt{0011101000}

		\item Similarly, we $G_4$ gives us the word \texttt{0001 0011 0101 1110 000}. \par
		\note{Spaces have been added for convenience.}
	\end{itemize}
\end{solution}

\vfill
\pagebreak
Added initial De Bruijn sections 2024-03-20 19:38:35 -07:00			`\section{De Bruijn Words}`

			`Before we continue, we'll need to review some basic`
			`graph theory.`

			`\definition{}`
			`A \textit{directed graph} consists of nodes and directed edges. \par`
			`An example is shown below. It consists of three vertices (labeled $a, b, c$), \par`
			`and five edges (labeled $0, ... , 4$).`

			`\begin{center}`
			`\begin{tikzpicture}`
			`\begin{scope}[layer = nodes]`
			`\node[main] (a) at (0, 0) {$a$};`
			`\node[main] (b) at (2, 0) {$b$};`
			`\node[main] (c) at (4, 0) {$c$};`
			`\end{scope}`

			`\draw[->]`
			`(a) edge node[label] {$0$} (b)`
			`(a) edge[loop above] node[label] {$1$} (a)`
			`(b) edge[bend left] node[label] {$2$} (c)`
			`(b) edge[loop above] node[label] {$3$} (b)`
			`(c) edge[bend left] node[label] {$4$} (b)`
			`;`
			`\end{tikzpicture}`
			`\end{center}`

			`\definition{}`
			`A \textit{path} in a graph is a sequence of adjacent edges. \par`
			`In a directed graph, adjacent edges are those that start and end at the same node. \par`
			`\vspace{2mm}`
			`For example, consider the graph above. \par`
			`The edges $0$ and $1$ are not adjacent, because $0$ and $1$ both \textit{end} at $b$. \par`
			`$0$ and $2$, however, are: $0$ ends at $b$, and $2$ starts at $b$.`
			`$[0, 3, 2]$ is a path in the graph above, drawn below. \par`


			`\definition{}`
			`An \textit{Eulerian path} is a path that visits each edge of a graph exactly once. \par`
			`An \textit{Eulerian cycle} is an Eulerian path that starts and ends on the same node.`

			`\problem{}`
			`Find the single unique Eulerian cycle in the graph below.`
			`\begin{center}`
			`\begin{tikzpicture}`
			`\begin{scope}[layer = nodes]`
			`\node[main] (a) at (0, 0) {$a$};`
			`\node[main] (b) at (2, 0) {$b$};`
			`\node[main] (c) at (4, 0) {$c$};`
			`\end{scope}`

			`\draw[->]`
			`(a) edge[bend left] node[label] {$0$} (b)`
			`(b) edge[bend left] node[label] {$1$} (a)`
			`(b) edge[bend left] node[label] {$2$} (c)`
			`(c) edge[bend left] node[label] {$3$} (b)`
			`(c) edge[loop right] node[label] {$4$} (c)`
			`;`
			`\end{tikzpicture}`
			`\end{center}`

			`\begin{solution}`
			`$24310$ is one way to write this cycle. \par`
			`There are other options, but they're all the same.`
			`\end{solution}`

			`\vfill`
			`\pagebreak`



			`\definition{}`
			`Now, consider the $n$-subword problem over $\{\texttt{0}, \texttt{1}\}$. \par`
			`We'll call the optimal solution to this problem a \textit{De Bruijn\footnotemark{} word} of order $n$. \par`

			`\footnotetext{Dutch. Rhymes with \say{De Grown.}}`


			`\problem{}`
			`Let $\mathcal{B}_n$ be the length of an order-$n$ De Bruijn word. \par`
			`Show that the following bounds always hold:`
			`\begin{itemize}`
			`\item $\mathcal{B}_n \leq n2^n$`
			`\item $\mathcal{B}_n \geq 2^n + n - 1$`
			`\end{itemize}`

			`\begin{solution}`
			`\begin{itemize}`
			`\item There are $2^n$ binary words with length $n$. \par`
			`Concatenate these to get a word with length $n2^n$.`
			`\item A word must have at least $2^n + n - 1$ letters to have $2^n$ subwords with length $n$.`
			`\end{itemize}`
			`\end{solution}`


			`\remark{}`
			`Now, we'd like to show that $\mathcal{B}_n = 2^n + n - 1$... \par`
			`That is, that the optimal solution to the subword problem always has $2^n + n - 1$ letters.`

			`\definition{}`
			`Consider a $n$-length word $w$. \par`
			`The \textit{prefix} of $w$ is the word formed by the first $n-1$ letters of $w$. \par`
			`The \textit{suffix} of $w$ is the word formed by the last $n-1$ letters of $w$. \par`
			`For example, the prefix of the word \texttt{1101} is \texttt{110}, and its suffix is \texttt{101}.`
			`The prefix and suffix of any one-letter word are both $\varnothing$.`

			`\definition{}`
			`A \textit{De Bruijn graph} of order $n$, denoted $G_n$, is constructed as follows:`
			`\begin{itemize}`
			`\item Nodes are created for each word of length $n - 1$.`
			`\item A directed edge is drawn from $a$ to $b$ if the suffix of`
			`$a$ matches the prefix of $b$. \par`
			`Note that a node may have an edge to itself.`
			`\item We label each edge with the last letter of $b$.`
			`\end{itemize}`
			`$G_2$ and $G_3$ are shown below.`

			`\null\hfill`
			`\begin{minipage}{0.48\textwidth}`
			`\begin{center}`
			$G_2$

			`\begin{tikzpicture}`
			`\begin{scope}[layer = nodes]`
			`\node[main] (0) at (0, 0) {\texttt{0}};`
			`\node[main] (1) at (2, 0) {\texttt{1}};`
			`\end{scope}`

			`\draw[->]`
			`(0) edge[loop left] node[label] {$0$} (0)`
			`(1) edge[loop right] node[label] {$1$} (1)`
			`(1) edge[bend left] node[label] {$0$} (0)`
			`(0) edge[bend left] node[label] {$1$} (1)`
			`;`
			`\end{tikzpicture}`
			`\end{center}`
			`\end{minipage}`
			`\hfill`
			`\begin{minipage}{0.48\textwidth}`
			`\begin{center}`
			$G_3$

			`\begin{tikzpicture}[scale = 0.9]`
			`\begin{scope}[layer = nodes]`
			`\node[main] (00) at (0, 0) {\texttt{00}};`
			`\node[main] (01) at (2, 1) {\texttt{01}};`
			`\node[main] (10) at (2, -1) {\texttt{10}};`
			`\node[main] (11) at (4, 0) {\texttt{11}};`
			`\end{scope}`

			`\draw[->]`
			`(00) edge[loop left] node[label] {$0$} (00)`
			`(11) edge[loop right] node[label] {$1$} (11)`
			`(00) edge[bend left] node[label] {$1$} (01)`
			`(01) edge[bend left] node[label] {$0$} (10)`
			`(10) edge[bend left] node[label] {$1$} (01)`
			`(10) edge[bend left] node[label] {$0$} (00)`
			`(01) edge[bend left] node[label] {$1$} (11)`
			`(11) edge[bend left] node[label] {$0$} (10)`
			`;`
			`\end{tikzpicture}`
			`\end{center}`
			`\end{minipage}`
			`\hfill\null`

			`\vfill`
			`\pagebreak`

			`\problem{}`
			`Draw $G_4$.`

			`\begin{solution}`
			`\begin{center}`
			`\begin{tikzpicture}`
			`\begin{scope}[layer = nodes]`
			`\node[main] (7) at (0, 0) {\texttt{111}};`
			`\node[main] (3) at (0, -2) {\texttt{011}};`
			`\node[main] (6) at (2, -2) {\texttt{110}};`
			`\node[main] (4) at (4, -2) {\texttt{100}};`
			`\node[main] (1) at (-4, -4) {\texttt{001}};`
			`\node[main] (5) at (0, -4) {\texttt{101}};`
			`\node[main] (2) at (-2, -4) {\texttt{010}};`
			`\node[main] (0) at (-2, -6) {\texttt{000}};`
			`\end{scope}`

			`\draw[->]`
			`(0) edge[loop left, looseness = 7] node[label] {\texttt{0}} (0)`
			`(7) edge[loop above, looseness = 7] node[label] {\texttt{1}} (7)`

			`(0) edge[out=90,in=-90] node[label] {\texttt{1}} (1)`
			`(1) edge node[label] {\texttt{0}} (2)`
			`(1) edge[out=45,in=-135] node[label] {\texttt{1}} (3)`
			`(2) edge[bend left] node[label] {\texttt{1}} (5)`
			`(3) edge node[label] {\texttt{0}} (6)`
			`(3) edge node[label] {\texttt{1}} (7)`
			`(5) edge[bend left] node[label] {\texttt{0}} (2)`
			`(5) edge node[label] {\texttt{1}} (3)`
			`(6) edge[bend left] node[label] {\texttt{0}} (4)`
			`(6) edge[out=-90,in=0] node[label] {\texttt{1}} (5)`
			`(7) edge[out=0,in=90] node[label] {\texttt{0}} (6)`
			`;`

			`\draw[->, rounded corners = 10mm]`
			`(4) to (4, 2) to node[label] {\texttt{1}} (-4, 2) to (1)`
			`;`

			`\draw[->, rounded corners = 10mm]`
			`(4) to (4, -6) to node[label] {\texttt{0}} (0)`
			`;`

			`\draw[->, rounded corners = 5mm]`
			`(2) to (-2, -5) to node[label] {\texttt{0}} (3, -5) to (3, -2) to (4)`
			`;`
			`\end{tikzpicture}`
			`\end{center}`

			`\begin{instructornote}`
			`This graph also appears as a solution to a different`
			`problem in the DFA handout.`
			`\end{instructornote}`
			`\end{solution}`

			`\vfill`


			`\problem{}`
			`\begin{itemize}`
			`\item Show that $G_n$ has $2^{n-1}$ nodes and $2^n$ edges;`
			`\item that each node has two outgoing edges;`
			`\item and that there are as many edges labeled $0$ as are labeled $1$.`
			`\end{itemize}`

			`\begin{solution}`
			`\begin{itemize}`
			`\item There $2^{n-1}$ binary words of length $n-1$.`
			`\item The suffix of a given word is the prefix of two other words, \par`
			`so there are two edges leaving each node.`
			`\item One of those words will end with one, and the other will end with zero.`
			`\item Our $2^{n-1}$ nodes each have $2$ outgoing edges---we thus have $2^n$ edges in total.`
			`\end{itemize}`
			`\end{solution}`

			`\vfill`
			`\pagebreak`


			`\theorem{}`
			`We can now easily construct De Bruijn words for a given $n$: \par`
			`\begin{itemize}`
			`\item Construct $G_n$,`
			`\item then an Eulerian cycle in $G_n$.`
			`\item Finally, construct a De Bruijn by writing the label of our starting vertex,`
			`then appending the label of every edge we travel.`
			`\end{itemize}`

			`\problem{}`
			`Find De Bruijn words of orders $2$, $3$, and $4$.`

			`\begin{solution}`
			`\begin{itemize}`
			`\item`
			`One Eulerian cycle in $G_2$ starts at node \texttt{0}, and takes the edges labeled $[1, 1, 0, 0]$. \par`
			`We thus have the word \texttt{01100}.`

			`\item`
			`In $G_3$, we have an Eulerian cycle that visits nodes in the following order: \par`
			`$`
			`\texttt{00}`
			`\rightarrow \texttt{01}`
			`\rightarrow \texttt{11}`
			`\rightarrow \texttt{11}`
			`\rightarrow \texttt{10}`
			`\rightarrow \texttt{01}`
			`\rightarrow \texttt{10}`
			`\rightarrow \texttt{00}`
			`\rightarrow \texttt{00}`
			`$\par`
			`This gives us the word \texttt{0011101000}`

			`\item Similarly, we $G_4$ gives us the word \texttt{0001 0011 0101 1110 000}. \par`
			`\note{Spaces have been added for convenience.}`
			`\end{itemize}`
			`\end{solution}`

			`\vfill`
			`\pagebreak`