New build system
This commit is contained in:
434
src/Advanced/De Bruijn/parts/4 sturmian.tex
Normal file
434
src/Advanced/De Bruijn/parts/4 sturmian.tex
Normal file
@ -0,0 +1,434 @@
|
||||
\section{Sturmian Words}
|
||||
|
||||
A De Bruijn word is the shortest word that contains all subwords
|
||||
of a given length. \par
|
||||
Let's now solve a similar problem: given an alphabet, we want to
|
||||
construct a word that contains exactly $m$ distinct subwords of
|
||||
length $n$.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
% TODO: better, intuitive description
|
||||
|
||||
In general, this is a difficult problem. We'll restrict ourselves
|
||||
to a special case: \par
|
||||
We'd like to find a word that contains exactly $m + 1$ distinct subwords
|
||||
of length $m$ for all $m < n$.
|
||||
|
||||
|
||||
\definition{}
|
||||
We say a word $w$ is a \textit{Sturmian word} of order $n$
|
||||
if $\mathcal{S}_m(w) = m + 1$ for all $m \leq n$. \par
|
||||
We say $w$ is a \textit{minimal} Sturmian word if there is no shorter
|
||||
Sturmian word of that order.
|
||||
|
||||
\problem{}
|
||||
Show that the length of a Sturmian word of order $n$ is at least $2n$.
|
||||
|
||||
\begin{solution}
|
||||
In order to have $n + 1$ subwords of length $n$, a word must have at
|
||||
least $(n+1) + (n-1) = 2n$ letters.
|
||||
\end{solution}
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\problem{}
|
||||
Construct $R_3$ by removing four edges from $G_3$. \par
|
||||
Show that each of the following is possible:
|
||||
\begin{itemize}[itemsep=2mm ]
|
||||
\item $R_3$ does not contain an Eulerian path.
|
||||
\item $R_3$ contains an Eulerian path, and this path \par
|
||||
constructs a word $w$ with $\mathcal{S}_3(w) = 4$
|
||||
and $\mathcal{S}_2(w) = 4$.
|
||||
\item $R_3$ contains an Eulerian path, and this path \par
|
||||
constructs a word $w$ that is a minimal Sturmian word
|
||||
of order 3.
|
||||
\end{itemize}
|
||||
|
||||
\begin{solution}
|
||||
Remove the edges $\texttt{00} \rightarrow \texttt{01}$,
|
||||
$\texttt{01} \rightarrow \texttt{10}$,
|
||||
$\texttt{10} \rightarrow \texttt{00}$, and
|
||||
$\texttt{11} \rightarrow \texttt{11}$:
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(00) edge[loop left] node[label] {$0$} (00)
|
||||
(10) edge[bend left] node[label] {$1$} (01)
|
||||
(01) edge[bend left] node[label] {$1$} (11)
|
||||
(11) edge[bend left] node[label] {$0$} (10)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
\linehack{}
|
||||
|
||||
Remove the edges $\texttt{00} \rightarrow \texttt{00}$,
|
||||
$\texttt{01} \rightarrow \texttt{10}$,
|
||||
$\texttt{10} \rightarrow \texttt{01}$, and
|
||||
$\texttt{11} \rightarrow \texttt{11}$. \par
|
||||
The Eulerian path starting at \texttt{00} produces \texttt{001100},
|
||||
where $\mathcal{S}_2 = \mathcal{S}_3 = 4$.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(00) edge[bend left] node[label] {$1$} (01)
|
||||
(10) edge[bend left] node[label] {$0$} (00)
|
||||
(01) edge[bend left] node[label] {$1$} (11)
|
||||
(11) edge[bend left] node[label] {$0$} (10)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
\linehack{}
|
||||
|
||||
Remove the edges $\texttt{01} \rightarrow \texttt{11}$,
|
||||
$\texttt{10} \rightarrow \texttt{00}$,
|
||||
$\texttt{11} \rightarrow \texttt{10}$, and
|
||||
$\texttt{11} \rightarrow \texttt{11}$. \par
|
||||
The Eulerian path starting at \texttt{00} produces \texttt{000101},
|
||||
where $\mathcal{S}_0 = 1$, $\mathcal{S}_1 = 2$, $\mathcal{S}_2 = 3$,
|
||||
and $\mathcal{S}_3 = 4$. \par
|
||||
|
||||
\texttt{000101} has length $2 \times 3 = 6$, and is thus minimal.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(00) edge[loop left] node[label] {$0$} (00)
|
||||
(00) edge[bend left] node[label] {$1$} (01)
|
||||
(01) edge[bend left] node[label] {$0$} (10)
|
||||
(10) edge[bend left] node[label] {$1$} (01)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
Note that this graph contains an Eulerian path even though
|
||||
\texttt{11} is disconnected. \par
|
||||
An Eulerian path needs to visit all \textit{edges}, not all \textit{nodes}!
|
||||
\end{solution}
|
||||
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\problem{}<trysturmian>
|
||||
Construct $R_2$ by removing one edge from $G_2$, then construct $\mathcal{L}(R_2)$. \par
|
||||
\begin{itemize}
|
||||
\item If this line graph has four edges, set $R_3 = \mathcal{L}(R_2)$. \par
|
||||
\item If not, remove one edge from $\mathcal{L}(R_2)$ so that an Eulerian path still exists
|
||||
and set $R_3$ to the resulting graph.
|
||||
\end{itemize}
|
||||
Label each edge in $R_3$ with the last letter of its target node. \par
|
||||
Let $w$ be the word generated by an Eulerian path in this graph, as before.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
Attempt the above construction a few times. Is $w$ a minimal Sturmian word?
|
||||
|
||||
\begin{solution}
|
||||
If $R_2$ is constructed by removing the edge $\texttt{0} \rightarrow \texttt{1}$,
|
||||
$\mathcal{L}(R_2)$ is the graph shown below.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(00) edge[loop left] node[label] {$0$} (00)
|
||||
(10) edge[bend left] node[label] {$0$} (00)
|
||||
(11) edge[bend left] node[label] {$0$} (10)
|
||||
(11) edge[loop right] node[label] {$1$} (11)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
We obtain the Sturmian word \texttt{111000} via the Eulerian path through the nodes
|
||||
$\texttt{11} \rightarrow \texttt{11} \rightarrow \texttt{10}
|
||||
\rightarrow \texttt{00} \rightarrow \texttt{00}$.
|
||||
|
||||
\linehack{}
|
||||
|
||||
If $R_2$ is constructed by removing the edge $\texttt{0} \rightarrow \texttt{0}$,
|
||||
$\mathcal{L}(R_2)$ is the graph pictured below.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(01) edge[bend left] node[label] {$0$} (10)
|
||||
(10) edge[bend left] node[label] {$1$} (01)
|
||||
(11) edge[bend left] node[label] {$0$} (10)
|
||||
(01) edge[bend left] node[label] {$1$} (11)
|
||||
(11) edge[loop right] node[label] {$1$} (11)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
This graph contains five edges, we need to remove one. \par
|
||||
To keep an Eulerian path, we can remove any of the following:
|
||||
\begin{itemize}
|
||||
\item $\texttt{10} \rightarrow \texttt{01}$ to produce \texttt{011101}
|
||||
\item $\texttt{01} \rightarrow \texttt{11}$ to produce \texttt{111010}
|
||||
\item $\texttt{11} \rightarrow \texttt{10}$ to produce \texttt{010111}
|
||||
\item $\texttt{11} \rightarrow \texttt{11}$ to produce \texttt{011010}
|
||||
\end{itemize}
|
||||
Each of these is a minimal Sturmian word.
|
||||
|
||||
\linehack{}
|
||||
|
||||
The case in which we remove $\texttt{1} \rightarrow \texttt{0}$ in $G_2$ should
|
||||
produce a minimal Sturmian word where \texttt{0} and \texttt{1} are interchanged
|
||||
in the word produced by removing $\texttt{0} \rightarrow \texttt{1}$.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
If we remove $\texttt{1} \rightarrow \texttt{1}$ will produce minimal
|
||||
Sturmian words where \texttt{0} and \texttt{1} are interchanged from the words
|
||||
produced by removing $\texttt{0} \rightarrow \texttt{0}$.
|
||||
|
||||
\end{solution}
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
\theorem{}<sturmanthm>
|
||||
We can construct a minimal Sturmian word of order $n \geq 3$ as follows:
|
||||
\begin{itemize}
|
||||
\item Start with $G_2$, create $R_2$ by removing one edge.
|
||||
\item Construct $\mathcal{L}(G_2)$, remove an edge if necessary. \par
|
||||
The resulting graph must have an 4 edges and an Eulerian path. Call this $R_3$.
|
||||
\item Repeat the previous step to construct a sequence of graphs $R_n$. \par
|
||||
$R_{n-1}$ is used to create $R_n$, which has $n + 1$ edges and an Eulerian path. \par
|
||||
Label edges with the last letter of their target vertex.
|
||||
\item Construct a word $w$ using the Eulerian path, as before. \par
|
||||
This is a minimal Sturmian word.
|
||||
\end{itemize}
|
||||
For now, assume this theorem holds. We'll prove it in the next few problems.
|
||||
|
||||
\problem{}<sturmianfour>
|
||||
Construct a minimal Sturmain word of order 4.
|
||||
|
||||
\begin{solution}
|
||||
Let $R_3$ be the graph below (see \ref{trysturmian}).
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (00) at (0, 0) {\texttt{00}};
|
||||
\node[main] (01) at (2, 1) {\texttt{01}};
|
||||
\node[main] (10) at (2, -1) {\texttt{10}};
|
||||
\node[main] (11) at (4, 0) {\texttt{11}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(00) edge[loop left] node[label] {$0$} (00)
|
||||
(10) edge[bend left] node[label] {$0$} (00)
|
||||
(11) edge[bend left] node[label] {$0$} (10)
|
||||
(11) edge[loop right] node[label] {$1$} (11)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
$R_4 = \mathcal{L}(R_3)$ is then as shown below, producing the
|
||||
order $4$ minimal Sturman word \texttt{11110000}. Disconnected
|
||||
nodes are omitted.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (000) at (0, 0) {\texttt{000}};
|
||||
\node[main] (100) at (2, 1) {\texttt{100}};
|
||||
\node[main] (110) at (2, -1) {\texttt{110}};
|
||||
\node[main] (111) at (4, 0) {\texttt{111}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(000) edge[loop left] node[label] {$0$} (000)
|
||||
(100) edge[bend right] node[label] {$0$} (000)
|
||||
(110) edge[bend left] node[label] {$0$} (100)
|
||||
(111) edge[bend left] node[label] {$0$} (110)
|
||||
(11) edge[loop right] node[label] {$1$} (11)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
\end{solution}
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
||||
|
||||
\problem{}
|
||||
Construct a minimal Sturmain word of order 5.
|
||||
|
||||
\begin{solution}
|
||||
Use $R_4$ from \ref{sturmianfour} to construct $R_5$, shown below. \par
|
||||
Disconnected nodes are omitted.
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}
|
||||
\begin{scope}[layer = nodes]
|
||||
\node[main] (0000) at (0, 0) {\texttt{0000}};
|
||||
\node[main] (1000) at (2, 0) {\texttt{1000}};
|
||||
\node[main] (1100) at (4, 0) {\texttt{1100}};
|
||||
\node[main] (1110) at (6, 0) {\texttt{1110}};
|
||||
\node[main] (1111) at (8, 0) {\texttt{1111}};
|
||||
\end{scope}
|
||||
|
||||
\draw[->]
|
||||
(1111) edge[loop right] node[label] {$1$} (1111)
|
||||
(1111) edge[bend right] node[label] {$0$} (1110)
|
||||
(1110) edge[bend left] node[label] {$0$} (1100)
|
||||
(1100) edge[bend right] node[label] {$0$} (1000)
|
||||
(1000) edge[bend left] node[label] {$0$} (0000)
|
||||
(0000) edge[loop left] node[label] {$0$} (0000)
|
||||
;
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
This graph generates the minimal Sturmian word \texttt{1111100000}
|
||||
\end{solution}
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
||||
|
||||
|
||||
\problem{}
|
||||
Argue that the words we get by \ref{sturmanthm} are minimal Sturmain words. \par
|
||||
That is, the word $w$ has length $2n$ and $\mathcal{S}_m(w) = m + 1$ for all $m \leq n$.
|
||||
|
||||
\begin{solution}
|
||||
We proceed by induction. \par
|
||||
First, show that we can produce a minimal order 3 Sturmian word: \par
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
|
||||
$R_3$ is guaranteed to have four edges with length-$2$ node labels,
|
||||
the length of $w$ is $2 \times 3 = 6$. \par
|
||||
Trivially, we also have $\mathcal{S}_0 = 1$ and $\mathcal{S}_1 = 2$. \par
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
There are three vertices of $R_3$ given by the three remaining nodes of $R_2$.
|
||||
Each length-2 subword of $w$ will be represented by the label of one of these
|
||||
three nodes. Thus, $\mathcal{S}_2(w) \leq 3$. The line graph of a connected graph
|
||||
is connected, so an Eulerian path on $R_3$ reaches every node. We thus have that
|
||||
$\mathcal{S}_2(w) = 3$.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
By construction, the length 3 subwords of $w$ are all distinct, so $\mathcal{S}_3(w) = 4$.
|
||||
We thus conclude that $w$ is a minimal order 3 Sturmain word.
|
||||
|
||||
\linehack{}
|
||||
|
||||
Now, we prove our inductive step: \par
|
||||
Assume that the process above produces an order $n-1$ minimal Sturmain word $w_{n-1}$. \par
|
||||
We want to show that $w_n$ is also a minimal Sturmain word. \par
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
By construction, $R_n$ has node labels of length $n-1$ and $n+1$ edges. \par
|
||||
Thus, $w_n$ has length $2n$.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
The only possilble length-$m$ subwords of $w_n$ are those of $w_{n-1}$ for $m < n$. \par
|
||||
The line graph of a connected graph is connected, so an Eulerian path on $R_3$ reaches each node.
|
||||
Thus, all length-$m$ subwords of $w_{n-1}$ appear in $w_n$.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
By our inductive hypothesis, $\mathcal{S}_m(w_n) = m + 1$ for $m < n$. \par
|
||||
The length-$n$ subwords of $w_n$ are distinct by construction, and there are
|
||||
$n+1$ such subwords.
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
Thus, $\mathcal{S}_n(w_n) = n + 1$.
|
||||
\end{solution}
|
||||
|
||||
\vfill
|
||||
\pagebreak
|
Reference in New Issue
Block a user