\section{Sturmian Words}

A De Bruijn word is the shortest word that contains all subwords
of a given length. \par
Let's now solve a similar problem: given an alphabet, we want to
construct a word that contains exactly $m$ distinct subwords of
length $n$.

\vspace{2mm}

% TODO: better, intuitive description

In general, this is a difficult problem. We'll restrict ourselves
to a special case: \par
We'd like to find a word that contains exactly $m + 1$ distinct subwords
of length $m$ for all $m < n$.


\definition{}
We say a word $w$ is a \textit{Sturmian word} of order $n$
if $\mathcal{S}_m(w) = m + 1$ for all $m \leq n$. \par
We say $w$ is a \textit{minimal} Sturmian word if there is no shorter
Sturmian word of that order.

\problem{}
Show that the length of a Sturmian word of order $n$ is at least $2n$.

\begin{solution}
	In order to have $n + 1$ subwords of length $n$, a word must have at
	least $(n+1) + (n-1) = 2n$ letters.
\end{solution}

\vfill
\pagebreak



















\problem{}
Construct $R_3$ by removing four edges from $G_3$. \par
Show that each of the following is possible:
\begin{itemize}[itemsep=2mm ]
	\item $R_3$ does not contain an Eulerian path.
	\item $R_3$ contains an Eulerian path, and this path \par
	constructs a word $w$ with $\mathcal{S}_3(w) = 4$
	and $\mathcal{S}_2(w) = 4$.
	\item $R_3$ contains an Eulerian path, and this path \par
	constructs a word $w$ that is a minimal Sturmian word
	of order 3.
\end{itemize}

\begin{solution}
	Remove the edges $\texttt{00} \rightarrow \texttt{01}$,
	$\texttt{01} \rightarrow \texttt{10}$,
	$\texttt{10} \rightarrow \texttt{00}$, and
	$\texttt{11} \rightarrow \texttt{11}$:

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[loop left] node[label] {$0$} (00)
				(10) edge[bend left] node[label] {$1$} (01)
				(01) edge[bend left] node[label] {$1$} (11)
				(11) edge[bend left] node[label] {$0$} (10)
			;
		\end{tikzpicture}
	\end{center}

	\linehack{}

	Remove the edges $\texttt{00} \rightarrow \texttt{00}$,
	$\texttt{01} \rightarrow \texttt{10}$,
	$\texttt{10} \rightarrow \texttt{01}$, and
	$\texttt{11} \rightarrow \texttt{11}$. \par
	The Eulerian path starting at \texttt{00} produces \texttt{001100},
	where $\mathcal{S}_2 = \mathcal{S}_3 = 4$.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[bend left] node[label] {$1$} (01)
				(10) edge[bend left] node[label] {$0$} (00)
				(01) edge[bend left] node[label] {$1$} (11)
				(11) edge[bend left] node[label] {$0$} (10)
			;
		\end{tikzpicture}
	\end{center}

	\linehack{}

	Remove the edges $\texttt{01} \rightarrow \texttt{11}$,
	$\texttt{10} \rightarrow \texttt{00}$,
	$\texttt{11} \rightarrow \texttt{10}$, and
	$\texttt{11} \rightarrow \texttt{11}$. \par
	The Eulerian path starting at \texttt{00} produces \texttt{000101},
	where $\mathcal{S}_0 = 1$, $\mathcal{S}_1 = 2$, $\mathcal{S}_2 = 3$,
	and $\mathcal{S}_3 = 4$. \par

	\texttt{000101} has length $2 \times 3 = 6$, and is thus minimal.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[loop left] node[label] {$0$} (00)
				(00) edge[bend left] node[label] {$1$} (01)
				(01) edge[bend left] node[label] {$0$} (10)
				(10) edge[bend left] node[label] {$1$} (01)
			;
		\end{tikzpicture}
	\end{center}

	Note that this graph contains an Eulerian path even though
	\texttt{11} is disconnected. \par
	An Eulerian path needs to visit all \textit{edges}, not all \textit{nodes}!
\end{solution}


\vfill
\pagebreak



















\problem{}<trysturmian>
Construct $R_2$ by removing one edge from $G_2$, then construct $\mathcal{L}(R_2)$. \par
\begin{itemize}
	\item If this line graph has four edges, set $R_3 = \mathcal{L}(R_2)$. \par
	\item If not, remove one edge from $\mathcal{L}(R_2)$ so that an Eulerian path still exists
	and set $R_3$ to the resulting graph.
\end{itemize}
Label each edge in $R_3$ with the last letter of its target node. \par
Let $w$ be the word generated by an Eulerian path in this graph, as before.

\vspace{2mm}

Attempt the above construction a few times. Is $w$ a minimal Sturmian word?

\begin{solution}
	If $R_2$ is constructed by removing the edge $\texttt{0} \rightarrow \texttt{1}$,
	$\mathcal{L}(R_2)$ is the graph shown below.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[loop left] node[label] {$0$} (00)
				(10) edge[bend left] node[label] {$0$} (00)
				(11) edge[bend left] node[label] {$0$} (10)
				(11) edge[loop right] node[label] {$1$} (11)
			;
		\end{tikzpicture}
	\end{center}

	We obtain the Sturmian word \texttt{111000} via the Eulerian path through the nodes
	$\texttt{11} \rightarrow \texttt{11} \rightarrow \texttt{10}
	\rightarrow \texttt{00} \rightarrow \texttt{00}$.

	\linehack{}

	If $R_2$ is constructed by removing the edge $\texttt{0} \rightarrow \texttt{0}$,
	$\mathcal{L}(R_2)$ is the graph pictured below.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(01) edge[bend left] node[label] {$0$} (10)
				(10) edge[bend left] node[label] {$1$} (01)
				(11) edge[bend left] node[label] {$0$} (10)
				(01) edge[bend left] node[label] {$1$} (11)
				(11) edge[loop right] node[label] {$1$} (11)
			;
		\end{tikzpicture}
	\end{center}

	This graph contains five edges, we need to remove one. \par
	To keep an Eulerian path, we can remove any of the following:
	\begin{itemize}
		\item $\texttt{10} \rightarrow \texttt{01}$ to produce \texttt{011101}
		\item $\texttt{01} \rightarrow \texttt{11}$ to produce \texttt{111010}
		\item $\texttt{11} \rightarrow \texttt{10}$ to produce \texttt{010111}
		\item $\texttt{11} \rightarrow \texttt{11}$ to produce \texttt{011010}
	\end{itemize}
	Each of these is a minimal Sturmian word.

	\linehack{}

	The case in which we remove $\texttt{1} \rightarrow \texttt{0}$ in $G_2$ should
	produce a minimal Sturmian word where \texttt{0} and \texttt{1} are interchanged
	in the word produced by removing $\texttt{0} \rightarrow \texttt{1}$.

	\vspace{2mm}

	If we remove $\texttt{1} \rightarrow \texttt{1}$ will produce minimal
	Sturmian words where \texttt{0} and \texttt{1} are interchanged from the words
	produced by removing $\texttt{0} \rightarrow \texttt{0}$.

\end{solution}

\vfill
\pagebreak













\theorem{}<sturmanthm>
We can construct a minimal Sturmian word of order $n \geq 3$ as follows:
\begin{itemize}
	\item Start with $G_2$, create $R_2$ by removing one edge.
	\item Construct $\mathcal{L}(G_2)$, remove an edge if necessary. \par
	The resulting graph must have an 4 edges and an Eulerian path. Call this $R_3$.
	\item Repeat the previous step to construct a sequence of graphs $R_n$. \par
	$R_{n-1}$ is used to create $R_n$, which has $n + 1$ edges and an Eulerian path. \par
	Label edges with the last letter of their target vertex.
	\item Construct a word $w$ using the Eulerian path, as before. \par
	This is a minimal Sturmian word.
\end{itemize}
For now, assume this theorem holds. We'll prove it in the next few problems.

\problem{}<sturmianfour>
Construct a minimal Sturmain word of order 4.

\begin{solution}
	Let $R_3$ be the graph below (see \ref{trysturmian}).

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (00) at (0, 0) {\texttt{00}};
				\node[main] (01) at (2, 1) {\texttt{01}};
				\node[main] (10) at (2, -1) {\texttt{10}};
				\node[main] (11) at (4, 0) {\texttt{11}};
			\end{scope}

			\draw[->]
				(00) edge[loop left] node[label] {$0$} (00)
				(10) edge[bend left] node[label] {$0$} (00)
				(11) edge[bend left] node[label] {$0$} (10)
				(11) edge[loop right] node[label] {$1$} (11)
			;
		\end{tikzpicture}
	\end{center}

	$R_4 = \mathcal{L}(R_3)$ is then as shown below, producing the
	order $4$ minimal Sturman word \texttt{11110000}. Disconnected
	nodes are omitted.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (000) at (0, 0) {\texttt{000}};
				\node[main] (100) at (2, 1) {\texttt{100}};
				\node[main] (110) at (2, -1) {\texttt{110}};
				\node[main] (111) at (4, 0) {\texttt{111}};
			\end{scope}

			\draw[->]
				(000) edge[loop left] node[label] {$0$} (000)
				(100) edge[bend right] node[label] {$0$} (000)
				(110) edge[bend left] node[label] {$0$} (100)
				(111) edge[bend left] node[label] {$0$} (110)
				(11) edge[loop right] node[label] {$1$} (11)
			;
		\end{tikzpicture}
	\end{center}
\end{solution}

\vfill
\pagebreak

\problem{}
Construct a minimal Sturmain word of order 5.

\begin{solution}
	Use $R_4$ from \ref{sturmianfour} to construct $R_5$, shown below. \par
	Disconnected nodes are omitted.

	\begin{center}
		\begin{tikzpicture}
			\begin{scope}[layer = nodes]
				\node[main] (0000) at (0, 0) {\texttt{0000}};
				\node[main] (1000) at (2, 0) {\texttt{1000}};
				\node[main] (1100) at (4, 0) {\texttt{1100}};
				\node[main] (1110) at (6, 0) {\texttt{1110}};
				\node[main] (1111) at (8, 0) {\texttt{1111}};
			\end{scope}

			\draw[->]
				(1111) edge[loop right] node[label] {$1$} (1111)
				(1111) edge[bend right] node[label] {$0$} (1110)
				(1110) edge[bend left] node[label] {$0$} (1100)
				(1100) edge[bend right] node[label] {$0$} (1000)
				(1000) edge[bend left] node[label] {$0$} (0000)
				(0000) edge[loop left] node[label] {$0$} (0000)
			;
		\end{tikzpicture}
	\end{center}
	This graph generates the minimal Sturmian word \texttt{1111100000}
\end{solution}

\vfill
\pagebreak


\problem{}
Argue that the words we get by \ref{sturmanthm} are minimal Sturmain words. \par
That is, the word $w$ has length $2n$ and $\mathcal{S}_m(w) = m + 1$ for all $m \leq n$.

\begin{solution}
	We proceed by induction. \par
	First, show that we can produce a minimal order 3 Sturmian word: \par

	\vspace{2mm}


	$R_3$ is guaranteed to have four edges with length-$2$ node labels,
	the length of $w$ is $2 \times 3 = 6$. \par
	Trivially, we also have $\mathcal{S}_0 = 1$ and $\mathcal{S}_1 = 2$. \par

	\vspace{2mm}

	There are three vertices of $R_3$ given by the three remaining nodes of $R_2$.
	Each length-2 subword of $w$ will be represented by the label of one of these
	three nodes. Thus, $\mathcal{S}_2(w) \leq 3$. The line graph of a connected graph
	is connected, so an Eulerian path on $R_3$ reaches every node. We thus have that
	$\mathcal{S}_2(w) = 3$.

	\vspace{2mm}

	By construction, the length 3 subwords of $w$ are all distinct, so $\mathcal{S}_3(w) = 4$.
	We thus conclude that $w$ is a minimal order 3 Sturmain word.

	\linehack{}

	Now, we prove our inductive step: \par
	Assume that the process above produces an order $n-1$ minimal Sturmain word $w_{n-1}$. \par
	We want to show that $w_n$ is also a minimal Sturmain word. \par

	\vspace{2mm}

	By construction, $R_n$ has node labels of length $n-1$ and $n+1$ edges. \par
	Thus, $w_n$ has length $2n$.

	\vspace{2mm}

	The only possilble length-$m$ subwords of $w_n$ are those of $w_{n-1}$ for $m < n$. \par
	The line graph of a connected graph is connected, so an Eulerian path on $R_3$ reaches each node.
	Thus, all length-$m$ subwords of $w_{n-1}$ appear in $w_n$.

	\vspace{2mm}

	By our inductive hypothesis, $\mathcal{S}_m(w_n) = m + 1$ for $m < n$. \par
	The length-$n$ subwords of $w_n$ are distinct by construction, and there are
	$n+1$ such subwords.

	\vspace{2mm}

	Thus, $\mathcal{S}_n(w_n) = n + 1$.
\end{solution}

\vfill
\pagebreak