diff --git a/Advanced/DFAs/main.tex b/Advanced/DFAs/main.tex new file mode 100755 index 0000000..4ef0529 --- /dev/null +++ b/Advanced/DFAs/main.tex @@ -0,0 +1,25 @@ +% use [nosolutions] flag to hide solutions. +% use [solutions] flag to show solutions. +\documentclass[ + solutions, + %shortwarning +]{../../resources/ormc_handout} + +\include{tikxset.tex} + +\tikzset{loop above/.style={min distance=5mm,looseness=10}} + +\begin{document} + + \maketitle + + + {Finite Automata} + {Prepared by Mark and Nikita on \today} + + + \input{parts/0 DFA.tex} + + + +\end{document} \ No newline at end of file diff --git a/Advanced/DFAs/parts/0 DFA.tex b/Advanced/DFAs/parts/0 DFA.tex new file mode 100644 index 0000000..d1b2512 --- /dev/null +++ b/Advanced/DFAs/parts/0 DFA.tex @@ -0,0 +1,270 @@ +\section{DFAs} + +This week, we will study computational devices called \textit{deterministic finite automata}. \par +A DFA has a simple job: it will either \say{accept} or \say{reject} a string of letters. + +\vspace{2mm} + +Consider the automaton $A$ shown below: + +\begin{center} +\begin{tikzpicture} + \begin{scope}[layer = nodes] + \node[main] (a) at (0, 0) {$a$}; + \node[accept] (b) at (2, 0) {$b$}; + \node[main] (c) at (5, 0) {$c$}; + \end{scope} + + \draw[->] + (a) edge node[label] {$1$} (b) + (a) edge[loop above] node[label] {$0$} (a) + (b) edge[bend left] node[label] {$0$} (c) + (b) edge[loop above] node[label] {$1$} (b) + (c) edge[bend left] node[label] {$0,1$} (b) + ; +\end{tikzpicture} +\end{center} + +$A$ always starts in the state $q_1$. This is called the \textit{start state}. \par +It takes strings using letters in the alphabet $\{0, 1\}$ and reads them left to right, moving between states along the edges marked by each letter. + +For example, consider the string \texttt{1011}. Processing this string, $A$ will go through the states $q_1 - q_2 - q_3 - q_2 - q_2$. \par +Note that $q_2$ has a circle in the diagram above. This means that the state $q_2$ is \textit{accepting}, and that all the strings which end up in it are \textit{accepted}. Similarly, states $q_1$ and $q_3$ are \textit{rejecting} and the strings which end up there are \textit{rejected}. + + + +\problem{} +Which of the following strings are accepted by $A$? \\ +\begin{itemize} + \item \texttt{1} + \item \texttt{1010} + \item \texttt{1110010} + \item \texttt{1000100} +\end{itemize} + +\vfill + + + +\problem{} +Describe the general form of a string accepted by $A$. +\hint{Work backwards from the accepting state, and decide what all the strings must look like at the end in order to be accepted.} + +\begin{solution} + $A$ will accept strings that contain at least one $1$ and end with an even (possibly 0) number of zeroes. +\end{solution} + +\vfill +\pagebreak + + + +Now consider the automaton $B$, which uses the alphabet $\{a, b\}$. \par +It starts in the state $s$ and has two accepting states $a_1$ and $b_1$. + +\begin{center} +\begin{tikzpicture} + \begin{scope}[layer = nodes] + \node[main] (s) at (0, 0) {$s$}; + \node[accept] (a1) at (-2, -0.5) {$a_1$}; + \node[main] (a2) at (-2, -2.5) {$a_2$}; + \node[accept] (b1) at (2, -0.5) {$b_1$}; + \node[main] (b2) at (2, -2.5) {$b_2$}; + \end{scope} + + \draw[->] + (s) edge node[label] {\texttt{a}} (a1) + (a1) edge[loop left] node[label] {\texttt{a}} (a1) + (a1) edge[bend left] node[label] {\texttt{b}} (a2) + (a2) edge[bend left] node[label] {\texttt{a}} (a1) + (a2) edge[loop left] node[label] {\texttt{b}} (a2) + (s) edge node[label] {\texttt{b}} (b1) + (b1) edge[loop right] node[label] {\texttt{b}} (b1) + (b1) edge[bend left] node[label] {\texttt{a}} (b2) + (b2) edge[bend left] node[label] {\texttt{b}} (b1) + (b2) edge[loop right] node[label] {\texttt{a}} (b2) + ; +\end{tikzpicture} +\end{center} + + + + +\problem{} +Which of the following strings are accepted by $B$: +\begin{itemize} + \item \texttt{aa} + \item \texttt{abba} + \item \texttt{abbba} + \item \texttt{baabab} +\end{itemize} + +\vfill + + + +\problem{} +Describe the strings accepted by $B$. + +\begin{solution} + They are strings that start and end with the same letter. +\end{solution} + +\vfill +\pagebreak + + + + +\definition{} +An \textit{alphabet} is a finite set of symbols. \par + +\definition{} +A \textit{string} over an alphabet $Q$ is a finite sequence of symbols from $Q$. \par +We denote the empty string $\varepsilon$. \par + + +\vspace{2mm} + +$Q^*$ is the set of all possible strings over $Q$. \par +For example, $\{\texttt{0}, \texttt{1}\}^*$ is the set $\{\varepsilon, \texttt{0}, \texttt{1}, \texttt{00}, \texttt{01}, \texttt{10}, \texttt{11}, \texttt{000},... \}$ \par +Note that this set contains the empty string. + +\definition{} +A \textit{language} over an alphabet $Q$ is a subset of $Q^*$. \\ +For example, the language \say{strings of length 2} over $\{\texttt{0}, \texttt{1}\}$ is $\{\texttt{00}, \texttt{01}, \texttt{10}, \texttt{11}\}$ + +\definition{} +We say a language $L$ is \textit{recognized} by a DFA $A$ if that DFA accepts a string $w$ iff $w \in L$. + + +%\begin{remark} +%A machine, such as DFA or Turing machine, may accept several strings, but it always recognizes only one language. If the machine %accepts no strings, it still recognizes one language — namely, the empty language $\emptyset$. +%\end{remark} + + +\vspace{8mm} + +\problem{} +How many strings of length $n$ are accepted by the automaton $C$? + +\begin{center} +\begin{tikzpicture} + \begin{scope}[layer = nodes] + \node[main] (0) at (0, 0) {$0$}; + \node[accept] (1) at (3, 0) {$1$}; + \node[main] (2) at (5, 0) {$2$}; + \end{scope} + + \draw[->] + (a) edge[loop above] node[label] {\texttt{b}} (a) + (a) edge[bend left] node[label] {\texttt{a}} (b) + (b) edge[bend left] node[label] {\texttt{b}} (a) + (b) edge node[label] {\texttt{a}} (c) + (c) edge[loop above] node[label] {\texttt{a, b}} (c) + ; +\end{tikzpicture} +\end{center} + +\begin{solution} + If $A_n$ is the number of accepted strings of length $n$, then $A_n = A_{n-1}+A_{n-2}$. Together with initial conditions, we see that $A_n$ is an $n+2$-th Fibonacci number. +\end{solution} + +%\begin{remark} +%Note that all the states in our DFAs $A$, $B$ and $C$ from figures 1, 2, 3 have outgoing symbols for each letter of the alphabet. %Do the same for your DFAs. +%\end{remark} + +\vfill +\pagebreak + +\problem{} +Draw DFAs that recognize the following languages. In all parts, the alphabet is $\{0,1\}$: +\begin{itemize} + \item $\{w~ | ~w~ \text{begins with a \texttt{1} and ends with a \texttt{0}}\}$ + \item $\{w~ | ~w~ \text{contains at least three \texttt{1}s}\}$ + \item $\{w~ | ~w~ \text{contains the substring \texttt{0101} (i.e, $w = x\texttt{0101}y$ for some $x$ and $y$)}\}$ + \item $\{w~ | ~w~ \text{has length at least three and its third symbol is a \texttt{0}}\}$ + \item $\{w~ | ~w~ \text{starts with \texttt{0} and has odd length, or starts with \texttt{1} and has even length}\}$ + \item $\{w~ | ~w~ \text{doesn't contain the substring \texttt{110}}\}$ +\end{itemize} + + +\begin{solution} + %\part{a} \includegraphics[width=0.3\linewidth]{6a.png} + %\part{b} \includegraphics[width=0.4\linewidth]{6b.png} + %\part{c} \includegraphics[width=0.3\linewidth]{6c.png} + + \medskip + Notice that after getting two 0's in a row we don't reset to the initial state. + %\part{d} \includegraphics[width=0.4\linewidth]{6d.png} + %\part{e} \includegraphics[width=0.3\linewidth]{6e.png} + %\part{f} \includegraphics[width=0.4\linewidth]{6f.png} + + \medskip + + Notice that after getting three 1's in a row we don't reset to the initial state. +\end{solution} + +\vfill + +\problem{} +Draw a DFA over an alphabet $\{\texttt{a}, \texttt{b}, \texttt{@}, \texttt{.}\}$ recognizing the language of strings of the form \texttt{user@website.domain}, where \texttt{user}, \texttt{website} and \texttt{domain} are nonempty strings over $\{\texttt{a}, \texttt{b}\}$ and \texttt{domain} has length 2 or 3. + +\begin{solution} +%\includegraphics[width=0.9\linewidth]{Email.png} +\end{solution} + +\vfill +\pagebreak + +\problem{} +Draw a state diagram for a DFA over an alphabet of your choice that recognizes exactly $f(n)$ strings of length $n$ if \\ +\begin{itemize} + \item $f(n) = n$ + \item $f(n) = n+1$ + \item $f(n) = 3^n$ + \item $f(n) = n^2$ + \item $f(n)$ is a Tribonacci number. \par + \textit{Tribonacci numbers} are defined by the sequence $f(0) = 0$, $f(1) = 1$, $f(2) = 1$, + and $f(n) = f(n-1)+f(n-2)+f(n-3)$ for $n \ge 3$ \par + \hint{Fibonacci numbers are given by the automaton prohibiting two \texttt{a}s in a row.} +\end{itemize} + + +\begin{solution} + \begin{itemize} + \item You would need to have an alphabet with three letters. + \item Consider the language of words over $\{0, 1, 2\}$ having the sum of digits equal to $2$, so they contain two 1's or one 2. %\includegraphics[width=0.5\linewidth]{NSqrd.png} + \item Following the hint gives the automaton %\includegraphics[width=0.5\linewidth]{Trib1.png} + \item For this automaton $f(n)$ gives Tribonacci numbers with a shift: $f(0)=1$, $f(1)=2$, $f(2)=4$, $f(3)=7$. To account for the shift one can move the starting state in, e.g., this fashion: + %\includegraphics[width=0.5\linewidth]{Trib2.png} + \end{itemize} +\end{solution} + +\vfill + +% \problem{} +% Draw a DFA over an alphabet $\{a, b, c\}$, accepting all the suffixes of the string $abbc$ (including $\varepsilon$) and only them. +% +% \com{TD}{Something suffix automaton} + + +\problem{} + Draw a DFA recognizing the language of strings over $\{\texttt{0}, \texttt{1}\}$ in which \texttt{0} is the third digit from the end. \par + Prove that any such DFA must have at least 8 states. + + \begin{solution} + + \textbf{Part 1:} \par + Index the states by triples of digits \texttt{000}, \texttt{001}, ..., \texttt{111}. All strings which end by 3 digits $d_1d_2d_3$ will end up in the state $d_1d_2d_3$. The starting state will be \texttt{111}. The transitions from $d_1d_2d_3$ by \texttt{0} and \texttt{1} will lead to $d_2d_3\texttt{0}$ and $d_2d_3\texttt{1}$, respectively. Accepting states are states with indices starting with \texttt{0}. + + %\includegraphics[width=0.7\linewidth]{9.png} + + \linehack{} + + \textbf{Part 2:} \par + Strings \texttt{000}, \texttt{001}, ..., \texttt{111} should lead to pairwise different states since they differ in $i$-th position and after completing them with $i-1$ digit, they will need to be in different states. +\end{solution} + +\vfill +\pagebreak \ No newline at end of file diff --git a/Advanced/DFAs/parts/1 regular.tex b/Advanced/DFAs/parts/1 regular.tex new file mode 100644 index 0000000..980a1c1 --- /dev/null +++ b/Advanced/DFAs/parts/1 regular.tex @@ -0,0 +1,177 @@ +% \section{Regular languages} + +% \definition{} +% A language is called \textit{regular} if it is recognized by some $DFA$. +% \end{definition} + +% \problem{} +% \part{a} Draw a DFA over an alphabet $\{A, B\}$ accepting strings which do not start and end with the same letter. \textit{Hint: the DFA from Fig. 2 does almost this.} +% \part{b} Prove that for any regular language $L$ over an alphabet $Q$ its complement $\overline{L} = Q^*\setminus L$ is also regular. +% +% \begin{solution} +% Invert accepting and rejecting states. +% \end{solution} + + +% \problem{} +% \part{a} Draw a DFA over an alphabet $\{A, B\}$ accepting strings which do not start and end with the same letter AND have an even length. +% \part{b} Prove that for any regular languages $L_1$, $L_2$ over an alphabet $Q$ their union and intersection are also regular. +% +% \begin{solution} +% Consider a product of automatons where each state is a pair of states in the first and second automaton and every transition works if it was applied to both elements in pair. + +% For union, we call the state $(s_1, s_2)$ accepting if $s_1$ OR $s_2$ is accepting in their respective automaton. + +% For intersection, we call it accepting if $s_1$ AND $s_2$ are accepting in their respective automaton. +% \end{solution} + + +% However, not all languages are regular. You will later see that the language consisting of palindromes is not regular. + +% Our technique for proving nonregularity stems from a theorem about regular +% languages, traditionally called the pumping lemma. This theorem states that all regular languages have a special property. If we can show that a language does not have this property, we are guaranteed that it is not regular. The property states that all strings in the language can be “pumped” if they are at least as long as a certain special value, called \textit{the pumping length}. That means each such string contains a section that can be repeated any number of times with the resulting string remaining in the language. +% \medskip + +% \noindent \fbox{\begin{minipage}{\textwidth} +% \begin{theorem}[Pumping lemma] +% If $A$ is a regular language, then there is a number $p$ (the pumping length) where if $s$ is any string in $A$ of length at least $p$, then $s$ may be divided into three pieces, $s = xyz$, satisfying the following conditions: +% \begin{enumerate} +% \item for each $i \ge 0$, $xy^iz \in A$, +% \item $|y| > 0$, and +% \item $|xy| \le p$. +% \end{enumerate} +% \end{theorem} +% \end{minipage} +% } +% \medskip + +% Here $|s|$ represents the length of string $s$ (assuming $\varepsilon$ has length 0), $y^i$ means that $i$ copies of $y$ are concatenated together, and $y^0$ equals $\varepsilon$. +% When $s$ is divided into $xyz$, either $x$ or $z$ may be $\varepsilon$, but condition 2 says that $y \ne \varepsilon$. Observe that without condition 2 the theorem would be trivially true. + + +% \problem{} +% \part{a} Check that the pumping lemma holds for the language recognized by the automaton C from Figure 3 and pumping length $p=2$. + +% \part{b} Suppose that there is a regular language $L$ in the alphabet $\{a\}$. $L$ contains all strings of $a$'s whose length is some set $S$. Derive from the pumping lemma that if $S$ is infinite then it contains some arithmetic progression. + +% \part{c} Prove directly that if $S$ is infinite, than it contains some arithmetic progression. \textit{Hint: look at the first cycle in the DFA you get while reading $aaa\dots$.} + +% \part{d} Prove the pumping lemma. \textit{Hint: look at the first cycle in the DFA you get while reading $s$.} +% +% \begin{solution} +% Look at the first place where we come to an already visited state while reading the word. Say the first time we came to this state after reading $x$ and the second time after reading $xy$. Then $y$ doesn't move us from this state and we can omit it or repeat any number of times we want. +% \end{solution} + +% \problem{} +% Show that the following languages are not regular:\\ +% \part{a} $\{a^{n^2}\}$ -- the language of all strings over alphabet $\{a\}$ whose length is a perfect square; +% \part{b} $\{0^n1^n| n \in \mathbb{N}_0\}$ over $\{0, 1\}$ (which is the shorthand for the set $\{\varepsilon, 01, 0011, \dots\}$); +% \part{c} Language $ADD$ over the alphabet $\Sigma = \{0, 1, +, =\}$ where +% $$ADD = \{"x=y+z"|~x, y, z\text{ are binary integers, and $x$ is the sum of $y$ and $z$}\};$$ +% \vspace{-2em} +% \part{d} Language of all palindromes over the Latin alphabet. +% +% \begin{solution} +% All of them are done by the pumping lemma. + +% \noindent\part{a} Follows from parts b-c of the previous problem; +% \part{b} Assume the contrary and take the $p$ from the pumping lemma. Then the string $0^p1^p$ is accepted and so the string $0^{p-|y|}1^p$ (or $0^{p+|y|}1^p$) is also accepting; +% \part{c} Pumping $10^{p+1}=10^p+10^p$; +% \part{d} Pumping $a^pba^p$. +% \end{solution} + +% \problem{} +% For a word $w$ over an alphabet $\{a, b\}$ denote by $|w|_a$ and $|w|_b$ the amount of letters $a$ and $b$, respectively, inside $w$.\\ +% \part{a} Prove that the language $L_p = \{w,\text{ s.t. $p$ divides } |w|_a - |w|_b\}$ is regular for any prime $p$. +% \part{b} Prove that $L = \{w,\text{ s.t. } |w|_a - |w|_b = \pm1\}$ is not regular. +% \part{c} Prove that there are infinitely many primes. +% +% \begin{solution} +% \href{https://www.jstor.org/stable/48661886#metadata_info_tab_contents}{Link} +% \end{solution} + +% \section{Tilings} +% Tilings are all around us. Given a bunch of little pieces, it is human nature to wonder how they fit together. In the most general sense of the word, tiling is just a way of decomposing some space into lots of little pieces (tiles) that fit together without gaps or overlaps. Mathematicians study the structures emerging from the tilings of a plane or high-dimensional space wondering about their properties such as periodicity. + +% Let us consider the one-dimensional tilings. {\em Tiles} can be seen as {\em letters} over a finite alphabet, and {\em tilings} as words infinite in both directions. + +% \begin{figure}[h] +% \begin{center} +% \includegraphics[width = 0.5\linewidth]{Alphabet.png} +% \caption{Tiles forming a finite word} +% \end{center} +% \end{figure} + +% We call {\em pattern} of a word any of its finite subwords. +% Letter are allowed to stay next to each other or not, and a common way to specify such constraints is to give a finite set of forbidden patterns. +% For example, if $aa$ and $bb$ are forbidden, then the only infinite word that can be formed is "$\dots babab \dots$" -- the infinite periodic word with period $(ab)$. + +% \problem{} +% Let us consider the set $S$ of infinite words in the alphabet $\{a,b\}$ which contain runs of $b$'s of length at most three. +% Does there exist a finite set of forbidden words determining the $S$? +% +% \begin{solution} +% Yes, we just prohibit the pattern $bbbb$. +% \end{solution} + +% \problem{} Count the minimal number of forbidden words to determine the following infinite words (given by its periods): \part{a} $(ab)$, \part{b} $(aab)$, \part{c*} $(aabaabab)$, \part{d*} $(aabaababaabaababaabab)$. +% +% \begin{solution} +% \part{a} We can determine it by prohibiting 2 patterns -- $aa$ and $bb$. Moreover, we need to prohibit at least two patterns, because otherwise one of the words $(a)$ or $(b)$ would not be prohibited. + +% \part{b} We can prohibit $aaa$, $bb$ and $bab$. After that $(aab)$ is determined uniquely. Moreover, we need to prohibit at least 3 patterns, since otherwise $(a)$, $(b)$ or $(ab)$ will not be prohibited. + +% \part{c, d} We act analogously. You can see the generalized solution after the last problem. + +% \end{solution} + +% \problem{} +% Consider the set of infinite words over the alphabet $\{a,b\}$ where the patterns (subword) $ba^nb$ are forbidden, for any $n>1$. +% Can you find a finite set of forbidden patterns that defines the same set of words? +% +% \begin{solution} +% No, since any pattern is finite, and if the maximal prohibited pattern has length $p$, there is no way of prohibiting two $b$'s on distance $>p$. +% \end{solution} + + +% \problem{} +% What about the previous question if, in addition, you are now allowed to color letters (using finitely many different colors), that is, for example, to make a difference between a blue $a$ and a green $a$? +% +% \begin{solution} +% Yes. Take two types of $a$ and prohibit patterns $bb$, $a_{blue}b$, $ba_{green}$, $a_{blue}a_{green}$ and $a_{green}a_{blue}$. Then words $(a)$ and $\dots aaabaaa \dots$ will be still possible (color everything left from $b$ green and right -- blue). But $ba^nb$ is impossible since all $a$'s between $b$ should have simultaneously blue and green color. +% \end{solution} + +% \problem{} Let $S$ be the set of infinite words whose finite runs of $a$'s are all of the even length only. +% Is it possible to determine the $S$ with some finite number of forbidden words? +% And if we can color letters using finitely many different colors? +% + +% \problem{} Same questions if the length of finite runs of $a$'s is asked to be odd. + +% \problem{} Let $u_0=a$, $u_1=ab$, $u_{n+2}=u_n u_{n+1}$. +% Count the minimal number of necessary forbidden words to enforce infinite periodical words with period $u_n$. +% +% \begin{solution}~\\ + +% \includegraphics[width = 0.9\linewidth]{Patterns1.png} + +% See the continuation at the next page + +% \includegraphics[width = 0.9\linewidth]{Patterns2.png} +% \end{solution} + +% % \section{DFAs from NFAs} + +% % \com{TD}{Show NFA==DFA} +% % \com{TD}{Increasing number of states exponentially} + +% % \problem{}[IOM 2016, P6] +% % In a country with n cities, some pairs of cities are connected by oneway flights operated by one of two companies A and B. Two cities can be connected by more than one flight in either direction. An AB-word $w$ is called implementable if there is a sequence of connected flights whose companies’ names form the word $w$. Given that every AB-word of length $2^n$ is implementable, prove that every finite AB-word is implementable. (An AB-word of length $k$ is an arbitrary sequence of $k$ letters A or B; e.g. AABA is a word of length 4.) +% % +% % \begin{solution} +% % \hyperlink{https://imomath.com/srb/zadaci/2016_metropolis_resenja_e.pdf}{Solution} +% % \end{solution} + +% % \section{Regular expressions} + +% % \com{TD}{Prove that regular expressions give regular languages and vice versa} diff --git a/Advanced/DFAs/tikxset.tex b/Advanced/DFAs/tikxset.tex new file mode 100644 index 0000000..aed6432 --- /dev/null +++ b/Advanced/DFAs/tikxset.tex @@ -0,0 +1,45 @@ +\usetikzlibrary{arrows.meta} +\usetikzlibrary{shapes.geometric} +\usetikzlibrary{patterns} + +% We put nodes in a separate layer, so we can +% slightly overlap with paths for a perfect fit +\pgfdeclarelayer{nodes} +\pgfdeclarelayer{path} +\pgfsetlayers{main,nodes} + +% Layer settings +\tikzset{ + % Layer hack, lets us write + % later = * in scopes. + layer/.style = { + execute at begin scope={\pgfonlayer{#1}}, + execute at end scope={\endpgfonlayer} + }, + % + % Arrowhead tweaks + >={Latex[ width=2mm, length=2mm ]}, + label/.style = { + circle, + % For automatic red background in solutions + fill = \ORMCbgcolor, + draw = none + }, + % + % Nodes + main/.style = { + draw, + circle, + fill = white + }, + accept/.style = { + draw, + circle, + fill = white, + double, + }, + hatch/.style = { + pattern=north west lines, + pattern color=gray + } +} \ No newline at end of file