Merge branch 'stopping'

2024-09-26 09:25:38 -07:00
parent 42d592e700 dc5cb2c9b6
commit 9a531509ca
7 changed files with 788 additions and 5 deletions
--- a/Problems/main.tex
+++ b/Problems/main.tex
@ -0,0 +1,26 @@
 % use [nosolutions] flag to hide solutions.
 % use [solutions] flag to show solutions.
 \documentclass[
 	solutions,
 	singlenumbering
 ]{../../resources/ormc_handout}
 \usepackage{../../resources/macros}
 \usepackage{units}
 \usepackage{mathtools} % for \coloneqq
 \uptitlel{Advanced 2}
 \uptitler{\smallurl{}}
 \title{Stopping problems}
 \subtitle{Prepared by Mark on \today{}}
 \begin{document}
 	\maketitle
 	\input{parts/0 probability.tex}
 	\input{parts/1 intro.tex}
 	\input{parts/2 secretary.tex}
 	\input{parts/3 orderstat.tex}
 \end{document}
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,130 @@
 \section{Probability}
 \definition{}
 A \textit{sample space} is a finite set $\Omega$. \par
 The elements of this set are called \textit{outcomes}. \par
 An \textit{event} is a set of outcomes (i.e, a subset of of $\Omega$).
 \definition{}
 A \textit{probability function} over a sample space $\Omega$ is a function $\mathcal{P}: P(\Omega) \to (0, 1)$ \par
 that maps events to real numbers between 0 and 1. \par
 Any probability function has the following properties:
 \begin{itemize}
 	\item $\mathcal{P}(\varnothing) = 0$
 	\item $\mathcal{P}(\Omega) = 1$
 	\item For events $A$ and $B$ where $A \cap B = \varnothing$, $\mathcal{P}(A \cup B) = \mathcal{P}(A) + \mathcal{P}(B)$
 \end{itemize}
 \problem{}<threecoins>
 Say we flip a fair coin three times. \par
 List all elements of the sample space $\Omega$ this experiment generates.
 \vfill
 \problem{}
 Using the same setup as \ref{threecoins}, find the following:
 \begin{itemize}
 	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least two \say{heads}}\} ~)$
 	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has an odd number of \say{heads}}\} ~)$
 	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least one \say{tails}}\} ~)$
 \end{itemize}
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \definition{}
 Given a sample space $\Omega$ and a probability function $\mathcal{P}$, \par
 a \textit{random variable} is a function from $\Omega$ to a specified output set.
 \vspace{2mm}
 For example, given the three-coin-toss sample space
 $\Omega = \{
 	\texttt{TTT},~ \texttt{TTH},~ \texttt{THT},~
 	\texttt{THH},~ \texttt{HTT},~ \texttt{HTH},~
 	\texttt{HHT},~ \texttt{HHH}
 \}$,
 We can define a random variable $\mathcal{H}$ as \say{the number of heads in a throw of three coins}. \par
 As a function, $\mathcal{H}$ maps values in $\Omega$ to values in $\mathbb{Z}^+_0$ and is defined as:
 \begin{itemize}
 	\item $\mathcal{H}(\texttt{TTT}) = 0$
 	\item $\mathcal{H}(\texttt{TTH}) = 1$
 	\item $\mathcal{H}(\texttt{THT}) = 1$
 	\item $\mathcal{H}(\texttt{THH}) = 2$
 	\item ...and so on.
 \end{itemize}
 \definition{}
 We can compute the probability that a random variable takes a certain value by computing the probability of
 the set of outcomes that produce that value. \par
 \vspace{2mm}
 For example, if we wanted to compute $\mathcal{P}(\mathcal{H} = 2)$, we would find
 $\mathcal{P}\bigl(\{\texttt{THH}, \texttt{HTH}, \texttt{HHT}\}\bigr)$.
 \problem{}
 Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
 What is $\mathcal{P}(\mathcal{H} = 1)$, with $\mathcal{H}$ defined as above? \par
 What is $\mathcal{P}(\mathcal{H} = 5)$?
 \vfill
 \problem{}
 Say we roll a fair six-sided die twice. \par
 Let $\mathcal{X}$ be a random variable measuring the sum of the two results. \par
 Find $\mathcal{P}(\mathcal{X} = x)$ for all $x$ in $\mathbb{Z}$.
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \definition{}
 Say we have a random variable $\mathcal{X}$ that produces outputs in $\mathbb{R}$. \par
 The \textit{expected value} of $\mathcal{X}$ is then defined as
 \begin{equation*}
 	\mathcal{E}(\mathcal{X})
 	~\coloneqq~ \sum_{x \in A}\Bigl(x \times \mathcal{P}\bigl(\mathcal{X} = x\bigr)\Bigr)
 	~=~ \sum_{\omega \in \Omega}\Bigl(\mathcal{X}(\omega) \times \mathcal{P}(\omega)\Bigr)
 \end{equation*}
 That is, $\mathcal{E}(\mathcal{X})$ is the average of all possible outputs of $\mathcal{X}$ weighted by their probability.
 \problem{}
 Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
 Define $\mathcal{H}$ as the number of heads we see. \par
 Find $\mathcal{E}(\mathcal{H})$.
 \vfill
 \problem{}
 Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables. \par
 Show that $\mathcal{E}(\mathcal{A} + \mathcal{B}) = \mathcal{E}(\mathcal{A}) + \mathcal{E}(\mathcal{B})$.
 \vfill
 \definition{}
 Let $A$ and $B$ be events on a sample space $\Omega$. \par
 We say that $A$ and $B$ are \textit{independent} if $\mathcal{P}(A \cap B) = \mathcal{P}(A) + \mathcal{P}(B)$. \par
 Intuitively, events $A$ and $B$ are independent if the outcome of one does not affect the other.
 \definition{}
 Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables over $\Omega$. \par
 We say that $\mathcal{A}$ and $\mathcal{B}$ are independent if the events $\{\omega \in \Omega ~|~ \mathcal{A}(\omega) = a\}$
 and $\{\omega \in \Omega ~|~ \mathcal{B}(\omega) = b\}$ are independent for all $(a, b)$ that $\mathcal{A}$ and $\mathcal{B}$ can produce.
 \pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,67 @@
 \section{Introduction}
 \generic{Setup:}
 Suppose we toss a 6-sided die $n$ times. \par
 It is easy to detect the first time we roll a 6. \par
 What should we do if we want to detect the \textit{last}?
 \problem{}<lastl>
 Given $l \leq n$, what is the probability that the last $l$
 tosses of this die contain exactly one six? \par
 \hint{Start with small $l$.}
 \begin{solution}
 	$\mathcal{P}(\text{last } l \text{ tosses have exactly one 6}) = (\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$
 \end{solution}
 \vfill
 \problem{}
 For what value of $l$ is the probability in \ref{lastl} maximal? \par
 The following table may help.
 \begin{center}
 	\begin{tabular}{|| c | c | c ||}
 		\hline
 		\rule{0pt}{3.5mm} % Bonus height for exponent
 		$l$ & $(\nicefrac{5}{6})^l$ & $(\nicefrac{1}{6})(\nicefrac{5}{6})^l$ \\
 		\hline\hline
 		1 & 0.83 & 0.133 \\
 		\hline
 		2 & 0.69 & 0.115 \\
 		\hline
 		3 & 0.57 & 0.095 \\
 		\hline
 		4 & 0.48 & 0.089 \\
 		\hline
 		5 & 0.40 & 0.067 \\
 		\hline
 		6 & 0.33 & 0.055 \\
 		\hline
 		7 & 0.27 & 0.045 \\
 		\hline
 		8 & 0.23 & 0.038 \\
 		\hline
 	\end{tabular}
 \end{center}
 \begin{solution}
 	$(\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$ is maximal at $x = 5.48$, so $l = 5$. \par
 	$l = 6$ is close enough.
 \end{solution}
 \vfill
 \problem{}
 Finish your solution: \par
 In $n$ rolls of a six-sided die, what strategy maximizes
 our chance of detecting the last $6$ that is rolled? \par
 What is the probability of our guess being right?
 \begin{solution}
 	Whether $l = 5$, $5.4$, or $6$, the probability of success rounds to $0.40$.
 \end{solution}
 \vfill
 \pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,276 @@
 \section{The Secretary Problem}
 \definition{The secretary problem}
 Say we need to hire a secretary. We have exactly one position to fill,
 and we must fill it with one of $n$ applicants. These $n$ applicants,
 if put together, can be ranked unambiguously from \say{best} to \say{worst}.
 \vspace{2mm}
 We interview applicants in a random order, one at a time. \par
 At the end of each interview, we either reject the applicant (and move on to the next one), \par
 or select the applicant (which fills the position and ends the process).
 \vspace{2mm}
 Each applicant is interviewed at most once---we cannot return to an applicant we've rejected. \par
 In addition, we cannot reject the final applicant, as doing so will leave us without a secretary.
 \vspace{2mm}
 For a given $n$, we would like to maximize our probability of selecting the best applicant. \par
 This is the only metric we care about---we do not try to maximize the rank of our applicant. \par
 Hiring the second-best applicant is no better than hiring the worst.
 \problem{}
 If $n = 1$, what is the best hiring strategy, and what is the probability that we hire the best applicant?
 \begin{solution}
 	This is trivial. Hire the first applicant, she's always the best.
 \end{solution}
 \vfill
 \problem{}
 If $n = 2$, what is the best hiring strategy, and what is the probability that we hire the best applicant? \par
 Is this different than the probability of hiring the best applicant at random?
 \begin{solution}
 	There are two strategies:
 	\begin{itemize}
 		\item hire the first
 		\item hire the second
 	\end{itemize}
 	Both are equivalent to the random strategy.
 	\vspace{2mm}
 	Intuitively, the fact that a strategy can't help us makes sense: \par
 	When we're looking at the first applicant, we have no information; \par
 	when we're looking at the second, we have no agency (i.e, we \textit{must} hire).
 \end{solution}
 \vfill
 \problem{}
 If $n = 3$, what is the probability of hiring the best applicant at random? \par
 Come up with a strategy that produces better odds.
 \begin{solution}
 	Once we have three applicants, we can make progress.
 	\vspace{2mm}
 	The remark from the previous solution still holds: \par
 	When we're looking at the first applicant, we have no information; \par
 	when we're looking at the second, we have no choices.
 	\vspace{2mm}
 	So, let's make our decision at the second candidate. \par
 	If we hire only when the second candidate is better than the first, \par
 	we end up hiring the best candidate exactly half the time.
 	\vspace{2mm}
 	This can be verified by checking all six cases.
 \end{solution}
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \problem{}<bestyet>
 Should we ever consider hiring a candidate that \textit{isn't} the best we've seen so far? \par
 Why or why not? \hint{Read the problem again.}
 \begin{solution}
 	No! A candidate that isn't the best yet cannot be the best overall! \par
 	Remember---this problem is only interested in hiring the \textit{absolute best} candidate. \par
 	Our reward is zero in all other cases.
 \end{solution}
 \vfill
 \remark{}
 \ref{bestyet} implies that we should automatically reject any applicant that isn't
 the best we've seen. We can take advantage of this fact to restrict the types of
 strategies we consider.
 \remark{}
 Let $B_x$ be the event \say{the $x^\text{th}$ applicant is better than all previous applicants,} \par
 and recall that we only know the \textit{relative} ranks of our applicants: \par
 given two candidates, we know \textit{which} is better, but not \textit{by how much}.
 \vspace{2mm}
 Therefore, the results of past events cannot provide information about future $B_x$. \par
 All events $B_x$ are independent.
 \vspace{2mm}
 We can therefore ignore any strategy that depends on the outcomes of individual $B_x$.
 Given this realization, we are left with only one kind of strategy: \par
 We blindly reject the first $(k - 1)$ applicants, then select the next \say{best-yet} applicant. \par
 All we need to do now is pick the optimal $k$.
 \problem{}
 Consider the secretary problem with a given $n$. \par
 What are the probabilities of each $B_x$?
 \vfill
 \problem{}<seca>
 What is the probability that the $n^\text{th}$ applicant is the overall best applicant?
 \begin{solution}
 	All positions are equally likely. $\nicefrac{1}{n}$.
 \end{solution}
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \problem{}<secb>
 Given that the $x^\textit{th}$ applicant is the overall best, what is the probability of hiring this applicant \par
 if we use the \say{look-then-leap} strategy detailed above? \par
 \hint{
 	Under what conditions would we \textit{not} hire this applicant? \par
 	This probability depends on $k$ and $x$.
 }
 \begin{solution}
 	Say that the $x^\text{th}$ applicant is the best overall. If we do not hire this applicant,
 	we must have hired a candidate that came before them. \par
 	\vspace{2mm}
 	What is the probability of this? We saw $x-1$ applicants before the $x^\text{th}$. \par
 	If we hired one of them, the best of those initial $x-1$ candidates did \textit{not} fall
 	into the initial $k-1$ applicants we rejected.
 	\note{(This is again verified by contradiction: if the best of the first $x-1$ applicants
 	\textit{was} within the first $k-1$, we would hire the $x^\text{th}$)}
 	\vspace{2mm}
 	There are $x-1$ positions to place the best of the first $x-1$ candidates, \par
 	and $k-1$ of these positions are initially rejected. \par
 	Thus, the probability of the best of the first $x-1$ applicants being rejected is $\frac{k-1}{x-1}$.
 	\vspace{2mm}
 	Unraveling our previous logic, we find that the probability we are interested in is also $\frac{k-1}{x-1}$.
 \end{solution}
 \vfill
 \problem{}<phisubn>
 Consider the secretary problem with $n$ applicants. \par
 If we reject the first $k$ applicants and hire the first \say{best-yet} applicant we encounter, \par
 what is the probability that we select the best candidate? \par
 Call this probability $\phi_n(k)$.
 \begin{solution}
 	Using \ref{seca} and \ref{secb}, this is straightfoward:
 	\[
 		\phi_n(k)
 		= \sum_{x = k}^{n}\left( \frac{1}{n} \times \frac{k-1}{x-1} \right)
 	\]
 \end{solution}
 \vfill
 \problem{}
 Find the $k$ that maximizes $\phi_n(k)$ for $n$ in $\{1, 2, 3, 4, 5\}$.
 \begin{solution}
 	Brute force. We already know that $\phi_1(1) = 1.0$ and $\phi_2(1) = \phi_3(2) = 0.5$. \par
 	The maximal value of $\phi_4$ is $\phi_4(2) = 0.46$, and of $\phi_5$ is $\phi_5(3) = 0.43$.
 \end{solution}
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \problem{}
 Let $r = \frac{k-1}{n}$, the fraction of applicants we reject. Show that
 \begin{equation*}
 	\phi_n(k)
 	= r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
 \end{equation*}
 \begin{solution}
 	This is easy.
 \end{solution}
 \vfill
 \problem{}
 With a bit of faily unpleasant calculus, we can show that the following is true for large $n$:
 \begin{equation*}
 	\sum_{x=k}^{n}\frac{1}{x-1}
 	~\approx~ \text{ln}\Bigl(\frac{n}{k}\Bigr)
 \end{equation*}
 Use this fact to find an approximation of $\phi_n(k)$ at large $n$ in terms of $r$. \par
 \hint{If $n$ is big, $\frac{k-1}{n} \approx \frac{k}{n}$.}
 \begin{solution}
 	\begin{equation*}
 		\phi_n(k)
 		~=~  r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
 		~\approx~ r \times \text{ln}\left(\frac{n}{k}\right)
 		~=~ -r \times \text{ln}\left(\frac{k}{n}\right)
 		~\approx~ -r \times \text{ln}(r)
 	\end{equation*}
 \end{solution}
 \vfill
 \problem{}
 Find the $r$ that maximizes $\underset{n \rightarrow \infty}{\text{lim}} \phi_n$. \par
 Also, find the value of $\phi_n$ at this point. \par
 \note{If you aren't familiar with calculus, ask an instructor for help.}
 \begin{solution}
 	Use the usual calculus tricks:
 	\begin{equation*}
 		\frac{d}{dr} \bigl( -r \times \text{ln}(r) \bigr)
 		= -1 - \text{ln}(r)
 	\end{equation*}
 	Which is zero at $r = e^{-1}$. The value of $ -r \times \text{ln}(r)$ at this point is also $\frac{1}{e}$.
 \end{solution}
 \vfill
 Thus, the \say{look-then-leap} strategy with $r = e^{-1}$ should select the best candidate about $e^{-1} = 37\%$ of the time,
 \textit{regardless of $n$.} Our probability of success does not change as $n$ gets larger! \par
 \note{Recall that the random strategy succeeds with probability $\nicefrac{1}{n}$. \par
 That is, it quickly becomes small as $n$ gets large.}
 \pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,204 @@
 \section{Another Secretary Problem}
 As you may have already noticed, the secretary problem we discussed in the previous section
 is somewhat disconnected from reality. Under what circumstances would one only be satisfied
 with the \textit{absolute best} candidate? It may make more sense to maximize the average rank
 of the candidate we hire, rather than the probability of selecting the best. This is the problem
 we'll attempt to solve next.
 \definition{}
 The problem we're solving is summarized below.
 Note that this is nearly identical to the classical secretary problem in the previous
 section---the only thing that has changed is the goal.
 \begin{itemize}
 	\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
 	\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
 	\item We interview applicants in a random order, one at a time.
 	\item After each interview, we either reject or select the applicant.
 	\item We cannot return to an applicant we've rejected.
 	\item The process ends once we select an applicant.
 	\vspace{2mm}
 	\item Our goal is to maximize the rank of the applicant we hire.
 \end{itemize}
 \definition{}<mod>
 Just like before, we need to restate this problem in the language of probability. \par
 To do this, we'll say that each candidate has a \textit{quality} rating in $[0, 1]$. \par
 \vspace{2mm}
 Our series of applicants then becomes a series of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_n$, \par
 where each $\mathcal{X}_i$ is drawn uniformly from $[0, 1]$.
 \problem{}<notsatisfy>
 The modification in \ref{mod} doesn't fully satisfy the constraints of the secretary problem. \par
 Why not?
 \begin{solution}
 	If we observe $\mathcal{X}_i$ directly, we obtain \textit{absolute} scores. \par
 	This is more information than the secretary problem allows us to have---we can know which of
 	two candidates is better, but \textit{not by how much}.
 \end{solution}
 \vfill
 Ignore this issue for now. We'll return to it later.
 \problem{}
 Let $\mathcal{X}$ be a random variable uniformly distributed over $[0, 1]$. \par
 Given a real number $x$, what is the probability that $\mathcal{P}(\mathcal{X} \leq x)$?
 \begin{solution}
 	\begin{equation*}
 		\mathcal{P}(\mathcal{X} \leq x) =
 		\begin{cases}
 			0 & x \leq 0 \\
 			x & 0 < x < 1 \\
 			1 & \text{otherwise}
 		\end{cases}
 	\end{equation*}
 \end{solution}
 \vfill
 \problem{}
 Say we have five random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_5$. \par
 Given some $y$, what is the probability that all five $\mathcal{X}_i$ are smaller than $y$?
 \begin{solution}
 	Naturally, this is $\mathcal{P}(\mathcal{X} \leq y)^5$, which is $y^5$.
 \end{solution}
 \vfill
 \pagebreak
 %
 % MARK: Page
 %
 \definition{}
 Say we have a random variable $\mathcal{X}$ which we observe $n$ times. \note{(for example, we repeatedly roll a die)}
 We'll arrange these observations in increasing order, labeled $x_1 < x_2 < ... < x_n$. \par
 Under this definition, $x_i$ is called the \textit{$i^\text{th}$ order statistic}---the $i^\text{th}$ smallest sample of $\mathcal{X}$.
 a
 \problem{}<ostatone>
 Say we have a random variable $\mathcal{X}$ uniformly distributed on $[0, 1]$, of which we take $5$ observations. \par
 Given some $y$, what is the probability that $x_5 < y$? How about $x_4 <y $?
 \begin{solution}
 	$x_5 < y$: ~This is a restatement of the previous problem.
 	\vspace{2mm}
 	$x_4 < y$: ~We need 4 measurements to be smaller,
 	and one to be larger. Accounting for permutations, we get
 	$
 		5\mathcal{P}(\mathcal{X} \leq y)^4
 		\mathcal{P}(\mathcal{X} > y)
 		+
 		\mathcal{P}(\mathcal{X} \leq y)^5
 	$, which is $5y^4(1-y) + y^5$.
 \end{solution}
 \vfill
 \problem{}
 Consider the same setup as \ref{ostatone}, but with $n$ measurements. \par
 What is the probability that $x_i < y$ for a given $y$?
 \begin{solution}
 	\begin{equation*}
 		\mathcal{P}(x_i < y)
 		~=~
 		\sum_{j=i}^{n}
 		\binom{n}{j} \times
 		y^j
 		(1-y)^{n-j}
 	\end{equation*}
 \end{solution}
 \vfill
 \remark{}
 The expected value of the $i^\text{th}$ order statistic on $n$ samples of the uniform distribution is below.
 \begin{equation*}
 	\mathcal{E}(x_i) = \frac{i}{n+1}
 \end{equation*}
 We do not have the tools to derive this yet.
 \pagebreak
 %
 % MARK: Page
 %
 \definition{}
 Recall \ref{notsatisfy}. We need one more modification. \par
 In order to preserve the constraints of the problem, we will not be allowed to observe $\mathcal{X}_i$ directly. \par
 Instead, we'll be given an \say{indicator} $\mathcal{I}_i$ for each $\mathcal{X}_i$, which produces values in $\{0, 1\}$. \par
 If the value we observe when interviewing $\mathcal{X}_i$ is the best we've seen so far, $\mathcal{I}_i$ will produce $1$. \par
 If it isn't, $\mathcal{I}_i$ produces $0$.
 \problem{}
 Given a secretary problem with $n$ applicants, what is $\mathcal{E}(\mathcal{I}_i)$?
 \begin{solution}
 	\begin{equation*}
 		\mathcal{E}(\mathcal{I}_i) = \frac{1}{i}
 	\end{equation*}
 \end{solution}
 \vfill
 \problem{}
 What is $\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1)$? \par
 In other words, what is the expected value of $\mathcal{X}_i$ given that \par
 we know this candidate is the best we've seen so far?
 \begin{solution}
 	This is simply the expected value of the $i^\text{th}$ order statistic on $i$ samples:
 	\begin{equation*}
 		\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1) = \frac{i}{i+1}
 	\end{equation*}
 \end{solution}
 \vfill
 \pagebreak
 \problem{}
 In the previous section, we found that the optimal strategy for the classical secretary problem is to
 reject the first $e^{-1} \times n$ candidates, and select the next \say{best-yet} candidate we see. \par
 \vspace{2mm}
 How effective is this strategy for the ranked secretary problem? \par
 Find the expected rank of the applicant we select using this strategy.
 \vfill
 \problem{}
 Assuming we use the same kind of strategy as before (reject $k$, select the next \say{best-yet} candidate), \par
 show that $k = \sqrt{n}$ optimizes the expected rank of the candidate we select.
 \begin{solution}
 	This is a difficult bonus problem. see
 	\texttt{Neil Bearden, J. (2006). A new secretary problem with rank-based selection and cardinal payoffs.}
 \end{solution}
 \vfill
 \pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,81 @@
 \section{The Secretary, Again}
 Now, let's solve the secretary problem as as a stopping rule problem. \par
 The first thing we need to do is re-write it into the form we discussed in the previous section. \par
 Namely, we need...
 \begin{itemize}
 	\item A sequence of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_t$
 	\item A sequence of reward functions $y_0, y_1(\sigma_1), ..., y_t(\sigma_t)$.
 \end{itemize}
 \vspace{2mm}
 For convenience, I've summarized the secretary problem below:
 \begin{itemize}
 	\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
 	\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
 	\item We interview applicants in a random order, one at a time.
 	\item After each interview, we reject the applicant and move on, \par
 	or select the applicant and end the process.
 	\item We cannot return to an applicant we've rejected.
 	\item Our goal is to select the \textit{overall best} applicant.
 \end{itemize}
 \definition{}
 First, we'll define a sequence of $\mathcal{X}_i$ that fits this problem. \par
 Each $\mathcal{X}_i$ will gives us the \textit{relative rank} of each applicant. \par
 For example, if $\mathcal{X}_i = 1$, the $i^\text{th}$ applicant is the best of the first $i$. \par
 If $\mathcal{X}_i = 3$, two applicants better than $i$ came before $i$.
 \problem{}
 What values can $\mathcal{X}_1$ take, and what are their probabilities? \par
 How about $\mathcal{X}_2$, $\mathcal{X}_3$, and $\mathcal{X}_4$?
 \vfill
 \remark{}
 Now we need to define $y_n(\sigma_n)$. Intuitively, it may make sense to set $y_n = 1$ if the $n^\text{th}$
 applicant is the best, and $y_n = 0$ otherwise---but this doesn't work.
 \vspace{2mm}
 As defined in the previous section, $y_n$ can only depend on $\sigma_n = [x_1, x_2, ..., x_n]$, the previous $n$ observations.
 We cannot define $y_n$ as specified above because, having seen $\sigma_n$, we \textit{cannot} know whether or not the $n^\text{th}$
 applicant is the best.
 \vspace{2mm}
 To work around this, we'll define our reward for selecting the $n^\text{th}$ applicant as the \textit{probability}
 that this applicant is the best.
 \problem{}
 Define $y_n$.
 \begin{solution}
 	\begin{itemize}
 		\item An applicant should only be selected if $\mathcal{X}_i = 1$
 		\item if we accept an the $j^\text{th}$ applicant, the probability we select the absolute best is equal to \par
 		the probability that the best of the first $j$ candidates is the best overall. \par
 		\vspace{1mm}
 		This is just the probability that the best candidate overall appears among the first $j$, \par
 		and is thus $\nicefrac{j}{n}$.
 	\end{itemize}
 	So,
 	\begin{equation*}
 		y_j(\sigma_j) =
 		\begin{cases}
 			\nicefrac{j}{n} & x_j = 1 \\
 			0 & \text{otherwise}
 		\end{cases}
 	\end{equation*}
 	\vspace{2mm}
 	Note that $y_0 = 0$, and that $y_n$ depends only on $x_n$.
 \end{solution}
 \vfill
 \pagebreak
--- a/resources/ormc_handout.cls
+++ b/resources/ormc_handout.cls
@ -722,11 +722,10 @@
 % Misc helper commands %
 % -------------------- %
 % Inline note
 \NewDocumentCommand{\ilnote}{ +m }{\begingroup\color{gray}#1\endgroup}
 \NewDocumentCommand{\note}{ d[] +m }{
-	\IfNoValueTF{#1}{%
+	\IfNoValueTF{#1}{\ilnote{#2}}{\ilnote{\textit{#1:} #2}}\par
 		\begingroup\color{gray}#2\endgroup%
 	}{%
 		\begingroup\color{gray}\textit{#1:} #2\endgroup%
 	}\par
 }
 \long\def\hint#1{\note[Hint]{#1}}