Merge branch 'stopping'

2024-09-26 09:25:38 -07:00
parent 42d592e700 dc5cb2c9b6
commit 9a531509ca
7 changed files with 788 additions and 5 deletions
--- a/Problems/main.tex
+++ b/Problems/main.tex
@ -0,0 +1,26 @@
+% use [nosolutions] flag to hide solutions.
+% use [solutions] flag to show solutions.
+\documentclass[
+	solutions,
+	singlenumbering
+]{../../resources/ormc_handout}
+\usepackage{../../resources/macros}
+
+\usepackage{units}
+\usepackage{mathtools} % for \coloneqq
+
+\uptitlel{Advanced 2}
+\uptitler{\smallurl{}}
+\title{Stopping problems}
+\subtitle{Prepared by Mark on \today{}}
+
+\begin{document}
+
+	\maketitle
+
+	\input{parts/0 probability.tex}
+	\input{parts/1 intro.tex}
+	\input{parts/2 secretary.tex}
+	\input{parts/3 orderstat.tex}
+
+\end{document}
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,130 @@
+\section{Probability}
+
+\definition{}
+A \textit{sample space} is a finite set $\Omega$. \par
+The elements of this set are called \textit{outcomes}. \par
+An \textit{event} is a set of outcomes (i.e, a subset of of $\Omega$).
+
+\definition{}
+A \textit{probability function} over a sample space $\Omega$ is a function $\mathcal{P}: P(\Omega) \to (0, 1)$ \par
+that maps events to real numbers between 0 and 1. \par
+Any probability function has the following properties:
+\begin{itemize}
+	\item $\mathcal{P}(\varnothing) = 0$
+	\item $\mathcal{P}(\Omega) = 1$
+	\item For events $A$ and $B$ where $A \cap B = \varnothing$, $\mathcal{P}(A \cup B) = \mathcal{P}(A) + \mathcal{P}(B)$
+\end{itemize}
+
+
+\problem{}<threecoins>
+Say we flip a fair coin three times. \par
+List all elements of the sample space $\Omega$ this experiment generates.
+
+\vfill
+
+\problem{}
+Using the same setup as \ref{threecoins}, find the following:
+\begin{itemize}
+	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least two \say{heads}}\} ~)$
+	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has an odd number of \say{heads}}\} ~)$
+	\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least one \say{tails}}\} ~)$
+\end{itemize}
+
+\vfill
+\pagebreak
+
+%
+% MARK: Page
+%
+
+
+\definition{}
+Given a sample space $\Omega$ and a probability function $\mathcal{P}$, \par
+a \textit{random variable} is a function from $\Omega$ to a specified output set.
+
+\vspace{2mm}
+
+For example, given the three-coin-toss sample space
+$\Omega = \{
+	\texttt{TTT},~ \texttt{TTH},~ \texttt{THT},~
+	\texttt{THH},~ \texttt{HTT},~ \texttt{HTH},~
+	\texttt{HHT},~ \texttt{HHH}
+\}$,
+We can define a random variable $\mathcal{H}$ as \say{the number of heads in a throw of three coins}. \par
+As a function, $\mathcal{H}$ maps values in $\Omega$ to values in $\mathbb{Z}^+_0$ and is defined as:
+\begin{itemize}
+	\item $\mathcal{H}(\texttt{TTT}) = 0$
+	\item $\mathcal{H}(\texttt{TTH}) = 1$
+	\item $\mathcal{H}(\texttt{THT}) = 1$
+	\item $\mathcal{H}(\texttt{THH}) = 2$
+	\item ...and so on.
+\end{itemize}
+
+\definition{}
+We can compute the probability that a random variable takes a certain value by computing the probability of
+the set of outcomes that produce that value. \par
+
+\vspace{2mm}
+
+For example, if we wanted to compute $\mathcal{P}(\mathcal{H} = 2)$, we would find
+$\mathcal{P}\bigl(\{\texttt{THH}, \texttt{HTH}, \texttt{HHT}\}\bigr)$.
+
+
+\problem{}
+Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
+What is $\mathcal{P}(\mathcal{H} = 1)$, with $\mathcal{H}$ defined as above? \par
+What is $\mathcal{P}(\mathcal{H} = 5)$?
+
+\vfill
+
+
+\problem{}
+Say we roll a fair six-sided die twice. \par
+Let $\mathcal{X}$ be a random variable measuring the sum of the two results. \par
+Find $\mathcal{P}(\mathcal{X} = x)$ for all $x$ in $\mathbb{Z}$.
+
+\vfill
+\pagebreak
+
+
+%
+% MARK: Page
+%
+
+
+\definition{}
+Say we have a random variable $\mathcal{X}$ that produces outputs in $\mathbb{R}$. \par
+The \textit{expected value} of $\mathcal{X}$ is then defined as
+\begin{equation*}
+	\mathcal{E}(\mathcal{X})
+	~\coloneqq~ \sum_{x \in A}\Bigl(x \times \mathcal{P}\bigl(\mathcal{X} = x\bigr)\Bigr)
+	~=~ \sum_{\omega \in \Omega}\Bigl(\mathcal{X}(\omega) \times \mathcal{P}(\omega)\Bigr)
+\end{equation*}
+That is, $\mathcal{E}(\mathcal{X})$ is the average of all possible outputs of $\mathcal{X}$ weighted by their probability.
+
+\problem{}
+Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
+Define $\mathcal{H}$ as the number of heads we see. \par
+Find $\mathcal{E}(\mathcal{H})$.
+
+\vfill
+
+\problem{}
+Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables. \par
+Show that $\mathcal{E}(\mathcal{A} + \mathcal{B}) = \mathcal{E}(\mathcal{A}) + \mathcal{E}(\mathcal{B})$.
+
+\vfill
+
+\definition{}
+Let $A$ and $B$ be events on a sample space $\Omega$. \par
+We say that $A$ and $B$ are \textit{independent} if $\mathcal{P}(A \cap B) = \mathcal{P}(A) + \mathcal{P}(B)$. \par
+Intuitively, events $A$ and $B$ are independent if the outcome of one does not affect the other.
+
+\definition{}
+Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables over $\Omega$. \par
+We say that $\mathcal{A}$ and $\mathcal{B}$ are independent if the events $\{\omega \in \Omega ~|~ \mathcal{A}(\omega) = a\}$
+and $\{\omega \in \Omega ~|~ \mathcal{B}(\omega) = b\}$ are independent for all $(a, b)$ that $\mathcal{A}$ and $\mathcal{B}$ can produce.
+
+
+
+\pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,67 @@
+\section{Introduction}
+
+\generic{Setup:}
+Suppose we toss a 6-sided die $n$ times. \par
+It is easy to detect the first time we roll a 6. \par
+What should we do if we want to detect the \textit{last}?
+
+\problem{}<lastl>
+Given $l \leq n$, what is the probability that the last $l$
+tosses of this die contain exactly one six? \par
+\hint{Start with small $l$.}
+
+\begin{solution}
+	$\mathcal{P}(\text{last } l \text{ tosses have exactly one 6}) = (\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$
+\end{solution}
+
+\vfill
+
+\problem{}
+For what value of $l$ is the probability in \ref{lastl} maximal? \par
+The following table may help.
+
+\begin{center}
+	\begin{tabular}{|| c | c | c ||}
+		\hline
+		\rule{0pt}{3.5mm} % Bonus height for exponent
+		$l$ & $(\nicefrac{5}{6})^l$ & $(\nicefrac{1}{6})(\nicefrac{5}{6})^l$ \\
+		\hline\hline
+		1 & 0.83 & 0.133 \\
+		\hline
+		2 & 0.69 & 0.115 \\
+		\hline
+		3 & 0.57 & 0.095 \\
+		\hline
+		4 & 0.48 & 0.089 \\
+		\hline
+		5 & 0.40 & 0.067 \\
+		\hline
+		6 & 0.33 & 0.055 \\
+		\hline
+		7 & 0.27 & 0.045 \\
+		\hline
+		8 & 0.23 & 0.038 \\
+		\hline
+	\end{tabular}
+\end{center}
+
+\begin{solution}
+	$(\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$ is maximal at $x = 5.48$, so $l = 5$. \par
+	$l = 6$ is close enough.
+\end{solution}
+
+\vfill
+
+\problem{}
+Finish your solution: \par
+In $n$ rolls of a six-sided die, what strategy maximizes
+our chance of detecting the last $6$ that is rolled? \par
+What is the probability of our guess being right?
+
+\begin{solution}
+	Whether $l = 5$, $5.4$, or $6$, the probability of success rounds to $0.40$.
+\end{solution}
+
+
+\vfill
+\pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,276 @@
+\section{The Secretary Problem}
+
+\definition{The secretary problem}
+Say we need to hire a secretary. We have exactly one position to fill,
+and we must fill it with one of $n$ applicants. These $n$ applicants,
+if put together, can be ranked unambiguously from \say{best} to \say{worst}.
+
+\vspace{2mm}
+
+We interview applicants in a random order, one at a time. \par
+At the end of each interview, we either reject the applicant (and move on to the next one), \par
+or select the applicant (which fills the position and ends the process).
+
+\vspace{2mm}
+
+Each applicant is interviewed at most once---we cannot return to an applicant we've rejected. \par
+In addition, we cannot reject the final applicant, as doing so will leave us without a secretary.
+
+\vspace{2mm}
+
+For a given $n$, we would like to maximize our probability of selecting the best applicant. \par
+This is the only metric we care about---we do not try to maximize the rank of our applicant. \par
+Hiring the second-best applicant is no better than hiring the worst.
+
+\problem{}
+If $n = 1$, what is the best hiring strategy, and what is the probability that we hire the best applicant?
+
+\begin{solution}
+	This is trivial. Hire the first applicant, she's always the best.
+\end{solution}
+
+\vfill
+
+
+
+
+\problem{}
+If $n = 2$, what is the best hiring strategy, and what is the probability that we hire the best applicant? \par
+Is this different than the probability of hiring the best applicant at random?
+
+\begin{solution}
+	There are two strategies:
+	\begin{itemize}
+		\item hire the first
+		\item hire the second
+	\end{itemize}
+
+	Both are equivalent to the random strategy.
+
+	\vspace{2mm}
+
+	Intuitively, the fact that a strategy can't help us makes sense: \par
+	When we're looking at the first applicant, we have no information; \par
+	when we're looking at the second, we have no agency (i.e, we \textit{must} hire).
+\end{solution}
+
+
+\vfill
+
+
+\problem{}
+If $n = 3$, what is the probability of hiring the best applicant at random? \par
+Come up with a strategy that produces better odds.
+
+\begin{solution}
+	Once we have three applicants, we can make progress.
+
+	\vspace{2mm}
+
+	The remark from the previous solution still holds: \par
+	When we're looking at the first applicant, we have no information; \par
+	when we're looking at the second, we have no choices.
+
+	\vspace{2mm}
+
+	So, let's make our decision at the second candidate. \par
+	If we hire only when the second candidate is better than the first, \par
+	we end up hiring the best candidate exactly half the time.
+
+	\vspace{2mm}
+
+	This can be verified by checking all six cases.
+\end{solution}
+
+\vfill
+\pagebreak
+
+%
+% MARK: Page
+%
+
+
+\problem{}<bestyet>
+Should we ever consider hiring a candidate that \textit{isn't} the best we've seen so far? \par
+Why or why not? \hint{Read the problem again.}
+
+\begin{solution}
+	No! A candidate that isn't the best yet cannot be the best overall! \par
+	Remember---this problem is only interested in hiring the \textit{absolute best} candidate. \par
+	Our reward is zero in all other cases.
+\end{solution}
+
+\vfill
+
+
+\remark{}
+\ref{bestyet} implies that we should automatically reject any applicant that isn't
+the best we've seen. We can take advantage of this fact to restrict the types of
+strategies we consider.
+
+\remark{}
+Let $B_x$ be the event \say{the $x^\text{th}$ applicant is better than all previous applicants,} \par
+and recall that we only know the \textit{relative} ranks of our applicants: \par
+given two candidates, we know \textit{which} is better, but not \textit{by how much}.
+
+\vspace{2mm}
+
+Therefore, the results of past events cannot provide information about future $B_x$. \par
+All events $B_x$ are independent.
+
+\vspace{2mm}
+
+We can therefore ignore any strategy that depends on the outcomes of individual $B_x$.
+Given this realization, we are left with only one kind of strategy: \par
+We blindly reject the first $(k - 1)$ applicants, then select the next \say{best-yet} applicant. \par
+All we need to do now is pick the optimal $k$.
+
+\problem{}
+Consider the secretary problem with a given $n$. \par
+What are the probabilities of each $B_x$?
+
+\vfill
+
+
+
+\problem{}<seca>
+What is the probability that the $n^\text{th}$ applicant is the overall best applicant?
+
+\begin{solution}
+	All positions are equally likely. $\nicefrac{1}{n}$.
+\end{solution}
+
+\vfill
+\pagebreak
+
+%
+% MARK: Page
+%
+
+
+
+
+\problem{}<secb>
+Given that the $x^\textit{th}$ applicant is the overall best, what is the probability of hiring this applicant \par
+if we use the \say{look-then-leap} strategy detailed above? \par
+\hint{
+	Under what conditions would we \textit{not} hire this applicant? \par
+	This probability depends on $k$ and $x$.
+}
+
+\begin{solution}
+	Say that the $x^\text{th}$ applicant is the best overall. If we do not hire this applicant,
+	we must have hired a candidate that came before them. \par
+
+	\vspace{2mm}
+
+	What is the probability of this? We saw $x-1$ applicants before the $x^\text{th}$. \par
+	If we hired one of them, the best of those initial $x-1$ candidates did \textit{not} fall
+	into the initial $k-1$ applicants we rejected.
+	\note{(This is again verified by contradiction: if the best of the first $x-1$ applicants
+	\textit{was} within the first $k-1$, we would hire the $x^\text{th}$)}
+
+	\vspace{2mm}
+
+	There are $x-1$ positions to place the best of the first $x-1$ candidates, \par
+	and $k-1$ of these positions are initially rejected. \par
+	Thus, the probability of the best of the first $x-1$ applicants being rejected is $\frac{k-1}{x-1}$.
+
+	\vspace{2mm}
+
+	Unraveling our previous logic, we find that the probability we are interested in is also $\frac{k-1}{x-1}$.
+\end{solution}
+
+\vfill
+
+\problem{}<phisubn>
+Consider the secretary problem with $n$ applicants. \par
+If we reject the first $k$ applicants and hire the first \say{best-yet} applicant we encounter, \par
+what is the probability that we select the best candidate? \par
+Call this probability $\phi_n(k)$.
+
+\begin{solution}
+	Using \ref{seca} and \ref{secb}, this is straightfoward:
+	\[
+		\phi_n(k)
+		= \sum_{x = k}^{n}\left( \frac{1}{n} \times \frac{k-1}{x-1} \right)
+	\]
+\end{solution}
+
+\vfill
+
+\problem{}
+Find the $k$ that maximizes $\phi_n(k)$ for $n$ in $\{1, 2, 3, 4, 5\}$.
+
+\begin{solution}
+	Brute force. We already know that $\phi_1(1) = 1.0$ and $\phi_2(1) = \phi_3(2) = 0.5$. \par
+	The maximal value of $\phi_4$ is $\phi_4(2) = 0.46$, and of $\phi_5$ is $\phi_5(3) = 0.43$.
+\end{solution}
+
+\vfill
+\pagebreak
+
+
+%
+% MARK: Page
+%
+
+\problem{}
+Let $r = \frac{k-1}{n}$, the fraction of applicants we reject. Show that
+\begin{equation*}
+	\phi_n(k)
+	= r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
+\end{equation*}
+
+\begin{solution}
+	This is easy.
+\end{solution}
+
+\vfill
+
+\problem{}
+With a bit of faily unpleasant calculus, we can show that the following is true for large $n$:
+\begin{equation*}
+	\sum_{x=k}^{n}\frac{1}{x-1}
+	~\approx~ \text{ln}\Bigl(\frac{n}{k}\Bigr)
+\end{equation*}
+Use this fact to find an approximation of $\phi_n(k)$ at large $n$ in terms of $r$. \par
+\hint{If $n$ is big, $\frac{k-1}{n} \approx \frac{k}{n}$.}
+
+\begin{solution}
+	\begin{equation*}
+		\phi_n(k)
+		~=~  r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
+		~\approx~ r \times \text{ln}\left(\frac{n}{k}\right)
+		~=~ -r \times \text{ln}\left(\frac{k}{n}\right)
+		~\approx~ -r \times \text{ln}(r)
+	\end{equation*}
+\end{solution}
+
+\vfill
+
+\problem{}
+Find the $r$ that maximizes $\underset{n \rightarrow \infty}{\text{lim}} \phi_n$. \par
+Also, find the value of $\phi_n$ at this point. \par
+\note{If you aren't familiar with calculus, ask an instructor for help.}
+
+
+\begin{solution}
+	Use the usual calculus tricks:
+	\begin{equation*}
+		\frac{d}{dr} \bigl( -r \times \text{ln}(r) \bigr)
+		= -1 - \text{ln}(r)
+	\end{equation*}
+
+	Which is zero at $r = e^{-1}$. The value of $ -r \times \text{ln}(r)$ at this point is also $\frac{1}{e}$.
+\end{solution}
+
+
+\vfill
+
+Thus, the \say{look-then-leap} strategy with $r = e^{-1}$ should select the best candidate about $e^{-1} = 37\%$ of the time,
+\textit{regardless of $n$.} Our probability of success does not change as $n$ gets larger! \par
+\note{Recall that the random strategy succeeds with probability $\nicefrac{1}{n}$. \par
+That is, it quickly becomes small as $n$ gets large.}
+
+\pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,204 @@
+\section{Another Secretary Problem}
+
+As you may have already noticed, the secretary problem we discussed in the previous section
+is somewhat disconnected from reality. Under what circumstances would one only be satisfied
+with the \textit{absolute best} candidate? It may make more sense to maximize the average rank
+of the candidate we hire, rather than the probability of selecting the best. This is the problem
+we'll attempt to solve next.
+
+
+\definition{}
+The problem we're solving is summarized below.
+Note that this is nearly identical to the classical secretary problem in the previous
+section---the only thing that has changed is the goal.
+\begin{itemize}
+	\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
+	\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
+	\item We interview applicants in a random order, one at a time.
+	\item After each interview, we either reject or select the applicant.
+	\item We cannot return to an applicant we've rejected.
+	\item The process ends once we select an applicant.
+
+	\vspace{2mm}
+
+	\item Our goal is to maximize the rank of the applicant we hire.
+\end{itemize}
+
+
+\definition{}<mod>
+Just like before, we need to restate this problem in the language of probability. \par
+To do this, we'll say that each candidate has a \textit{quality} rating in $[0, 1]$. \par
+
+\vspace{2mm}
+
+Our series of applicants then becomes a series of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_n$, \par
+where each $\mathcal{X}_i$ is drawn uniformly from $[0, 1]$.
+
+\problem{}<notsatisfy>
+The modification in \ref{mod} doesn't fully satisfy the constraints of the secretary problem. \par
+Why not?
+
+\begin{solution}
+	If we observe $\mathcal{X}_i$ directly, we obtain \textit{absolute} scores. \par
+	This is more information than the secretary problem allows us to have---we can know which of
+	two candidates is better, but \textit{not by how much}.
+\end{solution}
+
+\vfill
+
+Ignore this issue for now. We'll return to it later.
+
+\problem{}
+Let $\mathcal{X}$ be a random variable uniformly distributed over $[0, 1]$. \par
+Given a real number $x$, what is the probability that $\mathcal{P}(\mathcal{X} \leq x)$?
+
+
+\begin{solution}
+	\begin{equation*}
+		\mathcal{P}(\mathcal{X} \leq x) =
+		\begin{cases}
+			0 & x \leq 0 \\
+			x & 0 < x < 1 \\
+			1 & \text{otherwise}
+		\end{cases}
+	\end{equation*}
+
+\end{solution}
+
+\vfill
+
+\problem{}
+Say we have five random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_5$. \par
+Given some $y$, what is the probability that all five $\mathcal{X}_i$ are smaller than $y$?
+
+\begin{solution}
+	Naturally, this is $\mathcal{P}(\mathcal{X} \leq y)^5$, which is $y^5$.
+\end{solution}
+
+\vfill
+\pagebreak
+
+
+%
+% MARK: Page
+%
+
+
+\definition{}
+Say we have a random variable $\mathcal{X}$ which we observe $n$ times. \note{(for example, we repeatedly roll a die)}
+We'll arrange these observations in increasing order, labeled $x_1 < x_2 < ... < x_n$. \par
+Under this definition, $x_i$ is called the \textit{$i^\text{th}$ order statistic}---the $i^\text{th}$ smallest sample of $\mathcal{X}$.
+a
+
+\problem{}<ostatone>
+Say we have a random variable $\mathcal{X}$ uniformly distributed on $[0, 1]$, of which we take $5$ observations. \par
+Given some $y$, what is the probability that $x_5 < y$? How about $x_4 <y $?
+
+\begin{solution}
+	$x_5 < y$: ~This is a restatement of the previous problem.
+
+	\vspace{2mm}
+
+	$x_4 < y$: ~We need 4 measurements to be smaller,
+	and one to be larger. Accounting for permutations, we get
+	$
+		5\mathcal{P}(\mathcal{X} \leq y)^4
+		\mathcal{P}(\mathcal{X} > y)
+		+
+		\mathcal{P}(\mathcal{X} \leq y)^5
+	$, which is $5y^4(1-y) + y^5$.
+\end{solution}
+
+\vfill
+
+\problem{}
+Consider the same setup as \ref{ostatone}, but with $n$ measurements. \par
+What is the probability that $x_i < y$ for a given $y$?
+
+\begin{solution}
+	\begin{equation*}
+		\mathcal{P}(x_i < y)
+		~=~
+		\sum_{j=i}^{n}
+		\binom{n}{j} \times
+		y^j
+		(1-y)^{n-j}
+	\end{equation*}
+\end{solution}
+
+\vfill
+
+\remark{}
+The expected value of the $i^\text{th}$ order statistic on $n$ samples of the uniform distribution is below.
+\begin{equation*}
+	\mathcal{E}(x_i) = \frac{i}{n+1}
+\end{equation*}
+We do not have the tools to derive this yet.
+
+\pagebreak
+
+%
+% MARK: Page
+%
+
+
+
+\definition{}
+Recall \ref{notsatisfy}. We need one more modification. \par
+In order to preserve the constraints of the problem, we will not be allowed to observe $\mathcal{X}_i$ directly. \par
+Instead, we'll be given an \say{indicator} $\mathcal{I}_i$ for each $\mathcal{X}_i$, which produces values in $\{0, 1\}$. \par
+If the value we observe when interviewing $\mathcal{X}_i$ is the best we've seen so far, $\mathcal{I}_i$ will produce $1$. \par
+If it isn't, $\mathcal{I}_i$ produces $0$.
+
+\problem{}
+Given a secretary problem with $n$ applicants, what is $\mathcal{E}(\mathcal{I}_i)$?
+
+\begin{solution}
+	\begin{equation*}
+		\mathcal{E}(\mathcal{I}_i) = \frac{1}{i}
+	\end{equation*}
+\end{solution}
+
+\vfill
+
+
+\problem{}
+What is $\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1)$? \par
+In other words, what is the expected value of $\mathcal{X}_i$ given that \par
+we know this candidate is the best we've seen so far?
+
+\begin{solution}
+	This is simply the expected value of the $i^\text{th}$ order statistic on $i$ samples:
+	\begin{equation*}
+		\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1) = \frac{i}{i+1}
+	\end{equation*}
+\end{solution}
+
+
+\vfill
+\pagebreak
+
+
+\problem{}
+In the previous section, we found that the optimal strategy for the classical secretary problem is to
+reject the first $e^{-1} \times n$ candidates, and select the next \say{best-yet} candidate we see. \par
+
+\vspace{2mm}
+
+How effective is this strategy for the ranked secretary problem? \par
+Find the expected rank of the applicant we select using this strategy.
+
+
+\vfill
+
+\problem{}
+Assuming we use the same kind of strategy as before (reject $k$, select the next \say{best-yet} candidate), \par
+show that $k = \sqrt{n}$ optimizes the expected rank of the candidate we select.
+
+\begin{solution}
+	This is a difficult bonus problem. see
+	\texttt{Neil Bearden, J. (2006). A new secretary problem with rank-based selection and cardinal payoffs.}
+\end{solution}
+
+\vfill
+\pagebreak
--- a/Advanced/Stopping
+++ b/Advanced/Stopping
@ -0,0 +1,81 @@
+\section{The Secretary, Again}
+
+Now, let's solve the secretary problem as as a stopping rule problem. \par
+The first thing we need to do is re-write it into the form we discussed in the previous section. \par
+Namely, we need...
+\begin{itemize}
+	\item A sequence of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_t$
+	\item A sequence of reward functions $y_0, y_1(\sigma_1), ..., y_t(\sigma_t)$.
+\end{itemize}
+
+\vspace{2mm}
+
+For convenience, I've summarized the secretary problem below:
+\begin{itemize}
+	\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
+	\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
+	\item We interview applicants in a random order, one at a time.
+	\item After each interview, we reject the applicant and move on, \par
+	or select the applicant and end the process.
+	\item We cannot return to an applicant we've rejected.
+	\item Our goal is to select the \textit{overall best} applicant.
+\end{itemize}
+
+\definition{}
+First, we'll define a sequence of $\mathcal{X}_i$ that fits this problem. \par
+Each $\mathcal{X}_i$ will gives us the \textit{relative rank} of each applicant. \par
+For example, if $\mathcal{X}_i = 1$, the $i^\text{th}$ applicant is the best of the first $i$. \par
+If $\mathcal{X}_i = 3$, two applicants better than $i$ came before $i$.
+
+\problem{}
+What values can $\mathcal{X}_1$ take, and what are their probabilities? \par
+How about $\mathcal{X}_2$, $\mathcal{X}_3$, and $\mathcal{X}_4$?
+
+\vfill
+
+\remark{}
+Now we need to define $y_n(\sigma_n)$. Intuitively, it may make sense to set $y_n = 1$ if the $n^\text{th}$
+applicant is the best, and $y_n = 0$ otherwise---but this doesn't work.
+
+\vspace{2mm}
+
+As defined in the previous section, $y_n$ can only depend on $\sigma_n = [x_1, x_2, ..., x_n]$, the previous $n$ observations.
+We cannot define $y_n$ as specified above because, having seen $\sigma_n$, we \textit{cannot} know whether or not the $n^\text{th}$
+applicant is the best.
+
+\vspace{2mm}
+
+To work around this, we'll define our reward for selecting the $n^\text{th}$ applicant as the \textit{probability}
+that this applicant is the best.
+
+\problem{}
+Define $y_n$.
+
+\begin{solution}
+	\begin{itemize}
+		\item An applicant should only be selected if $\mathcal{X}_i = 1$
+		\item if we accept an the $j^\text{th}$ applicant, the probability we select the absolute best is equal to \par
+		the probability that the best of the first $j$ candidates is the best overall. \par
+
+		\vspace{1mm}
+
+		This is just the probability that the best candidate overall appears among the first $j$, \par
+		and is thus $\nicefrac{j}{n}$.
+	\end{itemize}
+
+	So,
+	\begin{equation*}
+		y_j(\sigma_j) =
+		\begin{cases}
+			\nicefrac{j}{n} & x_j = 1 \\
+			0 & \text{otherwise}
+		\end{cases}
+	\end{equation*}
+
+	\vspace{2mm}
+	Note that $y_0 = 0$, and that $y_n$ depends only on $x_n$.
+
+\end{solution}
+
+\vfill
+\pagebreak
--- a/resources/ormc_handout.cls
+++ b/resources/ormc_handout.cls
@ -722,11 +722,10 @@
 % Misc helper commands %
 % -------------------- %

+% Inline note
+\NewDocumentCommand{\ilnote}{ +m }{\begingroup\color{gray}#1\endgroup}
+
 \NewDocumentCommand{\note}{ d[] +m }{
-	\IfNoValueTF{#1}{%
-		\begingroup\color{gray}#2\endgroup%
-	}{%
-		\begingroup\color{gray}\textit{#1:} #2\endgroup%
-	}\par
+	\IfNoValueTF{#1}{\ilnote{#2}}{\ilnote{\textit{#1:} #2}}\par
 }
 \long\def\hint#1{\note[Hint]{#1}}