Merge branch 'stopping'

This commit is contained in:
Mark 2024-09-26 09:25:38 -07:00
commit 9a531509ca
Signed by: Mark
GPG Key ID: C6D63995FE72FD80
7 changed files with 788 additions and 5 deletions

View File

@ -0,0 +1,26 @@
% use [nosolutions] flag to hide solutions.
% use [solutions] flag to show solutions.
\documentclass[
solutions,
singlenumbering
]{../../resources/ormc_handout}
\usepackage{../../resources/macros}
\usepackage{units}
\usepackage{mathtools} % for \coloneqq
\uptitlel{Advanced 2}
\uptitler{\smallurl{}}
\title{Stopping problems}
\subtitle{Prepared by Mark on \today{}}
\begin{document}
\maketitle
\input{parts/0 probability.tex}
\input{parts/1 intro.tex}
\input{parts/2 secretary.tex}
\input{parts/3 orderstat.tex}
\end{document}

View File

@ -0,0 +1,130 @@
\section{Probability}
\definition{}
A \textit{sample space} is a finite set $\Omega$. \par
The elements of this set are called \textit{outcomes}. \par
An \textit{event} is a set of outcomes (i.e, a subset of of $\Omega$).
\definition{}
A \textit{probability function} over a sample space $\Omega$ is a function $\mathcal{P}: P(\Omega) \to (0, 1)$ \par
that maps events to real numbers between 0 and 1. \par
Any probability function has the following properties:
\begin{itemize}
\item $\mathcal{P}(\varnothing) = 0$
\item $\mathcal{P}(\Omega) = 1$
\item For events $A$ and $B$ where $A \cap B = \varnothing$, $\mathcal{P}(A \cup B) = \mathcal{P}(A) + \mathcal{P}(B)$
\end{itemize}
\problem{}<threecoins>
Say we flip a fair coin three times. \par
List all elements of the sample space $\Omega$ this experiment generates.
\vfill
\problem{}
Using the same setup as \ref{threecoins}, find the following:
\begin{itemize}
\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least two \say{heads}}\} ~)$
\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has an odd number of \say{heads}}\} ~)$
\item $\mathcal{P}(~ \{\omega \in \Omega ~|~ \omega \text{ has at least one \say{tails}}\} ~)$
\end{itemize}
\vfill
\pagebreak
%
% MARK: Page
%
\definition{}
Given a sample space $\Omega$ and a probability function $\mathcal{P}$, \par
a \textit{random variable} is a function from $\Omega$ to a specified output set.
\vspace{2mm}
For example, given the three-coin-toss sample space
$\Omega = \{
\texttt{TTT},~ \texttt{TTH},~ \texttt{THT},~
\texttt{THH},~ \texttt{HTT},~ \texttt{HTH},~
\texttt{HHT},~ \texttt{HHH}
\}$,
We can define a random variable $\mathcal{H}$ as \say{the number of heads in a throw of three coins}. \par
As a function, $\mathcal{H}$ maps values in $\Omega$ to values in $\mathbb{Z}^+_0$ and is defined as:
\begin{itemize}
\item $\mathcal{H}(\texttt{TTT}) = 0$
\item $\mathcal{H}(\texttt{TTH}) = 1$
\item $\mathcal{H}(\texttt{THT}) = 1$
\item $\mathcal{H}(\texttt{THH}) = 2$
\item ...and so on.
\end{itemize}
\definition{}
We can compute the probability that a random variable takes a certain value by computing the probability of
the set of outcomes that produce that value. \par
\vspace{2mm}
For example, if we wanted to compute $\mathcal{P}(\mathcal{H} = 2)$, we would find
$\mathcal{P}\bigl(\{\texttt{THH}, \texttt{HTH}, \texttt{HHT}\}\bigr)$.
\problem{}
Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
What is $\mathcal{P}(\mathcal{H} = 1)$, with $\mathcal{H}$ defined as above? \par
What is $\mathcal{P}(\mathcal{H} = 5)$?
\vfill
\problem{}
Say we roll a fair six-sided die twice. \par
Let $\mathcal{X}$ be a random variable measuring the sum of the two results. \par
Find $\mathcal{P}(\mathcal{X} = x)$ for all $x$ in $\mathbb{Z}$.
\vfill
\pagebreak
%
% MARK: Page
%
\definition{}
Say we have a random variable $\mathcal{X}$ that produces outputs in $\mathbb{R}$. \par
The \textit{expected value} of $\mathcal{X}$ is then defined as
\begin{equation*}
\mathcal{E}(\mathcal{X})
~\coloneqq~ \sum_{x \in A}\Bigl(x \times \mathcal{P}\bigl(\mathcal{X} = x\bigr)\Bigr)
~=~ \sum_{\omega \in \Omega}\Bigl(\mathcal{X}(\omega) \times \mathcal{P}(\omega)\Bigr)
\end{equation*}
That is, $\mathcal{E}(\mathcal{X})$ is the average of all possible outputs of $\mathcal{X}$ weighted by their probability.
\problem{}
Say we flip a coin with $\mathcal{P}(\texttt{H}) = \nicefrac{1}{3}$ three times. \par
Define $\mathcal{H}$ as the number of heads we see. \par
Find $\mathcal{E}(\mathcal{H})$.
\vfill
\problem{}
Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables. \par
Show that $\mathcal{E}(\mathcal{A} + \mathcal{B}) = \mathcal{E}(\mathcal{A}) + \mathcal{E}(\mathcal{B})$.
\vfill
\definition{}
Let $A$ and $B$ be events on a sample space $\Omega$. \par
We say that $A$ and $B$ are \textit{independent} if $\mathcal{P}(A \cap B) = \mathcal{P}(A) + \mathcal{P}(B)$. \par
Intuitively, events $A$ and $B$ are independent if the outcome of one does not affect the other.
\definition{}
Let $\mathcal{A}$ and $\mathcal{B}$ be two random variables over $\Omega$. \par
We say that $\mathcal{A}$ and $\mathcal{B}$ are independent if the events $\{\omega \in \Omega ~|~ \mathcal{A}(\omega) = a\}$
and $\{\omega \in \Omega ~|~ \mathcal{B}(\omega) = b\}$ are independent for all $(a, b)$ that $\mathcal{A}$ and $\mathcal{B}$ can produce.
\pagebreak

View File

@ -0,0 +1,67 @@
\section{Introduction}
\generic{Setup:}
Suppose we toss a 6-sided die $n$ times. \par
It is easy to detect the first time we roll a 6. \par
What should we do if we want to detect the \textit{last}?
\problem{}<lastl>
Given $l \leq n$, what is the probability that the last $l$
tosses of this die contain exactly one six? \par
\hint{Start with small $l$.}
\begin{solution}
$\mathcal{P}(\text{last } l \text{ tosses have exactly one 6}) = (\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$
\end{solution}
\vfill
\problem{}
For what value of $l$ is the probability in \ref{lastl} maximal? \par
The following table may help.
\begin{center}
\begin{tabular}{|| c | c | c ||}
\hline
\rule{0pt}{3.5mm} % Bonus height for exponent
$l$ & $(\nicefrac{5}{6})^l$ & $(\nicefrac{1}{6})(\nicefrac{5}{6})^l$ \\
\hline\hline
1 & 0.83 & 0.133 \\
\hline
2 & 0.69 & 0.115 \\
\hline
3 & 0.57 & 0.095 \\
\hline
4 & 0.48 & 0.089 \\
\hline
5 & 0.40 & 0.067 \\
\hline
6 & 0.33 & 0.055 \\
\hline
7 & 0.27 & 0.045 \\
\hline
8 & 0.23 & 0.038 \\
\hline
\end{tabular}
\end{center}
\begin{solution}
$(\nicefrac{1}{6})(\nicefrac{5}{6})^l \times l$ is maximal at $x = 5.48$, so $l = 5$. \par
$l = 6$ is close enough.
\end{solution}
\vfill
\problem{}
Finish your solution: \par
In $n$ rolls of a six-sided die, what strategy maximizes
our chance of detecting the last $6$ that is rolled? \par
What is the probability of our guess being right?
\begin{solution}
Whether $l = 5$, $5.4$, or $6$, the probability of success rounds to $0.40$.
\end{solution}
\vfill
\pagebreak

View File

@ -0,0 +1,276 @@
\section{The Secretary Problem}
\definition{The secretary problem}
Say we need to hire a secretary. We have exactly one position to fill,
and we must fill it with one of $n$ applicants. These $n$ applicants,
if put together, can be ranked unambiguously from \say{best} to \say{worst}.
\vspace{2mm}
We interview applicants in a random order, one at a time. \par
At the end of each interview, we either reject the applicant (and move on to the next one), \par
or select the applicant (which fills the position and ends the process).
\vspace{2mm}
Each applicant is interviewed at most once---we cannot return to an applicant we've rejected. \par
In addition, we cannot reject the final applicant, as doing so will leave us without a secretary.
\vspace{2mm}
For a given $n$, we would like to maximize our probability of selecting the best applicant. \par
This is the only metric we care about---we do not try to maximize the rank of our applicant. \par
Hiring the second-best applicant is no better than hiring the worst.
\problem{}
If $n = 1$, what is the best hiring strategy, and what is the probability that we hire the best applicant?
\begin{solution}
This is trivial. Hire the first applicant, she's always the best.
\end{solution}
\vfill
\problem{}
If $n = 2$, what is the best hiring strategy, and what is the probability that we hire the best applicant? \par
Is this different than the probability of hiring the best applicant at random?
\begin{solution}
There are two strategies:
\begin{itemize}
\item hire the first
\item hire the second
\end{itemize}
Both are equivalent to the random strategy.
\vspace{2mm}
Intuitively, the fact that a strategy can't help us makes sense: \par
When we're looking at the first applicant, we have no information; \par
when we're looking at the second, we have no agency (i.e, we \textit{must} hire).
\end{solution}
\vfill
\problem{}
If $n = 3$, what is the probability of hiring the best applicant at random? \par
Come up with a strategy that produces better odds.
\begin{solution}
Once we have three applicants, we can make progress.
\vspace{2mm}
The remark from the previous solution still holds: \par
When we're looking at the first applicant, we have no information; \par
when we're looking at the second, we have no choices.
\vspace{2mm}
So, let's make our decision at the second candidate. \par
If we hire only when the second candidate is better than the first, \par
we end up hiring the best candidate exactly half the time.
\vspace{2mm}
This can be verified by checking all six cases.
\end{solution}
\vfill
\pagebreak
%
% MARK: Page
%
\problem{}<bestyet>
Should we ever consider hiring a candidate that \textit{isn't} the best we've seen so far? \par
Why or why not? \hint{Read the problem again.}
\begin{solution}
No! A candidate that isn't the best yet cannot be the best overall! \par
Remember---this problem is only interested in hiring the \textit{absolute best} candidate. \par
Our reward is zero in all other cases.
\end{solution}
\vfill
\remark{}
\ref{bestyet} implies that we should automatically reject any applicant that isn't
the best we've seen. We can take advantage of this fact to restrict the types of
strategies we consider.
\remark{}
Let $B_x$ be the event \say{the $x^\text{th}$ applicant is better than all previous applicants,} \par
and recall that we only know the \textit{relative} ranks of our applicants: \par
given two candidates, we know \textit{which} is better, but not \textit{by how much}.
\vspace{2mm}
Therefore, the results of past events cannot provide information about future $B_x$. \par
All events $B_x$ are independent.
\vspace{2mm}
We can therefore ignore any strategy that depends on the outcomes of individual $B_x$.
Given this realization, we are left with only one kind of strategy: \par
We blindly reject the first $(k - 1)$ applicants, then select the next \say{best-yet} applicant. \par
All we need to do now is pick the optimal $k$.
\problem{}
Consider the secretary problem with a given $n$. \par
What are the probabilities of each $B_x$?
\vfill
\problem{}<seca>
What is the probability that the $n^\text{th}$ applicant is the overall best applicant?
\begin{solution}
All positions are equally likely. $\nicefrac{1}{n}$.
\end{solution}
\vfill
\pagebreak
%
% MARK: Page
%
\problem{}<secb>
Given that the $x^\textit{th}$ applicant is the overall best, what is the probability of hiring this applicant \par
if we use the \say{look-then-leap} strategy detailed above? \par
\hint{
Under what conditions would we \textit{not} hire this applicant? \par
This probability depends on $k$ and $x$.
}
\begin{solution}
Say that the $x^\text{th}$ applicant is the best overall. If we do not hire this applicant,
we must have hired a candidate that came before them. \par
\vspace{2mm}
What is the probability of this? We saw $x-1$ applicants before the $x^\text{th}$. \par
If we hired one of them, the best of those initial $x-1$ candidates did \textit{not} fall
into the initial $k-1$ applicants we rejected.
\note{(This is again verified by contradiction: if the best of the first $x-1$ applicants
\textit{was} within the first $k-1$, we would hire the $x^\text{th}$)}
\vspace{2mm}
There are $x-1$ positions to place the best of the first $x-1$ candidates, \par
and $k-1$ of these positions are initially rejected. \par
Thus, the probability of the best of the first $x-1$ applicants being rejected is $\frac{k-1}{x-1}$.
\vspace{2mm}
Unraveling our previous logic, we find that the probability we are interested in is also $\frac{k-1}{x-1}$.
\end{solution}
\vfill
\problem{}<phisubn>
Consider the secretary problem with $n$ applicants. \par
If we reject the first $k$ applicants and hire the first \say{best-yet} applicant we encounter, \par
what is the probability that we select the best candidate? \par
Call this probability $\phi_n(k)$.
\begin{solution}
Using \ref{seca} and \ref{secb}, this is straightfoward:
\[
\phi_n(k)
= \sum_{x = k}^{n}\left( \frac{1}{n} \times \frac{k-1}{x-1} \right)
\]
\end{solution}
\vfill
\problem{}
Find the $k$ that maximizes $\phi_n(k)$ for $n$ in $\{1, 2, 3, 4, 5\}$.
\begin{solution}
Brute force. We already know that $\phi_1(1) = 1.0$ and $\phi_2(1) = \phi_3(2) = 0.5$. \par
The maximal value of $\phi_4$ is $\phi_4(2) = 0.46$, and of $\phi_5$ is $\phi_5(3) = 0.43$.
\end{solution}
\vfill
\pagebreak
%
% MARK: Page
%
\problem{}
Let $r = \frac{k-1}{n}$, the fraction of applicants we reject. Show that
\begin{equation*}
\phi_n(k)
= r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
\end{equation*}
\begin{solution}
This is easy.
\end{solution}
\vfill
\problem{}
With a bit of faily unpleasant calculus, we can show that the following is true for large $n$:
\begin{equation*}
\sum_{x=k}^{n}\frac{1}{x-1}
~\approx~ \text{ln}\Bigl(\frac{n}{k}\Bigr)
\end{equation*}
Use this fact to find an approximation of $\phi_n(k)$ at large $n$ in terms of $r$. \par
\hint{If $n$ is big, $\frac{k-1}{n} \approx \frac{k}{n}$.}
\begin{solution}
\begin{equation*}
\phi_n(k)
~=~ r \sum_{x = k}^{n}\left( \frac{1}{x-1} \right)
~\approx~ r \times \text{ln}\left(\frac{n}{k}\right)
~=~ -r \times \text{ln}\left(\frac{k}{n}\right)
~\approx~ -r \times \text{ln}(r)
\end{equation*}
\end{solution}
\vfill
\problem{}
Find the $r$ that maximizes $\underset{n \rightarrow \infty}{\text{lim}} \phi_n$. \par
Also, find the value of $\phi_n$ at this point. \par
\note{If you aren't familiar with calculus, ask an instructor for help.}
\begin{solution}
Use the usual calculus tricks:
\begin{equation*}
\frac{d}{dr} \bigl( -r \times \text{ln}(r) \bigr)
= -1 - \text{ln}(r)
\end{equation*}
Which is zero at $r = e^{-1}$. The value of $ -r \times \text{ln}(r)$ at this point is also $\frac{1}{e}$.
\end{solution}
\vfill
Thus, the \say{look-then-leap} strategy with $r = e^{-1}$ should select the best candidate about $e^{-1} = 37\%$ of the time,
\textit{regardless of $n$.} Our probability of success does not change as $n$ gets larger! \par
\note{Recall that the random strategy succeeds with probability $\nicefrac{1}{n}$. \par
That is, it quickly becomes small as $n$ gets large.}
\pagebreak

View File

@ -0,0 +1,204 @@
\section{Another Secretary Problem}
As you may have already noticed, the secretary problem we discussed in the previous section
is somewhat disconnected from reality. Under what circumstances would one only be satisfied
with the \textit{absolute best} candidate? It may make more sense to maximize the average rank
of the candidate we hire, rather than the probability of selecting the best. This is the problem
we'll attempt to solve next.
\definition{}
The problem we're solving is summarized below.
Note that this is nearly identical to the classical secretary problem in the previous
section---the only thing that has changed is the goal.
\begin{itemize}
\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
\item We interview applicants in a random order, one at a time.
\item After each interview, we either reject or select the applicant.
\item We cannot return to an applicant we've rejected.
\item The process ends once we select an applicant.
\vspace{2mm}
\item Our goal is to maximize the rank of the applicant we hire.
\end{itemize}
\definition{}<mod>
Just like before, we need to restate this problem in the language of probability. \par
To do this, we'll say that each candidate has a \textit{quality} rating in $[0, 1]$. \par
\vspace{2mm}
Our series of applicants then becomes a series of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_n$, \par
where each $\mathcal{X}_i$ is drawn uniformly from $[0, 1]$.
\problem{}<notsatisfy>
The modification in \ref{mod} doesn't fully satisfy the constraints of the secretary problem. \par
Why not?
\begin{solution}
If we observe $\mathcal{X}_i$ directly, we obtain \textit{absolute} scores. \par
This is more information than the secretary problem allows us to have---we can know which of
two candidates is better, but \textit{not by how much}.
\end{solution}
\vfill
Ignore this issue for now. We'll return to it later.
\problem{}
Let $\mathcal{X}$ be a random variable uniformly distributed over $[0, 1]$. \par
Given a real number $x$, what is the probability that $\mathcal{P}(\mathcal{X} \leq x)$?
\begin{solution}
\begin{equation*}
\mathcal{P}(\mathcal{X} \leq x) =
\begin{cases}
0 & x \leq 0 \\
x & 0 < x < 1 \\
1 & \text{otherwise}
\end{cases}
\end{equation*}
\end{solution}
\vfill
\problem{}
Say we have five random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_5$. \par
Given some $y$, what is the probability that all five $\mathcal{X}_i$ are smaller than $y$?
\begin{solution}
Naturally, this is $\mathcal{P}(\mathcal{X} \leq y)^5$, which is $y^5$.
\end{solution}
\vfill
\pagebreak
%
% MARK: Page
%
\definition{}
Say we have a random variable $\mathcal{X}$ which we observe $n$ times. \note{(for example, we repeatedly roll a die)}
We'll arrange these observations in increasing order, labeled $x_1 < x_2 < ... < x_n$. \par
Under this definition, $x_i$ is called the \textit{$i^\text{th}$ order statistic}---the $i^\text{th}$ smallest sample of $\mathcal{X}$.
a
\problem{}<ostatone>
Say we have a random variable $\mathcal{X}$ uniformly distributed on $[0, 1]$, of which we take $5$ observations. \par
Given some $y$, what is the probability that $x_5 < y$? How about $x_4 <y $?
\begin{solution}
$x_5 < y$: ~This is a restatement of the previous problem.
\vspace{2mm}
$x_4 < y$: ~We need 4 measurements to be smaller,
and one to be larger. Accounting for permutations, we get
$
5\mathcal{P}(\mathcal{X} \leq y)^4
\mathcal{P}(\mathcal{X} > y)
+
\mathcal{P}(\mathcal{X} \leq y)^5
$, which is $5y^4(1-y) + y^5$.
\end{solution}
\vfill
\problem{}
Consider the same setup as \ref{ostatone}, but with $n$ measurements. \par
What is the probability that $x_i < y$ for a given $y$?
\begin{solution}
\begin{equation*}
\mathcal{P}(x_i < y)
~=~
\sum_{j=i}^{n}
\binom{n}{j} \times
y^j
(1-y)^{n-j}
\end{equation*}
\end{solution}
\vfill
\remark{}
The expected value of the $i^\text{th}$ order statistic on $n$ samples of the uniform distribution is below.
\begin{equation*}
\mathcal{E}(x_i) = \frac{i}{n+1}
\end{equation*}
We do not have the tools to derive this yet.
\pagebreak
%
% MARK: Page
%
\definition{}
Recall \ref{notsatisfy}. We need one more modification. \par
In order to preserve the constraints of the problem, we will not be allowed to observe $\mathcal{X}_i$ directly. \par
Instead, we'll be given an \say{indicator} $\mathcal{I}_i$ for each $\mathcal{X}_i$, which produces values in $\{0, 1\}$. \par
If the value we observe when interviewing $\mathcal{X}_i$ is the best we've seen so far, $\mathcal{I}_i$ will produce $1$. \par
If it isn't, $\mathcal{I}_i$ produces $0$.
\problem{}
Given a secretary problem with $n$ applicants, what is $\mathcal{E}(\mathcal{I}_i)$?
\begin{solution}
\begin{equation*}
\mathcal{E}(\mathcal{I}_i) = \frac{1}{i}
\end{equation*}
\end{solution}
\vfill
\problem{}
What is $\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1)$? \par
In other words, what is the expected value of $\mathcal{X}_i$ given that \par
we know this candidate is the best we've seen so far?
\begin{solution}
This is simply the expected value of the $i^\text{th}$ order statistic on $i$ samples:
\begin{equation*}
\mathcal{E}(\mathcal{X}_i ~|~ \mathcal{I}_i = 1) = \frac{i}{i+1}
\end{equation*}
\end{solution}
\vfill
\pagebreak
\problem{}
In the previous section, we found that the optimal strategy for the classical secretary problem is to
reject the first $e^{-1} \times n$ candidates, and select the next \say{best-yet} candidate we see. \par
\vspace{2mm}
How effective is this strategy for the ranked secretary problem? \par
Find the expected rank of the applicant we select using this strategy.
\vfill
\problem{}
Assuming we use the same kind of strategy as before (reject $k$, select the next \say{best-yet} candidate), \par
show that $k = \sqrt{n}$ optimizes the expected rank of the candidate we select.
\begin{solution}
This is a difficult bonus problem. see
\texttt{Neil Bearden, J. (2006). A new secretary problem with rank-based selection and cardinal payoffs.}
\end{solution}
\vfill
\pagebreak

View File

@ -0,0 +1,81 @@
\section{The Secretary, Again}
Now, let's solve the secretary problem as as a stopping rule problem. \par
The first thing we need to do is re-write it into the form we discussed in the previous section. \par
Namely, we need...
\begin{itemize}
\item A sequence of random variables $\mathcal{X}_1, \mathcal{X}_2, ..., \mathcal{X}_t$
\item A sequence of reward functions $y_0, y_1(\sigma_1), ..., y_t(\sigma_t)$.
\end{itemize}
\vspace{2mm}
For convenience, I've summarized the secretary problem below:
\begin{itemize}
\item We have exactly one position to fill, and we must fill it with one of $n$ applicants.
\item These $n$ applicants, if put together, can be ranked unambiguously from \say{best} to \say{worst}.
\item We interview applicants in a random order, one at a time.
\item After each interview, we reject the applicant and move on, \par
or select the applicant and end the process.
\item We cannot return to an applicant we've rejected.
\item Our goal is to select the \textit{overall best} applicant.
\end{itemize}
\definition{}
First, we'll define a sequence of $\mathcal{X}_i$ that fits this problem. \par
Each $\mathcal{X}_i$ will gives us the \textit{relative rank} of each applicant. \par
For example, if $\mathcal{X}_i = 1$, the $i^\text{th}$ applicant is the best of the first $i$. \par
If $\mathcal{X}_i = 3$, two applicants better than $i$ came before $i$.
\problem{}
What values can $\mathcal{X}_1$ take, and what are their probabilities? \par
How about $\mathcal{X}_2$, $\mathcal{X}_3$, and $\mathcal{X}_4$?
\vfill
\remark{}
Now we need to define $y_n(\sigma_n)$. Intuitively, it may make sense to set $y_n = 1$ if the $n^\text{th}$
applicant is the best, and $y_n = 0$ otherwise---but this doesn't work.
\vspace{2mm}
As defined in the previous section, $y_n$ can only depend on $\sigma_n = [x_1, x_2, ..., x_n]$, the previous $n$ observations.
We cannot define $y_n$ as specified above because, having seen $\sigma_n$, we \textit{cannot} know whether or not the $n^\text{th}$
applicant is the best.
\vspace{2mm}
To work around this, we'll define our reward for selecting the $n^\text{th}$ applicant as the \textit{probability}
that this applicant is the best.
\problem{}
Define $y_n$.
\begin{solution}
\begin{itemize}
\item An applicant should only be selected if $\mathcal{X}_i = 1$
\item if we accept an the $j^\text{th}$ applicant, the probability we select the absolute best is equal to \par
the probability that the best of the first $j$ candidates is the best overall. \par
\vspace{1mm}
This is just the probability that the best candidate overall appears among the first $j$, \par
and is thus $\nicefrac{j}{n}$.
\end{itemize}
So,
\begin{equation*}
y_j(\sigma_j) =
\begin{cases}
\nicefrac{j}{n} & x_j = 1 \\
0 & \text{otherwise}
\end{cases}
\end{equation*}
\vspace{2mm}
Note that $y_0 = 0$, and that $y_n$ depends only on $x_n$.
\end{solution}
\vfill
\pagebreak

View File

@ -722,11 +722,10 @@
% Misc helper commands % % Misc helper commands %
% -------------------- % % -------------------- %
% Inline note
\NewDocumentCommand{\ilnote}{ +m }{\begingroup\color{gray}#1\endgroup}
\NewDocumentCommand{\note}{ d[] +m }{ \NewDocumentCommand{\note}{ d[] +m }{
\IfNoValueTF{#1}{% \IfNoValueTF{#1}{\ilnote{#2}}{\ilnote{\textit{#1:} #2}}\par
\begingroup\color{gray}#2\endgroup%
}{%
\begingroup\color{gray}\textit{#1:} #2\endgroup%
}\par
} }
\long\def\hint#1{\note[Hint]{#1}} \long\def\hint#1{\note[Hint]{#1}}