% =================================================================
%  Lecture 13
%  Source: handwritten notes (Mathpix-converted) + canonical typeset
%  notes from /static/doc/math/stochastic-analysis/sections/section1.tex
%  and section2.tex (same author).
% =================================================================
\section[Lecture 13 -- Conditional Expectation and Martingales]{Lecture 13 \textemdash{} Conditional Expectation and Martingales}
\label{sec:lec13}

The final lecture upgrades the elementary conditional expectation
\(\E(X\mid B)=\E(X\indic_B)/\P(B)\) to a fully measure-theoretic object:
\(\E[X\mid\Gcal]\) for an arbitrary sub-\(\sigma\)-field \(\Gcal\). Once
this object is in hand, filtrations, martingales and stopping times
follow almost immediately, and the Doob optional sampling theorem closes
the course.

\subsection{Conditional expectation with respect to a partition}

Let \((\Omega,\Acal,\P)\) be a probability space. For a simple random
variable \(X=\sum_{j=1}^{m}x_j\indic_{D_j^X}\) with
\(D_j^X=\{X=x_j\}\), the collection
\(\Dcal(X)=\{D_1^X,\dots,D_m^X\}\) is the \emph{partition of \(\Omega\)
associated with \(X\)}.

\begin{definition}{Conditional expectation given a partition}{cond-exp-partition}
Let \(\Dcal=\{D_1,\dots,D_n\}\) be a (finite) partition of \(\Omega\)
with \(\P(D_i)>0\). The \emph{conditional expectation of a simple random
variable \(X\) given \(\Dcal\)} is the simple, \(\sigma(\Dcal)\)-measurable
random variable
\[
   \E(X\mid\Dcal)
   \;=\;\sum_{i=1}^{n}\E(X\mid D_i)\,\indic_{D_i}
   \;=\;\sum_{i=1}^{n}\sum_{j=1}^{m} x_j\,\P(D_j^X\mid D_i)\,\indic_{D_i},
\]
where \(\E(X\mid D_i)=\E(X\indic_{D_i})/\P(D_i)\) is the elementary
conditional expectation. For two simple random variables \(X,Y\) we
write \(\E(X\mid Y):=\E(X\mid\Dcal(Y))\); this is a function of \(Y\)
and is \(\sigma(Y)\)-measurable.
\end{definition}

\begin{remark}
\(\E(\,\cdot\mid\Dcal)=\E(\,\cdot\mid\sigma(\Dcal))\) in the sense of
the general definition below, and for a simple random variable
\(\sigma(X)=\sigma(\Dcal(X))\). Thus the partition picture is just the
\(\sigma\)-algebra picture restricted to the simple case.
\end{remark}

\subsection{General conditional expectation}

The right level of generality replaces the partition by an arbitrary
sub-\(\sigma\)-field \(\Gcal\subseteq\Acal\). The two characterising
properties — measurability and partial averaging — uniquely determine
the conditional expectation almost surely.

\begin{definition}{Conditional expectation given a $\sigma$-field}{cond-exp-general}
Let \(X\in L^{1}(\Omega,\Acal,\P)\) and let \(\Gcal\subseteq\Acal\) be a
sub-\(\sigma\)-field. A \emph{conditional expectation of \(X\) given
\(\Gcal\)}, denoted \(\E[X\mid\Gcal]\), is any integrable random variable
satisfying
\begin{enumerate}[label=(\roman*)]
  \item \textbf{(measurability)} \(\E[X\mid\Gcal]\) is
        \(\Gcal\)-measurable;
  \item \textbf{(partial averaging)} for every \(A\in\Gcal\),
        \[
           \int_A \E[X\mid\Gcal]\,d\P
           \;=\; \int_A X\,d\P,
           \qquad\text{equivalently}\qquad
           \E\bigl[\indic_A\,\E[X\mid\Gcal]\bigr]=\E[\indic_A X].
        \]
\end{enumerate}
\end{definition}

\begin{theorem}{Existence and uniqueness}{cond-exp-existence}
For every \(X\in L^{1}(\Omega,\Acal,\P)\) and every sub-\(\sigma\)-field
\(\Gcal\subseteq\Acal\), a conditional expectation \(\E[X\mid\Gcal]\)
exists, and any two versions agree \(\P\)-a.s. Existence follows from
the Radon--Nikodym theorem applied to the measure
\(\Q(A):=\E[\indic_A X]/\E[X]\) (after splitting \(X=X^{+}-X^{-}\));
uniqueness follows from the partial-averaging property applied to the
event \(\{Y>Y'\}\in\Gcal\).
\end{theorem}

\begin{remark}
For the trivial \(\sigma\)-field \(\Acal_0=\{\emptyset,\Omega\}\) one has
\(\E[X\mid\Acal_0]=\E[X]\), so ordinary expectation is recovered as the
extreme case.
\end{remark}

\subsection{Properties of conditional expectation}

The following properties pin down conditional expectation in practice;
each follows from \cref{def:cond-exp-general} together with standard
measure-theoretic limit theorems.

\begin{theorem}{Basic properties of conditional expectation}{cond-exp-props}
Let \(X,Y\in L^{1}(\Omega,\Acal,\P)\) and let
\(\mathcal{H}\subseteq\Gcal\subseteq\Acal\) be sub-\(\sigma\)-fields.
\begin{enumerate}[label=(\roman*)]
  \item \textbf{(taking out what is known)} If \(X\) is
        \(\Gcal\)-measurable, then \(\E[X\mid\Gcal]=X\) a.s.; more
        generally, if \(X\) is \(\Gcal\)-measurable and \(XY\in L^{1}\),
        then \(\E[XY\mid\Gcal]=X\,\E[Y\mid\Gcal]\) a.s.
  \item \textbf{(linearity)} For \(a,b\in\R\),
        \[
            \E[aX+bY\mid\Gcal]=a\,\E[X\mid\Gcal]+b\,\E[Y\mid\Gcal]\quad\text{a.s.}
        \]
  \item \textbf{(tower property)}
        \(\E\bigl[\E[X\mid\Gcal]\bigm|\mathcal{H}\bigr]=\E[X\mid\mathcal{H}]\)
        a.s.; in particular
        \(\E\bigl[\E[X\mid\Gcal]\bigr]=\E[X]\).
  \item \textbf{(independence)} If \(\sigma(X)\) is independent of
        \(\Gcal\), then \(\E[X\mid\Gcal]=\E[X]\) a.s.
  \item \textbf{(independence + measurability)} If \(X\) is
        \(\Gcal\)-measurable and \(Y\) is independent of \(\Gcal\),
        then for any Borel \(h:\R^{2}\to\R\) with
        \(h(X,Y)\in L^{1}\),
        \[
            \E[h(X,Y)\mid\Gcal]\;=\;H(X), \qquad H(x):=\E[h(x,Y)].
        \]
  \item \textbf{(conditional Jensen)} For convex
        \(g:\R\to\R\) with \(g(X)\in L^{1}\),
        \[
            g\bigl(\E[X\mid\Gcal]\bigr)\;\le\;\E[g(X)\mid\Gcal]\quad\text{a.s.}
        \]
\end{enumerate}
\end{theorem}

\begin{lemma}{Doob's measurability lemma}{doob-measurability}
Let \(X:\Omega\to\Psi\) and let \((\Psi,\Gcal)\) be measurable. A
function \(Y:\Omega\to\R\) is \(\sigma(X)\)-measurable if and only if
there exists a \(\Gcal\)-measurable \(h:\Psi\to\R\) with \(Y=h(X)\). In
particular, \(\E[Y\mid\sigma(X)]\) is necessarily a measurable function
of \(X\).
\end{lemma}

\subsection{Geometric interpretation}

When \(X\) is square-integrable, conditional expectation is the
orthogonal projection of \(X\) onto the closed subspace of
\(\Gcal\)-measurable square-integrable random variables.

\begin{theorem}{Conditional expectation as $L^{2}$-projection}{cond-exp-l2}
Let \(X\in L^{2}(\Omega,\Acal,\P)\) and \(\Gcal\subseteq\Acal\). Then
\(\E[X\mid\Gcal]\) is the unique element (up to a.s.\ equality) of
\(L^{2}(\Omega,\Gcal,\P)\) minimising the mean-squared error: for every
\(Z\in L^{2}(\Omega,\Gcal,\P)\),
\[
   \E\bigl[(X-Z)^{2}\bigr]\;\ge\;\E\Bigl[\bigl(X-\E[X\mid\Gcal]\bigr)^{2}\Bigr],
\]
with equality iff \(Z=\E[X\mid\Gcal]\) a.s.
\end{theorem}

\begin{remark}
The residual \(X-\E[X\mid\Gcal]\) is orthogonal (in \(L^{2}\)) to every
\(\Gcal\)-measurable bounded random variable; in particular
\(\E\bigl[(X-\E[X\mid\Gcal])\,\E[X\mid\Gcal]\bigr]=0\). This is the
Pythagorean identity behind variance decomposition.
\end{remark}

\subsection{Filtrations, adapted processes, and martingales}

Conditional expectation acquires its real power when the conditioning
\(\sigma\)-field grows with time.

\begin{definition}{Filtration and adapted process}{filtration}
A \emph{filtration} on \((\Omega,\Acal,\P)\) is an increasing family
\(\Fcal=(\Fcal_n)_{n\in\N}\) of sub-\(\sigma\)-fields of \(\Acal\), i.e.\
\(\Fcal_m\subseteq\Fcal_n\) whenever \(m\le n\). A stochastic process
\(X=(X_n)_{n\in\N}\) is \emph{\(\Fcal\)-adapted} if \(X_n\) is
\(\Fcal_n\)-measurable for each \(n\). The \emph{natural filtration} of
\(X\) is \(\Fcal_n^{X}:=\sigma(X_0,X_1,\dots,X_n)\); every process is
adapted to its natural filtration.
\end{definition}

\begin{definition}{Martingale, sub- and supermartingale}{martingale}
Let \((\Omega,\Acal,\P)\) be a probability space with filtration
\(\Fcal=(\Fcal_n)_{n\in\N}\). An \(\Fcal\)-adapted process
\(M=(M_n)_{n\in\N}\) is called
\begin{itemize}
  \item an \emph{\(\Fcal\)-martingale} if \(\E|M_n|<\infty\) and
        \(\E[M_{n+1}\mid\Fcal_n]=M_n\) a.s.\ for all \(n\);
  \item an \emph{\(\Fcal\)-submartingale} if instead
        \(\E[M_{n+1}\mid\Fcal_n]\ge M_n\) a.s.;
  \item an \emph{\(\Fcal\)-supermartingale} if
        \(\E[M_{n+1}\mid\Fcal_n]\le M_n\) a.s.
\end{itemize}
By the tower property, \(\E[M_n]\) is constant in \(n\) for a martingale,
non-decreasing for a submartingale, and non-increasing for a
supermartingale.
\end{definition}

\begin{example}[Conditional expectation process]
Let \(X\in L^{1}(\Omega,\Acal,\P)\) and set \(M_n:=\E[X\mid\Fcal_n]\).
The tower property gives
\[
   \E[M_{n+1}\mid\Fcal_n]
   \;=\;\E\bigl[\E[X\mid\Fcal_{n+1}]\bigm|\Fcal_n\bigr]
   \;=\;\E[X\mid\Fcal_n]\;=\;M_n,
\]
so \((M_n)\) is an \(\Fcal\)-martingale. This is the prototypical
example.
\end{example}

\begin{example}[Random walk]
Let \((\xi_k)_{k\ge 1}\) be i.i.d.\ with \(\E\xi_1=\mu\) and
\(\E|\xi_1|<\infty\); set \(S_n=\xi_1+\dots+\xi_n\) with
\(\Fcal_n=\sigma(\xi_1,\dots,\xi_n)\). Then \(S_n-n\mu\) is an
\(\Fcal\)-martingale, and \(S_n\) itself is a martingale iff
\(\mu=0\). If additionally \(\E\xi_1^{2}=\sigma^{2}<\infty\), then
\(S_n^{2}-n\sigma^{2}\) is also a martingale.
\end{example}

\begin{lemma}{Convex transform of a martingale}{convex-martingale}
Let \(M\) be an \(\Fcal\)-martingale and let \(\varphi:\R\to\R\) be
convex with \(\varphi(M_n)\in L^{1}\) for every \(n\). Then
\((\varphi(M_n))_{n\in\N}\) is an \(\Fcal\)-submartingale. In
particular \(|M_n|\) and \(M_n^{2}\) (when integrable) are
submartingales.
\end{lemma}

\subsection{Stopping times and the optional sampling theorem}

\begin{definition}{Stopping time}{stopping-time}
A random variable \(\tau:\Omega\to\N\cup\{\infty\}\) is an
\emph{\(\Fcal\)-stopping time} if \(\{\tau\le n\}\in\Fcal_n\) for every
\(n\in\N\); equivalently \(\{\tau=n\}\in\Fcal_n\) for every \(n\). The
\emph{\(\sigma\)-field of events prior to \(\tau\)} is
\[
   \Fcal_\tau\;:=\;\bigl\{A\in\Acal:\;A\cap\{\tau\le n\}\in\Fcal_n\text{ for all }n\bigr\}.
\]
\end{definition}

\begin{lemma}{Stopped processes}{stopped}
If \(X\) is \(\Fcal\)-adapted and \(\tau\) is an \(\Fcal\)-stopping
time, the \emph{stopped process}
\(X^{\tau}:=(X_{n\wedge\tau})_{n\in\N}\) is again \(\Fcal\)-adapted.
If \(M\) is an \(\Fcal\)-martingale (resp.\ sub-, supermartingale), then
so is \(M^{\tau}\).
\end{lemma}

\begin{theorem}{Doob's optional sampling theorem}{optional-sampling}
Let \(M=(M_n)_{n=0,1,\dots,N}\) be a martingale (resp.\ sub-,
supermartingale) on a finite horizon, and let \(\sigma\le\tau\) be two
\(\Fcal\)-stopping times taking values in \(\{0,1,\dots,N\}\). Then
\(M_\sigma,M_\tau\in L^{1}\) and
\[
   \E[M_\tau\mid\Fcal_\sigma]\;=\;M_\sigma\quad\text{a.s.}
\]
(with \(\ge\) for submartingales and \(\le\) for supermartingales). In
particular, taking \(\sigma\equiv 0\) gives the optional-stopping
identity
\[
   \E[M_\tau]\;=\;\E[M_0]
\]
for every bounded stopping time \(\tau\).
\end{theorem}

\begin{corollary}{Stopping-time characterisation of martingales}{ot-char}
An \(\Fcal\)-adapted integrable process
\(M=(M_n)_{n=0,\dots,N}\) is a martingale if and only if
\(\E[M_\tau]=\E[M_0]\) for every \(\Fcal\)-stopping time \(\tau\) with
values in \(\{0,1,\dots,N\}\).
\end{corollary}

\begin{remark}
For unbounded \(\tau\) the identity \(\E[M_\tau]=\E[M_0]\) can fail
(e.g.\ a simple symmetric random walk stopped at the first hit of
\(\{1\}\) has \(M_0=0\) but \(M_\tau\equiv 1\)). Sufficient extra
conditions guaranteeing it include: \(\tau\) is bounded; \(M\) is
uniformly integrable; or \(\E\tau<\infty\) and the increments
\(M_n-M_{n-1}\) are uniformly bounded.
\end{remark}

\begin{theorem}{Doob decomposition}{doob-decomp}
Every discrete-time \(\Fcal\)-submartingale \(X=(X_n)_{n=0,\dots,N}\)
admits a unique (a.s.) decomposition
\[
   X_n\;=\;M_n+A_n,
\]
where \(M\) is an \(\Fcal\)-martingale with \(M_0=X_0\) and \(A\) is a
\emph{predictable} (i.e.\ \(A_n\) is \(\Fcal_{n-1}\)-measurable)
non-decreasing process with \(A_0=0\). Symmetrically, a supermartingale
decomposes as a martingale plus a predictable non-increasing process.
\end{theorem}