% ================================================================= % Lecture 13 % Source: handwritten notes (Mathpix-converted) + canonical typeset % notes from /static/doc/math/stochastic-analysis/sections/section1.tex % and section2.tex (same author). % ================================================================= \section[Lecture 13 -- Conditional Expectation and Martingales]{Lecture 13 \textemdash{} Conditional Expectation and Martingales} \label{sec:lec13} The final lecture upgrades the elementary conditional expectation \(\E(X\mid B)=\E(X\indic_B)/\P(B)\) to a fully measure-theoretic object: \(\E[X\mid\Gcal]\) for an arbitrary sub-\(\sigma\)-field \(\Gcal\). Once this object is in hand, filtrations, martingales and stopping times follow almost immediately, and the Doob optional sampling theorem closes the course. \subsection{Conditional expectation with respect to a partition} Let \((\Omega,\Acal,\P)\) be a probability space. For a simple random variable \(X=\sum_{j=1}^{m}x_j\indic_{D_j^X}\) with \(D_j^X=\{X=x_j\}\), the collection \(\Dcal(X)=\{D_1^X,\dots,D_m^X\}\) is the \emph{partition of \(\Omega\) associated with \(X\)}. \begin{definition}{Conditional expectation given a partition}{cond-exp-partition} Let \(\Dcal=\{D_1,\dots,D_n\}\) be a (finite) partition of \(\Omega\) with \(\P(D_i)>0\). The \emph{conditional expectation of a simple random variable \(X\) given \(\Dcal\)} is the simple, \(\sigma(\Dcal)\)-measurable random variable \[ \E(X\mid\Dcal) \;=\;\sum_{i=1}^{n}\E(X\mid D_i)\,\indic_{D_i} \;=\;\sum_{i=1}^{n}\sum_{j=1}^{m} x_j\,\P(D_j^X\mid D_i)\,\indic_{D_i}, \] where \(\E(X\mid D_i)=\E(X\indic_{D_i})/\P(D_i)\) is the elementary conditional expectation. For two simple random variables \(X,Y\) we write \(\E(X\mid Y):=\E(X\mid\Dcal(Y))\); this is a function of \(Y\) and is \(\sigma(Y)\)-measurable. \end{definition} \begin{remark} \(\E(\,\cdot\mid\Dcal)=\E(\,\cdot\mid\sigma(\Dcal))\) in the sense of the general definition below, and for a simple random variable \(\sigma(X)=\sigma(\Dcal(X))\). Thus the partition picture is just the \(\sigma\)-algebra picture restricted to the simple case. \end{remark} \subsection{General conditional expectation} The right level of generality replaces the partition by an arbitrary sub-\(\sigma\)-field \(\Gcal\subseteq\Acal\). The two characterising properties — measurability and partial averaging — uniquely determine the conditional expectation almost surely. \begin{definition}{Conditional expectation given a $\sigma$-field}{cond-exp-general} Let \(X\in L^{1}(\Omega,\Acal,\P)\) and let \(\Gcal\subseteq\Acal\) be a sub-\(\sigma\)-field. A \emph{conditional expectation of \(X\) given \(\Gcal\)}, denoted \(\E[X\mid\Gcal]\), is any integrable random variable satisfying \begin{enumerate}[label=(\roman*)] \item \textbf{(measurability)} \(\E[X\mid\Gcal]\) is \(\Gcal\)-measurable; \item \textbf{(partial averaging)} for every \(A\in\Gcal\), \[ \int_A \E[X\mid\Gcal]\,d\P \;=\; \int_A X\,d\P, \qquad\text{equivalently}\qquad \E\bigl[\indic_A\,\E[X\mid\Gcal]\bigr]=\E[\indic_A X]. \] \end{enumerate} \end{definition} \begin{theorem}{Existence and uniqueness}{cond-exp-existence} For every \(X\in L^{1}(\Omega,\Acal,\P)\) and every sub-\(\sigma\)-field \(\Gcal\subseteq\Acal\), a conditional expectation \(\E[X\mid\Gcal]\) exists, and any two versions agree \(\P\)-a.s. Existence follows from the Radon--Nikodym theorem applied to the measure \(\Q(A):=\E[\indic_A X]/\E[X]\) (after splitting \(X=X^{+}-X^{-}\)); uniqueness follows from the partial-averaging property applied to the event \(\{Y>Y'\}\in\Gcal\). \end{theorem} \begin{remark} For the trivial \(\sigma\)-field \(\Acal_0=\{\emptyset,\Omega\}\) one has \(\E[X\mid\Acal_0]=\E[X]\), so ordinary expectation is recovered as the extreme case. \end{remark} \subsection{Properties of conditional expectation} The following properties pin down conditional expectation in practice; each follows from \cref{def:cond-exp-general} together with standard measure-theoretic limit theorems. \begin{theorem}{Basic properties of conditional expectation}{cond-exp-props} Let \(X,Y\in L^{1}(\Omega,\Acal,\P)\) and let \(\mathcal{H}\subseteq\Gcal\subseteq\Acal\) be sub-\(\sigma\)-fields. \begin{enumerate}[label=(\roman*)] \item \textbf{(taking out what is known)} If \(X\) is \(\Gcal\)-measurable, then \(\E[X\mid\Gcal]=X\) a.s.; more generally, if \(X\) is \(\Gcal\)-measurable and \(XY\in L^{1}\), then \(\E[XY\mid\Gcal]=X\,\E[Y\mid\Gcal]\) a.s. \item \textbf{(linearity)} For \(a,b\in\R\), \[ \E[aX+bY\mid\Gcal]=a\,\E[X\mid\Gcal]+b\,\E[Y\mid\Gcal]\quad\text{a.s.} \] \item \textbf{(tower property)} \(\E\bigl[\E[X\mid\Gcal]\bigm|\mathcal{H}\bigr]=\E[X\mid\mathcal{H}]\) a.s.; in particular \(\E\bigl[\E[X\mid\Gcal]\bigr]=\E[X]\). \item \textbf{(independence)} If \(\sigma(X)\) is independent of \(\Gcal\), then \(\E[X\mid\Gcal]=\E[X]\) a.s. \item \textbf{(independence + measurability)} If \(X\) is \(\Gcal\)-measurable and \(Y\) is independent of \(\Gcal\), then for any Borel \(h:\R^{2}\to\R\) with \(h(X,Y)\in L^{1}\), \[ \E[h(X,Y)\mid\Gcal]\;=\;H(X), \qquad H(x):=\E[h(x,Y)]. \] \item \textbf{(conditional Jensen)} For convex \(g:\R\to\R\) with \(g(X)\in L^{1}\), \[ g\bigl(\E[X\mid\Gcal]\bigr)\;\le\;\E[g(X)\mid\Gcal]\quad\text{a.s.} \] \end{enumerate} \end{theorem} \begin{lemma}{Doob's measurability lemma}{doob-measurability} Let \(X:\Omega\to\Psi\) and let \((\Psi,\Gcal)\) be measurable. A function \(Y:\Omega\to\R\) is \(\sigma(X)\)-measurable if and only if there exists a \(\Gcal\)-measurable \(h:\Psi\to\R\) with \(Y=h(X)\). In particular, \(\E[Y\mid\sigma(X)]\) is necessarily a measurable function of \(X\). \end{lemma} \subsection{Geometric interpretation} When \(X\) is square-integrable, conditional expectation is the orthogonal projection of \(X\) onto the closed subspace of \(\Gcal\)-measurable square-integrable random variables. \begin{theorem}{Conditional expectation as $L^{2}$-projection}{cond-exp-l2} Let \(X\in L^{2}(\Omega,\Acal,\P)\) and \(\Gcal\subseteq\Acal\). Then \(\E[X\mid\Gcal]\) is the unique element (up to a.s.\ equality) of \(L^{2}(\Omega,\Gcal,\P)\) minimising the mean-squared error: for every \(Z\in L^{2}(\Omega,\Gcal,\P)\), \[ \E\bigl[(X-Z)^{2}\bigr]\;\ge\;\E\Bigl[\bigl(X-\E[X\mid\Gcal]\bigr)^{2}\Bigr], \] with equality iff \(Z=\E[X\mid\Gcal]\) a.s. \end{theorem} \begin{remark} The residual \(X-\E[X\mid\Gcal]\) is orthogonal (in \(L^{2}\)) to every \(\Gcal\)-measurable bounded random variable; in particular \(\E\bigl[(X-\E[X\mid\Gcal])\,\E[X\mid\Gcal]\bigr]=0\). This is the Pythagorean identity behind variance decomposition. \end{remark} \subsection{Filtrations, adapted processes, and martingales} Conditional expectation acquires its real power when the conditioning \(\sigma\)-field grows with time. \begin{definition}{Filtration and adapted process}{filtration} A \emph{filtration} on \((\Omega,\Acal,\P)\) is an increasing family \(\Fcal=(\Fcal_n)_{n\in\N}\) of sub-\(\sigma\)-fields of \(\Acal\), i.e.\ \(\Fcal_m\subseteq\Fcal_n\) whenever \(m\le n\). A stochastic process \(X=(X_n)_{n\in\N}\) is \emph{\(\Fcal\)-adapted} if \(X_n\) is \(\Fcal_n\)-measurable for each \(n\). The \emph{natural filtration} of \(X\) is \(\Fcal_n^{X}:=\sigma(X_0,X_1,\dots,X_n)\); every process is adapted to its natural filtration. \end{definition} \begin{definition}{Martingale, sub- and supermartingale}{martingale} Let \((\Omega,\Acal,\P)\) be a probability space with filtration \(\Fcal=(\Fcal_n)_{n\in\N}\). An \(\Fcal\)-adapted process \(M=(M_n)_{n\in\N}\) is called \begin{itemize} \item an \emph{\(\Fcal\)-martingale} if \(\E|M_n|<\infty\) and \(\E[M_{n+1}\mid\Fcal_n]=M_n\) a.s.\ for all \(n\); \item an \emph{\(\Fcal\)-submartingale} if instead \(\E[M_{n+1}\mid\Fcal_n]\ge M_n\) a.s.; \item an \emph{\(\Fcal\)-supermartingale} if \(\E[M_{n+1}\mid\Fcal_n]\le M_n\) a.s. \end{itemize} By the tower property, \(\E[M_n]\) is constant in \(n\) for a martingale, non-decreasing for a submartingale, and non-increasing for a supermartingale. \end{definition} \begin{example}[Conditional expectation process] Let \(X\in L^{1}(\Omega,\Acal,\P)\) and set \(M_n:=\E[X\mid\Fcal_n]\). The tower property gives \[ \E[M_{n+1}\mid\Fcal_n] \;=\;\E\bigl[\E[X\mid\Fcal_{n+1}]\bigm|\Fcal_n\bigr] \;=\;\E[X\mid\Fcal_n]\;=\;M_n, \] so \((M_n)\) is an \(\Fcal\)-martingale. This is the prototypical example. \end{example} \begin{example}[Random walk] Let \((\xi_k)_{k\ge 1}\) be i.i.d.\ with \(\E\xi_1=\mu\) and \(\E|\xi_1|<\infty\); set \(S_n=\xi_1+\dots+\xi_n\) with \(\Fcal_n=\sigma(\xi_1,\dots,\xi_n)\). Then \(S_n-n\mu\) is an \(\Fcal\)-martingale, and \(S_n\) itself is a martingale iff \(\mu=0\). If additionally \(\E\xi_1^{2}=\sigma^{2}<\infty\), then \(S_n^{2}-n\sigma^{2}\) is also a martingale. \end{example} \begin{lemma}{Convex transform of a martingale}{convex-martingale} Let \(M\) be an \(\Fcal\)-martingale and let \(\varphi:\R\to\R\) be convex with \(\varphi(M_n)\in L^{1}\) for every \(n\). Then \((\varphi(M_n))_{n\in\N}\) is an \(\Fcal\)-submartingale. In particular \(|M_n|\) and \(M_n^{2}\) (when integrable) are submartingales. \end{lemma} \subsection{Stopping times and the optional sampling theorem} \begin{definition}{Stopping time}{stopping-time} A random variable \(\tau:\Omega\to\N\cup\{\infty\}\) is an \emph{\(\Fcal\)-stopping time} if \(\{\tau\le n\}\in\Fcal_n\) for every \(n\in\N\); equivalently \(\{\tau=n\}\in\Fcal_n\) for every \(n\). The \emph{\(\sigma\)-field of events prior to \(\tau\)} is \[ \Fcal_\tau\;:=\;\bigl\{A\in\Acal:\;A\cap\{\tau\le n\}\in\Fcal_n\text{ for all }n\bigr\}. \] \end{definition} \begin{lemma}{Stopped processes}{stopped} If \(X\) is \(\Fcal\)-adapted and \(\tau\) is an \(\Fcal\)-stopping time, the \emph{stopped process} \(X^{\tau}:=(X_{n\wedge\tau})_{n\in\N}\) is again \(\Fcal\)-adapted. If \(M\) is an \(\Fcal\)-martingale (resp.\ sub-, supermartingale), then so is \(M^{\tau}\). \end{lemma} \begin{theorem}{Doob's optional sampling theorem}{optional-sampling} Let \(M=(M_n)_{n=0,1,\dots,N}\) be a martingale (resp.\ sub-, supermartingale) on a finite horizon, and let \(\sigma\le\tau\) be two \(\Fcal\)-stopping times taking values in \(\{0,1,\dots,N\}\). Then \(M_\sigma,M_\tau\in L^{1}\) and \[ \E[M_\tau\mid\Fcal_\sigma]\;=\;M_\sigma\quad\text{a.s.} \] (with \(\ge\) for submartingales and \(\le\) for supermartingales). In particular, taking \(\sigma\equiv 0\) gives the optional-stopping identity \[ \E[M_\tau]\;=\;\E[M_0] \] for every bounded stopping time \(\tau\). \end{theorem} \begin{corollary}{Stopping-time characterisation of martingales}{ot-char} An \(\Fcal\)-adapted integrable process \(M=(M_n)_{n=0,\dots,N}\) is a martingale if and only if \(\E[M_\tau]=\E[M_0]\) for every \(\Fcal\)-stopping time \(\tau\) with values in \(\{0,1,\dots,N\}\). \end{corollary} \begin{remark} For unbounded \(\tau\) the identity \(\E[M_\tau]=\E[M_0]\) can fail (e.g.\ a simple symmetric random walk stopped at the first hit of \(\{1\}\) has \(M_0=0\) but \(M_\tau\equiv 1\)). Sufficient extra conditions guaranteeing it include: \(\tau\) is bounded; \(M\) is uniformly integrable; or \(\E\tau<\infty\) and the increments \(M_n-M_{n-1}\) are uniformly bounded. \end{remark} \begin{theorem}{Doob decomposition}{doob-decomp} Every discrete-time \(\Fcal\)-submartingale \(X=(X_n)_{n=0,\dots,N}\) admits a unique (a.s.) decomposition \[ X_n\;=\;M_n+A_n, \] where \(M\) is an \(\Fcal\)-martingale with \(M_0=X_0\) and \(A\) is a \emph{predictable} (i.e.\ \(A_n\) is \(\Fcal_{n-1}\)-measurable) non-decreasing process with \(A_0=0\). Symmetrically, a supermartingale decomposes as a martingale plus a predictable non-increasing process. \end{theorem}