% ================================================================= % Lecture 13 % Primary source: handwritten notes (Mathpix mmd) % Fallback: kashlak.pdf (only for OCR/notation/curriculum clarity) % ================================================================= \section[Lecture 13 -- The Ergodic Theorem]{Lecture 13 \textemdash{} The Ergodic Theorem} \label{sec:lec13} The strong law of large numbers proved in Lecture~12 says that, for i.i.d.\ summands, time averages \(n^{-1}S_n\) converge almost surely to the expected value. Ergodic theory generalises this picture to any measure-preserving dynamical system: replace ``i.i.d.'' by ``measure-preserving'' and ``\(\E X_1\)'' by a conditional expectation on the \(\sigma\)-field of invariant sets. The two foundational results are Birkhoff's pointwise theorem (almost-sure convergence) and von Neumann's mean ergodic theorem (\(L^p\) convergence). Specialising to the shift on a product space recovers the SLLN. \subsection{Measure-preserving maps, invariance, ergodicity} Throughout this section \((\Omega,\Fcal,\mu)\) is a measure space and \(T\colon\Omega\to\Omega\) a measurable map. We are interested in time averages along the orbit \(\omega,\,T\omega,\,T^2\omega,\dots\) \begin{definition}{Measure-preserving map}{measure-preserving} The map \(T\colon\Omega\to\Omega\) is \emph{measure preserving} if \[ \mu\bigl(T^{-1}(A)\bigr) \;=\; \mu(A),\qquad \text{for all } A\in\Fcal. \] Equivalently, the pushforward measure \(\mu\circ T^{-1}\) coincides with \(\mu\): the dynamics does not distort the size of any measurable set. \end{definition} \begin{definition}{Invariant set, invariant function}{invariant} A set \(A\in\Fcal\) is \emph{\(T\)-invariant} if \(T^{-1}(A)=A\). The collection \[ \Fcal_T \;=\; \{A\in\Fcal : T^{-1}(A)=A\} \] of all \(T\)-invariant sets is a \(\sigma\)-field. A measurable function \(f\colon\Omega\to\R\) is \emph{invariant} if \(f=f\circ T\); equivalently, \(f\) is invariant if and only if it is \(\Fcal_T\)-measurable. \end{definition} \begin{definition}{Ergodic map}{ergodic} A measure-preserving map \(T\) is \emph{ergodic} if every invariant set is trivial: for all \(A\in\Fcal_T\), \[ \mu(A)\;=\;0 \quad\text{or}\quad \mu(A^c)\;=\;0. \] Equivalently, every \(T\)-invariant measurable function is constant \(\mu\)-almost everywhere. \end{definition} \begin{example}[Shift mod $1$ on the circle] On \(\bigl((0,1],\Bcal,\lambda\bigr)\) and a fixed \(a\in(0,1]\), define the rotation \[ T(x) \;=\; x + a \mod 1 \;=\; \begin{cases} x+a & x+a\le 1,\\ x+a-1 & x+a>1. \end{cases} \] \(T\) preserves Lebesgue measure: every half-open arc and its preimage have the same length. It is ergodic precisely when \(a\) is irrational. \end{example} \begin{example}[Baker's map] On \((0,1]\) define \(T(x)=2x-\lfloor 2x\rfloor\). \(T\) is the doubling map; preimages of intervals split into two intervals of half the length, so Lebesgue measure is preserved. \(T\) is ergodic. \end{example} \begin{figure}[h] \centering \begin{tikzpicture}[>=Stealth, scale=0.95] % circle representation of (0,1] \draw[thick, deepnavy] (0,0) circle (1.5); \node[below] at (0,-1.5) {\small \(0\equiv 1\)}; \node[above] at (0,1.5) {\small \(\tfrac12\)}; % four orbit dots under rotation by a \foreach \k/\ang in {0/30, 1/95, 2/160, 3/225}{ \fill[exampleblue] (\ang:1.5) circle (1.6pt); \node[exampleblue] at (\ang:1.85) {\scriptsize \(T^{\k}x\)}; } % arrows between consecutive orbit points \foreach \a/\b in {30/95, 95/160, 160/225}{ \draw[->, thick, exampleblue] (\a:1.5) arc[start angle=\a, end angle=\b, radius=1.5]; } \end{tikzpicture} \caption{Orbit of a point under the rotation \(T(x)=x+a\bmod 1\): for irrational \(a\) the orbit is dense, the dynamics is ergodic, and Birkhoff's theorem says time averages equal space averages.} \label{fig:orbit-rotation} \end{figure} The next two facts are the everyday tools used below; both follow directly from \cref{def:measure-preserving,def:ergodic}. \begin{proposition}{Two basic facts}{ergodic-facts} Let \(T\) be measure preserving on \((\Omega,\Fcal,\mu)\). \begin{enumerate} \item If \(f\in L^1(\Omega,\Fcal,\mu)\) then \(f\circ T\in L^1\) and \[ \int f\,d\mu \;=\; \int f\circ T\,d\mu. \] \item If, in addition, \(T\) is ergodic and \(f\) is invariant, then \(f=c\) \(\mu\)-a.e.\ for some constant \(c\). \end{enumerate} \end{proposition} \subsection{Ergodic theorems} For the rest of the lecture, fix \((\Omega,\Fcal,\mu)\) and a measure-preserving \(T\). For \(f\colon\Omega\to\R\) measurable set the \emph{Birkhoff sums} \[ S_n \;=\; S_n(f) \;=\; f + f\circ T + f\circ T^2 + \cdots + f\circ T^{n-1}, \qquad S_0\equiv 0. \] Birkhoff's theorem controls the time averages \(n^{-1}S_n(f)\) almost everywhere; von Neumann's controls them in \(L^p\). Both rest on a single combinatorial estimate, the maximal ergodic lemma. \begin{lemma}{Maximal ergodic lemma}{maximal-ergodic} Let \(f\in L^1(\Omega,\Fcal,\mu)\) and set \(S^* = \sup_{n\ge 0}S_n(f)\). Then \[ \int_{\{S^*>0\}} f\,d\mu \;\ge\; 0. \] \end{lemma} \begin{theorem}{Birkhoff's pointwise ergodic theorem}{birkhoff} Let \((\Omega,\Fcal,\mu)\) be \(\sigma\)-finite, \(T\) measure preserving, and \(f\in L^1(\Omega,\Fcal,\mu)\). There exists an invariant function \(\bar f\in L^1(\Omega,\Fcal,\mu)\) with \[ \int|\bar f|\,d\mu \;\le\; \int|f|\,d\mu \qquad\text{and}\qquad \frac{S_n(f)}{n} \;\longrightarrow\; \bar f \quad \mu\text{-a.e.\ as }n\to\infty. \] If \(T\) is ergodic and \(\mu\) is a probability, then \(\bar f = \int f\,d\mu\) almost everywhere. \end{theorem} \begin{remark} The strategy is to show that \(\liminf_n n^{-1}S_n(f)\) and \(\limsup_n n^{-1}S_n(f)\) are both \(T\)-invariant and equal a.e. Invariance follows from \[ n^{-1}S_n(f)\circ T \;=\; n^{-1}\!\left[S_{n+1}(f)-f\right] \;=\; \frac{n+1}{n}\cdot\frac{S_{n+1}(f)}{n+1} \;-\; \frac{f}{n}, \] and one isolates the bad set \[ D_{a,b} \;=\; \Bigl\{\omega\in\Omega : \liminf_n n^{-1}S_n(f)0\), choose \(C>0\) and set \(g=\min\{\max\{-C,f\},C\}\); then \(\|f-g\|_p<\varepsilon/3\) and \(g\) is bounded by \(C\), so dominated convergence upgrades the a.e.\ convergence \(n^{-1}S_n(g)\to\bar g\) of \cref{thm:birkhoff} to \(L^p\) convergence. Fatou applied to \(|n^{-1}S_n(f-g)|^p\) gives \(\|\bar f-\bar g\|_p\le\|f-g\|_p\), and the triangle inequality \[ \Bigl\|\tfrac{S_n(f)}{n}-\bar f\Bigr\|_p \;\le\; \Bigl\|\tfrac{S_n(f-g)}{n}\Bigr\|_p +\Bigl\|\tfrac{S_n(g)}{n}-\bar g\Bigr\|_p +\|\bar g - \bar f\|_p \;<\;\varepsilon \] finishes the proof. \end{remark} \begin{figure}[h] \centering \begin{tikzpicture}[>=Stealth, scale=1.0] % axes \draw[->] (-2.6,0) -- (2.6,0) node[right]{\scriptsize \(\omega\)}; \draw[->] (0,-1.6) -- (0,1.8); % bounds at +-C \draw[dashed, gray] (-2.4,1.0) -- (2.4,1.0) node[right]{\scriptsize \(+C\)}; \draw[dashed, gray] (-2.4,-1.0) -- (2.4,-1.0) node[right]{\scriptsize \(-C\)}; % unbounded f (red): a sin-like curve that overshoots +-C \draw[thick, highlightred, smooth, samples=80, domain=-2.4:2.4] plot (\x, {1.45*sin(deg(1.4*\x))}); % truncated g (blue): same curve clipped to [-C,C] \draw[thick, exampleblue, smooth, samples=80, domain=-2.4:2.4] plot (\x, {max(-1, min(1, 1.45*sin(deg(1.4*\x))))}); \node[highlightred] at (-2.05,1.55) {\scriptsize \(f\)}; \node[exampleblue] at (-1.55,0.75) {\scriptsize \(g\)}; \end{tikzpicture} \caption{Truncation step in von~Neumann's proof: the unbounded \(f\) (red) is clipped to a bounded \(g=\min\{\max\{-C,f\},C\}\) (blue); the tails are absorbed in \(\|f-g\|_p<\varepsilon/3\), and dominated convergence handles \(g\).} \label{fig:truncation} \end{figure} \subsection{Application: the strong law of large numbers, again} The two ergodic theorems give an almost free derivation of the SLLN by running the canonical i.i.d.\ construction through the shift map. Let \((\Omega,\Fcal,P)\) be a probability space carrying i.i.d.\ real-valued random variables \(\{X_i\}_{i=1}^\infty\) with common distribution \(F\). Set \((S,\Scal)=(\R^{\N},\Scal)\) where \(\Scal\) is generated by the \(\pi\)-system of cylinder sets \[ \Acal \;=\; \Bigl\{\textstyle\prod_{n\in\N}A_n \,:\, A_n\in\Bcal(\R)\;\forall n,\ A_n=\R\text{ eventually}\Bigr\}. \] The map \(X\colon\Omega\to\R^{\N}\), \(X(\omega)=(X_1(\omega),X_2(\omega),\dots)\), induces the product measure \[ \mu(A) \;=\; P\circ X^{-1}(A) \;=\; \prod_{n\in\N}dF(A_n),\qquad A=\textstyle\prod A_n. \] \begin{definition}{Shift map on $\R^{\N}$}{shift} The \emph{shift map} \(T\colon\R^{\N}\to\R^{\N}\) drops the first coordinate: \[ T(x_1,x_2,x_3,\dots) \;=\; (x_2,x_3,x_4,\dots). \] \end{definition} \begin{proposition}{The shift is measure-preserving and ergodic}{shift-ergodic} Under the i.i.d.\ product measure \(\mu\) above, the shift map \(T\) is measure preserving and ergodic. Ergodicity follows from Kolmogorov's zero-one law: every shift-invariant cylinder event lies in the tail \(\sigma\)-field \(\bigcap_n\sigma(X_n,X_{n+1},\dots)\) and so has probability \(0\) or \(1\). \end{proposition} \begin{theorem}{Strong law of large numbers, again}{slln-ergodic} Let \(\{X_i\}_{i=1}^\infty\) be i.i.d.\ real-valued random variables with \(\E|X_i|<\infty\). Then \[ \frac{S_n}{n} \;=\; \frac{X_1+\cdots+X_n}{n} \;\xrightarrow{\text{a.s.}}\; \E X_i. \] \end{theorem} \begin{remark} Take \(f\colon\R^{\N}\to\R\) to be the first-coordinate projection \(f(x_1,x_2,\dots)=x_1\). With \(T\) the shift, \(f\circ T^{k}(x)=x_{k+1}\), so the Birkhoff sums recover the partial sums: \[ S_n(f) \;=\; f + f\circ T + \cdots + f\circ T^{n-1} \;=\; X_1+\cdots+X_n. \] \Cref{thm:birkhoff} gives an invariant \(\bar f\in L^1\) with \(n^{-1}S_n\to\bar f\) a.s. Since the shift is ergodic (\cref{prop:shift-ergodic}), \cref{def:ergodic} forces \(\bar f\) to be constant a.e.; identifying that constant via \cref{thm:von-neumann} at \(p=1\), \[ \bar f \;=\; \int \bar f\,d\mu \;=\; \lim_{n\to\infty}\int n^{-1}S_n(f)\,d\mu \;=\; \E X_i, \] which is the SLLN. \end{remark}