% =================================================================
%  Lecture 13
%  Primary source: handwritten notes (Mathpix mmd)
%  Fallback: kashlak.pdf (only for OCR/notation/curriculum clarity)
% =================================================================
\section[Lecture 13 -- The Ergodic Theorem]{Lecture 13 \textemdash{} The Ergodic Theorem}
\label{sec:lec13}

The strong law of large numbers proved in Lecture~12 says that, for
i.i.d.\ summands, time averages \(n^{-1}S_n\) converge almost surely to
the expected value. Ergodic theory generalises this picture to any
measure-preserving dynamical system: replace ``i.i.d.'' by
``measure-preserving'' and ``\(\E X_1\)'' by a conditional expectation
on the \(\sigma\)-field of invariant sets. The two foundational results
are Birkhoff's pointwise theorem (almost-sure convergence) and von
Neumann's mean ergodic theorem (\(L^p\) convergence). Specialising to
the shift on a product space recovers the SLLN.

\subsection{Measure-preserving maps, invariance, ergodicity}

Throughout this section \((\Omega,\Fcal,\mu)\) is a measure space and
\(T\colon\Omega\to\Omega\) a measurable map. We are interested in time
averages along the orbit \(\omega,\,T\omega,\,T^2\omega,\dots\)

\begin{definition}{Measure-preserving map}{measure-preserving}
The map \(T\colon\Omega\to\Omega\) is \emph{measure preserving} if
\[
\mu\bigl(T^{-1}(A)\bigr) \;=\; \mu(A),\qquad \text{for all } A\in\Fcal.
\]
Equivalently, the pushforward measure \(\mu\circ T^{-1}\) coincides with
\(\mu\): the dynamics does not distort the size of any measurable set.
\end{definition}

\begin{definition}{Invariant set, invariant function}{invariant}
A set \(A\in\Fcal\) is \emph{\(T\)-invariant} if
\(T^{-1}(A)=A\). The collection
\[
\Fcal_T \;=\; \{A\in\Fcal : T^{-1}(A)=A\}
\]
of all \(T\)-invariant sets is a \(\sigma\)-field. A measurable function
\(f\colon\Omega\to\R\) is \emph{invariant} if \(f=f\circ T\); equivalently,
\(f\) is invariant if and only if it is \(\Fcal_T\)-measurable.
\end{definition}

\begin{definition}{Ergodic map}{ergodic}
A measure-preserving map \(T\) is \emph{ergodic} if every invariant set
is trivial: for all \(A\in\Fcal_T\),
\[
\mu(A)\;=\;0 \quad\text{or}\quad \mu(A^c)\;=\;0.
\]
Equivalently, every \(T\)-invariant measurable function is constant
\(\mu\)-almost everywhere.
\end{definition}

\begin{example}[Shift mod $1$ on the circle]
On \(\bigl((0,1],\Bcal,\lambda\bigr)\) and a fixed \(a\in(0,1]\), define
the rotation
\[
T(x) \;=\; x + a \mod 1
\;=\;
\begin{cases}
x+a   & x+a\le 1,\\
x+a-1 & x+a>1.
\end{cases}
\]
\(T\) preserves Lebesgue measure: every half-open arc and its preimage
have the same length. It is ergodic precisely when \(a\) is irrational.
\end{example}

\begin{example}[Baker's map]
On \((0,1]\) define \(T(x)=2x-\lfloor 2x\rfloor\). \(T\) is the doubling
map; preimages of intervals split into two intervals of half the length,
so Lebesgue measure is preserved. \(T\) is ergodic.
\end{example}

\begin{figure}[h]
\centering
\begin{tikzpicture}[>=Stealth, scale=0.95]
  % circle representation of (0,1]
  \draw[thick, deepnavy] (0,0) circle (1.5);
  \node[below] at (0,-1.5) {\small \(0\equiv 1\)};
  \node[above] at (0,1.5)  {\small \(\tfrac12\)};
  % four orbit dots under rotation by a
  \foreach \k/\ang in {0/30, 1/95, 2/160, 3/225}{
    \fill[exampleblue] (\ang:1.5) circle (1.6pt);
    \node[exampleblue] at (\ang:1.85) {\scriptsize \(T^{\k}x\)};
  }
  % arrows between consecutive orbit points
  \foreach \a/\b in {30/95, 95/160, 160/225}{
    \draw[->, thick, exampleblue]
      (\a:1.5) arc[start angle=\a, end angle=\b, radius=1.5];
  }
\end{tikzpicture}
\caption{Orbit of a point under the rotation \(T(x)=x+a\bmod 1\): for
irrational \(a\) the orbit is dense, the dynamics is ergodic, and
Birkhoff's theorem says time averages equal space averages.}
\label{fig:orbit-rotation}
\end{figure}

The next two facts are the everyday tools used below; both follow
directly from \cref{def:measure-preserving,def:ergodic}.

\begin{proposition}{Two basic facts}{ergodic-facts}
Let \(T\) be measure preserving on \((\Omega,\Fcal,\mu)\).
\begin{enumerate}
\item If \(f\in L^1(\Omega,\Fcal,\mu)\) then \(f\circ T\in L^1\) and
\[
\int f\,d\mu \;=\; \int f\circ T\,d\mu.
\]
\item If, in addition, \(T\) is ergodic and \(f\) is invariant, then
\(f=c\) \(\mu\)-a.e.\ for some constant \(c\).
\end{enumerate}
\end{proposition}

\subsection{Ergodic theorems}

For the rest of the lecture, fix \((\Omega,\Fcal,\mu)\) and a
measure-preserving \(T\). For \(f\colon\Omega\to\R\) measurable set the
\emph{Birkhoff sums}
\[
S_n \;=\; S_n(f) \;=\; f + f\circ T + f\circ T^2 + \cdots + f\circ T^{n-1},
\qquad S_0\equiv 0.
\]
Birkhoff's theorem controls the time averages \(n^{-1}S_n(f)\) almost
everywhere; von Neumann's controls them in \(L^p\). Both rest on a
single combinatorial estimate, the maximal ergodic lemma.

\begin{lemma}{Maximal ergodic lemma}{maximal-ergodic}
Let \(f\in L^1(\Omega,\Fcal,\mu)\) and set
\(S^* = \sup_{n\ge 0}S_n(f)\). Then
\[
\int_{\{S^*>0\}} f\,d\mu \;\ge\; 0.
\]
\end{lemma}

\begin{theorem}{Birkhoff's pointwise ergodic theorem}{birkhoff}
Let \((\Omega,\Fcal,\mu)\) be \(\sigma\)-finite, \(T\) measure
preserving, and \(f\in L^1(\Omega,\Fcal,\mu)\). There exists an
invariant function \(\bar f\in L^1(\Omega,\Fcal,\mu)\) with
\[
\int|\bar f|\,d\mu \;\le\; \int|f|\,d\mu
\qquad\text{and}\qquad
\frac{S_n(f)}{n} \;\longrightarrow\; \bar f \quad \mu\text{-a.e.\ as }n\to\infty.
\]
If \(T\) is ergodic and \(\mu\) is a probability, then
\(\bar f = \int f\,d\mu\) almost everywhere.
\end{theorem}

\begin{remark}
The strategy is to show that \(\liminf_n n^{-1}S_n(f)\) and
\(\limsup_n n^{-1}S_n(f)\) are both \(T\)-invariant and equal a.e.
Invariance follows from
\[
n^{-1}S_n(f)\circ T \;=\; n^{-1}\!\left[S_{n+1}(f)-f\right]
   \;=\; \frac{n+1}{n}\cdot\frac{S_{n+1}(f)}{n+1} \;-\; \frac{f}{n},
\]
and one isolates the bad set
\[
D_{a,b} \;=\; \Bigl\{\omega\in\Omega : \liminf_n n^{-1}S_n(f)<a<b<\limsup_n n^{-1}S_n(f)\Bigr\}
\]
for rationals \(a<b\). Each \(D_{a,b}\) is \(T\)-invariant; an
application of \cref{lem:maximal-ergodic} to \(g=f-b\,\indic_B\) on a
finite-measure subset \(B\subseteq D_{a,b}\) yields
\[
b\,\mu(D_{a,b}) \;\le\; \int_{D_{a,b}}\!f\,d\mu \;\le\; a\,\mu(D_{a,b}),
\]
and \(a<b\) forces \(\mu(D_{a,b})=0\). Taking the countable union over
rationals gives convergence in \([-\infty,\infty]\) on a full-measure
set; the integrability bound \(\int|\bar f|\,d\mu\le\int|f|\,d\mu\)
falls out of Fatou's lemma applied to \(n^{-1}|S_n(f)|\).
\end{remark}

\begin{theorem}{von Neumann's mean ergodic theorem}{von-neumann}
Suppose \(\mu(\Omega)<\infty\) and \(p\in[1,\infty)\). For every
\(f\in L^p(\Omega,\Fcal,\mu)\) there exists \(\bar f\in L^p\) such that
\[
\frac{S_n(f)}{n} \;\longrightarrow\; \bar f \qquad\text{in } L^p.
\]
\end{theorem}

\begin{remark}
The argument is a three-epsilon truncation. Because \(T\) is
measure-preserving, \(\|f\circ T^n\|_p=\|f\|_p\), so by Minkowski
\(\|n^{-1}S_n(f)\|_p\le\|f\|_p\). Given \(\varepsilon>0\), choose
\(C>0\) and set \(g=\min\{\max\{-C,f\},C\}\); then
\(\|f-g\|_p<\varepsilon/3\) and \(g\) is bounded by \(C\), so dominated
convergence upgrades the a.e.\ convergence \(n^{-1}S_n(g)\to\bar g\) of
\cref{thm:birkhoff} to \(L^p\) convergence. Fatou applied to
\(|n^{-1}S_n(f-g)|^p\) gives \(\|\bar f-\bar g\|_p\le\|f-g\|_p\), and
the triangle inequality
\[
\Bigl\|\tfrac{S_n(f)}{n}-\bar f\Bigr\|_p
\;\le\;
\Bigl\|\tfrac{S_n(f-g)}{n}\Bigr\|_p
+\Bigl\|\tfrac{S_n(g)}{n}-\bar g\Bigr\|_p
+\|\bar g - \bar f\|_p
\;<\;\varepsilon
\]
finishes the proof.
\end{remark}

\begin{figure}[h]
\centering
\begin{tikzpicture}[>=Stealth, scale=1.0]
  % axes
  \draw[->] (-2.6,0) -- (2.6,0) node[right]{\scriptsize \(\omega\)};
  \draw[->] (0,-1.6) -- (0,1.8);
  % bounds at +-C
  \draw[dashed, gray] (-2.4,1.0) -- (2.4,1.0) node[right]{\scriptsize \(+C\)};
  \draw[dashed, gray] (-2.4,-1.0) -- (2.4,-1.0) node[right]{\scriptsize \(-C\)};
  % unbounded f (red): a sin-like curve that overshoots +-C
  \draw[thick, highlightred, smooth, samples=80, domain=-2.4:2.4]
    plot (\x, {1.45*sin(deg(1.4*\x))});
  % truncated g (blue): same curve clipped to [-C,C]
  \draw[thick, exampleblue, smooth, samples=80, domain=-2.4:2.4]
    plot (\x, {max(-1, min(1, 1.45*sin(deg(1.4*\x))))});
  \node[highlightred] at (-2.05,1.55) {\scriptsize \(f\)};
  \node[exampleblue] at (-1.55,0.75) {\scriptsize \(g\)};
\end{tikzpicture}
\caption{Truncation step in von~Neumann's proof: the unbounded
\(f\) (red) is clipped to a bounded \(g=\min\{\max\{-C,f\},C\}\) (blue);
the tails are absorbed in \(\|f-g\|_p<\varepsilon/3\), and dominated
convergence handles \(g\).}
\label{fig:truncation}
\end{figure}

\subsection{Application: the strong law of large numbers, again}

The two ergodic theorems give an almost free derivation of the SLLN by
running the canonical i.i.d.\ construction through the shift map.

Let \((\Omega,\Fcal,P)\) be a probability space carrying i.i.d.\
real-valued random variables \(\{X_i\}_{i=1}^\infty\) with common
distribution \(F\). Set \((S,\Scal)=(\R^{\N},\Scal)\) where \(\Scal\) is
generated by the \(\pi\)-system of cylinder sets
\[
\Acal \;=\; \Bigl\{\textstyle\prod_{n\in\N}A_n
   \,:\, A_n\in\Bcal(\R)\;\forall n,\ A_n=\R\text{ eventually}\Bigr\}.
\]
The map \(X\colon\Omega\to\R^{\N}\), \(X(\omega)=(X_1(\omega),X_2(\omega),\dots)\),
induces the product measure
\[
\mu(A) \;=\; P\circ X^{-1}(A) \;=\; \prod_{n\in\N}dF(A_n),\qquad A=\textstyle\prod A_n.
\]

\begin{definition}{Shift map on $\R^{\N}$}{shift}
The \emph{shift map} \(T\colon\R^{\N}\to\R^{\N}\) drops the first
coordinate:
\[
T(x_1,x_2,x_3,\dots) \;=\; (x_2,x_3,x_4,\dots).
\]
\end{definition}

\begin{proposition}{The shift is measure-preserving and ergodic}{shift-ergodic}
Under the i.i.d.\ product measure \(\mu\) above, the shift map \(T\) is
measure preserving and ergodic. Ergodicity follows from Kolmogorov's
zero-one law: every shift-invariant cylinder event lies in the tail
\(\sigma\)-field \(\bigcap_n\sigma(X_n,X_{n+1},\dots)\) and so has
probability \(0\) or \(1\).
\end{proposition}

\begin{theorem}{Strong law of large numbers, again}{slln-ergodic}
Let \(\{X_i\}_{i=1}^\infty\) be i.i.d.\ real-valued random variables
with \(\E|X_i|<\infty\). Then
\[
\frac{S_n}{n} \;=\; \frac{X_1+\cdots+X_n}{n}
   \;\xrightarrow{\text{a.s.}}\; \E X_i.
\]
\end{theorem}

\begin{remark}
Take \(f\colon\R^{\N}\to\R\) to be the first-coordinate projection
\(f(x_1,x_2,\dots)=x_1\). With \(T\) the shift,
\(f\circ T^{k}(x)=x_{k+1}\), so the Birkhoff sums recover the partial
sums:
\[
S_n(f) \;=\; f + f\circ T + \cdots + f\circ T^{n-1} \;=\; X_1+\cdots+X_n.
\]
\Cref{thm:birkhoff} gives an invariant \(\bar f\in L^1\) with
\(n^{-1}S_n\to\bar f\) a.s. Since the shift is ergodic
(\cref{prop:shift-ergodic}), \cref{def:ergodic} forces \(\bar f\) to be
constant a.e.; identifying that constant via \cref{thm:von-neumann} at
\(p=1\),
\[
\bar f \;=\; \int \bar f\,d\mu \;=\; \lim_{n\to\infty}\int n^{-1}S_n(f)\,d\mu \;=\; \E X_i,
\]
which is the SLLN.
\end{remark}