% =================================================================
%  Lecture 12
%  Primary source: handwritten notes (Mathpix mmd, lines 527--644)
%  Fallback: kashlak.pdf §3.4 (only for OCR/notation/curriculum clarity)
% =================================================================
\section[Lecture 12 -- Central Limit Theorem; Characteristic Functions]{Lecture 12 \textemdash{} Central Limit Theorem; Characteristic Functions}
\label{sec:lec12}

The previous lecture closed the strong law of large numbers. We now turn
to fluctuations: properly normalised, sums \(S_n=X_1+\dots+X_n\) of iid
mean-zero random vectors converge in distribution to a Gaussian. The
proof rests on three tools\,---\,uniform tightness and Prohorov's
theorem (\cref{thm:prohorov}, already established), the characteristic
function and its uniqueness, and L\'evy's continuity lemma\,---\,from
which the central limit theorem drops out by a Taylor expansion.

\subsection{Gaussian measures}

\begin{definition}{Gaussian measure on \texorpdfstring{$\R$}{R}}{gaussian-r}
A Borel measure \(\gamma\) on \((\R,\Bcal)\) is \emph{Gaussian} with
mean \(m\in\R\) and variance \(\sigma^2>0\) if
\[
\gamma\bigl((a,b]\bigr) \;=\;
\frac{1}{\sigma\sqrt{2\pi}}\int_{a}^{b}
\exp\!\left[-\frac{1}{2\sigma^2}(x-m)^2\right]\,d\lambda(x).
\]
For \(\sigma=0\) we set \(\gamma=\delta_m\) (Dirac mass at \(m\)) and
call \(\gamma\) a \emph{degenerate} Gaussian measure.
\end{definition}

\begin{definition}{Gaussian measure on \texorpdfstring{$\R^d$}{Rd}}{gaussian-rd}
A Borel measure \(\gamma\) on \((\R^d,\Bcal)\) is \emph{Gaussian} if
for every linear functional \(f\colon\R^d\to\R\) the induced measure
\(\gamma\circ f^{-1}\) on \((\R,\Bcal)\) is Gaussian. Equivalently,
every linear combination of the coordinates is one-dimensional
Gaussian.
\end{definition}

\begin{definition}{Gaussian random variable}{gaussian-rv}
A random variable \(Z\) from a probability space \((\Omega,\Fcal,\mu)\)
to \((\R^d,\Bcal)\) is \emph{Gaussian} if its law
\(\gamma:=\mu\circ Z^{-1}\) is a Gaussian measure on \((\R^d,\Bcal)\).
\end{definition}

\begin{remark}
For vectors \(u,v\in\R^d\) we use the Euclidean inner product
\(\langle u,v\rangle=\sum_{i=1}^{d}u_i v_i\) and write
\(|u|^2=\langle u,u\rangle\). A collection \(\{X_i\}_{i=1}^{\infty}\)
is \emph{iid} if the \(X_i\) are pairwise independent and share a
common law (``random variables induce measures'').
\end{remark}

\subsection{Characteristic functions}

The characteristic function is the Fourier transform of a probability
measure; it linearises convolution and, by uniqueness below, encodes
the measure completely.

\begin{definition}{Characteristic function}{char-fn}
For a probability measure \(\mu\) on \((\R^d,\Bcal)\), the
\emph{characteristic function} \(\tilde\mu\colon\R^d\to\C\) is
\[
\tilde\mu(t) \;:=\; \int \exp\!\bigl\{\,i\langle x,t\rangle\,\bigr\}\,d\mu(x).
\]
When \(\tilde\mu\) is integrable against Lebesgue measure on \(\R^d\),
the inverse transform recovers a density:
\[
p(x) \;=\; (2\pi)^{-d}\!\int \tilde\mu(t)\,
\exp\!\bigl\{\,-i\langle x,t\rangle\,\bigr\}\,d\lambda(t),
\qquad \lambda\text{-a.e.,}
\]
with \(p\) the probability density function of \(\mu\).
\end{definition}

\begin{definition}{Convolution of measures}{convolution}
For two measures \(\mu,\nu\) on \((\R^d,\Bcal)\), the
\emph{convolution} \(\mu*\nu\) is the measure
\[
(\mu*\nu)(B) \;:=\; \int \nu(B-x)\,d\mu(x),
\qquad B\in\Bcal,
\]
where \(B-x=\{y\in\R^d:y+x\in B\}\). The operation \(*\) is
associative and commutative; the characteristic function of \(\mu*\nu\)
is \(\tilde\mu\,\tilde\nu\); and if \(X,Y\) are independent with laws
\(\mu,\nu\), then \(X+Y\) has law \(\mu*\nu\).
\end{definition}

\begin{theorem}{Uniqueness of characteristic functions}{char-unique}
Let \(\mu\) and \(\nu\) be probability measures on \((\R^d,\Bcal)\). If
\(\tilde\mu=\tilde\nu\), then \(\mu=\nu\).
\end{theorem}

\begin{remark}
The proof goes via Gaussian smoothing. Let \(\gamma_\sigma\) be the
mean-zero Gaussian on \(\R^d\) with covariance \(\sigma^2 I\) and put
\(\mu^{(\sigma)}:=\mu*\gamma_\sigma\), \(\nu^{(\sigma)}:=\nu*\gamma_\sigma\).
The smoothed measures admit explicit densities
\[
q^{(\sigma)}(x) \;=\; (2\pi)^{-d}\!\int \tilde\nu(t)\,
\exp\!\left[\,-i\langle x,t\rangle - \tfrac12\sigma^2|t|^2\right]
d\lambda(t),
\]
and similarly for \(p^{(\sigma)}\) with \(\tilde\mu\). Hence
\(\tilde\mu=\tilde\nu\) forces \(\mu^{(\sigma)}=\nu^{(\sigma)}\) for
every \(\sigma>0\). Realising \(\mu^{(\sigma)}\) as the law of
\(X+\sigma Z\) (with \(X\sim\mu\), \(Z\sim\gamma_1\) independent) and
letting \(\sigma\downarrow 0\) gives \(X+\sigma Z\to X\) almost surely,
hence in probability and so in distribution:
\(\mu^{(\sigma)}\Rightarrow\mu\), and likewise
\(\nu^{(\sigma)}\Rightarrow\nu\). Uniqueness of weak limits gives
\(\mu=\nu\).
\end{remark}

\subsection{L\texorpdfstring{\'e}{e}vy's continuity lemma}

Convergence of characteristic functions, plus tightness, controls weak
convergence of the underlying measures.

\begin{lemma}{L\'evy continuity}{levy-continuity}
Let \(\{\mu_i\}_{i=1}^{\infty}\) be a uniformly tight sequence of
probability measures on \(\R^d\). If the characteristic functions
satisfy \(\tilde\mu_i(v)\to\tilde\mu(v)\) for every \(v\in\R^d\), then
\(\mu_i\Rightarrow\mu\), where \(\mu\) is the (unique) probability
measure with characteristic function \(\tilde\mu\).
\end{lemma}

\begin{remark}
By Prohorov (\cref{thm:prohorov}), every subsequence \(\mu_{i_k}\) has
a further weakly convergent subsubsequence
\(\mu_{i_{k_r}}\Rightarrow\mu^*\). Continuity of the integrand forces
\(\widetilde{\mu^*}=\tilde\mu\) on all of \(\R^d\), and uniqueness of
characteristic functions (\cref{thm:char-unique}) identifies
\(\mu^*=\mu\). The standard subsubsequence trick (every subsequence
has a further subsubsequence with the same weak limit) then promotes
this to convergence of the full sequence.
\end{remark}

\subsection{The central limit theorem}

We can now prove the headline result. The hypothesis is just iid plus a
finite second moment.

\begin{theorem}{Central limit theorem}{clt}
Let \((\Omega,\Fcal,\mu)\) be a probability space and let
\(\{X_n\}_{n=1}^{\infty}\) be iid random vectors on \((\R^d,\Bcal)\)
with
\[
\E X_n \;=\; 0
\qquad\text{and}\qquad
\E\,|X_n|^2 \;<\; \infty.
\]
Set \(S_n=\sum_{j=1}^{n}X_j\). Then
\[
n^{-\tfrac12}\,S_n \;\xrightarrow{d}\; Z,
\]
where \(Z\) is a Gaussian random vector on \(\R^d\) with mean zero and
covariance \(\Sigma\) given by \(\Sigma_{jk}=\E[X_{nj}X_{nk}]\).
\end{theorem}

The strategy of the proof is a two-step: \emph{tightness} of the
normalised sums via a second-moment Chebyshev bound, and
\emph{characteristic-function convergence} via Taylor expansion. The
two ingredients meet in L\'evy's lemma.

\begin{remark}[tightness via Chebyshev]
Since the \(X_j\) are mean zero and independent, \(\E\langle
X_j,X_k\rangle=0\) for \(j\ne k\), so
\[
\E\,\bigl|n^{-\tfrac12}S_n\bigr|^2
   \;=\; \frac{1}{n}\,\E\!\left[\,\sum_{j,k=1}^{n}\langle X_j,X_k\rangle\right]
   \;=\; \E\,|X_j|^2.
\]
For any \(\varepsilon>0\), choose \(M_\varepsilon>0\) with
\(\E|X_j|^2/M_\varepsilon^2<\varepsilon\); Chebyshev's inequality gives
\(\P(|n^{-\tfrac12}S_n|>M_\varepsilon)<\varepsilon\), uniformly in
\(n\). The sequence \(\{n^{-\tfrac12}S_n\}\) is therefore uniformly
tight.
\end{remark}

\begin{remark}[characteristic-function expansion]
Fix \(v\in\R^d\). The scalars \(\langle v,X_j\rangle\) are iid
real-valued with \(\E\langle v,X_j\rangle=0\) and
\(\E\langle v,X_j\rangle^2<\infty\). Define
\[
h(v) \;:=\; \E\exp\!\bigl(\,i\langle v,X_j\rangle\,\bigr).
\]
Then \(h(0)=1\), \(\nabla h(0)=0\) and
\(\nabla^2 h(0)=-\Sigma\) where \(\Sigma=\E[X_j X_j^{\top}]\). Taylor's
theorem gives
\[
h(v) \;=\; 1 \;-\; \tfrac12\,v^{\top}\Sigma\,v \;+\; o(|v|^2).
\]
Independence then yields, for any fixed \(v\),
\[
\E\exp\!\bigl\{\,i\langle n^{-\tfrac12}S_n,v\rangle\,\bigr\}
   \;=\; h\!\bigl(n^{-\tfrac12}v\bigr)^{n}
   \;=\; \left(1-\frac{v^{\top}\Sigma v}{2n} + o\!\left(\frac{|v|^2}{n}\right)\right)^{\!n}
   \;\longrightarrow\; \exp\!\bigl\{-\tfrac12 v^{\top}\Sigma v\bigr\}
\]
as \(n\to\infty\). The right-hand side is the characteristic function
of the mean-zero Gaussian \(Z\) on \(\R^d\) with covariance \(\Sigma\).
Combining with tightness and L\'evy's continuity
(\cref{lem:levy-continuity}) gives \(n^{-\tfrac12}S_n\xRightarrow{} Z\)
\textemdash{} convergence in distribution.
\end{remark}

\begin{figure}[h]
\centering
\begin{tikzpicture}[>=Stealth, node distance=10mm and 18mm, font=\small]
  \node[draw, rounded corners, fill=defbodybg, align=center,
        text width=34mm, minimum height=12mm]
    (tight) {Tightness\\ \(\{n^{-1/2}S_n\}\) uniformly tight\\ (Chebyshev)};
  \node[draw, rounded corners, fill=defbodybg, align=center,
        text width=34mm, minimum height=12mm,
        right=of tight]
    (cf) {Characteristic functions\\ \(h(n^{-1/2}v)^n\to e^{-\tfrac12 v^\top\!\Sigma v}\)\\ (Taylor)};
  \node[draw, rounded corners, fill=lembodybg, align=center,
        text width=44mm, minimum height=12mm,
        below=12mm of $(tight)!0.5!(cf)$]
    (levy) {L\'evy continuity \(+\) uniqueness\\ of characteristic functions};
  \node[draw, rounded corners, fill=thmbodybg, align=center,
        text width=44mm, minimum height=12mm,
        below=10mm of levy]
    (clt) {\(n^{-1/2}S_n\xRightarrow{}Z\sim\mathcal N(0,\Sigma)\)};
  \draw[->, thick] (tight.south) -- (levy.north west);
  \draw[->, thick] (cf.south)    -- (levy.north east);
  \draw[->, thick] (levy.south)  -- (clt.north);
\end{tikzpicture}
\caption{Architecture of the CLT proof: tightness and pointwise
convergence of characteristic functions feed into L\'evy's lemma; the
limiting characteristic function identifies the Gaussian \(Z\).}
\label{fig:clt-architecture}
\end{figure}

\begin{remark}
The covariance entry \(\Sigma_{jk}=\E[X_{nj}X_{nk}]\) is independent of
\(n\) by the iid hypothesis; the limiting Gaussian is the same
regardless of which copy of \(X_n\) one uses to compute it. In the
scalar case \(d=1\) the conclusion reduces to the familiar
\(n^{-1/2}S_n\xRightarrow{}\mathcal N(0,\sigma^2)\) with
\(\sigma^2=\E X_1^2\).
\end{remark}