% =================================================================
%  Lecture 12
%  Source: handwritten notes (Mathpix-converted) + Kashlak STAT 571
% =================================================================
\section[Lecture 12 -- Central Limit Theorem; Characteristic Functions]{Lecture 12 \textemdash{} Central Limit Theorem; Characteristic Functions}
\label{sec:lec12}

To state the central limit theorem we first need to know what a Gaussian
random variable is, and to prove it (next lecture) we will route everything
through the \emph{characteristic function} \textemdash{} the Fourier transform
of a probability measure. This lecture collects the cast of characters:
Gaussian measures on \(\R\) and \(\R^d\), the characteristic function and
its inversion formula, convolution, the uniqueness theorem, L\'evy's
continuity lemma, and the CLT itself.

\subsection{Gaussian measures}

\begin{definition}{Gaussian Measure on $\R$}{gaussian-measure-r}
A Borel measure \(\gamma\) on \((\R,\Bcal)\) is \emph{Gaussian with mean
\(m\) and variance \(\sigma^{2}\)} if
\[
\gamma\bigl((a,b]\bigr)
   \;=\; \frac{1}{\sigma\sqrt{2\pi}}
         \int_{a}^{b}\exp\!\left[-\frac{1}{2\sigma^{2}}(x-m)^{2}\right]
         d\lambda(x).
\]
If \(\sigma=0\) we set \(\gamma=\delta_{m}\) (a Dirac mass at \(m\)) and
call \(\gamma\) a \emph{degenerate} Gaussian measure.
\end{definition}

\begin{definition}{Gaussian Measure on $\R^{d}$}{gaussian-measure-rd}
A Borel measure \(\gamma\) on \((\R^{d},\Bcal)\) is \emph{Gaussian} if for
every linear functional \(f:\R^{d}\to\R\), the induced (push-forward)
measure \(\gamma\circ f^{-1}\) on \((\R,\Bcal)\) is Gaussian in the sense
of \cref{def:gaussian-measure-r}. Equivalently, every linear combination
of the coordinates is (one-dimensional) Gaussian.
\end{definition}

\begin{definition}{Gaussian Random Variable}{gaussian-rv}
A random variable \(Z:(\Omega,\Fcal,\mu)\to(\R^{d},\Bcal)\) is
\emph{Gaussian} if its law \(\gamma:=\mu\circ Z^{-1}\) is a Gaussian
measure on \((\R^{d},\Bcal)\).
\end{definition}

\begin{remark}
For \(u,v\in\R^{d}\) we use the standard inner product
\(\langle u,v\rangle=\sum_{i=1}^{d}u_{i}v_{i}\) and write
\(|u|^{2}:=\langle u,u\rangle\). A collection \(\{X_{i}\}_{i=1}^{\infty}\)
of random variables is \emph{i.i.d.} if the \(X_{i}\) are pairwise
independent and the induced measures \(\mu\circ X_{i}^{-1}\) all coincide.
\end{remark}

\subsection{Characteristic functions}

The strategy for proving the CLT is to show that the characteristic
functions of \(n^{-1/2}S_{n}\) converge to that of a Gaussian, then invoke
L\'evy's continuity lemma.

\begin{definition}{Characteristic Function}{characteristic-function}
For a probability measure \(\mu\) on \((\R^{d},\Bcal)\), the
\emph{characteristic function} (Fourier transform)
\(\widetilde{\mu}:\R^{d}\to\C\) is
\[
\widetilde{\mu}(t)
   \;:=\; \int \exp\!\bigl\{i\langle x,t\rangle\bigr\}\,d\mu(x).
\]
\end{definition}

\begin{remark}[Inversion]
If \(\widetilde{\mu}\) is integrable with respect to Lebesgue measure on
\(\R^{d}\) then \(\mu\) has a density \(p\) and
\[
p(x) \;=\; (2\pi)^{-d}\int \widetilde{\mu}(t)\,
            \exp\!\bigl\{-i\langle x,t\rangle\bigr\}\,d\lambda(t),
\qquad \lambda\text{-a.e.}
\]
\end{remark}

\begin{definition}{Convolution}{convolution}
For two measures \(\mu,\nu\) on \((\R^{d},\Bcal)\), the
\emph{convolution} \(\mu*\nu\) is the measure
\[
(\mu*\nu)(B)
   \;:=\; \int \nu(B-x)\,d\mu(x), \qquad B\in\Bcal,
\]
where \(B-x=\{y\in\R^{d}: y+x\in B\}\).
\end{definition}

\begin{remark}
Convolution is associative and commutative. The characteristic function
of \(\mu*\nu\) factorises:
\(\widetilde{\mu*\nu}=\widetilde{\mu}\,\widetilde{\nu}\).
Moreover, if \(X\) and \(Y\) are independent random variables with laws
\(\mu\) and \(\nu\), the law of \(X+Y\) is \(\mu*\nu\).
\end{remark}

\begin{theorem}{Uniqueness of Characteristic Functions}{cf-uniqueness}
Let \(\mu\) and \(\nu\) be probability measures on \((\R^{d},\Bcal)\). If
\(\widetilde{\mu}=\widetilde{\nu}\), then \(\mu=\nu\).
\end{theorem}

\begin{remark}
The proof goes via convolution with a small Gaussian: let \(\gamma_\sigma\)
be mean-zero Gaussian on \(\R^{d}\) with covariance \(\sigma^{2}I\) and
set \(\mu^{(\sigma)}=\mu*\gamma_{\sigma}\). Inversion gives the density
\(p^{(\sigma)}\) explicitly in terms of \(\widetilde{\mu}\); equality of
characteristic functions therefore yields
\(\mu^{(\sigma)}=\nu^{(\sigma)}\). Letting \(\sigma\downarrow 0\) gives
\(\mu^{(\sigma)}\Rightarrow\mu\) and \(\nu^{(\sigma)}\Rightarrow\nu\), so
\(\mu=\nu\).
\end{remark}

\subsection{L\'evy's continuity lemma and the CLT}

The bridge from pointwise convergence of characteristic functions to weak
convergence of measures is L\'evy's lemma; together with the uniqueness
theorem and Prohorov's theorem (\cref{sec:lec11}, in spirit) it is
exactly what the CLT proof needs.

\begin{lemma}{L\'evy's Continuity Lemma}{levy-continuity}
Let \(\{\mu_{i}\}\) be a uniformly tight sequence of probability measures
on \(\R^{d}\). If for every \(v\in\R^{d}\),
\[
\widetilde{\mu_{i}}(v) \;\longrightarrow\; \widetilde{\mu}(v),
\]
then \(\mu_{i}\Rightarrow\mu\), where \(\mu\) is the (unique) probability
measure with characteristic function \(\widetilde{\mu}\).
\end{lemma}

\begin{remark}
Sketch: uniform tightness plus Prohorov gives a weakly convergent
subsubsequence along every subsequence; the assumed pointwise convergence
of characteristic functions, combined with
\cref{thm:cf-uniqueness}, forces every limit point to equal \(\mu\); the
subsequence principle then promotes this to convergence of the whole
sequence.
\end{remark}

\begin{theorem}{Central Limit Theorem}{clt}
Let \((\Omega,\Fcal,\mu)\) be a probability space and let
\(\{X_{n}\}_{n=1}^{\infty}\) be i.i.d.\ random variables on
\((\R^{d},\Bcal)\) with \(\E X_{n}=0\) and \(\E|X_{n}|^{2}<\infty\). Set
\(S_{n}=\sum_{j=1}^{n}X_{j}\). Then
\[
n^{-1/2} S_{n} \;\xrightarrow{\;d\;}\; Z,
\]
where \(Z\) is a Gaussian random variable on \(\R^{d}\) with mean zero
and covariance \(\Sigma\) with \((j,k)\)-entry
\(\Sigma_{jk}=\E[X_{nj}X_{nk}]\).
\end{theorem}

\begin{remark}
The proof has two beats. (i) \emph{Tightness}: the second-moment
hypothesis gives
\(\E|n^{-1/2}S_{n}|^{2}=\E|X_{1}|^{2}\), and Chebyshev then yields
\(\P(|n^{-1/2}S_{n}|>M_{\varepsilon})<\varepsilon\), so the sequence is
uniformly tight. (ii) \emph{Convergence of characteristic functions}:
fixing \(v\in\R^{d}\) and letting
\(h(v)=\E\exp\bigl(i\langle v,X_{1}\rangle\bigr)\), Taylor's theorem at
\(0\) gives
\(h(v)=1-\tfrac{1}{2}v^{\top}\Sigma v + o(\|v\|_{2}^{2})\); hence
\[
\E\exp\!\bigl\{i\langle n^{-1/2}S_{n}, v\rangle\bigr\}
   \;=\; h\!\bigl(n^{-1/2}v\bigr)^{n}
   \;\longrightarrow\; \exp\!\Bigl\{-\tfrac{1}{2}v^{\top}\Sigma v\Bigr\},
\]
the characteristic function of the claimed Gaussian. Apply
\cref{lem:levy-continuity}.
\end{remark}