% ================================================================= % Lecture 12 % Primary source: handwritten notes (Mathpix mmd, lines 527--644) % Fallback: kashlak.pdf ยง3.4 (only for OCR/notation/curriculum clarity) % ================================================================= \section[Lecture 12 -- Central Limit Theorem; Characteristic Functions]{Lecture 12 \textemdash{} Central Limit Theorem; Characteristic Functions} \label{sec:lec12} The previous lecture closed the strong law of large numbers. We now turn to fluctuations: properly normalised, sums \(S_n=X_1+\dots+X_n\) of iid mean-zero random vectors converge in distribution to a Gaussian. The proof rests on three tools\,---\,uniform tightness and Prohorov's theorem (\cref{thm:prohorov}, already established), the characteristic function and its uniqueness, and L\'evy's continuity lemma\,---\,from which the central limit theorem drops out by a Taylor expansion. \subsection{Gaussian measures} \begin{definition}{Gaussian measure on \texorpdfstring{$\R$}{R}}{gaussian-r} A Borel measure \(\gamma\) on \((\R,\Bcal)\) is \emph{Gaussian} with mean \(m\in\R\) and variance \(\sigma^2>0\) if \[ \gamma\bigl((a,b]\bigr) \;=\; \frac{1}{\sigma\sqrt{2\pi}}\int_{a}^{b} \exp\!\left[-\frac{1}{2\sigma^2}(x-m)^2\right]\,d\lambda(x). \] For \(\sigma=0\) we set \(\gamma=\delta_m\) (Dirac mass at \(m\)) and call \(\gamma\) a \emph{degenerate} Gaussian measure. \end{definition} \begin{definition}{Gaussian measure on \texorpdfstring{$\R^d$}{Rd}}{gaussian-rd} A Borel measure \(\gamma\) on \((\R^d,\Bcal)\) is \emph{Gaussian} if for every linear functional \(f\colon\R^d\to\R\) the induced measure \(\gamma\circ f^{-1}\) on \((\R,\Bcal)\) is Gaussian. Equivalently, every linear combination of the coordinates is one-dimensional Gaussian. \end{definition} \begin{definition}{Gaussian random variable}{gaussian-rv} A random variable \(Z\) from a probability space \((\Omega,\Fcal,\mu)\) to \((\R^d,\Bcal)\) is \emph{Gaussian} if its law \(\gamma:=\mu\circ Z^{-1}\) is a Gaussian measure on \((\R^d,\Bcal)\). \end{definition} \begin{remark} For vectors \(u,v\in\R^d\) we use the Euclidean inner product \(\langle u,v\rangle=\sum_{i=1}^{d}u_i v_i\) and write \(|u|^2=\langle u,u\rangle\). A collection \(\{X_i\}_{i=1}^{\infty}\) is \emph{iid} if the \(X_i\) are pairwise independent and share a common law (``random variables induce measures''). \end{remark} \subsection{Characteristic functions} The characteristic function is the Fourier transform of a probability measure; it linearises convolution and, by uniqueness below, encodes the measure completely. \begin{definition}{Characteristic function}{char-fn} For a probability measure \(\mu\) on \((\R^d,\Bcal)\), the \emph{characteristic function} \(\tilde\mu\colon\R^d\to\C\) is \[ \tilde\mu(t) \;:=\; \int \exp\!\bigl\{\,i\langle x,t\rangle\,\bigr\}\,d\mu(x). \] When \(\tilde\mu\) is integrable against Lebesgue measure on \(\R^d\), the inverse transform recovers a density: \[ p(x) \;=\; (2\pi)^{-d}\!\int \tilde\mu(t)\, \exp\!\bigl\{\,-i\langle x,t\rangle\,\bigr\}\,d\lambda(t), \qquad \lambda\text{-a.e.,} \] with \(p\) the probability density function of \(\mu\). \end{definition} \begin{definition}{Convolution of measures}{convolution} For two measures \(\mu,\nu\) on \((\R^d,\Bcal)\), the \emph{convolution} \(\mu*\nu\) is the measure \[ (\mu*\nu)(B) \;:=\; \int \nu(B-x)\,d\mu(x), \qquad B\in\Bcal, \] where \(B-x=\{y\in\R^d:y+x\in B\}\). The operation \(*\) is associative and commutative; the characteristic function of \(\mu*\nu\) is \(\tilde\mu\,\tilde\nu\); and if \(X,Y\) are independent with laws \(\mu,\nu\), then \(X+Y\) has law \(\mu*\nu\). \end{definition} \begin{theorem}{Uniqueness of characteristic functions}{char-unique} Let \(\mu\) and \(\nu\) be probability measures on \((\R^d,\Bcal)\). If \(\tilde\mu=\tilde\nu\), then \(\mu=\nu\). \end{theorem} \begin{remark} The proof goes via Gaussian smoothing. Let \(\gamma_\sigma\) be the mean-zero Gaussian on \(\R^d\) with covariance \(\sigma^2 I\) and put \(\mu^{(\sigma)}:=\mu*\gamma_\sigma\), \(\nu^{(\sigma)}:=\nu*\gamma_\sigma\). The smoothed measures admit explicit densities \[ q^{(\sigma)}(x) \;=\; (2\pi)^{-d}\!\int \tilde\nu(t)\, \exp\!\left[\,-i\langle x,t\rangle - \tfrac12\sigma^2|t|^2\right] d\lambda(t), \] and similarly for \(p^{(\sigma)}\) with \(\tilde\mu\). Hence \(\tilde\mu=\tilde\nu\) forces \(\mu^{(\sigma)}=\nu^{(\sigma)}\) for every \(\sigma>0\). Realising \(\mu^{(\sigma)}\) as the law of \(X+\sigma Z\) (with \(X\sim\mu\), \(Z\sim\gamma_1\) independent) and letting \(\sigma\downarrow 0\) gives \(X+\sigma Z\to X\) almost surely, hence in probability and so in distribution: \(\mu^{(\sigma)}\Rightarrow\mu\), and likewise \(\nu^{(\sigma)}\Rightarrow\nu\). Uniqueness of weak limits gives \(\mu=\nu\). \end{remark} \subsection{L\texorpdfstring{\'e}{e}vy's continuity lemma} Convergence of characteristic functions, plus tightness, controls weak convergence of the underlying measures. \begin{lemma}{L\'evy continuity}{levy-continuity} Let \(\{\mu_i\}_{i=1}^{\infty}\) be a uniformly tight sequence of probability measures on \(\R^d\). If the characteristic functions satisfy \(\tilde\mu_i(v)\to\tilde\mu(v)\) for every \(v\in\R^d\), then \(\mu_i\Rightarrow\mu\), where \(\mu\) is the (unique) probability measure with characteristic function \(\tilde\mu\). \end{lemma} \begin{remark} By Prohorov (\cref{thm:prohorov}), every subsequence \(\mu_{i_k}\) has a further weakly convergent subsubsequence \(\mu_{i_{k_r}}\Rightarrow\mu^*\). Continuity of the integrand forces \(\widetilde{\mu^*}=\tilde\mu\) on all of \(\R^d\), and uniqueness of characteristic functions (\cref{thm:char-unique}) identifies \(\mu^*=\mu\). The standard subsubsequence trick (every subsequence has a further subsubsequence with the same weak limit) then promotes this to convergence of the full sequence. \end{remark} \subsection{The central limit theorem} We can now prove the headline result. The hypothesis is just iid plus a finite second moment. \begin{theorem}{Central limit theorem}{clt} Let \((\Omega,\Fcal,\mu)\) be a probability space and let \(\{X_n\}_{n=1}^{\infty}\) be iid random vectors on \((\R^d,\Bcal)\) with \[ \E X_n \;=\; 0 \qquad\text{and}\qquad \E\,|X_n|^2 \;<\; \infty. \] Set \(S_n=\sum_{j=1}^{n}X_j\). Then \[ n^{-\tfrac12}\,S_n \;\xrightarrow{d}\; Z, \] where \(Z\) is a Gaussian random vector on \(\R^d\) with mean zero and covariance \(\Sigma\) given by \(\Sigma_{jk}=\E[X_{nj}X_{nk}]\). \end{theorem} The strategy of the proof is a two-step: \emph{tightness} of the normalised sums via a second-moment Chebyshev bound, and \emph{characteristic-function convergence} via Taylor expansion. The two ingredients meet in L\'evy's lemma. \begin{remark}[tightness via Chebyshev] Since the \(X_j\) are mean zero and independent, \(\E\langle X_j,X_k\rangle=0\) for \(j\ne k\), so \[ \E\,\bigl|n^{-\tfrac12}S_n\bigr|^2 \;=\; \frac{1}{n}\,\E\!\left[\,\sum_{j,k=1}^{n}\langle X_j,X_k\rangle\right] \;=\; \E\,|X_j|^2. \] For any \(\varepsilon>0\), choose \(M_\varepsilon>0\) with \(\E|X_j|^2/M_\varepsilon^2<\varepsilon\); Chebyshev's inequality gives \(\P(|n^{-\tfrac12}S_n|>M_\varepsilon)<\varepsilon\), uniformly in \(n\). The sequence \(\{n^{-\tfrac12}S_n\}\) is therefore uniformly tight. \end{remark} \begin{remark}[characteristic-function expansion] Fix \(v\in\R^d\). The scalars \(\langle v,X_j\rangle\) are iid real-valued with \(\E\langle v,X_j\rangle=0\) and \(\E\langle v,X_j\rangle^2<\infty\). Define \[ h(v) \;:=\; \E\exp\!\bigl(\,i\langle v,X_j\rangle\,\bigr). \] Then \(h(0)=1\), \(\nabla h(0)=0\) and \(\nabla^2 h(0)=-\Sigma\) where \(\Sigma=\E[X_j X_j^{\top}]\). Taylor's theorem gives \[ h(v) \;=\; 1 \;-\; \tfrac12\,v^{\top}\Sigma\,v \;+\; o(|v|^2). \] Independence then yields, for any fixed \(v\), \[ \E\exp\!\bigl\{\,i\langle n^{-\tfrac12}S_n,v\rangle\,\bigr\} \;=\; h\!\bigl(n^{-\tfrac12}v\bigr)^{n} \;=\; \left(1-\frac{v^{\top}\Sigma v}{2n} + o\!\left(\frac{|v|^2}{n}\right)\right)^{\!n} \;\longrightarrow\; \exp\!\bigl\{-\tfrac12 v^{\top}\Sigma v\bigr\} \] as \(n\to\infty\). The right-hand side is the characteristic function of the mean-zero Gaussian \(Z\) on \(\R^d\) with covariance \(\Sigma\). Combining with tightness and L\'evy's continuity (\cref{lem:levy-continuity}) gives \(n^{-\tfrac12}S_n\xRightarrow{} Z\) \textemdash{} convergence in distribution. \end{remark} \begin{figure}[h] \centering \begin{tikzpicture}[>=Stealth, node distance=10mm and 18mm, font=\small] \node[draw, rounded corners, fill=defbodybg, align=center, text width=34mm, minimum height=12mm] (tight) {Tightness\\ \(\{n^{-1/2}S_n\}\) uniformly tight\\ (Chebyshev)}; \node[draw, rounded corners, fill=defbodybg, align=center, text width=34mm, minimum height=12mm, right=of tight] (cf) {Characteristic functions\\ \(h(n^{-1/2}v)^n\to e^{-\tfrac12 v^\top\!\Sigma v}\)\\ (Taylor)}; \node[draw, rounded corners, fill=lembodybg, align=center, text width=44mm, minimum height=12mm, below=12mm of $(tight)!0.5!(cf)$] (levy) {L\'evy continuity \(+\) uniqueness\\ of characteristic functions}; \node[draw, rounded corners, fill=thmbodybg, align=center, text width=44mm, minimum height=12mm, below=10mm of levy] (clt) {\(n^{-1/2}S_n\xRightarrow{}Z\sim\mathcal N(0,\Sigma)\)}; \draw[->, thick] (tight.south) -- (levy.north west); \draw[->, thick] (cf.south) -- (levy.north east); \draw[->, thick] (levy.south) -- (clt.north); \end{tikzpicture} \caption{Architecture of the CLT proof: tightness and pointwise convergence of characteristic functions feed into L\'evy's lemma; the limiting characteristic function identifies the Gaussian \(Z\).} \label{fig:clt-architecture} \end{figure} \begin{remark} The covariance entry \(\Sigma_{jk}=\E[X_{nj}X_{nk}]\) is independent of \(n\) by the iid hypothesis; the limiting Gaussian is the same regardless of which copy of \(X_n\) one uses to compute it. In the scalar case \(d=1\) the conclusion reduces to the familiar \(n^{-1/2}S_n\xRightarrow{}\mathcal N(0,\sigma^2)\) with \(\sigma^2=\E X_1^2\). \end{remark}