% ================================================================= % Lecture 12 % Source: handwritten notes (Mathpix-converted) + Kashlak STAT 571 % ================================================================= \section[Lecture 12 -- Central Limit Theorem; Characteristic Functions]{Lecture 12 \textemdash{} Central Limit Theorem; Characteristic Functions} \label{sec:lec12} To state the central limit theorem we first need to know what a Gaussian random variable is, and to prove it (next lecture) we will route everything through the \emph{characteristic function} \textemdash{} the Fourier transform of a probability measure. This lecture collects the cast of characters: Gaussian measures on \(\R\) and \(\R^d\), the characteristic function and its inversion formula, convolution, the uniqueness theorem, L\'evy's continuity lemma, and the CLT itself. \subsection{Gaussian measures} \begin{definition}{Gaussian Measure on $\R$}{gaussian-measure-r} A Borel measure \(\gamma\) on \((\R,\Bcal)\) is \emph{Gaussian with mean \(m\) and variance \(\sigma^{2}\)} if \[ \gamma\bigl((a,b]\bigr) \;=\; \frac{1}{\sigma\sqrt{2\pi}} \int_{a}^{b}\exp\!\left[-\frac{1}{2\sigma^{2}}(x-m)^{2}\right] d\lambda(x). \] If \(\sigma=0\) we set \(\gamma=\delta_{m}\) (a Dirac mass at \(m\)) and call \(\gamma\) a \emph{degenerate} Gaussian measure. \end{definition} \begin{definition}{Gaussian Measure on $\R^{d}$}{gaussian-measure-rd} A Borel measure \(\gamma\) on \((\R^{d},\Bcal)\) is \emph{Gaussian} if for every linear functional \(f:\R^{d}\to\R\), the induced (push-forward) measure \(\gamma\circ f^{-1}\) on \((\R,\Bcal)\) is Gaussian in the sense of \cref{def:gaussian-measure-r}. Equivalently, every linear combination of the coordinates is (one-dimensional) Gaussian. \end{definition} \begin{definition}{Gaussian Random Variable}{gaussian-rv} A random variable \(Z:(\Omega,\Fcal,\mu)\to(\R^{d},\Bcal)\) is \emph{Gaussian} if its law \(\gamma:=\mu\circ Z^{-1}\) is a Gaussian measure on \((\R^{d},\Bcal)\). \end{definition} \begin{remark} For \(u,v\in\R^{d}\) we use the standard inner product \(\langle u,v\rangle=\sum_{i=1}^{d}u_{i}v_{i}\) and write \(|u|^{2}:=\langle u,u\rangle\). A collection \(\{X_{i}\}_{i=1}^{\infty}\) of random variables is \emph{i.i.d.} if the \(X_{i}\) are pairwise independent and the induced measures \(\mu\circ X_{i}^{-1}\) all coincide. \end{remark} \subsection{Characteristic functions} The strategy for proving the CLT is to show that the characteristic functions of \(n^{-1/2}S_{n}\) converge to that of a Gaussian, then invoke L\'evy's continuity lemma. \begin{definition}{Characteristic Function}{characteristic-function} For a probability measure \(\mu\) on \((\R^{d},\Bcal)\), the \emph{characteristic function} (Fourier transform) \(\widetilde{\mu}:\R^{d}\to\C\) is \[ \widetilde{\mu}(t) \;:=\; \int \exp\!\bigl\{i\langle x,t\rangle\bigr\}\,d\mu(x). \] \end{definition} \begin{remark}[Inversion] If \(\widetilde{\mu}\) is integrable with respect to Lebesgue measure on \(\R^{d}\) then \(\mu\) has a density \(p\) and \[ p(x) \;=\; (2\pi)^{-d}\int \widetilde{\mu}(t)\, \exp\!\bigl\{-i\langle x,t\rangle\bigr\}\,d\lambda(t), \qquad \lambda\text{-a.e.} \] \end{remark} \begin{definition}{Convolution}{convolution} For two measures \(\mu,\nu\) on \((\R^{d},\Bcal)\), the \emph{convolution} \(\mu*\nu\) is the measure \[ (\mu*\nu)(B) \;:=\; \int \nu(B-x)\,d\mu(x), \qquad B\in\Bcal, \] where \(B-x=\{y\in\R^{d}: y+x\in B\}\). \end{definition} \begin{remark} Convolution is associative and commutative. The characteristic function of \(\mu*\nu\) factorises: \(\widetilde{\mu*\nu}=\widetilde{\mu}\,\widetilde{\nu}\). Moreover, if \(X\) and \(Y\) are independent random variables with laws \(\mu\) and \(\nu\), the law of \(X+Y\) is \(\mu*\nu\). \end{remark} \begin{theorem}{Uniqueness of Characteristic Functions}{cf-uniqueness} Let \(\mu\) and \(\nu\) be probability measures on \((\R^{d},\Bcal)\). If \(\widetilde{\mu}=\widetilde{\nu}\), then \(\mu=\nu\). \end{theorem} \begin{remark} The proof goes via convolution with a small Gaussian: let \(\gamma_\sigma\) be mean-zero Gaussian on \(\R^{d}\) with covariance \(\sigma^{2}I\) and set \(\mu^{(\sigma)}=\mu*\gamma_{\sigma}\). Inversion gives the density \(p^{(\sigma)}\) explicitly in terms of \(\widetilde{\mu}\); equality of characteristic functions therefore yields \(\mu^{(\sigma)}=\nu^{(\sigma)}\). Letting \(\sigma\downarrow 0\) gives \(\mu^{(\sigma)}\Rightarrow\mu\) and \(\nu^{(\sigma)}\Rightarrow\nu\), so \(\mu=\nu\). \end{remark} \subsection{L\'evy's continuity lemma and the CLT} The bridge from pointwise convergence of characteristic functions to weak convergence of measures is L\'evy's lemma; together with the uniqueness theorem and Prohorov's theorem (\cref{sec:lec11}, in spirit) it is exactly what the CLT proof needs. \begin{lemma}{L\'evy's Continuity Lemma}{levy-continuity} Let \(\{\mu_{i}\}\) be a uniformly tight sequence of probability measures on \(\R^{d}\). If for every \(v\in\R^{d}\), \[ \widetilde{\mu_{i}}(v) \;\longrightarrow\; \widetilde{\mu}(v), \] then \(\mu_{i}\Rightarrow\mu\), where \(\mu\) is the (unique) probability measure with characteristic function \(\widetilde{\mu}\). \end{lemma} \begin{remark} Sketch: uniform tightness plus Prohorov gives a weakly convergent subsubsequence along every subsequence; the assumed pointwise convergence of characteristic functions, combined with \cref{thm:cf-uniqueness}, forces every limit point to equal \(\mu\); the subsequence principle then promotes this to convergence of the whole sequence. \end{remark} \begin{theorem}{Central Limit Theorem}{clt} Let \((\Omega,\Fcal,\mu)\) be a probability space and let \(\{X_{n}\}_{n=1}^{\infty}\) be i.i.d.\ random variables on \((\R^{d},\Bcal)\) with \(\E X_{n}=0\) and \(\E|X_{n}|^{2}<\infty\). Set \(S_{n}=\sum_{j=1}^{n}X_{j}\). Then \[ n^{-1/2} S_{n} \;\xrightarrow{\;d\;}\; Z, \] where \(Z\) is a Gaussian random variable on \(\R^{d}\) with mean zero and covariance \(\Sigma\) with \((j,k)\)-entry \(\Sigma_{jk}=\E[X_{nj}X_{nk}]\). \end{theorem} \begin{remark} The proof has two beats. (i) \emph{Tightness}: the second-moment hypothesis gives \(\E|n^{-1/2}S_{n}|^{2}=\E|X_{1}|^{2}\), and Chebyshev then yields \(\P(|n^{-1/2}S_{n}|>M_{\varepsilon})<\varepsilon\), so the sequence is uniformly tight. (ii) \emph{Convergence of characteristic functions}: fixing \(v\in\R^{d}\) and letting \(h(v)=\E\exp\bigl(i\langle v,X_{1}\rangle\bigr)\), Taylor's theorem at \(0\) gives \(h(v)=1-\tfrac{1}{2}v^{\top}\Sigma v + o(\|v\|_{2}^{2})\); hence \[ \E\exp\!\bigl\{i\langle n^{-1/2}S_{n}, v\rangle\bigr\} \;=\; h\!\bigl(n^{-1/2}v\bigr)^{n} \;\longrightarrow\; \exp\!\Bigl\{-\tfrac{1}{2}v^{\top}\Sigma v\Bigr\}, \] the characteristic function of the claimed Gaussian. Apply \cref{lem:levy-continuity}. \end{remark}