% ================================================================= % Lecture 11 % Source: handwritten notes (Mathpix-converted) + Kashlak STAT 571 % ================================================================= \section[Lecture 11 -- Law of Large Numbers]{Lecture 11 \textemdash{} Law of Large Numbers} \label{sec:lec11} The two laws of large numbers are the workhorses of measure-theoretic probability: they say that the empirical average \(S_n/n\) of a sample collapses onto its expectation, in two distinct senses (in probability versus almost surely). Throughout we fix a probability space \((\Omega,\Fcal,\P)\) and a sequence of random variables \(\{X_i\}_{i=1}^{\infty}\) from \((\Omega,\Fcal)\) to \((\R,\Bcal)\). For \(A\in\Bcal\) we write \[ \P(X\in A) \;:=\; \P\bigl(\{\omega\in\Omega : X(\omega)\in A\}\bigr), \qquad \E X \;=\; \int X(\omega)\,d\P, \] and define the partial sum \(S_n=\sum_{i=1}^{n}X_i\), itself a measurable random variable. \subsection{Independence of random variables} Before stating the laws of large numbers we need to record what independence means for random variables, not just for events. \begin{definition}{Independence of random variables}{indep-rv} Let \(X\) and \(Y\) be random variables on the same probability space \((\Omega,\Fcal,\P)\) with possibly different codomains \((\mathbb{X},\mathcal{X})\) and \((\mathbb{Y},\mathcal{Y})\). Then \(X\) and \(Y\) are \emph{independent} if \[ \P\bigl(\{X\in A\}\cap\{Y\in B\}\bigr) \;=\; \P(X\in A)\,\P(Y\in B) \qquad \forall\,A\in\mathcal{X},\ B\in\mathcal{Y}. \] This extends to a finite collection \(\{X_i\}_{i=1}^{n}\) by requiring \[ \P\!\left(\bigcap_{i=1}^{n}\{X_i\in A_i\}\right) \;=\; \prod_{i=1}^{n}\P(X_i\in A_i) \qquad \forall\,A_i\in\mathcal{X}_i. \] An infinite collection \(\{X_i\}_{i=1}^{\infty}\) is independent if every finite subcollection is independent. \end{definition} \begin{remark} Since \(\{X\in A\}=X^{-1}(A)\), the random variables \(X\) and \(Y\) are independent if and only if the \(\sigma\)-fields \(\sigma(X)\) and \(\sigma(Y)\) are independent in the sense of \cref{def:indep-sigma-fields} from Lecture~4. \end{remark} \begin{definition}{Identically distributed}{iid} The \emph{law} (or \emph{distribution}) of a random variable \(X:\Omega\to\R\) is the pushforward measure \(\P\circ X^{-1}\) on \((\R,\Bcal)\). Two random variables \(X,Y\) are \emph{identically distributed} if \(\P\circ X^{-1}=\P\circ Y^{-1}\); a sequence \(\{X_i\}_{i=1}^{\infty}\) is \emph{i.i.d.}\ if it is independent and its members are pairwise identically distributed. \end{definition} \subsection{The weak law} The first law trades sharp conclusions for cheap hypotheses: only uncorrelatedness and a uniform second moment are needed. \begin{theorem}{Weak Law of Large Numbers}{wlln} Let \((\Omega,\Fcal,\P)\) be a probability space and let \(\{X_i\}_{i=1}^{\infty}\) be random variables from \(\Omega\) to \(\R\) satisfying \begin{enumerate} \item \(\E X_i = c\in\R\) for all \(i\); \item \(\E X_i^{2} = 1\) for all \(i\); \item \(\E\bigl[(X_i-c)(X_j-c)\bigr] = 0\) for all \(i\neq j\) (i.e.\ the \(X_i\) are pairwise uncorrelated). \end{enumerate} Then \[ \frac{S_n}{n} \;\xrightarrow{\ \P\ }\; c \qquad \text{as } n\to\infty. \] \end{theorem} \begin{remark} The proof uses only Chebyshev's inequality applied to \(S_n/n\); in particular only \emph{uncorrelatedness} (not full independence) is required. The next theorem strengthens convergence in probability to almost-sure convergence at the price of (i) genuine independence and (ii) i.i.d.\ structure, but in compensation drops the second-moment hypothesis entirely. \end{remark} \subsection{The strong law} For the strong law we keep the variance only as notation: \[ \Var(X) \;=\; \int (X-\E X)^{2}\,d\P(\omega). \] \begin{theorem}{Strong Law of Large Numbers}{slln} Let \(\{X_i\}_{i=1}^{\infty}\) be i.i.d.\ random variables from \(\Omega\) to \(\R\). \begin{enumerate} \item If \(\E|X_i|<\infty\), then \[ \frac{S_n}{n} \;\xrightarrow{\text{a.s.}}\; c \qquad \text{for } c=\E X_1. \] \item If \(\E|X_i|=\infty\), then \(S_n/n\) does \emph{not} converge to any finite limit. \end{enumerate} \end{theorem} \begin{remark} The contrapositive of part~(2) is informative: existence of an almost-sure finite limit for \(S_n/n\) forces \(\E|X_1|<\infty\). So the hypothesis in part~(1) is sharp. \end{remark} \begin{remark} The proof of (2) is short: if \(n^{-1}S_n\to c\in\R\) almost surely then \(n^{-1}X_n=n^{-1}(S_n-S_{n-1})\to 0\). But \(\E|X_1|=\infty\) implies \(\sum_{n=0}^{\infty}\P(|X_n|>n)=\infty\), so by the second Borel--Cantelli lemma \(|X_n|>n\) infinitely often, contradicting \(n^{-1}X_n\to 0\). \end{remark} \begin{remark} The proof of (1) is the heart of the lecture. The strategy is to truncate \(Y_i=X_i\indic_{X_i\le i}\) so that the variances become finite; consider \(T_n=\sum_{i=1}^{n}Y_i\) along the geometric subsequence \(k_n=\lfloor\delta^n\rfloor\) for \(\delta>1\); apply Chebyshev plus the first Borel--Cantelli lemma to obtain \(k_n^{-1}|T_{k_n}-\E T_{k_n}|\to 0\) a.s.; observe that \(\E Y_n\uparrow\E X_1\); transfer the conclusion from \(T_{k_n}\) to \(S_{k_n}\) via \(\sum_i\P(X_i\neq Y_i)<\infty\); and finally interpolate between subsequence indices using \[ \delta^{-2}\,\E X_1 \;\le\; \liminf_{i\to\infty}\frac{S_i}{i} \;\le\; \limsup_{i\to\infty}\frac{S_i}{i} \;\le\; \delta^{2}\,\E X_1, \] then send \(\delta\downarrow 1\). \end{remark} \begin{example}[Empirical mean of i.i.d.\ Bernoullis] Let \(\{X_i\}_{i=1}^{\infty}\) be i.i.d.\ \(\mathrm{Bernoulli}(p)\), \(p\in(0,1)\). Then \(\E|X_1|=p<\infty\), so by \cref{thm:slln} \(S_n/n\to p\) almost surely. This is the rigorous statement underlying the frequentist interpretation: the long-run frequency of successes equals the success probability. \end{example} \begin{example}[Cauchy distribution: failure of the SLLN] If \(\{X_i\}_{i=1}^{\infty}\) are i.i.d.\ standard Cauchy, then \(\E|X_1|=\infty\). By part~(2) of \cref{thm:slln}, \(S_n/n\) does not converge to any finite limit; in fact \(S_n/n\) is itself standard Cauchy for every~\(n\), so the empirical mean never settles down. \end{example}