% ================================================================= % Lecture 11 % Primary source: handwritten notes (Mathpix mmd) % Fallback: kashlak.pdf (only for OCR/notation/curriculum clarity) % ================================================================= \section[Lecture 11 -- Law of Large Numbers]{Lecture 11 \textemdash{} Law of Large Numbers} \label{sec:lec11} The Borel--Cantelli machinery of Lecture~10 finally pays off. We fix a sequence \(\{X_i\}_{i=1}^{\infty}\) of random variables on a common probability space \((\Omega,\Fcal,\P)\), valued in \((\R,\Bcal)\), and ask in what sense the sample averages \(n^{-1}S_n=n^{-1}\sum_{i=1}^{n}X_i\) approach the common mean. Two answers --- one in probability under uncorrelation plus a second moment, one almost sure under independence and only a first moment --- are the content of this lecture. \subsection{Setup: independence and identical distribution} Throughout, \(X\colon\Omega\to\R\) is a random variable with law \(\P(X\in A)=\P(\{\omega\in\Omega:X(\omega)\in A\})\) for \(A\in\Bcal\), expectation \(\E X=\int X(\omega)\,d\P\), and partial sums \(S_n=\sum_{i=1}^{n}X_i\). \begin{definition}{Independence of random variables}{indep-rvs} Random variables \(X\) and \(Y\) on \((\Omega,\Fcal,\P)\), valued in measurable spaces \((\Xset,\Xcal)\) and \((\Yset,\Ycal)\) respectively, are \emph{independent} if \[ \P\bigl(\{X\in A\}\cap\{Y\in B\}\bigr) \;=\; \P(X\in A)\,\P(Y\in B) \qquad\text{for all }A\in\Xcal,\;B\in\Ycal. \] The definition extends to a finite collection \(\{X_i\}_{i=1}^{n}\) by requiring \(\P\!\left(\bigcap_{i=1}^{n}\{X_i\in A_i\}\right)=\prod_{i=1}^{n}\P(X_i\in A_i)\). An infinite collection \(\{X_i\}_{i=1}^{\infty}\) is independent if every finite subcollection is. \end{definition} \begin{remark} Since \(\{X\in A\}=X^{-1}(A)\), independence of the random variables \(X\) and \(Y\) is equivalent to independence of the generated \(\sigma\)-fields \(\sigma(X)\) and \(\sigma(Y)\) in the sense of \cref{def:indep-sigma-fields}. \end{remark} \begin{definition}{Identically distributed; i.i.d.}{iid} Random variables \(X\) and \(Y\) are \emph{identically distributed} if the pushforward laws \(\P\circ X^{-1}\) and \(\P\circ Y^{-1}\) coincide on \(\Bcal\). A sequence \(\{X_i\}_{i=1}^{\infty}\) is i.i.d.\ (\emph{independent and identically distributed}) if it is independent and the \(X_i\) share a common law. \end{definition} \subsection{Weak law of large numbers} The weak law trades a strong moment hypothesis for a very mild dependence hypothesis: not full independence, only \hblue{pairwise uncorrelation} (a strictly weaker condition). \begin{theorem}{Weak law of large numbers}{wlln} Let \((\Omega,\Fcal,\P)\) be a probability space and \(\{X_i\}_{i=1}^{\infty}\) random variables with \[ \E X_i \;=\; c\in\R,\qquad \E X_i^{2} \;=\; 1 \quad\text{for all } i, \] and \(\E\!\left[(X_i-c)(X_j-c)\right]=0\) for all \(i\neq j\). Then \[ \frac{S_n}{n} \;\xrightarrow{\;\P\;}\; c, \] i.e.\ for every \(\varepsilon>0\), \(\P\!\left(\bigl|n^{-1}S_n-c\bigr|\geq\varepsilon\right)\to 0\) as \(n\to\infty\). \end{theorem} \begin{remark} The proof reduces to \(c=0\) by replacing \(X_i\) with \(X_i-c\), then applies Chebyshev: for any \(t>0\), \[ \P\!\left(\frac{|S_n|}{n}\geq t\right) \;\le\; \frac{\E S_n^{2}}{t^{2}n^{2}} \;=\; \frac{1}{t^{2}n^{2}}\sum_{i,j=1}^{n}\E[X_i X_j] \;=\; \frac{1}{n t^{2}} \;\to\; 0, \] where the cross terms vanish by uncorrelation and the diagonal sums to \(n\) by the unit second moment. \end{remark} \begin{remark} Uncorrelation is genuinely weaker than independence: independence of \((X,Y)\) implies independence of \((f(X),g(Y))\) for any measurable \(f,g\), hence \(\Cov(f(X),g(Y))=0\) for every choice; uncorrelation asks this only for \(f=g=\mathrm{id}\). \end{remark} \subsection{Strong law of large numbers} The strong law promotes ``in probability'' to ``almost surely'', removes the second moment hypothesis, but pays for it with full independence and identical distribution. Recall the variance \[ \Var(X) \;=\; \int (X-\E X)^{2}\,d\P(\omega). \] \begin{theorem}{Strong law of large numbers}{slln} Let \(\{X_i\}_{i=1}^{\infty}\) be i.i.d.\ random variables from \((\Omega,\Fcal,\P)\) to \((\R,\Bcal)\). Then: \begin{enumerate} \item If \(\E|X_1|<\infty\), then \(\displaystyle \frac{S_n}{n}\xrightarrow{\text{a.s.}} c\) where \(c=\E X_1\). \item If \(\E|X_1|=\infty\), then \(S_n/n\) does not converge to any finite limit (almost surely). \end{enumerate} \end{theorem} \begin{remark} Compared with \cref{thm:wlln}, no second-moment assumption is made on the \(X_i\); only \(L^{1}\) is needed. The trade-off is full independence (not just uncorrelation) and identical distribution. The ``a.s.'' qualifier means convergence holds outside a \(\P\)-null set \(N\subset\Omega\). \end{remark} \begin{remark} The divergence half (part~2) is the easier direction. The heuristic: if \(\E|X_1|=\infty\) then \(\sum_{n}\P(|X_n|>n)=\infty\), so by the second Borel--Cantelli lemma \(|X_n|>n\) infinitely often. But on \(\{n^{-1}S_n\to c\}\) one has \(n^{-1}X_n=n^{-1}(S_n-S_{n-1})\to 0\), contradicting \(|X_n|/n>1\) i.o. \end{remark} \begin{remark} The forward direction (part~1) is far more delicate. The standard route: reduce to \(X_i\geq 0\) by writing \(X_i=X_i^{+}-X_i^{-}\) (independence of \(X,Y\) passes to \(X^{+},Y^{+}\)), truncate \(Y_i=X_i\indic_{\{X_i\leq i\}}\) so that variances are finite, control the truncated partial sums \(T_n=\sum_{i=1}^{n}Y_i\) along a geometric subsequence \(k_n=\lfloor\delta^{n}\rfloor\) using Chebyshev plus the first Borel--Cantelli lemma, then sandwich the full sums \(S_i\) for \(k_n\leq i\leq k_{n+1}\) and let \(\delta\downarrow 1\). \end{remark} \begin{example}[i.i.d.\ Bernoulli sample mean] Let \(X_i\overset{\text{i.i.d.}}{\sim}\mathrm{Bernoulli}(p)\), so \(\E X_i=p\) and \(\Var(X_i)=p(1-p)\). Both moment hypotheses of the weak and strong laws are satisfied, so \(n^{-1}S_n\to p\) both in probability (by \cref{thm:wlln}) and almost surely (by \cref{thm:slln}). In particular, the empirical frequency of successes in \(n\) Bernoulli trials converges almost surely to the true success probability~\(p\) --- the formal statement behind the everyday claim ``the average converges to the mean''. \end{example}