% ================================================================= % Lecture 8 % Primary source: handwritten notes (Mathpix mmd) % Fallback: kashlak.pdf (only for OCR/notation/curriculum clarity) % ================================================================= \section[Lecture 8 -- Lp Spaces and Classical Inequalities]{Lecture 8 \textemdash{} \texorpdfstring{$L^p$}{Lp} Spaces and Classical Inequalities} \label{sec:lec08} Fix a measure space \((\Omega,\Fcal,\mu)\). This lecture introduces the spaces \(L^p(\Omega,\Fcal,\mu)\), the four workhorse tail-bound inequalities (Markov, Chebyshev, Chernoff, Jensen) and the two inequalities (H\"older and Minkowski) that give \(L^p\) the structure of a normed vector space. We close with a density theorem that lets one approximate any \(L^p\) function by simple functions supported on sets of finite measure. \subsection{The spaces \texorpdfstring{$L^p$}{Lp}} \begin{definition}{$L^p$ space, $1\le p<\infty$}{Lp-finite} For \(p\in[1,\infty)\) and a measurable function \(f\colon\Omega\to\R\), define \[ \|f\|_p \;=\; \left(\int_\Omega |f|^p\,d\mu\right)^{1/p}. \] The space \(L^p(\Omega,\Fcal,\mu)\) consists of all measurable \(f\) with \(\|f\|_p<\infty\), modulo equality \(\mu\)-almost everywhere. \end{definition} \begin{definition}{Essential supremum and $L^\infty$}{Linfty} The \emph{essential supremum} of a measurable function \(f\) is \[ \|f\|_\infty \;=\; \esssup |f| \;=\; \inf\bigl\{t\in[-\infty,\infty]:\mu(\{|f|>t\})=0\bigr\}. \] The space \(L^\infty(\Omega,\Fcal,\mu)\) consists of all measurable \(f\) with \(\|f\|_\infty<\infty\), again modulo \(\mu\)-a.e.\ equality. \end{definition} \begin{remark} The endpoint cases \(p=1\) and \(p=\infty\) are honest analogues of the finite-\(p\) definition: as \(p\to\infty\) one has \(\|f\|_p\to\|f\|_\infty\) when \(\mu\) is finite and \(f\) is bounded. \end{remark} \subsection{Markov, Chebyshev, Chernoff} The next three results all spring from the same one-line trick: bound the integrand below on the set \(\{f\ge t\}\). \begin{theorem}{Markov's inequality}{markov} Let \(f\ge 0\) be measurable and \(t>0\). Then \[ \mu(\{f\ge t\}) \;\le\; \frac{1}{t}\int_\Omega f\,d\mu. \] \end{theorem} \begin{corollary}{Chebyshev's inequality}{chebyshev} For any measurable \(f\) and \(m\in\R\), \(t>0\), \[ \mu(\{|f-m|\ge t\}) \;\le\; t^{-2}\int_\Omega (f-m)^2\,d\mu. \] \end{corollary} \begin{corollary}{Chernoff's inequality}{chernoff} For any measurable \(f\), \(t\in\R\), and \(\eta\ge 0\), \[ \mu(\{f\ge t\}) \;\le\; e^{-\eta t}\int_\Omega e^{\eta f}\,d\mu. \] \end{corollary} \begin{remark} Both Chebyshev and Chernoff follow from \cref{thm:markov} applied to a non-negative transform: \((f-m)^2\) for Chebyshev, \(e^{\eta f}\) for Chernoff. Chernoff is sharpest when one optimises over \(\eta\ge 0\). \end{remark} \subsection{Convexity and Jensen's inequality} \begin{definition}{Convex function}{convex} Let \(I\subseteq\R\) be an interval. A function \(\phi\colon I\to\R\) is \emph{convex} if for all \(x,y\in I\) and \(t\in[0,1]\), \[ \phi\bigl(tx+(1-t)y\bigr) \;\le\; t\,\phi(x) + (1-t)\,\phi(y). \] Geometrically: the secant line lies above the graph. \end{definition} \begin{figure}[h] \centering \begin{tikzpicture}[>=Stealth, scale=1] % axes \draw[->,thick] (-0.4,0) -- (5.2,0) node[right] {\small \(t\)}; \draw[->,thick] (0,-0.4) -- (0,3.4); % convex curve y = 0.35 (x-2.5)^2 + 0.5 \draw[thick, deepnavy, domain=0.3:4.7, smooth, samples=60] plot (\x, {0.35*(\x-2.5)*(\x-2.5)+0.5}); % two points x=1, x=4 \pgfmathsetmacro{\ya}{0.35*(1-2.5)*(1-2.5)+0.5} \pgfmathsetmacro{\yb}{0.35*(4-2.5)*(4-2.5)+0.5} \fill[exampleblue] (1,\ya) circle (1.6pt); \fill[exampleblue] (4,\yb) circle (1.6pt); % secant \draw[thick, exampleblue] (1,\ya) -- (4,\yb); % midpoint comparison \pgfmathsetmacro{\xm}{2.5} \pgfmathsetmacro{\ymcurve}{0.35*(\xm-2.5)*(\xm-2.5)+0.5} \pgfmathsetmacro{\ymsec}{(\ya+\yb)/2} \fill[highlightred] (\xm,\ymcurve) circle (1.4pt); \fill[highlightred] (\xm,\ymsec) circle (1.4pt); \draw[dashed, highlightred] (\xm,\ymcurve) -- (\xm,\ymsec); % labels \node[below] at (1,0) {\small \(x\)}; \node[below] at (4,0) {\small \(y\)}; \node[below] at (\xm,0){\small \(tx{+}(1{-}t)y\)}; \node[right=3pt, exampleblue] at (4,\yb) {\small secant}; \node[left=2pt, highlightred] at (\xm,\ymsec) {\small \(t\phi(x){+}(1{-}t)\phi(y)\)}; \node[right=2pt, deepnavy] at (4.7,2.6) {\small \(\phi\)}; \end{tikzpicture} \caption{A convex function: the secant joining \((x,\phi(x))\) and \((y,\phi(y))\) lies above the graph.} \label{fig:convex} \end{figure} \begin{theorem}{Jensen's inequality}{jensen} Let \((\Omega,\Fcal,\mu)\) be a probability space (so \(\mu(\Omega)=1\)), let \(X\colon\Omega\to I\) be integrable with \(\E X=\int X\,d\mu\) lying in the interior of \(I\), and let \(\phi\colon I\to\R\) be convex with \(\E[\phi(X)]\) well defined. Then \[ \phi\bigl(\E X\bigr) \;\le\; \E[\phi(X)]. \] Equivalently, for any measurable \(X\) and convex \(\phi\), \(\phi\!\left(\int X\,d\mu\right)\le\int \phi(X)\,d\mu\). \end{theorem} \begin{remark} Convexity guarantees a supporting line: at \(m=\E X\) one can choose \(a,b\in\R\) with \(\phi(x)\ge ax+b\) for all \(x\in I\) and \(\phi(m)=am+b\). Taking expectations of the inequality gives the result; integrability of \(\phi(X)\) follows because \(\phi^-(x)\le |a|\,|x|+|b|\). \end{remark} \subsection{H\"older and Minkowski} We now turn to the two inequalities that pin down the geometry of \(L^p\). Throughout, \(p,q\in[1,\infty]\) are called \emph{conjugate exponents} when \[ \frac{1}{p}+\frac{1}{q} \;=\; 1, \] with the conventions \(1/\infty=0\) and \((p,q)\in\{(1,\infty),(\infty,1)\}\) included. \begin{theorem}{H\"older's inequality}{holder} Let \(p,q\in[1,\infty]\) be conjugate exponents and let \(f,g\) be measurable. Then \[ \|fg\|_1 \;=\; \int_\Omega |fg|\,d\mu \;\le\; \|f\|_p\,\|g\|_q. \] In particular, if \(f\in L^p\) and \(g\in L^q\), then \(fg\in L^1\). \end{theorem} \begin{corollary}{Cauchy--Schwarz}{cauchy-schwarz} The choice \(p=q=2\) in \cref{thm:holder} gives \[ \int_\Omega |fg|\,d\mu \;\le\; \left(\int_\Omega f^2\,d\mu\right)^{1/2} \left(\int_\Omega g^2\,d\mu\right)^{1/2} \;=\; \|f\|_2\,\|g\|_2. \] \end{corollary} \begin{theorem}{Minkowski's inequality}{minkowski} Let \(p\in[1,\infty]\) and let \(f,g\) be measurable. Then \[ \|f+g\|_p \;\le\; \|f\|_p + \|g\|_p. \] In particular, \(L^p(\Omega,\Fcal,\mu)\) is closed under addition and \(\|\cdot\|_p\) is a seminorm; on the quotient by \(\mu\)-a.e.\ equality it is a norm. \end{theorem} \begin{remark} For \(p>1\), Minkowski's bound is obtained by writing \(|f+g|^p\le 2^{p-1}(|f|^p+|g|^p)\) (so \(f+g\in L^p\)) and then applying H\"older to the splitting \(\int |f+g|^p \;=\; \int |f|\,|f+g|^{p-1} + \int |g|\,|f+g|^{p-1}\) with conjugate exponents \(p\) and \(q=p/(p-1)\). \end{remark} \subsection{Approximation in \texorpdfstring{$L^p$}{Lp}} The next theorem says that ``simple functions supported on sets of finite measure'' are dense in \(L^p\) for \(p\in[1,\infty)\). It is the standard tool for reducing analytic statements to a check on indicator functions. \begin{theorem}{Density of simple functions in $L^p$}{Lp-density} Let \((\Omega,\Fcal,\mu)\) be a measure space and assume there exist \(A_n\in\Fcal\) with \(A_n\uparrow\Omega\) and \(\mu(A_n)<\infty\) for all \(n\) (i.e.\ \(\mu\) is \(\sigma\)-finite). Let \[ V_0 \;=\; \mathrm{span}\bigl\{\indic_A : A\in\Fcal,\ \mu(A)<\infty\bigr\} \] denote the simple functions supported on sets of finite measure. Then for every \(p\in[1,\infty)\), \(V_0\subseteq L^p\) and for every \(f\in L^p\) and every \(\varepsilon>0\) there exists \(v\in V_0\) with \[ \|f-v\|_p \;<\; \varepsilon. \] \end{theorem} \begin{remark} The proof has the usual three-step shape: (i) a Dynkin \(\pi\)-\(\lambda\) argument shows the class \(\Lcal=\{A\in\Fcal:\indic_A\) is approximable\(\}\) is a \(\lambda\)-system containing the generating \(\pi\)-system, hence all of \(\Fcal\); (ii) for non-negative \(f\in L^p\) the truncations \(f_n=\min(n,2^{-n}\lfloor 2^n f\rfloor)\) satisfy \(|f-f_n|^p\to 0\) pointwise with \(|f-f_n|^p\le |f|^p\), so dominated convergence gives \(\|f-f_n\|_p\to 0\); (iii) general \(f\) is handled by splitting \(f=f^+-f^-\) and restricting to the exhausting sets \(A_n\). \end{remark}