% =================================================================
%  Lecture 8
%  Primary source: handwritten notes (Mathpix mmd)
%  Fallback: kashlak.pdf (only for OCR/notation/curriculum clarity)
% =================================================================
\section[Lecture 8 -- Lp Spaces and Classical Inequalities]{Lecture 8 \textemdash{} \texorpdfstring{$L^p$}{Lp} Spaces and Classical Inequalities}
\label{sec:lec08}

Fix a measure space \((\Omega,\Fcal,\mu)\). This lecture introduces the
spaces \(L^p(\Omega,\Fcal,\mu)\), the four workhorse tail-bound
inequalities (Markov, Chebyshev, Chernoff, Jensen) and the two
inequalities (H\"older and Minkowski) that give \(L^p\) the structure of
a normed vector space. We close with a density theorem that lets one
approximate any \(L^p\) function by simple functions supported on sets
of finite measure.

\subsection{The spaces \texorpdfstring{$L^p$}{Lp}}

\begin{definition}{$L^p$ space, $1\le p<\infty$}{Lp-finite}
For \(p\in[1,\infty)\) and a measurable function
\(f\colon\Omega\to\R\), define
\[
\|f\|_p \;=\; \left(\int_\Omega |f|^p\,d\mu\right)^{1/p}.
\]
The space \(L^p(\Omega,\Fcal,\mu)\) consists of all measurable \(f\)
with \(\|f\|_p<\infty\), modulo equality \(\mu\)-almost everywhere.
\end{definition}

\begin{definition}{Essential supremum and $L^\infty$}{Linfty}
The \emph{essential supremum} of a measurable function \(f\) is
\[
\|f\|_\infty
   \;=\; \esssup |f|
   \;=\; \inf\bigl\{t\in[-\infty,\infty]:\mu(\{|f|>t\})=0\bigr\}.
\]
The space \(L^\infty(\Omega,\Fcal,\mu)\) consists of all measurable
\(f\) with \(\|f\|_\infty<\infty\), again modulo \(\mu\)-a.e.\ equality.
\end{definition}

\begin{remark}
The endpoint cases \(p=1\) and \(p=\infty\) are honest analogues of the
finite-\(p\) definition: as \(p\to\infty\) one has
\(\|f\|_p\to\|f\|_\infty\) when \(\mu\) is finite and \(f\) is bounded.
\end{remark}

\subsection{Markov, Chebyshev, Chernoff}

The next three results all spring from the same one-line trick: bound
the integrand below on the set \(\{f\ge t\}\).

\begin{theorem}{Markov's inequality}{markov}
Let \(f\ge 0\) be measurable and \(t>0\). Then
\[
\mu(\{f\ge t\}) \;\le\; \frac{1}{t}\int_\Omega f\,d\mu.
\]
\end{theorem}

\begin{corollary}{Chebyshev's inequality}{chebyshev}
For any measurable \(f\) and \(m\in\R\), \(t>0\),
\[
\mu(\{|f-m|\ge t\}) \;\le\; t^{-2}\int_\Omega (f-m)^2\,d\mu.
\]
\end{corollary}

\begin{corollary}{Chernoff's inequality}{chernoff}
For any measurable \(f\), \(t\in\R\), and \(\eta\ge 0\),
\[
\mu(\{f\ge t\}) \;\le\; e^{-\eta t}\int_\Omega e^{\eta f}\,d\mu.
\]
\end{corollary}

\begin{remark}
Both Chebyshev and Chernoff follow from \cref{thm:markov} applied to a
non-negative transform: \((f-m)^2\) for Chebyshev, \(e^{\eta f}\) for
Chernoff. Chernoff is sharpest when one optimises over \(\eta\ge 0\).
\end{remark}

\subsection{Convexity and Jensen's inequality}

\begin{definition}{Convex function}{convex}
Let \(I\subseteq\R\) be an interval. A function \(\phi\colon I\to\R\)
is \emph{convex} if for all \(x,y\in I\) and \(t\in[0,1]\),
\[
\phi\bigl(tx+(1-t)y\bigr) \;\le\; t\,\phi(x) + (1-t)\,\phi(y).
\]
Geometrically: the secant line lies above the graph.
\end{definition}

\begin{figure}[h]
\centering
\begin{tikzpicture}[>=Stealth, scale=1]
  % axes
  \draw[->,thick] (-0.4,0) -- (5.2,0) node[right] {\small \(t\)};
  \draw[->,thick] (0,-0.4) -- (0,3.4);
  % convex curve y = 0.35 (x-2.5)^2 + 0.5
  \draw[thick, deepnavy, domain=0.3:4.7, smooth, samples=60]
    plot (\x, {0.35*(\x-2.5)*(\x-2.5)+0.5});
  % two points x=1, x=4
  \pgfmathsetmacro{\ya}{0.35*(1-2.5)*(1-2.5)+0.5}
  \pgfmathsetmacro{\yb}{0.35*(4-2.5)*(4-2.5)+0.5}
  \fill[exampleblue] (1,\ya) circle (1.6pt);
  \fill[exampleblue] (4,\yb) circle (1.6pt);
  % secant
  \draw[thick, exampleblue] (1,\ya) -- (4,\yb);
  % midpoint comparison
  \pgfmathsetmacro{\xm}{2.5}
  \pgfmathsetmacro{\ymcurve}{0.35*(\xm-2.5)*(\xm-2.5)+0.5}
  \pgfmathsetmacro{\ymsec}{(\ya+\yb)/2}
  \fill[highlightred] (\xm,\ymcurve) circle (1.4pt);
  \fill[highlightred] (\xm,\ymsec)  circle (1.4pt);
  \draw[dashed, highlightred] (\xm,\ymcurve) -- (\xm,\ymsec);
  % labels
  \node[below] at (1,0)  {\small \(x\)};
  \node[below] at (4,0)  {\small \(y\)};
  \node[below] at (\xm,0){\small \(tx{+}(1{-}t)y\)};
  \node[right=3pt, exampleblue] at (4,\yb) {\small secant};
  \node[left=2pt,  highlightred]  at (\xm,\ymsec)  {\small \(t\phi(x){+}(1{-}t)\phi(y)\)};
  \node[right=2pt, deepnavy] at (4.7,2.6) {\small \(\phi\)};
\end{tikzpicture}
\caption{A convex function: the secant joining \((x,\phi(x))\) and
\((y,\phi(y))\) lies above the graph.}
\label{fig:convex}
\end{figure}

\begin{theorem}{Jensen's inequality}{jensen}
Let \((\Omega,\Fcal,\mu)\) be a probability space (so
\(\mu(\Omega)=1\)), let \(X\colon\Omega\to I\) be integrable with
\(\E X=\int X\,d\mu\) lying in the interior of \(I\), and let
\(\phi\colon I\to\R\) be convex with \(\E[\phi(X)]\) well defined. Then
\[
\phi\bigl(\E X\bigr) \;\le\; \E[\phi(X)].
\]
Equivalently, for any measurable \(X\) and convex \(\phi\),
\(\phi\!\left(\int X\,d\mu\right)\le\int \phi(X)\,d\mu\).
\end{theorem}

\begin{remark}
Convexity guarantees a supporting line: at \(m=\E X\) one can choose
\(a,b\in\R\) with \(\phi(x)\ge ax+b\) for all \(x\in I\) and
\(\phi(m)=am+b\). Taking expectations of the inequality gives the
result; integrability of \(\phi(X)\) follows because \(\phi^-(x)\le
|a|\,|x|+|b|\).
\end{remark}

\subsection{H\"older and Minkowski}

We now turn to the two inequalities that pin down the geometry of
\(L^p\). Throughout, \(p,q\in[1,\infty]\) are called \emph{conjugate
exponents} when
\[
\frac{1}{p}+\frac{1}{q} \;=\; 1,
\]
with the conventions \(1/\infty=0\) and \((p,q)\in\{(1,\infty),(\infty,1)\}\)
included.

\begin{theorem}{H\"older's inequality}{holder}
Let \(p,q\in[1,\infty]\) be conjugate exponents and let \(f,g\) be
measurable. Then
\[
\|fg\|_1 \;=\; \int_\Omega |fg|\,d\mu
   \;\le\; \|f\|_p\,\|g\|_q.
\]
In particular, if \(f\in L^p\) and \(g\in L^q\), then \(fg\in L^1\).
\end{theorem}

\begin{corollary}{Cauchy--Schwarz}{cauchy-schwarz}
The choice \(p=q=2\) in \cref{thm:holder} gives
\[
\int_\Omega |fg|\,d\mu
   \;\le\; \left(\int_\Omega f^2\,d\mu\right)^{1/2}
           \left(\int_\Omega g^2\,d\mu\right)^{1/2}
   \;=\; \|f\|_2\,\|g\|_2.
\]
\end{corollary}

\begin{theorem}{Minkowski's inequality}{minkowski}
Let \(p\in[1,\infty]\) and let \(f,g\) be measurable. Then
\[
\|f+g\|_p \;\le\; \|f\|_p + \|g\|_p.
\]
In particular, \(L^p(\Omega,\Fcal,\mu)\) is closed under addition and
\(\|\cdot\|_p\) is a seminorm; on the quotient by \(\mu\)-a.e.\ equality
it is a norm.
\end{theorem}

\begin{remark}
For \(p>1\), Minkowski's bound is obtained by writing \(|f+g|^p\le
2^{p-1}(|f|^p+|g|^p)\) (so \(f+g\in L^p\)) and then applying H\"older
to the splitting
\(\int |f+g|^p \;=\; \int |f|\,|f+g|^{p-1} + \int |g|\,|f+g|^{p-1}\)
with conjugate exponents \(p\) and \(q=p/(p-1)\).
\end{remark}

\subsection{Approximation in \texorpdfstring{$L^p$}{Lp}}

The next theorem says that ``simple functions supported on sets of
finite measure'' are dense in \(L^p\) for \(p\in[1,\infty)\). It is
the standard tool for reducing analytic statements to a check on
indicator functions.

\begin{theorem}{Density of simple functions in $L^p$}{Lp-density}
Let \((\Omega,\Fcal,\mu)\) be a measure space and assume there exist
\(A_n\in\Fcal\) with \(A_n\uparrow\Omega\) and
\(\mu(A_n)<\infty\) for all \(n\) (i.e.\ \(\mu\) is
\(\sigma\)-finite). Let
\[
V_0 \;=\; \mathrm{span}\bigl\{\indic_A : A\in\Fcal,\ \mu(A)<\infty\bigr\}
\]
denote the simple functions supported on sets of finite measure. Then
for every \(p\in[1,\infty)\), \(V_0\subseteq L^p\) and for every
\(f\in L^p\) and every \(\varepsilon>0\) there exists \(v\in V_0\) with
\[
\|f-v\|_p \;<\; \varepsilon.
\]
\end{theorem}

\begin{remark}
The proof has the usual three-step shape: (i) a Dynkin
\(\pi\)-\(\lambda\) argument shows the class
\(\Lcal=\{A\in\Fcal:\indic_A\) is approximable\(\}\) is a
\(\lambda\)-system containing the generating \(\pi\)-system, hence all
of \(\Fcal\); (ii) for non-negative \(f\in L^p\) the truncations
\(f_n=\min(n,2^{-n}\lfloor 2^n f\rfloor)\) satisfy
\(|f-f_n|^p\to 0\) pointwise with \(|f-f_n|^p\le |f|^p\), so dominated
convergence gives \(\|f-f_n\|_p\to 0\); (iii) general \(f\) is handled
by splitting \(f=f^+-f^-\) and restricting to the exhausting sets
\(A_n\).
\end{remark}