% DEFINE some information that will be populated throughout the course notes. \def \coursename {Advanced Linear Algebra} \def \coursecode {MATH 3221} \def \courseterm {Fall 2020} \def \instructorname {Nathan Johnston} % END DEFINITIONS % IMPORT the course note formatting and templates \input{course_notes_template} % END IMPORT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \setcounter{chapter}{4} % Set to one less than the week number \chapter{Inner Products and Orthogonality} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\large This week we will learn about: \begin{itemize} \item Inner products (and the dot product again), \item The norm induced by the inner product, \item The Cauchy--Schwarz and triangle inequalities, and \item Orthogonality. \end{itemize}\bigskip\bigskip \noindent Extra reading and watching: \begin{itemize} \item Sections 1.3.4 and 1.4.1 in the textbook \item Lecture videos \href{https://www.youtube.com/watch?v=NpkFp-14M7M&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=18}{17}, \href{https://www.youtube.com/watch?v=-tvsZ7Un8_g&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=19}{18}, \href{https://www.youtube.com/watch?v=G2X7zfSyFqk&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=20}{19}, \href{https://www.youtube.com/watch?v=0ogMWnPMyz8&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=21}{20}, \href{https://www.youtube.com/watch?v=sjuRbORUvOE&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=22}{21}, and \href{https://www.youtube.com/watch?v=uFAtC5EYJVM&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=23}{22} on YouTube \item \href{http://en.wikipedia.org/wiki/Inner_product_space}{Inner product space} at Wikipedia \item \href{http://en.wikipedia.org/wiki/Cauchy%E2%80%93Schwarz_inequality}{Cauchy--Schwarz inequality} at Wikipedia \item \href{http://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process}{Gram--Schmidt process} at Wikipedia \end{itemize}\bigskip\bigskip \noindent Extra textbook problems: \begin{itemize} \item[$\star$] 1.3.3, 1.3.4, 1.4.1 \item[$\phantom{\star}\star\star$] 1.3.9, 1.3.10, 1.3.12, 1.3.13, 1.4.2, 1.4.5(a,d) \item[$\star\star\star$] 1.3.11, 1.3.14, 1.3.15, 1.3.25, 1.4.16 \item[$\skull$] 1.3.18 \end{itemize}} \newpage There are many times when we would like to be able to talk about the angle between vectors in a vector space $\V$, and in particular orthogonality of vectors, just like we did in $\R^n$ in the previous course. This requires us to have a generalization of the dot product to arbitrary vector spaces. % DEFINITION: Inner Product \begin{definition}[Inner Product]\label{defn:inner_product} Suppose that $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $\V$ is a vector space over $\mathbb{F}$. Then an \textbf{inner product} on $\V$ is a function $\langle \cdot,\cdot \rangle : \V \times \V \rightarrow \mathbb{F}$ such that the following three properties hold for all $c \in \mathbb{F}$ and all $\v,\w,\x \in \V$:\smallskip \begin{enumerate}[label=\alph*)] \item $\langle \v,\w \rangle = \overline{\langle \w,\v \rangle}$ \hfill {\color{gray}(conjugate symmetry)} \item $\langle \v,\w+c\x \rangle = \langle \v,\w \rangle + c\langle \v,\x \rangle$ \hfill {\color{gray}(linearity in 2nd entry)} \item $\langle \v,\v \rangle \geq 0$, with equality if and only if $\v = \0$. \hfill {\color{gray}(positive definiteness)} \end{enumerate} \end{definition} % Clarify notation above (i.e., double-input function notation), since it likely is scary to students \begin{itemize} \item Why those three properties? \horlines{2}\vspace*{-0.6cm} % Because those are the three properties that the dot product satisfy. % Just like vector spaces mimic R^n, linear transformations mimic matrices, etc. \item Inner products are \emph{not} linear in their first argument... \horlines{2}\vspace*{-0.6cm} % They are conjugate linear. Show that = + \overline{c}. \item OK, so why does property~(a) have that weird complex conjugation in it? \horlines{2}\vspace*{-0.6cm} % Suppose v is non-zero vector. Then > 0, so = i = i = i^2 = - < 0, which is a contradiction. Thus we *need* the complex conjugate. \item For this reason, they are sometimes called ``sesquilinear'', which means... \horlines{1}\vspace*{-0.6cm} % "one-and-a-half" linear (not quite "bilinear"). \end{itemize} \newpage \exx[6]{Show that the following function is an inner product on $\mathbb{C}^n$: \[\langle \v,\w \rangle = \v^*\w = \sum_{i=1}^n \overline{v_{i}}w_{i} \quad \text{for all} \quad \v,\w \in \C^n.\]\vspace*{-0.1in}} % Just check the properties. Call this the "complex dot product", and introduced v \cdot w notation for it. \exx[6]{Let $a < b$ be real numbers and let $\mathcal{C}[a,b]$ be the vector space of continuous functions on the interval $[a,b]$. Show that the following function is an inner product on $\mathcal{C}[a,b]$: \[\langle f,g \rangle = \int_a^b f(x)g(x) \diff x \quad \text{for all} \quad f,g \in \mathcal{C}[a,b].\]\vspace*{-0.15in}} % Again, just check the properties. Briefly explain about this vector space first (e.g., why it is a vector space). The previous examples are the ``standard'' inner products on those vector spaces. However, inner products can also be much uglier. The following example illustrates how the same vector space can have multiple different inner products, and at first glance they might look nothing like the standard inner products. \newpage \exx[6]{Show that the following function is an inner product on $\mathbb{R}^2$: \[\langle \v,\w \rangle = v_1w_1 + 2v_1w_2 + 2v_2w_1 + 5v_2w_2 \quad \text{for all} \quad \v,\w \in \R^2.\]\vspace*{-0.15in}} % Properties (a) and (b) are easy, but (c) is not. For (c), rewrite = (v_1 + 2v_2)(w_1 + 2w_2) + v_2w_2 and then it's easier. There is also a ``standard'' inner product on $\M_n$, but before being able to explain it, we need to introduce the following helper function: \begin{definition}[Trace]\label{defn:trace} Let $A \in \M_n$ be a square matrix. Then the \textbf{trace} of $A$, denoted by $\mathrm{tr}(A)$, is the sum of its diagonal entries: \[ \mathrm{tr}(A) \defeq a_{1,1} + a_{2,2} + \cdots + a_{n,n}.\] \end{definition} \exx[2]{Compute the following matrix traces:} % Ask class for a 2x2 or 3x3 example and compute its trace. Trivial. Don't dwell on it. The reason why the trace is such a wonderful function is that it makes matrix multiplication ``kind of'' commutative: \begin{theorem}[Commutativity of the Trace]\label{thm:trace_commute} Let $A \in \M_{m,n}$ and $B \in \M_{n,m}$ be matrices. Then \[\mathrm{tr}(AB) = \mathrm{tr}(BA).\] \end{theorem} \newpage \begin{proof} Just directly compute the diagonal entries of $AB$ and $BA$: \horlines{4}\vspace*{-1.3cm} % Then sum them up and see (by switching order of sums) that they are equal. \end{proof} The trace also has some other nice properties that are easier to see: \horlines{3} % tr(A+B) = tr(A) + tr(B) % tr(cA) = ctr(A) (it's a linear transformation!) % tr(A^T) = tr(A) With the trace in hand, we can now introduce the standard inner product on the vector space of matrices: \exx[7]{Show that the following function is an inner product on $\M_{m,n}$: \[\langle A,B \rangle = \mathrm{tr}(A^*B) \quad \text{for all} \quad A,B \in \M_{m,n}.\]\vspace*{-0.15in}} % Define A^* for students if you haven't already done so on an assignment % Can either directly check the properties (NOT RECOMMENDED), or we can note that = \sum_i,j \overline{a_ij}b_ij, so this is just the complex dot product of the vectorizations of A and B. The above inner product is typically called the \textbf{Frobenius inner product} or \textbf{Hilbert--Schmidt inner product}. Also, a vector space together with a particular inner product is called an \textbf{inner product space}. \newpage \section*{Norm Induced by the Inner Product} Now that we have inner products, we can define the length of a vector in a manner completely analogous to how we did it with the dot product in $\R^n$. However, in this more general setting, we are a bit beyond the point of being able to draw a geometric picture of what length means (for example, what is the ``length'' of a continuous function?), so we change terminology slightly and instead call this function a ``norm.'' % DEFINITION: Norm Induced from Inner Product \begin{definition}[Norm Induced by the Inner Product] Suppose that $\V$ is an inner product space. Then the \textbf{norm induced by the inner product} is the function $\|\cdot\| : \V \rightarrow \R$ defined by \[\|\v\| \defeq \sqrt{ \langle \v, \v \rangle } \quad \text{for all} \quad \v \in \V.\] \end{definition} % END DEFINITION \exx[1]{What is the norm induced by the standard inner product on $\C^n$?} % Just the standard norm \sqrt{|v1|^2 + ... + |vn|^2}. \exx[1]{What is the norm induced by the standard inner product on $\mathcal{C}[a,b]$?} % sqrt(\int_a^b f(x)^2 dx) \exx[1]{What is the norm induced by the standard (Frobenius) inner product on $\M_{m,n}$?} % sqrt(\sum_{i,j} |a_ij|^2) = sqrt(tr(A^*A)). Perhaps not surprisingly, the norm induced by an inner product satisfies the same basic properties as the length of a vector in $\R^n$. These properties are summarized in the following theorem. \begin{theorem}[Properties of the Norm Induced by the I.P.] Suppose that $\V$ is an inner product space, $\v \in \V$ is a vector, and $c \in \mathbb{F}$ is a scalar. Then the following properties of the norm induced by the inner product hold: \begin{enumerate}[label=\alph*)] \item $\|c\v\| = |c|\|\v\|$, and \item $\|\v\| \geq 0$, with equality if and only if $\v = \0$. \end{enumerate} \end{theorem} %You can try proving these properties on your own (they each only take a line or two from the definitions). \newpage The two other main theorems that we proved for the length in $\R^n$ were the Cauchy--Schwarz inequality and the triangle inequality. We now show that these same properties hold for the norm induced by any inner product. \begin{theorem}[Cauchy--Schwarz Inequality] Suppose that $\V$ is an inner product space and $\v,\w \in \V$. Then \[|\langle \v, \w \rangle| \leq \|\v\|\|\w\|.\]Furthermore, equality holds if and only if $\{\v,\w\}$ is a linearly dependent set. \end{theorem} \begin{proof} Let $c,d \in \mathbb{F}$ be arbitrary scalars, and expand $\|c\v + d\w\|^2$ in terms of the inner product: \horlines{9}\vspace*{-1.3cm} % You get |c|^2\|\v\|^2 + 2\mathrm{Re}\big(\overline{c}d\langle\v,\w\rangle\big) + |d|^2\|\w\|^2, which we now know must be >= 0. % This holds for ALL c and d. Well, choose $c = \|\w\|$ and $d = -\langle\w,\v\rangle/\|\w\|$ and let's see what happens. Simplify and you get exactly what we want. % Mention how the equality argument goes briefly (i.e., equality iff cv + dw = 0, so they are collinear), but don't dwell on it. \end{proof} For example, if we apply the Cauchy--Schwarz inequality to the Frobenius inner product on $\M_{m,n}$, it tells us that \horlines{1} % \[\big|\mathrm{tr}(A^*B)\big|^2 \leq \mathrm{tr}(A^*A)\mathrm{tr}(B^*B) \quad \text{for all} \quad A,B \in \M_{m,n},\] \noindent and if we apply it to the standard inner product on $\mathcal{C}[a,b]$ then it says that \horlines{1} % \[ \left(\int_a^b f(x)g(x) \diff x\right)^2 \leq \left(\int_a^b f(x)^2\diff x\right)\left(\int_a^b g(x)^2 \diff x\right) \quad \text{for all} \quad f,g \in \mathcal{C}[a,b].\] \noindent Neither of the above inequalities are particularly pleasant to prove directly. \newpage Just as was the case in $\R^n$, the triangle inequality now follows very quickly from the Cauchy--Schwarz inequality. \begin{theorem}[The Triangle Inequality] Suppose that $\V$ is an inner product space and $\v,\w \in \V$. Then \[\|\v+\w\| \leq \|\v\| + \|\w\|.\]Furthermore, equality holds if and only if $\v$ and $\w$ point in the same direction (i.e., $\v = \0$ or $\w = c\v$ for some $0 \leq c \in \R$). \end{theorem} \begin{proof} Start by expanding $\|\v+\w\|^2$ in terms of the inner product: \horlines{6}\vspace*{-1.3cm} % Just do the obvious expansion and use C-S when you have to to clear the cross term. % Mention how the equality argument goes briefly (i.e., check each step that has an actual inequality), but don't dwell on it. Equality iff \mathrm{Re}(\langle\v,\w\rangle) = |\langle\v,\w\rangle| = \|\v\|\|\w\|, which is where c >= 0 comes from. \end{proof} \section*{Orthogonality} The most useful thing that we can do with an inner product is re-introduce orthogonality in this more general setting: % DEFINITION: Orthogonality \begin{definition}[Orthogonality] Suppose $\V$ is an inner product space. Then two vectors $\v,\w \in \V$ are called \textbf{orthogonal} if $\ip{\v}{\w} = 0$. \end{definition} % END DEFINITION In $\R^n$, we could think of ``orthogonal'' as a synonym for ``perpendicular'', since two vectors were orthogonal if and only if the angle between them was $\pi/2$. In general inner product spaces this geometric picture makes much less sense (for example, what does it mean for the angle between two polynomials to be $\pi/2$?), so it is perhaps better to think of orthogonal vectors as ones that are ``as linearly independent as possible.''' \newpage \vspace*{1.85in} % Draw a picture like the one from the textbook, with one picture for dependence of two vectors (colinear), one for independence, and then one for orthogonality. With this intuition in mind, it is useful to extend orthogonality to \emph{sets} of vectors, rather than just pairs of vectors: \begin{definition}[Orthonormal Bases] A basis $B$ of an inner product space $\V$ is called an \textbf{orthonormal basis} of $\V$ if \begin{enumerate}[label=\alph*)] \item $\ip{\v}{\w} = 0$ for all $\v\neq\w \in B$, and \hfill {\color{gray}(mutual orthogonality)} \item $\|\v\| = 1$ for all $\v \in B$. \hfill {\color{gray}(normalization)} \end{enumerate} \end{definition} \exx[4]{Examples of orthonormal bases in our ``standard'' vector spaces include...} % For example, standard bases of F^n and \M_{m,n} are orthonormal % OrthoNORMAL means orthogonal and normalized % NOT {1,x,x^2} in P^2 Orthogonal and orthonormal bases often greatly simplify calculations. For example, the following theorem shows us that linear independence comes for free when we know that a set of vectors are mutually orthogonal. \begin{theorem}[Orthogonality Implies Linear Independence] Let $\V$ be an inner product space and suppose that the set $B = \{\mathbf{v}_1,\mathbf{v}_2,\ldots,\mathbf{v}_n\} \subset \V$ consists of non-zero mutually orthogonal vectors (i.e., $\langle \mathbf{v}_i,\mathbf{v}_j\rangle = 0$ whenever $i \neq j$). Then $B$ is linearly independent. \end{theorem} \newpage \begin{proof} Suppose $c_1\v_1 + c_2\v_2 + \cdots + c_n\v_n = \0$. Then... \horlines{5}\vspace*{-1.3cm} % Take inner product with v1 to get c1||v1||^2 = 0, so c1 = 0. Similar for other coefficients. \end{proof} A fairly quick consequence of the previous theorem is the fact that if a set of non-zero vectors is mutually orthogonal, and their number matches the dimension of the vector space, then... \horlines{1} % they must form an orthogonal basis of the inner product space. \exx[6]{Show that the set of Pauli matrices \begin{align*} B = \left\{\begin{bmatrix}1 & 0 \\ 0 & 1\end{bmatrix}, \begin{bmatrix}0 & 1 \\ 1 & 0\end{bmatrix}, \begin{bmatrix}0 & -i \\ i & 0\end{bmatrix}, \begin{bmatrix}1 & 0 \\ 0 & -1\end{bmatrix}\right\} \end{align*} is an orthogonal basis of $\M_{2}(\mathbb{C})$. How could you turn it into an orthonormal basis?} % Just check orthogonality. 6 pairs, but easy. Since there are 4 of them in a 4-dimensional space, DONE. Easy-peasy. To make orthoNORMAL, divide each by the norm induced by the inner product (Frobenius norm), which is 1/sqrt(2) in all cases. We already learned that all finite-dimensional vector spaces are isomorphic (i.e., ``essentially the same'') to $\mathbb{F}^n$. It thus seems natural to ask the corresponding question about inner products---do all inner products on $\mathbb{F}^n$ look like the usual dot product on $\mathbb{F}^n$ in some basis? Orthonormal bases let us show that the answer is ``yes.'' \newpage \begin{theorem}[All Inner Products Look Like the Dot Product] Suppose that $B$ is an orthonormal basis of a finite-dimensional inner product space $\V$. Then \[ \ip{\v}{\w} = [\v]_B \cdot [\w]_B \quad \text{for all} \quad \v,\w \in \V. \] \end{theorem} \begin{proof} Write $B = \{\u_1,\u_2,\ldots\u_n\}$. Since $B$ is a basis of $\V$, we can write $\v = c_1\u_1 + \cdots + c_n\u_n$ and $\w = d_1\u_1 + \cdots + d_n\u_n$. Then... \horlines{6}\vspace*{-1.3cm} % Just compute directly by subbing in linear combination. \end{proof} If we specialize even further to $\mathbb{C}^n$ rather than to an arbitrary finite-dimensional vector space $\V$, then we can say even more. Specifically, recall that if $\v,\w \in \C^n$, $E$ is the standard basis of $\C^n$, and $B$ is any basis of $\C^n$, then \horlines{1} %P_{B\leftarrow E}\v = [\v]_{B} \quad \text{and} \quad P_{B\leftarrow E}\w = [\w]_{B}. By plugging this fact into the above characterization of finite-dimensional inner product spaces (and assuming that $B$ is orthonormal), we see that every inner product on $\C^n$ has the form \horlines{1} %\[ %\langle \v,\w \rangle = [\v]_{B} \cdot [\w]_{B} = (P_{B\leftarrow E}\v) \cdot (P_{B\leftarrow E}\w) = \v^*(P_{B\leftarrow E}^*P_{B\leftarrow E})\w \quad \text{for all} \quad \v,\w \in \C^n. %\] We state this fact in a slightly cleaner form below: \begin{corollary}[Invertible Matrices Make Inner Products] A function $\langle \cdot,\cdot \rangle : \mathbb{F}^n \times \mathbb{F}^n \rightarrow \mathbb{F}$ is an inner product if and only if there exists an invertible matrix $P \in \M_n(\mathbb{F})$ such that \[\langle \v,\w \rangle = \v^*(P^*P)\w \quad \text{for all} \quad \v,\w \in \mathbb{F}^n.\] \end{corollary} \newpage For example, the usual inner product (i.e., the dot product) on $\C^n$ arises when $P = I$. Similarly, the weird inner product on $\R^2$ from a few pages ago, defined by \[ \ip{\v}{\w} = v_1w_1 + 2v_1w_2 + 2v_2w_1 + 5v_2w_2 \quad \text{for all} \quad \v,\w \in \R^2, \] is what we get if we choose $P = \begin{bmatrix}1 & 2 \\ 0 & 1 \end{bmatrix}$. To see this, we verify that \horlines{4} % $P^*P = \begin{bmatrix}1 & 2 \\ 2 & 5 %\end{bmatrix}$, so %\[ %\v^*(P^*P)\w = \begin{bmatrix}v_1 & v_2\end{bmatrix}\begin{bmatrix}1 & 2 \\ 2 & 5 %\end{bmatrix}\begin{bmatrix}w_1 \\ w_2\end{bmatrix} = v_1w_1 + 2v_1w_2 + 2v_2w_1 + 5v_2w_2 = \ip{\v}{\w}. %\] \section*{Orthogonalization} We already showed how to determine whether or not a particular set \emph{is} an orthonormal basis, so let's turn to the question of how to \emph{construct} an orthonormal basis. While this is reasonably intuitive in familiar inner product spaces like $\R^n$ or $\M_{m,n}(\C)$, it becomes a bit more delicate when working in stranger inner products.\\ The process works one vector at a time to turn the vectors from some (not necessarily orthonormal) basis $B = \{\v_1,\v_2,\ldots,\v_m\}$ into an orthonormal basis $C = \{\u_1,\u_2,\ldots,\u_m\}$. We start by simply defining \horlines{1} % \u_1 = \v_1 / \|\v_1\|, which will be the first vector in our orthonormal basis. Clearly has length 1 To construct the next member of our orthonormal basis, we define \horlines{2} % \[\w_2 = \v_2 - (\u_1 \cdot \v_2)\u_1, \qquad\qquad \u_2 = \w_2 / \|\w_2\|, \] where we recall from Section~\ref{sec:lin_transform_examples} that $(\u_1 \cdot \v_2)\u_1$ is the projection of $\v_2$ onto the line in the direction of $\u_1$ (i.e., this is the quantity that we denoted by $P_{\u_1}(\v_2)$ in that section). In words, we are subtracting the portion of $\v_2$ that points in the direction of $\u_1$, leaving behind only the piece of it that is orthogonal to $\u_1$, as illustrated on the next page. \newpage \vspace*{4in} % Draw something like the 4-image figure from the textbook In higher dimensions, we would then continue in this way, adjusting each vector in the basis so that it is orthogonal to each of the previous vectors, and then normalizing it. The following theorem makes this precise and tells us that the result is indeed always an orthonormal basis. \begin{theorem}[Gram--Schmidt Process] Suppose $B = \{\v_1,\v_2,\ldots,\v_n\}$ is a basis of an inner product space $\V$. Define\\[1in] %$\u_1 = \v_1 / \|\v_1\|$ and % \begin{align*} % \u_k = \frac{\v_k - \sum_{i=1}^{k-1}\ip{\u_i}{\v_k}\u_i}{\left\| \v_k - \sum_{i=1}^{k-1}\ip{\u_i}{\v_k}\u_i \right\|} \quad \text{for all} \quad k = 2, \ldots, n. % \end{align*} Then $C = \{\u_1,\u_2,\ldots,\u_n\}$ is an orthonormal basis of $\V$. \end{theorem} % MENTION that yes, it looks like a horrible mess. But it's just "subtract the piece pointing in the direction of u1, u2, ..., and then normalize." \begin{proof} We actually prove that, not only is $C$ an orthonormal basis of $\V$, but also that \horlines{2} % \{\u_1,\u_2,\ldots,\u_k\} is an ONB of \mathrm{span}(\{\v_1,\v_2,\ldots,\v_k\}) for all $1 \leq k \leq n$. \newpage We prove this result by induction on $k$. For the base case of $k = 1$, ... \horlines{15}\vspace*{-1.3cm} % BASE CASE: simply note that $\u_1$ is indeed a unit vector and $\mathrm{span}(\{\u_1\}) = \mathrm{span}(\{\v_1\})$ since $\u_1$ and $\v_1$ are scalar multiples of each other. % % For the inductive step, suppose that for some particular $k$ we know that $\{\u_1,\u_2,\ldots,\u_k\}$ is a mutually orthogonal set of unit vectors and % \mathrm{span}(\{\u_1,\u_2,\ldots,\u_k\}) = \mathrm{span}(\{\v_1,\v_2,\ldots,\v_k\}). % Then $\v_{k+1} \not\in \mathrm{span}(\{\v_1,\v_2,\ldots,\v_k\})$, since $B$ is linearly independent. Thus $\v_{k+1} \not\in \mathrm{span}(\{\u_1,\u_2,\ldots,\u_k\})$ as well, so the definition of $\u_{k+1}$ makes sense (i.e., $\v_{k+1} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\u_i \neq \0$, so we are not dividing by $0$) and is a unit vector. % % To see that $\u_{k+1}$ is orthogonal to each of $\u_1, \u_2, \ldots, \u_k$, suppose that $1 \leq j \leq k$ and compute % \ip{\u_j}{\u_{k+1}} & = \left\langle \u_j, \frac{\v_{k+1} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\u_i}{\left\| \v_{k+1} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\u_i \right\|} \right\rangle & & \text{\color{gray}(definition of $\u_{k+1}$)} \\[0.75ex] % & = \frac{\ip{\u_j}{\v_{k+1}} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\ip{\u_j}{\u_i}}{\left\| \v_{k+1} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\u_i \right\|} & & \text{\color{gray}(expand the inner product)} \\[0.75ex] % & = \frac{\ip{\u_j}{\v_{k+1}} - \ip{\u_j}{\v_{k+1}}}{\left\| \v_{k+1} - \sum_{i=1}^{k}\ip{\u_i}{\v_{k+1}}\u_i \right\|} & & \text{\color{gray}($j \leq k$, so $\ip{\u_j}{\u_i} = 0$ if $i \neq j$)} \\[0.75ex] % & = 0. % % All that remains is to show that % \mathrm{span}(\{\u_1,\u_2,\ldots,\u_{k+1}\}) = \mathrm{span}(\{\v_1,\v_2,\ldots,\v_{k+1}\}). % By rearranging the definition of $\u_{k+1}$, we see that $\v_{k+1} \in \mathrm{span}(\{\u_1,\u_2,\ldots,\u_{k+1}\})$. When we combine this fact with Equation~\eqref{eq:gram_schmidt_span}, this implies % \mathrm{span}(\{\u_1,\u_2,\ldots,\u_{k+1}\}) \supseteq \mathrm{span}(\{\v_1,\v_2,\ldots,\v_{k+1}\}). % The $\v_i$'s are linearly independent (they form a subset of the basis $B$), so the span on the right has dimension $k+1$. Similarly, the $\u_i$'s are linearly independent (they are mutually orthogonal, so linear independence follows from earlier theorem), so the span on the left also has dimension $k+1$, and thus the two spans must in fact be equal. \end{proof} Since finite-dimensional inner product spaces (by definition) have a basis consisting of finitely many vectors, and the Gram--Schmidt process tells us how to convert that basis into an orthonormal basis, we now know that every finite-dimensional inner product space has an orthonormal basis: \begin{corollary}[Existence of Orthonormal Bases] Every finite-dimensional inner product space has an orthonormal basis. \end{corollary} \exx[22]{Find an orthonormal basis for $\mathcal{P}^2[-1,1]$ with respect to the inner product \[ \langle f,g \rangle = \int_{-1}^1 f(x)g(x) \diff x. \]} % Start with standard basis {1,x,x^2}. % You get the Legendre polynomials (up to scaling) \end{document}