% DEFINE some information that will be populated throughout the course notes.
\def \coursename {Advanced Linear Algebra}
\def \coursecode {MATH 3221}
\def \courseterm {Fall 2020}
\def \instructorname {Nathan Johnston}
% END DEFINITIONS


% IMPORT the course note formatting and templates
\input{course_notes_template}
% END IMPORT


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\setcounter{chapter}{8} % Set to one less than the week number
\chapter{The Singular Value Decomposition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

{\large This week we will learn about:
	
	\begin{itemize}
		\item The singular value decomposition (SVD),
		\item Orthogonality of the fundamental matrix subspaces, and
		\item How the SVD relates to other matrix decompositions,
	\end{itemize}\bigskip\bigskip
	
	\noindent Extra reading and watching:
	\begin{itemize}
		\item Section 2.3.1 and 2.3.2 in the textbook
		\item Lecture videos \href{https://www.youtube.com/watch?v=g86N23CZ5c8&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=35}{34}, \href{https://www.youtube.com/watch?v=3WqZaEYFzWA&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=36}{35}, \href{https://www.youtube.com/watch?v=mDFag07Un8o&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=37}{36}, and \href{https://www.youtube.com/watch?v=8x67QZAvXaM&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=38}{37} on YouTube
		\item \href{https://en.wikipedia.org/wiki/Singular_value_decomposition}{Singular value decomposition} at Wikipedia
		\item \href{https://en.wikipedia.org/wiki/Fundamental_theorem_of_linear_algebra}{Fundamental Theorem of Linear Algebra} at Wikipedia
	\end{itemize}\bigskip\bigskip
	
	\noindent Extra textbook problems:
	\begin{itemize}
		\item[$\star$] 2.3.1, 2.3.4(a,b,c,f,g,i)
		\item[$\phantom{\star}\star\star$] 2.3.3, 2.3.5, 2.3.7
		\item[$\star\star\star$] 2.3.14, 2.3.20
		\item[$\skull$] 2.3.26
\end{itemize}}


\newpage

If the Schur decomposition theorem from last week was ``big'', then the upcoming theorem is ``super-mega-gigantic''. The singular value decomposition is possibly the biggest and most widely-used theorem in all of linear algebra (and is my personal favourite), so we're going to spend some time focusing on it.

\begin{theorem}[Singular Value Decomposition (SVD)]\label{thm:svd}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A \in \M_{m,n}(\mathbb{F})$. Then there exist unitary matrices $U \in \M_m(\mathbb{F})$ and $V \in \M_n(\mathbb{F})$ and a diagonal matrix $\Sigma \in \M_{m,n}(\R)$ with non-negative entries such that\\[0.1cm]
	\[
		{}%A = U\Sigma V^*.
	\]
	Furthermore, the diagonal entries of $\Sigma$ (called the \textbf{singular values} of $A$) are the non-negative square roots of the eigenvalues of $A^*A$.
\end{theorem}
% Make a note about what "diagonal" means for rectangular matrices and give a 2x3 example

Let's compare how this decomposition theorem is good and bad compared to our previous decomposition theorems:

\begin{itemize}
	\item Good:
	
	\vspace*{-1cm}\horlines{1} % Works for every matrix (EVEN RECTANGULAR!!)
	
	\item Good:
	
	\vspace*{-1cm}\horlines{2} % Gets us to a diagonal matrix (one that even has non-negative reals on diagonal)
	
	\item Kinda good, kinda bad:
	
	\vspace*{-1cm}\horlines{2} % Need *two* unitary matrices. Better than invertible matrices, I guess.
\end{itemize}

\begin{proof}
	Consider the matrix $A^*A$ and assume that $m \geq n$...
	
	\horlines{4}\vspace*{-1.3cm}
	% Essentially follows from unitary freedom of PSD decompositions.
	%	(if $m < n$ then we instead use the matrix $AA^*$, but otherwise the proof is almost identical). We know that $A^*A$ is positive semidefinite and thus has a spectral decomposition $A^*A = VDV^*$, where $V \in \M_n(\mathbb{F})$ is unitary and $D \in \M_n(\R)$ is a diagonal matrix with non-negative diagonal entries equal to the eigenvalues of $A^*A$. Then define $\Sigma \in \M_{m,n}$ by $[\Sigma]_{i,i} = \sqrt{d_{i,i}}$ \marginnote{In other words, $\Sigma$ is the principal square root of $D$, but padded with extra zero rows so that it has the same size as $A$.}[0.25cm] for all $i$, and $[\Sigma]_{i,j} = 0$ if $i \neq j$.
	%	
	%	It then follows that \[A^*A = VDV^* = V\Sigma^*\Sigma V^* = (\Sigma V^*)^*(\Sigma V^*),\]so Theorem~\ref{thm:psd_decomp_characterize} tells us that there exists a unitary matrix $U \in \M_m(\mathbb{F})$ such that $A = U(\Sigma V^*)$, which is exactly what we wanted to show.
	
	
	\newpage
	
	
	\horlines{6}\vspace*{-1.3cm}
	
\end{proof}

Some notes about the SVD are in order:

\begin{itemize}
	\item The singular values of $A$ are exactly the square roots of the eigenvalues of $A^*A$. Alternatively...
	
	\horlines{1}
	% sq. roots of eigs of $AA^*$
	
	\item Even though the singular values are uniquely determined by $A$, the diagonal matrix $\Sigma$ isn't.
	
	\horlines{1}
	% they can appear in any order on the diagonal. we usually choose s1 >= s2 >= ...
	
	\item The unitary matrices $U$ and $V$ are often not uniquely determined by $A$. Example:
	
	\horlines{1}
	% Identity matrix: V = U^*, U is ANY unitary matrix.
\end{itemize}


\exx[5]{Let's find the singular values of a matrix.}
% Ask students for a 2x2 example. Do eigenvalues/eigenvectors of A^*A.


\newpage


\horlines{3}

To compute a full singular value decomposition (not just the singular values), we again leech off of diagonalization. Notice that

\horlines{3}
% if A = U\Sigma V^* then A^*A = V\Sigma\Sigma^* V^*, which is a diagonalized PSD matrix.
% Thus eigenvectors of A^*A are the columns of V.

\noindent Similarly, the columns of $U$ are eigenvectors of $AA^*$, but a slightly quicker (and slightly more correct) way to compute the columns of $U$ is to notice that
% Need left and right vectors to "match up", hence "more correct"

\horlines{5}
% A = U\Sigma V^*, so [Av_1 | ... | Av_n] = AV = U\Sigma = [\sigma_1 u_1 | ... | \sigma_n u_n], so u_j = Av_j/sigma_j.
% If sigma_j = 0 then just extend to an ONB via gram-schmidt


\exx[3]{Compute a singular value decomposition of the matrix \[A = \begin{bmatrix}1 & 2 & 3 \\
	-1 & 0 & 1 \\
	3 & 2 & 1
	\end{bmatrix}.\]}


\newpage


\horlines{15}
%	From our previous discussion, we know that we want to start by finding matrices $V$ and $\Sigma$ such that $\Sigma$ is diagonal with the square roots of the eigenvalues of $A^*A$ down its diagonal, and $V$ contains the corresponding (normalized) eigenvectors as its columns.
%	
%	Well, direct calculation shows that
%	\begin{align*}
%	A^*A = \begin{bmatrix}
%	11 & 8 & 5 \\
%	8 & 8 & 8 \\
%	5 & 8 & 11
%	\end{bmatrix},
%	\end{align*}
%	which has characteristic polynomial
%	\begin{align*}
%	p(\lambda) & = \det(A^*A - \lambda I) = -\lambda^3 + 30\lambda^2 - 144\lambda = -\lambda(\lambda-6)(\lambda-24).
%	\end{align*}
%	Thus the eigenvalues of $A^*A$ are $\lambda_1 = 24$, $\lambda_2 = 6$, and $\lambda_3 = 0$, so the singular values of $A$ are $\sigma_1 = \sqrt{24} = 2\sqrt{6}$, $\sigma_2 = \sqrt{6}$, and \marginnote{In particular, $A$ has rank $2$ since $2$ of its singular values are non-zero.} $\sigma_3 = 0$. The normalized eigenvectors corresponding to these eigenvalues are
%	\begin{align*}
%	\v_1 = \frac{1}{\sqrt{3}}\begin{bmatrix}
%	1 \\ 1 \\ 1
%	\end{bmatrix}, \qquad \v_2 = \frac{1}{\sqrt{2}}\begin{bmatrix}
%	-1 \\ 0 \\ 1
%	\end{bmatrix}, \quad \text{and} \quad \v_3 = \frac{1}{\sqrt{6}}\begin{bmatrix}
%	-1 \\ 2 \\ -1
%	\end{bmatrix},
%	\end{align*}
%	respectively. We then place these eigenvectors into the matrix $V$ as columns and obtain\marginnote{We just pulled a $1/\sqrt{6}$ factor out of $V$ to avoid having to write fractional entries---simplifying in another way (e.g., explicitly writing its top-left entry as $1/\sqrt{3}$) is fine too.}[1cm]
%	\begin{align*}
%	\Sigma & = \begin{bmatrix}
%	2\sqrt{6} & 0 & 0 \\ 0 & \sqrt{6} & 0 \\ 0 & 0 & 0
%	\end{bmatrix} \quad \text{and} \quad V = \frac{1}{\sqrt{6}}\begin{bmatrix}
%	\sqrt{2} & -\sqrt{3} & -1 \\
%	\sqrt{2} & 0 & 2 \\
%	\sqrt{2} & \sqrt{3} & -1
%	\end{bmatrix}.
%	\end{align*}
%	
%	Since $2$ of the singular values are non-zero, we know that $\rank(A) = 2$. Thus we compute the first $2$ columns of $U$ via
%	\begin{align*}
%	\u_1 & = \frac{1}{\sigma_1}A\v_1 = \frac{1}{2\sqrt{6}}\begin{bmatrix}1 & 2 & 3 \\
%	-1 & 0 & 1 \\
%	3 & 2 & 1
%	\end{bmatrix}\left(\frac{1}{\sqrt{3}}\begin{bmatrix}
%	1 \\ 1 \\ 1
%	\end{bmatrix}\right) = \frac{1}{\sqrt{2}}\begin{bmatrix}
%	1 \\ 0 \\ 1
%	\end{bmatrix}, \\
%	\u_2 & = \frac{1}{\sigma_2}A\v_2 = \frac{1}{\sqrt{6}}\begin{bmatrix}1 & 2 & 3 \\
%	-1 & 0 & 1 \\
%	3 & 2 & 1
%	\end{bmatrix}\left(\frac{1}{\sqrt{2}}\begin{bmatrix}
%	-1 \\ 0 \\ 1
%	\end{bmatrix}\right) = \frac{1}{\sqrt{3}}\begin{bmatrix}
%	1 \\ 1 \\ -1
%	\end{bmatrix}.
%	\end{align*}
%	The third column of $U$ can be found by extending $\{\u_1,\u_2\}$ to an orthonormal basis of $\R^3$, which just means that we need to find a unit vector orthogonal to each of $\u_1$ and $\u_2$. We could do this by solving a linear system, using the Gram--Schmidt process, or via the cross product. Any of these methods quickly lead us to the vector \marginnote{Alternatively, $\u_3 = (-1,2,1)/\sqrt{6}$ would be fine too.}[-0.5cm] $\u_3 = (1,-2,-1)/\sqrt{6}$, so the singular value decomposition $A = U\Sigma V^*$ is completed by choosing\marginnote{Again, we just pulled a $1/\sqrt{6}$ factor out of $U$ to avoid fractions.}[1cm]
%	\begin{align*}
%	U & = \frac{1}{\sqrt{6}}\begin{bmatrix}
%	\sqrt{3} & \sqrt{2} & 1 \\
%	0 & \sqrt{2} & -2 \\
%	\sqrt{3} & -\sqrt{2} & -1
%	\end{bmatrix}.
%	\end{align*}


Before delving into what makes the singular value decomposition so useful, it is worth noting that if $A \in \M_{m,n}(\mathbb{F})$ has singular value decomposition $A = U\Sigma V^*$ then $A^T$ and $A^*$ have singular value decompositions

\horlines{1}
% A^T & = \overline{V}\Sigma^TU^T and A^* & = V\Sigma^* U^*.

\noindent In particular,

\horlines{1}
% $\Sigma$, $\Sigma^T$, and $\Sigma^*$ all have the same diagonal entries, so $A$, $A^T$, and $A^*$ all have the same singular values.


\newpage


\section*{Geometric Interpretation}

Recall that we think of unitary matrices as arbitrary-dimensional rotations and/or reflections. Using this intuition gives the singular value decomposition a simple geometric interpretation. Specifically, it says that every matrix $A = U\Sigma V^* \in \M_{m,n}(\mathbb{F})$ acts as a linear transformation from $\mathbb{F}^n$ to $\mathbb{F}^m$ in the following way:\medskip

\begin{itemize}
	\item First,
	
	\vspace*{-1cm}\horlines{1}
	% it rotates and/or reflects $\mathbb{F}^n$ (i.e., $V^*$ acts on $\mathbb{F}^n$).\smallskip
	
	\item Then,
	
	\vspace*{-1cm}\horlines{2}
	% it stretches and/or shrinks $\mathbb{F}^n$ along the standard axes (i.e., the diagonal matrix $\Sigma$ acts on $\mathbb{F}^n$) and then embeds it in $\mathbb{F}^m$ (i.e., either adds $m-n$ extra dimensions if $m > n$ or ignores $n-m$ of the dimensions if $m < n$)
	
	\item Finally,
	
	\vspace*{-1cm}\horlines{1}
	% it rotates and/or reflects $\mathbb{F}^m$ (i.e., $U$ acts on $\mathbb{F}^m$).
\end{itemize}

Let's illustrate this geometric interpretation in the $m = n = 2$ case:

\horlines{10}
% TODO: FOR next time, add more space here (until the end of the page) so that you can draw 3 pictures and the square grid on each picture
% Draw a picture like Figure~\ref{fig:svd_2d_visualize} in the textbook. Maybe 3 images: one of the unit circle, then one of it stretched, then one of it rotated after stretched


\newpage


In particular, it is worth keeping track not only of how the linear transformation changes a unit square grid on $\R^2$ into a parallelogram grid, but also how it transforms...


\horlines{1}
% the unit circle into an ellipse.

\noindent Furthermore, the two radii of the ellipse are exactly

\horlines{1}
% the singular values $\sigma_1$ and $\sigma_2$ of the matrix, so we see that singular values provide yet another way of measuring how much a linear transformation expands space.

In higher dimensions, linear transformations send (hyper-)ellipsoids to (hyper-)ellipsoids. For example, the matrix\[A = \begin{bmatrix}1 & 2 & 3 \\
-1 & 0 & 1 \\
3 & 2 & 1
\end{bmatrix}\]from earlier deforms the unit sphere as follows:

\horlines{5}
% INSERT week8_svd_3d.png

The fact that the unit sphere is turned into a 2D ellipse by this matrix corresponds to the fact that...

\horlines{2}
% it has rank $2$, so its range is $2$-dimensional.

In fact, the first two left singular vectors $\u_1$ and $\u_2$ (which point in the directions of the major and minor axes of the ellipse) form an orthonormal basis of the range.

\horlines{2}
% Similarly, the third right singular vector, $\v_3$ has the property that $A\v_3 = U\Sigma \V^*\v_3 = U\Sigma \e_3 = \sigma_3 U\e_3 = \0$, since $\sigma_3 = 0$. Thus $\v_3$ is in the null space of $A$.


\newpage


This same type of argument works in general and leads to the following theorem:

\begin{theorem}[Bases of the Fundamental Subspaces]\label{thm:matrix_subspaces_onb_svd}
	Let $A \in \M_{m,n}$ be a matrix with rank $r$ and singular value decomposition $A = U\Sigma V^*$, where \[U = \big[ \ \u_1 \ {\color{gray}|} \ \u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \u_m \ \big] \quad \text{and} \quad V = \big[ \ \v_1 \ {\color{gray}|} \ \v_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \v_n \ \big].\]Then
	\begin{enumerate}
		\item[a)] $\{\u_1,\u_2,\ldots,\u_r\}$ is an orthonormal basis of $\mathrm{range}(A)$,
		
		\item[b)] $\{\u_{r+1},\u_{r+2},\ldots,\u_m\}$ is an orthonormal basis of $\mathrm{null}(A^*)$,
		
		\item[c)] $\{\v_1,\v_2,\ldots,\v_r\}$ is an orthonormal basis of $\mathrm{range}(A^*)$, and
		
		\item[d)] $\{\v_{r+1},\v_{r+2},\ldots,\v_n\}$ is an orthonormal basis of $\mathrm{null}(A)$.
	\end{enumerate}
\end{theorem}

\begin{proof}
	Let's compute $A\v_j$:
	
	\horlines{9}\vspace*{-1.3cm}
	%	\[A\v_j = U\Sigma V^*\v_j = U\Sigma \e_j = \sigma_jU\e_j = \sigma_j\u_j \quad \text{for all} \quad 1 \leq j \leq r.\]Dividing both sides by $\sigma_j$ then shows that $\u_1, \u_2, \ldots, \u_r$ are all in the range of $A$. Since $\mathrm{range}(A)$ is (by definition) $r$-dimensional, and $\{\u_1,\u_2,\ldots,\u_r\}$ is a set of $r$ mutually orthogonal unit vectors, it must be an orthonormal basis of $\mathrm{range}(A)$.
	%	
	%	Similarly, $\sigma_j = 0$ whenever $j \geq r+1$, so \[A\v_j = U\Sigma V^*\v_j = U\Sigma \e_j = \0 \quad \text{for all} \quad j \geq r+1.\]Thus $\v_{r+1},\v_{r+2},\ldots,\v_n$ are all in the null space of $A$. Since $\mathrm{null}(A)$ has dimension $n-r$ (by the rank--nullity theorem), and $\{\v_{r+1},\v_{r+2},\ldots,\v_n\}$ is a set of $n-r$ mutually orthogonal unit vectors, it must be an orthonormal basis of $\mathrm{null}(A)$.
	%	
	%	The corresponding facts about $\mathrm{range}(A^*)$ and $\mathrm{null}(A^*)$ follow by applying these same arguments to $A^*$ instead of $A$.
\end{proof}

\begin{corollary}[Orthogonality of the Fundamental Subspaces]\label{cor:fund_sub_orth}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A \in \M_{m,n}(\mathbb{F})$. Then
	\begin{enumerate}
		\item[a)] $\mathrm{range}(A)$ is orthogonal to $\mathrm{null}(A^*)$, and
		
		\item[b)] $\mathrm{null}(A)$ is orthogonal to $\mathrm{range}(A^*)$.
	\end{enumerate}
\end{corollary}


\newpage


In this corollary, when we say that one subspace is orthogonal to another, we mean that

\horlines{2}
% Every vector in the first is orthogonal to every vector in the other


\exx[15]{Compute a singular value decomposition of the matrix \[A = \begin{bmatrix}1 & 1 & 1 & -1 \\ 0 & 1 & 1 & 0 \\ -1 & 1 & 1 & 1\end{bmatrix},\]and use it to construct bases of the four fundamental subspaces of $A$.}


\newpage


\horlines{7}
%	To compute the SVD of $A$, we could start by computing $A^*A$ as in the previous example, but recall that we can also construct an SVD from $AA^*$ instead. Since $A^*A$ is a $4 \times 4$ matrix and $AA^*$ is a $3 \times 3$ matrix, working with $AA^*$ will likely be easier, so that's what we do.
%
%AA^* = 
% 4 & 2 & 0
% 2 & 2 & 2
% 0 & 2 & 4
%
%which has eigenvalues $6,4,0$, so the singular values of $A$ are $\sqrt{6},2,0$. The normalized eigenvectors corresponding to these eigenvalues are
%\u_1 = (1,1,1)/\sqrt{3}
%\u_2 = (-1,0,1)/\sqrt{2}
%\u_3 = (-1,2,-1)/\sqrt{6}
%respectively. We then place these eigenvectors into the matrix $U$ as columns and obtain (NOTE THAT SIGMA HAS SAME SHAPE AS A!!)
%
% Sigma =
% \sqrt{6} & 0 & 0 & 0
% 0        & 2 & 0 & 0
% 0        & 0 & 0 & 0
%   and
% U = \frac{1}{\sqrt{6}}\begin{bmatrix}
%\sqrt{2} & -\sqrt{3} & -1 \\
%\sqrt{2} & 0 & 2 \\
%\sqrt{2} & \sqrt{3} & -1
%\end{bmatrix}.
%
%Since $2$ of the singular values are non-zero, we know that $\rank(A) = 2$. Thus we compute the first $2$ columns of $V$ via
%\begin{align*}
%\v_1 & = (0 \\ 1 \\ 1 \\ 0)/sqrt(2)
%\v_2 & = (-1 \\ 0 \\ 0 \\ 1)/sqrt(2)
%The third and fourth columns of $V$ can be found by extending $\{\v_1,\v_2\}$ to an orthonormal basis of $\R^4$. We could do this via the Gram--Schmidt process, but in this case it is simple enough to ``eyeball'' vectors that work: we can choose 
%\v_3 = (0,1,-1,0)/\sqrt{2}
%\v_4 = (1,0,0,1)/\sqrt{2}$.
%Thus the singular value decomposition $A = U\Sigma V^*$ is completed by choosing
%V & = \frac{1}{\sqrt{2}}\begin{bmatrix}
%0 & -1 & 0 & 1 \\
%1 & 0 & 1 & 0 \\
%1 & 0 & -1 & 0 \\
%0 & 1 & 0 & 1
%\end{bmatrix}.
%
%We can the construct orthonormal bases of the four fundamental subspaces directly from Theorem~\ref{thm:matrix_subspaces_onb_svd} (recall that the rank of $A$ is $r = 2$):
%
%\begin{itemize}
%	\item $\mathrm{range}(A)$: $\{\u_1,\u_2\} = \{(1,1,1)/\sqrt{3},(-1,0,1)/\sqrt{2}\}$,
%	
%	\item $\mathrm{null}(A^*)$: $\{\u_{3}\} = \{(-1,2,-1)/\sqrt{6}\}$,
%	
%	\item $\mathrm{range}(A^*)$: $\{\v_1,\v_2\} = \{(0,1,1,0)/\sqrt{2},(-1,0,0,1)/\sqrt{2}\}$, and
%	
%	\item $\mathrm{null}(A)$: $\{\v_3,\v_4\} = \{(0,1,-1,0)/\sqrt{2},(1,0,0,1)/\sqrt{2}\}$.
%\end{itemize}


\section*{Relationship With Other Matrix Decompositions}

We now make sure that we really understand where the SVD fits into our world of matrix decompositions. For example, one way of rephrasing the singular value decomposition is as saying that we can always write a rank-$r$ matrix as a sum of $r$ rank-$1$ matrices in a very special way:

\begin{theorem}[Orthogonal Rank-One Sum Decomposition]\label{thm:sing_value_rank_sum}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A \in \M_{m,n}(\mathbb{F})$ is a matrix with $\mathrm{rank}(A) = r$. Then there exist orthonormal sets of vectors $\{ \u_i \}_{i=1}^{r} \subset \mathbb{F}^m$ and $\{ \v_i \}_{i=1}^{r} \subset \mathbb{F}^n$ such that\\[0.1cm]
	\begin{align*}
		{}% A = \sum_{i=1}^{r}\sigma_i\u_i\v_i^*,
	\end{align*}
	where $\sigma_1 \geq \sigma_2 \geq \cdots \geq \sigma_r > 0$ are the non-zero singular values of $A$.
\end{theorem}

\begin{itemize}
	\item This formulation is sometimes useful because...
	
	\vspace*{-1cm}\horlines{2}
	%  we like to think of the SVD as breaking A down into r "pieces" -- one for each singular value (s1 is most important, s2 is second-most-important, etc.)
		
	\item In fields other than $\R$ and $\C$, ...
	
	\vspace*{-1cm}\horlines{2}
	% You can do the same thing, but without "orthonormal" -- they are just linearly independent
\end{itemize}


\newpage


\begin{proof}
	For simplicity, we again assume that $m \leq n$ throughout this proof, and then we just do block matrix multiplication in the singular value decompositon:
	
	\horlines{7}\vspace*{-1.3cm}
	
	%	Use the singular value decomposition to write $A = U\Sigma V^*$, where $U$ and $V$ are unitary and $\Sigma$ is diagonal, and then write $U$ and $V$ in terms of their columns and $\Sigma$ in terms of its diagonal entries:
	%	\begin{align*}
	%		U & = \big[ \ \u_1 \ {\color{gray}|} \ \u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \u_m \ \big], \quad V = \big[ \	\v_1 \ {\color{gray}|} \ \v_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \v_n \ \big], \quad \text{and} \\
	%		\Sigma & = \begin{bmatrix}
	%			\sigma_1 & 0 & \cdots & 0 & 0 & \cdots & 0 \\
	%			0 & \sigma_2 & \cdots & 0 & 0 & \cdots & 0 \\
	%			\vdots & \vdots & \ddots & \vdots & \vdots & \ddots & \vdots \\
	%			0 & 0 & \cdots & \sigma_m & 0 & \cdots & 0
	%		\end{bmatrix}.
	%	\end{align*}
	%	Then performing block matrix multiplication reveals that
	%	\begin{align*}
	%		A & = U\Sigma V^* \\
	%		& = \big[ \ \u_1 \ {\color{gray}|} \ \u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \u_m \ \big]\begin{bmatrix}
	%			\sigma_1 & 0 & \cdots & 0 & 0 & \cdots & 0 \\
	%			0 & \sigma_2 & \cdots & 0 & 0 & \cdots & 0 \\
	%			\vdots & \vdots & \ddots & \vdots & \vdots & \ddots & \vdots \\
	%			0 & 0 & \cdots & \sigma_m & 0 & \cdots & 0
	%		\end{bmatrix}\left[\begin{array}{c}
	%			\v_1^* \\\hline
	%			\v_2^* \\\hline
	%			\vdots \\\hline
	%			\v_n^*
	%		\end{array}\right] \\
	%		& = \big[ \	\sigma_1\u_1 \ {\color{gray}|} \ \sigma_2\u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \sigma_m\u_m \ {\color{gray}|} \ \0 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \0 \ \big]\left[\begin{array}{c}
	%			\v_1^* \\\hline
	%			\v_2^* \\\hline
	%			\vdots \\\hline
	%			\v_n^*
	%		\end{array}\right] \\
	%		& = \sum_{i=1}^m \sigma_i\u_i\v_i^*.
	%	\end{align*}
	%	By recalling that $\sigma_i = 0$ whenever $i > r$, we see that this is exactly what we wanted.
\end{proof}

In fact the singular value decomposition and the orthogonal rank-one sum decomposition are ``equivalent'' in the sense that you can prove one to quickly prove the other, and vice-versa. Sometimes they are both just called the singular value decomposition.

\exx[8]{Compute an orthogonal rank-one sum decomposition of the matrix \[A = \begin{bmatrix}1 & 1 & 1 & -1 \\ 0 & 1 & 1 & 0 \\ -1 & 1 & 1 & 1\end{bmatrix}.\]}
%	This is the same matrix from the end of last week, which has singular value decomposition
%\begin{align*}
%	U & = \frac{1}{\sqrt{6}}\begin{bmatrix}
%		\sqrt{2} & -\sqrt{3} & -1 \\
%		\sqrt{2} & 0 & 2 \\
%		\sqrt{2} & \sqrt{3} & -1
%	\end{bmatrix}, \quad \Sigma = \begin{bmatrix}
%		\sqrt{6} & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 & 0
%	\end{bmatrix} \quad \text{and} \\
%	V & = \frac{1}{\sqrt{2}}\begin{bmatrix}
%		0 & -1 & 0 & 1 \\
%		1 & 0 & 1 & 0 \\
%		1 & 0 & -1 & 0 \\
%		0 & 1 & 0 & 1
%	\end{bmatrix}.
%\end{align*}
%Then the orthogonal rank-one sum decomposition is computed by adding up the outer products of the columns of $U$ and $V$, scaled by the diagonal entries of \marginnote{Its straightforward to perform the matrix multiplications and additions to verify that the sum on the right equals $A$.} $\Sigma$:
%\begin{align*}
%	A & = \sigma_1\u_1\v_1^* + \sigma_2\u_2\v_2^* = \begin{bmatrix}1 \\ 1 \\ 1\end{bmatrix}\begin{bmatrix}0 & 1 & 1 & 0\end{bmatrix} + \begin{bmatrix}-1 \\ 0 \\ 1\end{bmatrix}\begin{bmatrix}-1 & 0 & 0 & 1\end{bmatrix}.
%\end{align*}


\newpage


Similarly, the singular value decomposition is also ``essentially equivalent'' to the polar decomposition:

\horlines{3}
% If $A \in \M_n$ has singular value decomposition $A = U\Sigma V^*$, then $A = (UV^*)(V\Sigma V^*)$ is a polar decomposition of $A$, since $UV^*$ is unitary and $V\Sigma V^*$ is positive semidefinite.

\noindent In the opposite direction,

\horlines{3}
% If you start with a polar decomposition $A = UP$ and apply the spectral decomposition to $P = V\Sigma V^*$, you get $A = (UV)\Sigma V^*$ which is a singular value decomposition of $A$.

If $A \in \M_n$ is positive semidefinite, then the singular value decomposition coincides exactly with the spectral decomposition:

\horlines{3}
% Spectral: A = UDU^* with D >= 0, so D contains singular values, and left sing vects coincide with right sing vects (i.e., V = U). Corrollary: eigenvalues equal singular values.

\noindent A slight generalization of this type of argument leads to the following theorem:

\begin{theorem}[Singular Values of Normal Matrices]\label{thm:sing_value_normal_mat}
	Suppose $A \in \M_{n}$ is a normal matrix. Then the singular values of $A$ are the absolute values of its eigenvalues.
\end{theorem}

\begin{proof}
	Since $A$ is normal, we can use the spectral decomposition to write $A = UDU^*$, where $U$ is unitary and $D$ is diagonal...
	
	\horlines{3}
		
	\newpage
	
	\horlines{7}\vspace*{-1.3cm}
	
	%	(but with the not-necessarily-positive eigenvalues of $A$ on its diagonal). Write each $\lambda_j$ in its polar form: $\lambda_j = |\lambda_j|e^{i\theta_j}$. Then
	%	\begin{center}
	%		\begin{tikzpicture}[scale=1]%
	%		\draw (0,0) node[anchor=north]{$\begin{aligned}D = \begin{bmatrix}
	%			\lambda_1 & 0 & \cdots & 0 \\
	%			0 & \lambda_2 & \cdots & 0 \\
	%			\vdots & \vdots & \ddots & \vdots \\
	%			0 & 0 & \cdots & \lambda_n.
	%			\end{bmatrix} = \begin{bmatrix}
	%			|\lambda_1| & 0 & \cdots & 0 \\
	%			0 & |\lambda_2| & \cdots & 0 \\
	%			\vdots & \vdots & \ddots & \vdots \\
	%			0 & 0 & \cdots & |\lambda_n|.
	%			\end{bmatrix}\begin{bmatrix}
	%			e^{\theta_1} & 0 & \cdots & 0 \\
	%			0 & e^{\theta_2} & \cdots & 0 \\
	%			\vdots & \vdots & \ddots & \vdots \\
	%			0 & 0 & \cdots & e^{\theta_n}
	%			\end{bmatrix},\end{aligned}$};
	%		\draw [thick,gray,decoration={brace,mirror,raise=0.5cm},decorate] (-1.35,-2.35) --node[anchor=north,shift={(0,-0.7)}]{call this $\Sigma$} (2.21,-2.35);
	%		\draw [thick,gray,decoration={brace,mirror,raise=0.5cm},decorate] (2.54,-2.35) --node[anchor=north,shift={(0,-0.7)}]{call this $\Theta$} (5.6,-2.35);
	%		\end{tikzpicture}
	%	\end{center}
	%	so that $A = UDU^* = U\Sigma \Theta U^* = U\Sigma (U\Theta^*)^*$. Since $U$ and $\Theta$ are both unitary, so is $U\Theta^*$, so this is a singular value decomposition of $A$. Thus the diagonal entries of $\Sigma$ (i.e., $|\lambda_1|, |\lambda_2|, \ldots, |\lambda_n|$) are the singular values of $A$.
\end{proof}

To see that the above theorem does not hold for non-normal matrices, consider the following example:

\exx[7]{Compute the eigenvalues and singular values of the matrix \[A = \begin{bmatrix}1 & 1 \\ 0 & 1\end{bmatrix}.\]}
% eigenvalues 1,1 since upper triangular
% singular values (sqrt(5) \pm 1)/2

\end{document}