% DEFINE some information that will be populated throughout the course notes.
\def \coursename {Advanced Linear Algebra}
\def \coursecode {MATH 3221}
\def \courseterm {Fall 2020}
\def \instructorname {Nathan Johnston}
% END DEFINITIONS


% IMPORT the course note formatting and templates
\input{course_notes_template}
% END IMPORT


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\setcounter{chapter}{7} % Set to one less than the week number
\chapter{Positive (Semi)Definiteness}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

{\large This week we will learn about:

\begin{itemize}
	\item Positive definite and positive semidefinite matrices,
	\item Gershgorin discs and diagonal dominance,
	\item The principal square root of a matrix, and
	\item The polar decomposition.
\end{itemize}\bigskip\bigskip

\noindent Extra reading and watching:
\begin{itemize}
	\item Section 2.2 in the textbook
	\item Lecture videos \href{https://www.youtube.com/watch?v=bo8q_HW00wo&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=31}{30}, \href{https://www.youtube.com/watch?v=gu-RKEAlguQ&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=32}{31}, \href{https://www.youtube.com/watch?v=jLTrYS3yB9Y&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=33}{32}, and \href{https://www.youtube.com/watch?v=d8DXx5aLUcQ&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=34}{33} on YouTube
	\item \href{https://en.wikipedia.org/wiki/Positive-definite_matrix}{Positive-definite matrix} at Wikipedia
	\item \href{https://en.wikipedia.org/wiki/Gershgorin_circle_theorem}{Gershgorin circle theorem} at Wikipedia
	\item \href{https://en.wikipedia.org/wiki/Square_root_of_a_matrix}{Square root of a matrix} at Wikipedia
	\item \href{https://en.wikipedia.org/wiki/Polar_decomposition}{Polar decomposition} at Wikipedia
\end{itemize}\bigskip\bigskip

\noindent Extra textbook problems:
\begin{itemize}
	\item[$\star$] 2.2.1, 2.2.2
	\item[$\phantom{\star}\star\star$] 2.2.3, 2.2.5--2.2.10, 2.2.12
	\item[$\star\star\star$] 2.2.11, 2.2.14, 2.2.16, 2.2.19, 2.2.22
	\item[$\skull$] 2.2.18, 2.2.27, 2.2.28
\end{itemize}}


\newpage

% Should really point out that Hermitian matrices have real eigenvalues before this

Recall that normal matrices play a particularly important role in linear algebra (they can be diagonalized by unitary matrices). There is one particularly important family of normal matrices that we now focus our attention on.

\begin{definition}[Positive (Semi)Definite Matrices]\label{defn:psd_matrix}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A = A^* \in \M_n(\mathbb{F})$. Then $A$ is called
	\begin{enumerate}[label=\alph*)]
		\item \textbf{positive semidefinite (PSD)} if $\v^*A\v \geq 0$ for all $\v \in \mathbb{F}^n$, and
		
		\item \textbf{positive definite (PD)} if $\v^*A\v > 0$ for all $\v \neq \0$.
	\end{enumerate}
\end{definition}

Positive (semi)definiteness is somewhat difficult to eyeball from the entries of a matrix, and we should emphasize that it does \emph{not} mean that the entries of the matrix need to be positive. For example, if \[A = \begin{bmatrix}
1 & -1 \\ -1 & 1
\end{bmatrix} \quad \text{and} \quad B = \begin{bmatrix}
1 & 2 \\ 2 & 1
\end{bmatrix},\]
then...

\horlines{10}
%$A$ is positive semidefinite since \[\v^*A\v = \begin{bmatrix}\overline{v_1} & \overline{v_2}\end{bmatrix}\begin{bmatrix}
%1 & -1 \\ -1 & 1
%\end{bmatrix}\begin{bmatrix}v_1 \\ v_2\end{bmatrix} = |v_1|^2 - \overline{v_1}v_2 - \overline{v_2}v_1 + |v_2|^2 = |v_1 - v_2|^2 \geq 0\] for all $\v \in \C^2$. On the other hand, $B$ is \emph{not} positive semidefinite since if $\v = (1,-1)$ then $\v^*B\v = -2$.

% NOTE that a $1 \times 1$ matrix (i.e., a scalar) is PSD if and only if it is non-negative. We often think of PSD matrices as the ``matrix version'' of the non-negative real numbers.

The definition of positive semidefinite matrices perhaps looks a bit odd at first glance. The next theorem characterizes these matrices in several other equivalent ways, some of which are hopefully a bit more illuminating and easier to work with.


\newpage


\begin{theorem}[Characterization of PSD and PD Matrices]\label{thm:psd_characterize}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A = A^* \in \M_n(\mathbb{F})$. The following are equivalent:
	\begin{enumerate}[label=\alph*)]
		\item $A$ is positive ({\color{orng}semidefinite} | {\color{ocre}definite}),
		
		\item All of the eigenvalues of $A$ are ({\color{orng}non-negative} | {\color{ocre}strictly positive}),
		
		\item There exists a diagonal $D \in \M_n(\R)$ with ({\color{orng}non-negative} | {\color{ocre}strictly positive}) diagonal entries and a unitary matrix $U \in \M_n(\mathbb{F})$ such that $A = UDU^*$, and
		
		\item There exists ({\color{orng}a matrix} | {\color{ocre}an invertible matrix}) $B \in \M_n(\mathbb{F})$ such that $A = B^*B$.
%		\item There exists ({\color{orng}a set} | {\color{ocre} a linearly independent set}) $\{\v_1,\v_2,\ldots,\v_n\} \subset \mathbb{F}^n$ such that \[a_{i,j} = \v_i \cdot \v_j \quad \text{for all} \quad 1 \leq i,j \leq n.\]
	\end{enumerate}
\end{theorem}

\begin{proof}
	We prove the theorem by showing that (a) $\implies$ (b) $\implies$ (c) $\implies$ (d) $\implies$ (a).
	
	\horlines{15}\vspace*{-1.3cm}
%	To see that (a) $\implies$ (b), let $\v$ be an eigenvector of $A$ with associated eigenvalue $\lambda$. Then $A\v = \lambda\v$, and multiplying this equation on the left shows that $\v^*A\v = \lambda\v^*\v = \lambda\|\v\|^2$. Since $A$ is positive semidefinite, we know that $\v^*A\v \geq 0$, so it follows that $\lambda \geq 0$ too.\smallskip
%
%To see that (b) $\implies$ (c), we just apply the spectral decomposition theorem (Theorem~??) to $A$.\smallskip
%
%To see why (c) $\implies$ (d), define $B = \sqrt{D}U^*$ (recall that the square root of a diagonal matrix is obtained just by taking the square root of each of the diagonal entries). Then $B^*B = (\sqrt{D}U^*)^*(\sqrt{D}U^*) = U\sqrt{D}^*\sqrt{D}U^* = UDU^* = A$.\smallskip
%
%To show that (d) $\implies$ (e), let $\{\v_1,\v_2,\ldots,\v_n\}$ be the columns of $B$ (i.e., $B = [ \
%\v_1 \ {\color{gray}|} \ \v_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \v_n \
%]$). It then follows that \marginnote{This matrix of all possible dot products of $\{\v_1,\v_2,\ldots,\v_n\}$ is sometimes called the \textbf{Gram matrix} of those vectors. See Exercise~\ref{exer:gram_lin_indep} for one use of Gram matrices.}[0.5cm] \[A = B^*B = \left[\begin{array}{c}
%\v_1^* \\\hline \v_2^* \\\hline \vdots \\\hline \v_n^*
%\end{array}\right][ \
%\v_1 \ {\color{gray}|} \ \v_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \v_n \
%] = \begin{bmatrix}
%\v_1^*\v_1 & \v_1^*\v_2 & \cdots & \v_1^*\v_n \\
%\v_2^*\v_1 & \v_2^*\v_2 & \cdots & \v_2^*\v_n \\
%\vdots & \vdots & \ddots & \vdots \\
%\v_n^*\v_1 & \v_n^*\v_2 & \cdots & \v_n^*\v_n\end{bmatrix}.\] In other words, $a_{i,j} = \v_i^*\v_j = \v_i \cdot \v_j$.\smallskip
%
%Finally, to see that (e) $\implies$ (a), we let $\w \in \C^n$ be any vector and we compute $\w^*A\w$:
%\begin{align*}
%	\w^*A\w & = \begin{bmatrix}
%		\overline{w_1} & \overline{w_2} & \cdots & \overline{w_n}
%	\end{bmatrix}\begin{bmatrix}
%		\v_1^*\v_1 & \v_1^*\v_2 & \cdots & \v_1^*\v_n \\
%		\v_2^*\v_1 & \v_2^*\v_2 & \cdots & \v_2^*\v_n \\
%		\vdots & \vdots & \ddots & \vdots \\
%		\v_n^*\v_1 & \v_n^*\v_2 & \cdots & \v_n^*\v_n\end{bmatrix}\begin{bmatrix}
%		w_1 \\ w_2 \\ \vdots \\ w_n
%	\end{bmatrix} \\
%	& = \begin{bmatrix}
%		\overline{w_1} & \overline{w_2} & \cdots & \overline{w_n}
%	\end{bmatrix}\begin{bmatrix}
%		w_1\v_1^*\v_1 + w_2\v_1^*\v_2 + \cdots + w_n\v_1^*\v_n \\
%		w_1\v_2^*\v_1 + w_2\v_2^*\v_2 + \cdots + w_n\v_2^*\v_n \\
%		\vdots \\
%		w_1\v_n^*\v_1 + w_2\v_n^*\v_2 + \cdots + w_n\v_n^*\v_n\end{bmatrix} \\
%	& = \overline{w_1}(w_1\v_1^*\v_1 + \cdots + w_n\v_1^*\v_n) + \cdots + \overline{w_n}(w_1\v_n^*\v_1 + \cdots + w_n\v_n^*\v_n) \\
%	& = (w_1\v_1 + w_2\v_2 + \cdots + w_n\v_n)^*(w_1\v_1 + w_2\v_2 + \cdots + w_n\v_n) \\
%	& = \|w_1\v_1 + w_2\v_2 + \cdots + w_n\v_n\|^2 \\
%	& \geq 0.
%\end{align*}
%It follows that $A$ is positive semidefinite, so the proof is complete.
\end{proof}


\newpage


\exx[10]{Show that $A = \begin{bmatrix}
	1 & -1 \\ -1 & 1
	\end{bmatrix}$ is PSD, but not PD, in several different ways.}
%	We already showed that $\v^*A\v = |v_1 - v_2|^2 \geq 0$ for all $\v \in \C^2$ at the start of this section, which shows that $A$ is PSD. We now verify that properties (b)--(e) of Theorem~\ref{thm:psd_characterize} are all satisfied as well.\smallskip
%
%For property~(b), \marginnote{In practice, checking non-negativity of its eigenvalues is the simplest way to determine whether or not a matrix is positive semidefinite.} we can explicitly compute the eigenvalues of $A$: \[\det\left(\begin{bmatrix}1-\lambda & -1 \\ -1 & 1-\lambda\end{bmatrix}\right) = (1-\lambda)^2 - 1 = \lambda^2 - 2\lambda = \lambda(\lambda - 2) = 0,\]so the eigenvalues of $A$ are $0$ and $2$, which are indeed non-negative.\smallskip
%
%For property~(c), we want to find a unitary matrix $U$ such that $A = UDU^*$, where $D$ has $2$ and $0$ (the eigenvalues of $A$) along its diagonal. We know from the spectral decomposition that we can construct $U$ by placing the normalized eigenvectors of $A$ into $U$ as columns. Eigenvectors corresponding to the eigenvalues $2$ and $0$ are $\v = (1,-1)$ and $\v = (1,1)$, respectively. Thus \[U = \frac{1}{\sqrt{2}}\begin{bmatrix}1 & 1 \\ -1 & 1\end{bmatrix} \quad \text{and} \quad D = \begin{bmatrix}2 & 0 \\ 0 & 0\end{bmatrix}.\]It is then straightforward to verify that $A = UDU^*$, as desired.\smallskip
%
%For property~(d), we let \[B = \sqrt{D}U^* = \frac{1}{\sqrt{2}}\begin{bmatrix}\sqrt{2} & 0 \\ 0 & 0\end{bmatrix}\begin{bmatrix}1 & -1 \\ 1 & 1\end{bmatrix} = \begin{bmatrix}1 & -1 \\ 0 & 0\end{bmatrix}.\] It is then straightforward to verify that $A = B^*B$.\smallskip
%
%For property~(e), we let $\{\v_1,\v_2\}$ be the columns of $B$: $\v_1 = (1,0)$, $\v_2 = (-1,0)$. We then see that
%\begin{align*}
%	\v_1\cdot\v_1 & = 1 & \v_1\cdot\v_2 & = -1 \\
%	\v_2\cdot\v_1 & = -1 & \v_2\cdot\v_2 & = 1,
%\end{align*}
%so
%\begin{align*}
%	\begin{bmatrix}
%		\v_1\cdot\v_1 & \v_1\cdot\v_2 \\ \v_2\cdot\v_1 & \v_2\cdot\v_2
%	\end{bmatrix} = \begin{bmatrix}1 & -1 \\ -1 & 1\end{bmatrix} = A
%\end{align*}
%as desired.

\exx[8]{Show that $A = \begin{bmatrix}
	2 & -1 & i \\ -1 & 2 & 1 \\ -i & 1 & 2 
	\end{bmatrix}$ is positive definite.}
%   Eigenvalues are 2 and 2 \pm sqrt(3)


\newpage

OK, let's look at another way of determining whether or not a matrix is positive definite, which has the advantage of not requiring us to compute eigenvalues.

\begin{theorem}[Sylvester's Criterion]\label{thm:pd_sylvester_criterion}
	Let $A = A^* \in \M_n$. Then $A$ is positive definite if and only if the determinant of the top-left $k \times k$ block of $A$ is strictly positive for all $1 \leq k \leq n$.
\end{theorem}

We won't prove Sylvester's Criterion (a proof is in the textbook if you're curious), but instead let's jump right to an example to illustrate how it works.

\exx[6]{Use Sylvester's criterion to show that $A = \begin{bmatrix}
	2 & -1 & i \\ -1 & 2 & 1 \\ -i & 1 & 2 
	\end{bmatrix}$ is positive definite.}
% Compute the 3 determinants, all positive.

% Extending Sylvester's criterion to positive \emph{semi}definite matrices is a bit of a pain, so we don't do so here.

Let's wrap up this section by reminding ourselves of something that we already proved about positive definite matrices a few weeks ago:

\begin{theorem}[Positive Definite Matrices Make Inner Products]\label{thm:pd_inner_product}
	A function $\langle \cdot,\cdot \rangle : \mathbb{F}^n \times \mathbb{F}^n \rightarrow \mathbb{F}$ is an inner product if and only if there exists a positive definite matrix $A \in \M_n(\mathbb{F})$ such that \[\langle \v,\w \rangle = \v^*A\w \quad \text{for all} \quad \v,\w \in \mathbb{F}^n.\]
\end{theorem}

\horlines{2}
% We proved the above theorem in Week 5 in the form <v,w> = v^*P^*Pw with P invertible. Well, A = P^*P.


\newpage


\section*{Diagonal Dominance and Gershgorin Discs}

In order to motivate this next section, let's think a bit about what Sylvester's criterion says when the matrix $A$ is $2 \times 2$.

\begin{theorem}[Positive Definiteness for $\mathbf{2 \times 2}$ Matrices]\label{thm:2x2_psd}
	Let $a,d \in \R$, $b \in \C$, and suppose that $\displaystyle A = \begin{bmatrix}
	a & b \\ \overline{b} & d
	\end{bmatrix}.$
	\begin{enumerate}[label=\alph*)]
		\item $A$ is positive semidefinite if and only if $a,d \geq 0$ and $|b|^2 \leq ad$, and
		
		\item $A$ is positive definite if and only if $a > 0$ and $|b|^2 < ad$.
	\end{enumerate}
\end{theorem}

Indeed, case~(b) is exactly Sylvester's criterion. For case~(a)...

\horlines{3}
% Think of it like a limiting case, but d >= 0 is needed to rule out things like [0 0;0 -1], which is not PSD.

\exx[6]{Show that $A = \begin{bmatrix}
	1 & -1 \\ -1 & 1
	\end{bmatrix}$ is positive semidefinite, but not positive definite.}

The previous theorem basically says that a $2 \times 2$ matrix is positive (semi)definite as long as its off-diagonal entries are ``small enough'' compared to its diagonal entries. This same intuition is well-founded even for larger matrices. However, to clarify exactly what we mean, we first need the following result that helps us bound the eigenvalues of a matrix based on simple information about its entries.


\newpage


\begin{theorem}[Gershgorin Disc Theorem]\label{thm:gerchgorin}
	Let $A \in \M_n(\C)$ and define the following objects:\smallskip
	\begin{itemize}
		\item $\displaystyle r_i = \sum_{j\neq i}|a_{i,j}|$ (the sum of the off-diagonal entries of the $i$-th row of $A$),
		
		\item $D(a_{i,i},r_i)$ is the closed disc in the complex plane centered at $a_{i,i}$ with radius $r_i$.\smallskip
	\end{itemize}
	Then every eigenvalue of $A$ is in at least one of the $D(a_{i,i},r_i)$ (called \textbf{Gershgorin discs}).
\end{theorem}

\exx[6]{Draw the Gershgorin discs for...}
% Ask students for a 2x2 or 3x3 matrix and then draw it. Also plot the eigenvalues and see that they're in the discs.

\begin{proof}[Proof of Theorem~\ref{thm:gerchgorin}.]
	Let $\lambda$ be an eigenvalue of $A$ with associated eigenvector $\v$. Then...
	
	\horlines{8}\vspace*{-1.3cm}
	% Since $\v \neq \0$, we can scale it so that it largest entry is $v_i = 1$, and all other entries are no larger than $1$ ($|v_j| \leq 1$ for all $j \neq i$). By looking at the $i$-th entry of the vectors $A\v = \lambda\v$, we see that
%	\[ \sum_{j} a_{i,j} v_j = \lambda v_i = \lambda, since v_i = 1 \]
%	Now notice that the sum on the left can be split up into the form
%	\[\sum_{j} a_{i,j} v_j = \sum_{j\neq i} a_{i,j} v_j + a_{i,i}, again, since $v_i = 1$\]
%	By combining and slightly rearranging the two equations above, we get
%	\[\lambda - a_{i,i} = \sum_{j\neq i} a_{i,j} v_j.\]
%	By taking the absolute value of both sides of this equation, we finally see that
%	|\lambda - a_{i,i}| = \left|\sum_{j \neq i} a_{i,j} v_j\right| \leq \sum_{j \neq i} |a_{i,j}| |v_j| \leq \sum_{j \neq i} |a_{i,j}| = r_i
%	which means exactly that $\lambda \in D(a_{i,i},r_i)$.
\end{proof}


\newpage


The Gershgorin disc theorem is an approximation theorem. For diagonal matrices we have $r_i = 0$ for all $i$, so the Gershgorin discs have radius $0$ and thus the eigenvalues are exactly the diagonal entries (which we already knew from the previous course). However, as the off-diagonal entries increase, the radii of the Gershgorin discs increase so the eigenvalues can wiggle around a bit.

\horlines{3}
% draw 2 or 3 pictures of matrices like [1 0;0 2], [1 0.1;0.1 2], etc.

In order to connect Gershgorin discs to positive semidefiniteness, we introduce one additional family of matrices:

\begin{definition}[Diagonally Dominant Matrices]\label{defn:diag_dom}
	Suppose that $A \in \M_n(\C)$. Then $A$ is called
	\begin{enumerate}[label=\alph*)]
		\item \textbf{diagonally dominant} if $\displaystyle |a_{i,i}| \geq \sum_{j\neq i}|a_{i,j}|$ for all $1 \leq i \leq n$, and
		
		\item \textbf{strictly diagonally dominant} if $\displaystyle |a_{i,i}| > \sum_{j\neq i}|a_{i,j}|$ for all $1 \leq i \leq n$.
	\end{enumerate}
\end{definition}

\exx[6]{Show that the matrix \[A = \begin{bmatrix}
	2 & 0 & i \\ 0 & 3 & 1 \\ -i & 1 & 5
	\end{bmatrix}\] is strictly diagonally dominant, and draw its Gershgorin discs.}
% Do this. Then note that its eigenvalues are contained in the interval [1,7], thus positive definite.
% Compute its eigenvalues using software and put them here.


\newpage


In particular, since the eigenvalues of the previous matrix were positive, it was necessarily positive definite. This same type of argument works in general, and leads immediately to the following theorem:

\begin{theorem}[Diagonal Dominance Implies PSD]\label{thm:diag_dom_psd}
	Suppose that $A = A^* \in \M_n(\C)$ has non-negative diagonal entries.
	\begin{enumerate}[label=\alph*)]
		\item If $A$ is diagonally dominant then it is positive semidefinite.
		
		\item If $A$ is strictly diagonally dominant then it is positive definite.
	\end{enumerate}
\end{theorem}

{\color{red}\textbf{Be careful:}} this is a one-way theorem! DD implies PSD, but PSD does not imply DD. For example,

\horlines{3}
% A = [1 1 1;1 1 1;1 1 1] is PSD with eigenvalues 3, 0, 0, but not diagonally dominant.


\section*{Unitary Freedom of PSD Decompositions}

We saw earlier that for every positive semidefinite matrix $A$ we can find a matrix $B$ such that $A = B^*B$. However, this matrix $B$ is not unique, since if $U$ is a unitary matrix and we define $C = UB$ then

\horlines{1}
% $C^*C = (UB)^*(UB) = B^*U^*UB = B^*B = A$ as well.

\noindent The following theorem says that we can find \emph{all} decompositions of $A$ using this same procedure:

% Double-check that this proof still works if F = R (originally wrote it for C, but need R too)

\begin{theorem}[Unitary Freedom of PSD Decompositions]\label{thm:psd_decomp_characterize}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $B,C \in \M_{m,n}(\mathbb{F})$. Then $B^*B = C^*C$ if and only if there exists a unitary matrix $U \in \M_{m}(\mathbb{F})$ such that $C = UB$.
\end{theorem}

For the purpose of saving time, we do not show the ``only if'' direction of the proof here (it is in the textbook, in case you are interested).


\newpage

The previous theorem raises the question of how simple we can make the matrix $B$ in a positive semidefinite decomposition $A = B^*B$. The following theorem provides one possible answer: we can choose $B$ so that it is also positive semidefinite.

\begin{theorem}[Principal Square Root of a Matrix]\label{thm:principal_square_root}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A \in \M_n(\mathbb{F})$ is positive semidefinite. Then there exists a unique positive semidefinite matrix $P \in \M_n(\mathbb{F})$, called the \textbf{principal square root} of $A$, such that
	\[
		A = P^2% = P^*P (draw this in)
	\]
\end{theorem}

\begin{proof}
	To see that such a matrix $P$ exists, we use our usual diagonalization arguments.
	
	\horlines{7}\vspace*{-1.3cm}
%	Specifically, we use the spectral decomposition to write $A = UDU^*$, where $U$ is unitary and $D$ is diagonal with non-negative real numbers (the eigenvalues of $A$) as its diagonal entries. If we then define $P = U\sqrt{D}U^*$, where $\sqrt{D}$ is the diagonal matrix that contains the non-negative square roots of the entries of $D$, then \[P^2 = (U\sqrt{D}U^*)(U\sqrt{D}U^*) = U\sqrt{D}^2U^* = UDU^* = A,\]as desired.
%	
% Uniqueness is a bit of a pain (see textbook), but the basic idea is that if Q has particular eigs, then Q^2 has same eigenvectors with squared eigenvalues.
\end{proof}

The principal square root $P$ of a matrix $A$ is typically denoted by $P = \sqrt{A}$, and is in analogy with the principal square root of a non-negative real number (indeed, for $1 \times 1$ matrices they are the exact same thing).

\exx[6]{Find the principal square root of...} % ask class for a 2x2 psd matrix. Easy to make sure it's PSD via previous theorem (diag. dominant for example)
% eigs are 16 and 4. Sqrt is [3, -1;-1 3]


\newpage


\horlines{3}

By combining our previous two theorems, we also recover a new matrix decomposition, which answers the question of how simple we can make a matrix by multiplying it on the left by a unitary matrix---we can always make it positive semidefinite.

\begin{theorem}[Polar Decomposition]\label{thm:polar_decomp}
	Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $A \in \M_n(\mathbb{F})$. Then there exists a unitary matrix $U \in \M_n(\mathbb{F})$ and a positive semidefinite matrix $P \in \M_n(\mathbb{F})$ such that
	\[
		A = UP.
	\]
\end{theorem}

\begin{proof}
	Since $A^*A$ is positive semidefinite, we know from the previous theorem that
	
	\horlines{3}\vspace*{-1.3cm}
	% we can define $P = \sqrt{A^*A}$ so that $A^*A = P^2 = P^*P$ and $P$ is positive semidefinite. We then know from Theorem~\ref{thm:psd_decomp_characterize} that there exists a unitary matrix $U \in \M_n(\C)$ such that $A = UP$.
\end{proof}

The matrix $\sqrt{A^*A}$ in the polar decomposition can be thought of as the ``matrix version'' of the absolute value of a complex number $|z| = \sqrt{\overline{z}z}$. In fact, this matrix is sometimes even denoted by $|A| = \sqrt{A^*A}$. Similarly, the polar decomposition of a matrix generalizes the polar form of a complex number:

\horlines{5}
% generalizes the fact that that every complex number $z \in \C$ can be written in the form $z = re^{i\theta}$, where $r = |z|$ is non-negative and $e^{i\theta}$ lies on the unit circle in the complex plane.
% Makes sense since unitaries are like rotations, and PSD matrices are like matrices that don't rotate too much (but can stretch). Put them together, you get all matrices.

We don't know how to compute the polar decomposition yet (since we skipped a proof earlier this week), but we will learn a method soon.


\newpage


Over the past couple of weeks, we learned about several new families of matrices. It is worth drawing a diagram illustrating their relationships with each other:

\horlines{6}
% DRAW a picture with diagonalizable, normal, unitary, hermitian, and skew-hermitian like in the textbook. Maybe diagonally dominant too

It is also worth noting that many of these families of matrices are analogous to important subsets of the complex plane:

\horlines{6}
% Something like the complex plane, but with real line as Hermitian, etc.

\end{document}