% DEFINE some information that will be populated throughout the course notes. \def \coursename {Advanced Linear Algebra} \def \coursecode {MATH 3221} \def \courseterm {Fall 2020} \def \instructorname {Nathan Johnston} % END DEFINITIONS % IMPORT the course note formatting and templates \input{course_notes_template} % END IMPORT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \setcounter{chapter}{5} % Set to one less than the week number \chapter{Adjoints and Unitaries} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\large This week we will learn about: \begin{itemize} \item The adjoint of a linear transformation, and \item Unitary transformations and matrices. \end{itemize}\bigskip\bigskip \noindent Extra reading and watching: \begin{itemize} \item Sections 1.4.2 and 1.4.3 in the textbook \item Lecture videos \href{https://www.youtube.com/watch?v=G_pSlt7wVY4&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=24}{23} and \href{https://www.youtube.com/watch?v=AJUw6ooN2Ig&list=PLOAf1ViVP13jdhvy-wVS7aR02xnDxueuL&index=25}{24} on YouTube \item \href{https://en.wikipedia.org/wiki/Unitary_matrix}{Unitary matrix} at Wikipedia \end{itemize}\bigskip\bigskip \noindent Extra textbook problems: \begin{itemize} \item[$\star$] 1.4.5(b,c,e,f), 1.4.8 \item[$\phantom{\star}\star\star$] 1.4.3, 1.4.9--1.4.14, 1.4.21, 1.4.22 \item[$\star\star\star$] 1.4.6, 1.4.15, 1.4.18 \item[$\skull$] 1.4.19, 1.4.28 \end{itemize}} \newpage We now introduce the adjoint of a linear transformation, which we can think of as a way of generalizing the transpose of a real matrix to linear transformations between arbitrary inner product spaces. \begin{definition}[Adjoint Transformations] Suppose that $\V$ and $\W$ are inner product spaces and $T : \V \rightarrow \W$ is a linear transformation. Then a linear transformation $T^* : \W \rightarrow \V$ is called the \textbf{adjoint} of $T$ if\\[0.1cm] \[ {}% \langle T(\v),\w \rangle = \langle\v,T^*(\w)\rangle \quad \text{for all} \quad \v \in \V \ \text{and} \ \w \in \W. \] \end{definition} For example, the adjoint of a matrix $A \in \M_{m,n}(\R)$ is \horlines{8} % Think of A as a linear transformation from R^n to R^m. We want a linear transformation B from R^m to R^n that satisfies (Av) \cdot w = v \cdot (Bw) for all v, w. Choose v and w to be standard basis vectors to see that B = A^T is the only solution. Thus the adjoint of A is A^T. Similarly, the adjoint of a matrix $A \in \M_{m,n}(\C)$ is \horlines{3} % A^*. Don't actually do calculation, just note that it's similar to above. Note that this is WHY we use A^* with complex matrices instead of A^T. So far, we have been a bit careless and referred to ``the'' adjoint of a matrix (linear transformation), even though it perhaps seems believable that a linear transformation might have more than one adjoint. The following theorem shows that, at least in finite dimensions, this is not actually a problem. \newpage \begin{theorem}[Existence and Uniqueness of Adjoints] Suppose that $\V$ and $\W$ are finite-dimensional inner product spaces. For every linear transformation $T : \V \rightarrow \W$ there exists a unique adjoint transformation $T^* : \W \rightarrow \V$. Furthermore, if $B$ and $C$ are orthonormal bases of $\V$ and $\W$ respectively, then\\[0.1cm] \[ {}% \big[T^*\big]_{B\leftarrow C} = [T]_{C\leftarrow B}^*. \] \end{theorem} % Emphasize that the final claim about standard matrices does NOT necessarily hold in non-orthonormal bases (there is an example in the textbook to illustrate this). \begin{proof} To prove uniqueness of $T^*$, suppose that $T^*$ exists, let $\v \in \V$ and $\w \in \W$, and compute $\langle T(\v), \w \rangle$ in two different ways: \horlines{16}\vspace*{-1.3cm} %\begin{align*} %\langle T(\v), \w \rangle & = \langle \v, T^*(\w) \rangle & & \text{\color{gray}(definition of $T^*$)} \\ %& = [\v]_B \cdot [T^*(\w)]_B & & \text{\color{gray}(Theorem~\ref{thm:inner_product_finite_dim})} \\ %& = [\v]_B \cdot \big([T^*]_{B\leftarrow C}[\w]_C) & & \text{\color{gray}(definition of standard matrix)} \\ %& = [\v]_B^*[T^*]_{B\leftarrow C}[\w]_C. & & \text{\color{gray}(definition of dot product)} %\end{align*} %Similarly, %\begin{align*} %\langle T(\v), \w \rangle & = [T(\v)]_C \cdot [\w]_C & & \text{\color{gray}(Theorem~\ref{thm:inner_product_finite_dim})} \\ %& = \big([T]_{C\leftarrow B}[\v]_B) \cdot [\w]_C & & \text{\color{gray}(definition of standard matrix)} \\ %& = [\v]_B^*[T]_{C\leftarrow B}^*[\w]_C. & & \text{\color{gray}(definition of dot product)} %\end{align*} %It follows that $[\v]_B^*[T^*]_{B\leftarrow C}[\w]_C = [\v]_B^*[T]_{C\leftarrow B}^*[\w]_C$ for all $[\v]_B \in \mathbb{F}^n$ and all $[\w]_C \in \mathbb{F}^m$. Well, if we choose $\v$ to be the $i$-th vector in the basis $B$ and $\w$ to be the $j$-th vector in $C$, then $[\v]_B = \e_i$ and $[\w]_C = \e_j$, so $[\v]_B^*[T^*]_{B\leftarrow C}[\w]_C = \e_i^T[T^*]_{B\leftarrow C}\e_j$ is the $(i,j)$-entry of $[T^*]_{B\leftarrow C}$ and $[\v]_B^*[T]_{C\leftarrow B}^*[\w]_C = \e_i^T[T]_{C\leftarrow B}^*\e_j$ is the $(i,j)$-entry of $[T]_{C\leftarrow B}^*$. Since these are equal for all $i$ and $j$, it follows that $[T^*]_{B\leftarrow C} = [T]_{C\leftarrow B}^*$. % %Uniqueness of $T^*$ now follows immediately from uniqueness of standard matrices. Existence of $T^*$ follows from that fact that we can choose $T^*$ to be the linear transformation with standard matrix $[T]_{C\leftarrow B}^*$ and then follow the above argument backward to verify that $\langle T(\v), \w \rangle = \langle \v, T^*(\w) \rangle$ for all $\v \in \V$ and $\w \in \W$. \end{proof} \newpage \exx[8]{Show that the adjoint of the transposition map $T : \M_{m,n} \rightarrow \M_{n,m}$, with the Frobenius inner product, is also the transposition map.} % METHOD 1: % Our goal is to show that $\ip{A^T}{B} = \ip{A}{B^T}$ for all $A \in \M_{m,n}(\mathbb{F}), B \in M_{n,m}(\mathbb{F})$. Recall that the Frobenius inner product is defined by $\ip{A}{B} = \tr(A^*B)$, so this is equivalent to %\[ %\tr\big( \overline{A}B \big) = \tr\big( A^*B^T \big) \quad \text{for all} \quad A \in \M_{m,n}(\mathbb{F}), B \in M_{n,m}(\mathbb{F}). %\] %These two quantities can be shown to be equal by brute-force calculation of the traces and matrix multiplications in terms of the entries of $A$ and $B$, but a more elegant way is to use properties of the trace and transpose: %\begin{align*} %\tr\big( \overline{A}B \big) & = \tr\big( \big(\overline{A}B\big)^T \big) & & \text{\color{gray}(transpose does not change trace)} \\ %& = \tr\big( B^TA^* \big) & & \text{\color{gray}(transpose of a product)} \\ %& = \tr\big( A^*B^T \big). & & \text{\color{gray}(cyclic commutativity of trace)} %\end{align*} % METHOD 2: % Recall the standard matrix with respect to the standard (orthonormal) basis from last week (in the n = 2 case anyway). Conjugate transpose is the same, so T is self-adjoint. The situation presented in the above example, where a linear transformation is its own adjoint, is important enough that we give it a name: \begin{definition}[Self-Adjoint Transformations]\label{defn:self_adjoint} Suppose that $\V$ is an inner product space. Then a linear transformation $T : \V \rightarrow \V$ is called \textbf{self-adjoint} if $T^* = T$. \end{definition} For example, a matrix in $\M_n(\R)$ is self-adjoint if and only if it is... \horlines{1}\vspace*{-0.25cm} % symmetric (i.e., $A = A^T$), \noindent and a matrix in $\M_n(\C)$ is self-adjoint if and only if it is... \horlines{1}\vspace*{-0.25cm} % Hermitian (i.e., $A = A^*$). \noindent Furthermore, a linear transformation is self-adjoint if and only if its standard matrix... \horlines{2} % Is symmetric/Hermitian (with respect to some orthonormal basis). We saw this with the transpose map in the previous example. \newpage \section*{Unitary Transformations and Matrices} In situations where the norm of a vector is important, it is often desirable to work with linear transformations that do not alter that norm. We now start investigating these linear transformations. \begin{definition}[Unitary Transformations]\label{defn:unitary_transformation} Let $\V$ and $\W$ be inner product spaces and let $T : \V \rightarrow \W$ be a linear transformation. Then $T$ is said to be \textbf{unitary} if \[ \|T(\v)\| = \|\v\| \quad \text{for all} \quad \v \in \V. \] \end{definition} We also say that a \emph{matrix} is unitary if it acts as a unitary linear transformation on $\mathbb{F}^n$. \exx[11]{Show that the matrix $\displaystyle U = \frac{1}{\sqrt{2}}\begin{bmatrix}1 & -1 \\ 1 & 1\end{bmatrix}$ is unitary.} % We have to show that $\|U\v\| = \|\v\|$ for all $\v \in \R^2$. We can verify this directly by computing %\begin{align*} % \|U\v\| & = \sqrt{\frac{1}{2}(v_1-v_2)^2 + \frac{1}{2}(v_1+v_2)^2} = (expand both brackets) = \sqrt{v_1^2 + v_2^2} = \|\v\| %\end{align*} %for all $\v \in \R^2$.\smallskip % MENTION that this makes sense since this is a rotation matrix (CCW by pi/4) Fortunately, there is a much simpler method of checking whether or not a matrix (or a linear transformations) is unitary, as demonstrated by the following theorem. \newpage \begin{theorem}[Characterization of Unitary Matrices]\label{thm:unitary_characterize} Suppose $\mathbb{F} = \R$ or $\mathbb{F} = \C$, and $U \in \M_n(\mathbb{F})$. The following are equivalent:\smallskip \begin{enumerate}[label=\alph*)] \item $U$ is unitary, \item $U^*U = I$, \item $UU^* = I$, \item $(U\v) \cdot (U\w) = \v \cdot \w$ for all $\v,\w \in \mathbb{F}^n$, \item The columns of $U$ are an orthonormal basis of $\mathbb{F}^n$, and \item The rows of $U$ are an orthonormal basis of $\mathbb{F}^n$. \end{enumerate} \end{theorem} It is worth comparing these properties to corresponding properties of invertible matrices: \horlines{5} % Table 1.2 from text. % Invertible: P-1 exists. Unitary: U-1 = U* % ||Pv|| \neq 0 whenever ||v|| \neq 0, VERSUS ||Uv|| = ||v|| for all v % columns of P are a basis, VERSUS columns of U are an ONB % Also worth noting that (d) says unitaries are exactly the matrices that preserve angles (even though by definition they just preserve norms) \begin{proof}[Proof of Theorem~\ref{thm:unitary_characterize}.] We do not prove all equivalences of this theorem -- for that you can see the textbook. But we will demonstrate some of them in order to give an idea of why this theorem is true.\\ The equivalence of (b) and (c) follows from the fact that \horlines{1} % a one-sided inverse is necessarily a two-sided inverse (a theorem from MATH~2221). To see that (d) $\implies$ (b), note that if we rearrange the equation $(U\v)\cdot(U\w) = \v \cdot \w$ slightly, we get \horlines{2} \newpage \horlines{2} % \v\cdot(U^*U\w) = \v \cdot \w, so \v\cdot((U^*U - I)\w) = 0 for all v, w. % If we choose $\v = (U^*U - I)\w$ then this implies % \|(U^*U - I)\w\|^2 = 0 for all w. % This implies $(U^*U - I)\w = 0$ for all $\w$, so $U^*U = I$. To see that (b) implies (a), suppose $U^*U = I$. Then for all $\v \in \mathbb{F}^n$ we have \horlines{2} % \|U\v\|^2 = (U\v)\cdot(U\v) = \v\cdot(U^*U\v) \rangle = \v\cdot\v = \|\v\|^2, % so U is unitary. To see that (b) is equivalent to (e), write $U$ in terms of its columns $U = \big[ \ \u_1 \ {\color{gray}|} \ \u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \u_n \ \big]$ and then use block matrix multiplication to multiply by $U^*$: \horlines{6} %\begin{align*} % U^*U & = \left[\begin{array}{c} % \overline{\u_1} \\\hline % \overline{\u_2} \\\hline % \cdots \\\hline % \overline{\u_n} % \end{array}\right]\begin{bmatrix} \ % \u_1 \ {\color{gray}|} \ \u_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ \u_n \ \ \end{bmatrix} = \begin{bmatrix} % \u_1\cdot\u_1 & \u_1\cdot\u_2 & \cdots & \u_1\cdot\u_n \\ % \u_2\cdot\u_1 & \u_2\cdot\u_2 & \cdots & \u_2\cdot\u_n \\ % \vdots & \vdots & \ddots & \vdots \\ % \u_n\cdot\u_1 & \u_n\cdot\u_2 & \cdots & \u_n\cdot\u_n, % \end{bmatrix} %\end{align*} %which equals $I$ if and only if its diagonal entries equal $1$ and its off-diagonal entries equal $0$. In other words, $U^*U = I$ if and only if $\u_i \cdot \u_i = 1$ for all $i$ and $\u_i \cdot \u_j = 0$ whenever $i \neq j$. This says exactly that $\{\u_i\}$ is a set of mutually orthonormal vectors, and since it consists of $n$ vectors, it is an orthonormal basis of $\mathbb{F}^n$. \noindent The remaining implications can be proved using similar techniques. \end{proof} Checking whether or not a matrix is unitary is now quite simple, since we just have to check whether or not $U^*U = I$. For example, if we again return to the matrix \[U = \frac{1}{\sqrt{2}}\begin{bmatrix} 1 & -1 \\ 1 & 1\end{bmatrix}\]from earlier: \horlines{2} %\[ %U^*U = \frac{1}{2}\begin{bmatrix} 1 & 1 \\ -1 & 1\end{bmatrix}\begin{bmatrix} 1 & -1 \\ 1 & 1\end{bmatrix} = \begin{bmatrix} 1 & 0 \\ 0 & 1\end{bmatrix}. %\] % Thus unitary. More generally, every rotation matrix and reflection matrix is unitary, as we now demonstrate. \newpage \exx[8]{Show that every rotation matrix $U \in \M_2(\R)$ is unitary.} % Since rotation matrices do not change the length ($2$-norm) of vectors, we know that rotation matrices must be unitary. To see this algebraically, we recall that rotation matrices have the form (JOKE ABOUT THIS -- STUDENTS WILL HAVE FORGOTTEN) %\[ %U = \begin{bmatrix} %\cos(\theta) & -\sin(\theta) \\ %\sin(\theta) & \cos(\theta) %\end{bmatrix} %\] %for some $\theta \in \R$, and we compute $U^*U$: %\begin{align*} % U^*U & = \begin{bmatrix} % \cos(\theta) & \sin(\theta) \\ % -\sin(\theta) & \cos(\theta) % \end{bmatrix}\begin{bmatrix} % \cos(\theta) & -\sin(\theta) \\ % \sin(\theta) & \cos(\theta) % \end{bmatrix} \\ % & = \begin{bmatrix} % \cos^2(\theta) + \sin^2(\theta) & -\cos(\theta)\sin(\theta)+\sin(\theta)\cos(\theta) \\ % -\sin(\theta)\cos(\theta)+\cos(\theta)\sin(\theta) & \sin^2(\theta) + \cos^2(\theta) % \end{bmatrix} \\ % & = \begin{bmatrix} % 1 & 0 \\ % 0 & 1 % \end{bmatrix}. %\end{align*} %Since $U^*U = I$, we conclude the $U$ is unitary. \exx[8]{Show that every reflection matrix $U \in \M_n(\R)$ is unitary.} % Again, reflection matrices do not change the length ($2$-norm) of vectors, so we know that reflection matrices must be unitary. To see this algebraically, we recall that reflection matrices have the form $U = 2\u\u^T - I$ for some unit vector $\u \in \R^n$, and we compute $U^*U$: %\begin{align*} % U^*U = (2\u\u^T - I)^*(2\u\u^T - I) & = 4\u(\u^T\u)\u^T - 4\u\u^T + I \\ % & = 4\u\u^T - 4\u\u^T + I = I, %\end{align*} %where the third equality comes from the fact that $\u$ is a unit vector, so $\u^T\u = \|\u\|^2 = 1$. Since $U^*U = I$, we conclude the $U$ is unitary. In fact, the previous two examples provide exactly the intuition that you should have for unitary matrices---they are the ones that rotate and/or reflect $\mathbb{F}^n$, but do not stretch, shrink, or otherwise ``distort'' it. They can be thought of as ``rigid'' linear transformations that leave the size and shape of $\mathbb{F}^n$ in tact, but possibly change its orientation. \newpage \end{document}