% DEFINE some information that will be populated throughout the course notes.
\def \coursename {Linear Algebra}
\def \coursecode {MATH 2221}
\def \courseterm {Winter 2020}
\def \instructorname {Nathan Johnston}
% END DEFINITIONS


% IMPORT the course note formatting and templates
\input{course_notes_template}
% END IMPORT
	
	
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\setcounter{chapter}{2} % Set to one less than the week number
\chapter{Matrices and Matrix Operations}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

{\large This week we will learn about:

\begin{itemize}
	\item Matrices,
	\item Matrix addition, scalar multiplication, and matrix multiplication,
	\item The transpose, and
	\item Matrix powers and adjacency matrices of graphs.
\end{itemize}\bigskip\bigskip

\noindent Extra reading and watching:
\begin{itemize}
	\item Section 1.3 in the textbook
	\item Lecture videos \href{https://www.youtube.com/watch?v=ytOfBfUZKZM&list=PLOAf1ViVP13jmawPabxnAa00YFIetVqbd&index=8}{8}, \href{https://www.youtube.com/watch?v=VoT5g5e3hy8&list=PLOAf1ViVP13jmawPabxnAa00YFIetVqbd&index=9}{9}, \href{https://www.youtube.com/watch?v=ow7K6_3f33I&list=PLOAf1ViVP13jmawPabxnAa00YFIetVqbd&index=10}{10}, \href{https://www.youtube.com/watch?v=IbCpt-Vm7J8&list=PLOAf1ViVP13jmawPabxnAa00YFIetVqbd&index=11}{11}, and \href{https://www.youtube.com/watch?v=KCUgWj5nhYc&list=PLOAf1ViVP13jmawPabxnAa00YFIetVqbd&index=12}{12} on YouTube
	\item \href{http://en.wikipedia.org/wiki/Matrix_multiplication}{Matrix multiplication} at Wikipedia
	\item \href{http://en.wikipedia.org/wiki/Transpose}{Transpose} at Wikipedia
\end{itemize}\bigskip\bigskip

\noindent Extra textbook problems:
\begin{itemize}
	\item[$\star$] 1.3.1, 1.3.2, 1.3.4, 1.3.12%, 1.B.1, 1.B.2, 1.B.4(a--c), 1.B.5(a--c)
	\item[$\star \, \star$] 1.3.3, 1.3.5--1.3.7, 1.3.9, 1.3.11, 1.3.13--1.3.15%, 1.B.6(a--f), 1.B.7, 1.B.8
	\item[$\star\star\star$] 1.3.8%, 1.B.9, 1.B.10
	\item[$\skull$] none this week
\end{itemize}}


\newpage

\section*{Matrices}

Previously, we introduced vectors, which can be thought of as 1D lists of numbers. Now we start working with matrices, which are 2D arrays of numbers:

\begin{definition}[Matrices]
	A \textbf{matrix} is a rectangular array of numbers. Those numbers are called the \textbf{entries} or \textbf{elements} of the matrix.
\end{definition}

\exx{Examples of matrices.}
% Ask for examples from class. Give a non-example where the rows and columns don't match up, maybe.

The \textbf{size} of a matrix is a description of the number of rows and columns that it has. A matrix with $m$ rows and $n$ columns has size $m \times n$.
% Emphasize that rows are the first number, columns second. Give sizes of matrices above.
% Use "RC car" or "RC cola" mnemonic if it helps.

\horlines{4}

A $1 \times n$ matrix is called a \textbf{row matrix} or \textbf{row vector}. An $m \times 1$ matrix is called a \textbf{column matrix} or \textbf{column vector}. An $n \times n$ matrix is called \textbf{square}.
% Give an example of each. Maybe note that non-square matrices are sometimes called rectangular?

\horlines{4}


\newpage


We use double subscripts to specify individual entries of a matrix: the entry of the matrix $A$ in row $i$ and column $j$ is denoted by $a_{i,j}$. For example, if
% Make up a 2-by-3 matrix

\horlines{3}

\noindent then $a_{1,3} = \quad \quad \quad$ and $a_{2,2} = \quad \quad \quad$. \\

Similarly, when we say ``the $(i,j)$-entry of $A$'', we mean $a_{i,j}$. Another notation for this is $[A]_{i,j}$, and we will see some examples shortly where this notation is advantageous. \\

With this notation in mind, a general $m \times n$ matrix $A$ has the following form:
\begin{align*}
	A = \begin{bmatrix}
		a_{1,1} & a_{1,2} & \cdots & a_{1,n} \\
		a_{2,1} & a_{2,2} & \cdots & a_{2,n} \\
		\vdots & \vdots & \ddots & \vdots \\
		a_{m,1} & a_{m,2} & \cdots & a_{m,n}
	\end{bmatrix}.
\end{align*}

Two matrices are \textbf{equal} if they have the same size and all of their entries (in the same positions) are equal to each other.

\exx[5]{Some (un)equal matrices.}
% Make up some examples. Have some that are a different size, some that are the same size. Also do a matrix and its transpose. Same entries, but not equal.

\noindent We use $\M_{m,n}$ to denote the set of $m \times n$ matrices, and the shorthand $\M_n$ for the set of $n \times n$ matrices.


\newpage


Just like we could add vectors or multiply vectors by a scalar, we can also add matrices and multiply matrices by scalars, and their definitions are exactly what you would expect:

% DEFINITION: Matrix operations
\begin{definition}[Matrix Addition and Scalar Multiplication]\label{defn:matrix_operations}
	Suppose $A$ and $B$ are $m \times n$ matrices, and $c\in\R$ is a scalar. Then their \textbf{sum} $A+B$ is the matrix whose $(i,j)$-entry is $a_{i,j}+b_{i,j}$, and the \textbf{scalar multiplication} $cA$ is the matrix whose $(i,j)$-entry is $ca_{i,j}$.
\end{definition}
% END DEFINITION

In other words, these operations are just performed entry-wise, as you might expect. The definition of matrix addition only makes sense when $A$ and $B$ have the same size.

\exx[8]{Matrix addition and scalar multiplication.}
% Just gets examples from class. Easy. Do one where sizes are different so we can't add them.

Matrix subtraction is defined analogously:
% A - B = A + (-1)*B is the matrix whose (i,j)-entry is a_{i,j}-b_{i,j}.
% Do a simple 2x2 example.

\horlines{4}


\newpage


Matrix addition, subtraction, and scalar multiplication satisfy all of the ``natural'' properties you might expect (e.g., $A+B = B+A$). We state these properties as a theorem:

\begin{theorem}[Properties of Matrix Operations]
	Let $A,B,C \in \M_{m,n}$ and let $c,d \in \mathbb{R}$ be scalars. Then
	\begin{enumerate}
		\item $A + B = B + A$ \hfill {\color{gray}(commutativity)}
		\item $(A + B) + C = A + (B + C)$ \hfill {\color{gray}
			(associativity)}
		\item $c(A + B) = cA + cB$ \hfill {\color{gray}(distributivity)}
		\item $(c+d)A = cA + dA$ \hfill {\color{gray}(distributivity)}
		\item $c(dA) = (cd)A$
	\end{enumerate}
\end{theorem}

\begin{proof}
	We will only prove part~(c) of the theorem. The remaining parts of the theorem can be proved similarly: just use the definition of matrix addition and use the fact that all of these properties hold for addition of real numbers.
	
	\horlines{5}
	
	% (i,j)-entry of c(A+B) is c(a_ij + b_ij) = ca_ij + cb_ij, which is the (i,j)-entry of cA + cB
	% Use square bracket notation when proving. i.e., [c(A+B)]_{i,j} = c[A+B]_{i,j} = ...
	
	\noindent This completes the proof.
\end{proof}


% END OF CLASS 6


\section*{Matrix Multiplication}

What about matrix multiplication? Recall that multiplication was a bit tricky with vectors: we only saw the dot product, which ``multiplied'' two vectors to give us a number. Matrix multiplication is a bit different than this, and looks quite messy and ugly at first glance. So hold onto your hats... \\


\newpage


\begin{definition}[Matrix Multiplication]
	If $A$ is an $m \times n$ matrix and $B$ is an $n \times p$ matrix, then their \textbf{product} $AB$ is the $m \times p$ matrix whose $(i,j)$-entry is:
	\begin{align*}
		[AB]_{i,j} \defeq a_{i,1}b_{1,j} + a_{i,2}b_{2,j} + \cdots + a_{i,n}b_{n,j}.
	\end{align*}
\end{definition}

In other words, the product $AB$ is the matrix whose entries are all of the possible dot products of the rows of $A$ with the columns of $B$.

\horlines{5}
% Draw an illustration like the first figure in Section 1.3.2 of the textbook.
% MAYBE INSERT week3/matrix_dot_product.png

We emphasize that the matrix product $AB$ only makes sense if $A$ has the same number of \emph{columns} as $B$ has \emph{rows}. For example, it does not make sense to multiply a $2 \times 3$ matrix by another $2\times 3$ matrix, but it does make sense to multiply a $2 \times 3$ matrix by a $3 \times 7$ matrix.

\exx[8]{Compute the product of two matrices.}
% Get a 2-by-3 and 2-by-3 and 3-by-2(?) example from the class.
% Be very slow and methodical when computing. Compute the first 2 or 3 entries very explicitly. Highlight rows and columns when multiplying them.


When performing matrix multiplication, double-check that the sizes of your matrices actually make sense. In particular, the inner dimensions of the matrices must be equal, and the outer dimensions of the matrices will be the dimensions of the matrix product:

\horlines{2}
% Figure with arrows pointing to sizes like the image above Theorem 1.3.2 in the textbook.


As always, we have defined a new operation (matrix multiplication), so we want to know what properties it satisfies.

\begin{theorem}[Properties of Matrix Multiplication]
	Let $A,B$, and $C$ be matrices (with sizes such that all of the multiplications below make sense) and let $c \in \mathbb{R}$ be a scalar. Then
	\begin{enumerate}
		\item $(AB)C = A(BC)$ \hfill {\color{gray}(associativity)}
		\item $A(B + C) = AB + AC$ \hfill {\color{gray}(left distributivity)}
		\item $(A+B)C = AC + BC$ \hfill {\color{gray}(right distributivity)}
		\item $c(AB) = (cA)B = A(cB)$
	\end{enumerate}
\end{theorem}

\begin{proof}
	We will only prove part~(b) of the theorem. The remaining parts of the theorem can be proved similarly: just use the definition of matrix multiplication.
	
	\horlines{7}\vspace*{-1.4cm}
	
	% The $(i,j)$-entry of $A(B+C)$ is 
	% a_{i,1}(b_{1,j}+c_{1,j}) + a_{i,2}(b_{2,j}+c_{2,j}) + \cdots + a_{i,n}(b_{n,j}+c_{n,j}),
	% whereas the $(i,j)$-entry of $AB+AC$ is
	% (a_{i,1}b_{1,j} + a_{i,2}b_{2,j} + \cdots + a_{i,n}b_{n,j})+(a_{i,1}c_{1,j} + a_{i,2}c_{2,j} + \cdots + a_{i,n}c_{n,j}).
	% It is straightforward to see that these two quantities are equal. Thus each entry of $A(B+C)$ equals the corresponding entry of $AB+AC$, so the matrices themselves are the same.
\end{proof}


\newpage


Notice that we did \emph{not} say anything about commutativity (i.e., we did not claim that $AB = BA$). Why not?

\exx[8]{Commutativity of matrix multiplication?}
% Ask for example from the class. Just do 2x2 example. Will be non-commutative.
% Note that *sometimes* the same, but not always.
% In fact, BA might not even exist, even if AB does exist.
% In general, operations are not commutative: Would you rather I take all your money and then give you a million dollars, or give you a million dollars and then take all your money?

\exx[6]{FOILing matrices.}
% Does (A+B)^2 = A^2 + 2AB + B^2?
% No, only if commute.

One particularly important square matrix is the one that consists entirely of $0$ entries, except with $1$s on its diagonal. This is called the \textbf{identity matrix}, and it is denoted by $I$ (or sometimes $I_n$ if we want to emphasize it is $n \times n$). \\

Similarly, the \textbf{zero matrix} is the one with all entries equal to $0$. We denote it by $O$ (or $O_{m,n}$ if we care that it is $m \times n$).


\newpage


\exx[5]{The identity matrix and zero matrix.}
% Multiply the identity matrix by some 2-by-2 matrix. Do same with O matrix.

\noindent The previous example suggests the following general result, which is indeed true:

\begin{theorem}[Multiplication by Identity or Zero]\label{thm:identity_matrix}
	If $A \in \M_{m,n}$ then $AI_n = A = I_mA$ and $AO_n = O_{m,n} = O_mA$.
\end{theorem}

\noindent We won't prove the above theorem, but hopefully it seems believable enough.
% Mention that we can think of I and O as the "matrix versions" of the numbers 1 and 0.

\exx[4]{Diagonal matrices.}
% Define diagonal matrix.
% Multiply two specific diagonal matrices.

In general, the product of two diagonal matrices is just the entry-wise product of the two matrices:

\horlines{4}
%\begin{bmatrix}
%c_1 & 0 & \cdots & 0 \\ 0 & c_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & c_n\end{bmatrix}\begin{bmatrix}
%d_1 & 0 & \cdots & 0 \\ 0 & d_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & d_n\end{bmatrix} = \begin{bmatrix}
%c_1d_1 & 0 & \cdots & 0 \\ 0 & c_2d_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & c_nd_n\end{bmatrix}


\newpage


% END OF CLASS 7


\section*{The Transpose of a Matrix}

We now introduce an operation on matrices that changes the shape of a matrix, but not its contents. Specifically, it swaps the role of the rows and columns of a matrix:

% DEFINITION: Transpose
\begin{definition}[The Transpose]
	Suppose $A \in \M_{m,n}$ is an $m \times n$ matrix. Then its \textbf{transpose}, which we denote by $A^T$, is the $n \times m$ matrix whose $(i,j)$-entry is $a_{j,i}$.
\end{definition}
% END DEFINITION

Intuitively, the transpose of a matrix is obtained by mirroring it across its main diagonal.

\exx[5]{Let's compute a transpose or two.}
% Examples from class. Easy to do.
% Also draw general picture with a short fat matrix being transposed into a tall skinny one.

Let's now think about some basic properties that the transpose satisfies:

\begin{theorem}[Properties of the Transpose]\label{thm:transpose_properties}
	Let $A$ and $B$ be matrices (with sizes such that the operations below make sense) and let $c \in \R$ be a scalar. Then
	\begin{enumerate}[label=\alph*)]
		\item $(A^T)^T = A$
		\item $(A+B)^T = A^T + B^T$
		\item $(AB)^T = B^TA^T$
		\item $(cA)^T = cA^T$
	\end{enumerate}
\end{theorem}


\newpage


\begin{proof}
	Parts~(a),~(b), and~(d) of the theorem are intuitive enough, so we will only prove part~(c):
	
	\horlines{10}\vspace*{-1.4cm}
	% Compute [(AB)^T]_{i,j} = [AB]_{j,i} = a_{j,1}b_{1,i} + a_{j,2}b_{2,i} + \cdots + a_{j,n}b_{n,i},
	% and
	% [B^TA^T]_{i,j} = [B^T]_{i,1}[A^T]_{1,j} + [B^T]_{i,2}[A^T]_{2,j} + \cdots + [B^T]_{i,n}[A^T]_{n,j} = b_{1,i}a_{j,1} + b_{2,i}a_{j,2} + \cdots + b_{n,i}a_{j,n}.
	% Since these two quantities are the same, we conclude that $[(AB)^T]_{i,j} = [B^TA^T]_{i,j}$ for all $i,j$, so $(AB)^T = B^TA^T$, as desired.
\end{proof}

As a bit of a side note: would you have initially guessed that $(AB)^T = A^TB^T$? Situations like this are why we prove things rather than just guessing based on what ``looks believable''.

\exx[8]{Let's compute some more transposes.} % Compute A^TB^T (AB)^T and B^TA^T for some randomly-chosen A and B. Make A have size 3-by-2 and B have size 2-by-2 so that A^TB^T does not even exist.


\newpage


\exx[3]{Transpose of the product of many matrices.}
% (ABC)^T = C^TB^TA^T
% etc.

The transpose has the useful property that it converts a column vector into the corresponding row vector, and vice-versa. Furthermore, if $\v,\w \in \R^n$ are column vectors, then we can use our usual matrix multiplication rule to see that \vspace*{1.5em}
\[
\v^T\w = \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad %\begin{bmatrix}v_1 & v_2 & \cdots & v_n\end{bmatrix}\begin{bmatrix}w_1 \\ w_2 \\ \cdots \\ w_n\end{bmatrix} = v_1w_1 + v_2w_2 + \cdots + v_nw_n = \v\cdot\w.
\]\vspace*{1em}

In other words, we can use matrix multiplication to recover the dot product.


\section*{Matrix Powers}% and Graph Theory

Matrix multiplication also lets us define \emph{powers} of square matrices. For an integer $k \geq 1$, we define

\horlines{1}
% A^k = \underbrace{AA\cdots A}_{k\text{ copies}},
% Note that we already did a k=2 example back in the "FOILing matrices" example a couple pages ago.

\noindent and we also define $A^0 = I$ (analogously to how we define $a^0 = 1$ whenever $a$ is a non-zero real number). The next theorem follows almost immediately from this definition:

\begin{theorem}[Properties of Matrix Powers]
	If $A$ is square and $k$ and $r$ are nonnegative integers, then
	\begin{itemize}
		\item $A^kA^r = A^{k+r}$, and
		\item $(A^k)^r = A^{kr}$.
	\end{itemize}
\end{theorem}


\newpage


% make note that exponent rules with one matrix in the base still work, but exponent rules with two matrices in the base (e.g., (AB)^k = A^kB^k) don't, since commutativity fails.

\exx[10]{Compute some matrix powers.}
% Ask class for a 2x2 matrix and compute its square. Note that it is *not* the same as the entrywise square.
% Then compute its 4th power using the theorem (just square the square, which saves us 1 multiplication). Note that we could then compute the $8$th power with just one more multiplication, and so on.

\noindent Later in the course, we will see how to define things like $\quad \quad \quad \quad$ and $\quad \quad \quad \quad$. \\ % A^{-3} and A^(1/2) and A^pi.

%Let's take a brief detour and discuss some graph theory. This is one area of mathematics where linear algebra comes in so handy that it almost feels like magic. First off, a \textbf{graph} is a collection of vertices (dots) and edges connecting those vertices. Something like this:
%
%\horlines{7}
%% Draw two graphs -- one directed, one undirected. Label each one as directed and undirected
%
%
%\newpage
%
%
%\noindent For example, vertices and edges might represent:
%\begin{itemize}
%	\item Cities (vertices) and roads between them (edges), \medskip
%	
%	\item Web pages (vertices) and links from one page to another (edges), \medskip
%	
%	\item ${}$
%	
%	\vspace*{-0.4in}\horlines{3} % Ask for other examples from the class. Some other potential ones: computers (vertices) and network connections between them (edges), or particles and chemical bonds? People (vertices) and friendships (edges) on Facebook? Family tree (people/parental relationships)? Satellite communication network?
%\end{itemize}
%
%
%% END OF CLASS 8
%
%
%A problem that comes up fairly frequently is how to count the number of paths of a certain length between different vertices on a graph. For example, this could tell us:
%\begin{itemize}
%	\item ${}$
%	
%	\vspace*{-0.4in}\horlines{2} % number of ways to drive from City A to City B, passing through exactly one other city on the way
%	
%	\item ${}$ % Number of "friends of friends" on Facebook
%	
%	\vspace*{-0.4in}\horlines{2} % number of ways to start at the "Kevin Bacon" article on Wikipedia, click on exactly 12 links, and end up at the "Mount Allison" page
%\end{itemize}
%
%\exx[7]{Count the number of paths in a small graph.}
%% Draw a graph (maybe same one as before) and count paths of length 1, 2, and 3.
%% SHOULD HAVE EXACTLY 4 VERTICES, but not too many edges
%% What about paths of length 8? Ugh, don't wanna do this way!
%
%
%\newpage
%
%
%As a better way of counting the number of paths of a specific length, we construct a matrix from the graph by putting a $1$ in its $(i,j)$-entry if there is an edge from vertex $i$ to $j$, and we put a $0$ in its $(i,j)$-entry otherwise. This is called the \textbf{adjacency matrix} of the graph:
%
%\horlines{7}
%% Write the adjacency matrix of the graph in the last example
%% Put both a directed and undirected one here
%
%The entries of $A$ count the number of paths of length $1$ between different vertices. What about the entries of $A^2$?
%
%\horlines{8}
%% Observe that the (i,j)-entry of A^2 is ai1*a1j + ai2*a2j + ... + ain*anj.
%% This is the number of paths from i to another vertex to j.
%% i.e., the number of paths of length 2.
%
%It's not too tough to show that this pattern continues: the entries of $A^3$ give the number of paths of length $3$ from one vertex to another. In general, the entries of $A^k$ give the number of paths of length $k$ from one vertex to another.
%
%
%\newpage
%
%
%\exx[8]{Compute the number of paths of length $4$ between each pair of vertices in the graph from earlier.}
%
%% Compute A^2 and square it to get A^4
%
%
%\exx[6]{Compute the number of paths of length $8$ from vertex $1$ to vertex $2$ in the graph from earlier.}
%
%% Just compute the $(1,2)$-entry of A^8 by using A^4.


\section*{Block Matrices}

Oftentimes, there are clear ``patterns'' in the entries of a large matrix, and it might be useful to break that matrix down into smaller chunks based on some partition of its rows and columns. For example:
\[
A = \begin{bmatrix}
1 & 0 & 0 & 1 & 0 & 0 \\
0 & 1 & 0 & 0 & 1 & 0 \\
0 & 0 & 1 & 0 & 0 & 1 \\
0 & 0 & 0 & 2 & 1 & -1 \\
0 & 0 & 0 & 0 & -2 & 3
\end{bmatrix} \quad \text{ and } \quad B = \begin{bmatrix}
1 & 2 & 0 & 0 \\
2 & 1 & 0 & 0 \\
-1 & 1 & 0 & 0 \\
0 & 0 & 1 & 2 \\
0 & 0 & 2 & 1 \\
0 & 0 & -1 & 1
\end{bmatrix}
\]
% Draw partition lines in the above matrices.

\newpage

\horlines{5}
% A = [I I;O C] and B = [D O;O D], where C = [2 1 -1;0 -2 3] and D = [1 2;2 1;-1 1].
% Emphasize that we can think of A as a 2x2 block matrix or a 5x6 matrix. Difference is perspective only: still the same matrix. Similarly, the lines we draw in block matrices do not matter in any mathematical sense: they just make it easier for us to visualize the blocks.

When $A$ and $B$ are written in this way, as matrices whose elements are themselves matrices, they are called \textbf{block matrices}. Viewing matrices in this way often simplifies calculations and reveals structure, especially when the matrix has a lot of zeroes.\\

Remarkably, multiplication of block matrices works exactly as it does for regular matrices:

\horlines{10}
% AB = [I I;O C][D O;O D] = [D D;O CD]
% Directly compute CD = [5 4 ; -7 1] and then just plug the matrices into their spots to see that AB = [1 & 2 & 1 & 2 \\ 2 & 1 & 2 & 1 \\ -1 & 1 & -1 & 1 \\ 0 & 0 & 5 & 4 \\ 0 & 0 & -7 & 1].

\noindent And indeed, this is the exact same answer we would have gotten if we computed $AB$ the ``long way''.


\newpage


We have to be careful when performing block matrix multiplication: it is only valid if we choose the sizes of the blocks so that each and every matrix multiplication being performed makes sense.

\exx[17]{Suppose \\[5em] % A = [Ask for a 2x2 matrix], B = [Ask for a 2x3 matrix]
	Which of the following block matrix multiplications make sense?}

\vspace*{-17.3cm}
\noindent $\begin{bmatrix}
A & B \\
B & A
\end{bmatrix}^2$
% NO. Matrix itself doesn't make sense (blocks don't align with each other, wrong sizes)

\vspace*{2.4cm}
\noindent $\begin{bmatrix}
A & B \\
O & I_3
\end{bmatrix}\begin{bmatrix}
A & A \\
O & A \\
I_2 & O
\end{bmatrix}$
% NO. Block matrix sizes don't match up.

\vspace*{2.4cm}

\noindent $\begin{bmatrix}
A & B \\
O & I_3
\end{bmatrix}\begin{bmatrix}
A & A \\
O & A
\end{bmatrix}$
% NO. Do the block matrix multiplication and get a term BA, which is undefined.

\vspace*{2.7cm}
\noindent $\begin{bmatrix}
A & B \\
O & I_3
\end{bmatrix}\begin{bmatrix}
B & O \\
I_3 & I_3
\end{bmatrix}$
% YES. The product is [ AB+B, B ; I_3, I_3 ]. Just compute and plug in.


% END OF CLASS 9


\newpage


Partitioning matrices in different ways can lead to new insights about how matrix multiplication works.

\begin{theorem}[Matrix--Vector Multiplication]\label{thm:matrix_vect_mult}
	Suppose $A \in \M_{m,n}$ has columns $\a_1,\a_2,\ldots,\a_n$ and $\v \in \R^n$ is a column vector. Then
	\[
	A\v = v_1\a_1 + v_2\a_2 + \cdots + v_n\a_n.
	\]
\end{theorem}

\begin{proof}
	We simply perform block matrix multiplication:
	
	\horlines{5}\vspace*{-1.4cm}
	% A\v = \begin{bmatrix} \a_1 \ | \ \a_2 \ | \ \cdots \ | \ \a_n \end{bmatrix}[v_1 ; v_2 ; ... ; v_n] = v_1\a_1 + v_2\a_2 + \cdots + v_n\a_n
	
	% where the final equality comes from thinking of $A$ as a $1\times n$ block matrix and $B$ as an $n \times 1$ matrix, and performing block matrix multiplication.
\end{proof}

\noindent We of course can compute $A\v$ directly from the definition, but it's nice to have multiple ways to think about things.

\begin{theorem}[Matrix Multiplication is Column-Wise]\label{thm:matrix_mult_alternate}
	Suppose $A \in \M_{m,n}$ and $B \in \M_{n,p}$ are matrices. If $\b_j$ is the $j$-th column of $B$, then
	\[
		AB = \begin{bmatrix} \
		A\b_1 \ {\color{gray}|} \ A\b_2 \ {\color{gray}|} \ \cdots \ {\color{gray}|} \ A\b_p \ \ \end{bmatrix}.
	\]
\end{theorem}

\begin{proof}
	Again, we perform block matrix multiplication:
	
	\horlines{5}\vspace*{-1.4cm}
	% AB = A\begin{bmatrix} \b_1 \ | \ \b_2 \ | \ \cdots \ | \ \b_p \end{bmatrix} = \begin{bmatrix} A\b_1 \ | \ A\b_2 \ | \ \cdots \ | \ A\b_p \end{bmatrix}
	
	% where the final equality comes from thinking of $A$ as a $1\times 1$ block matrix and $B$ as a $1 \times p$ block matrix, and performing block matrix multiplication.
\end{proof}
\end{document}