\documentclass[12pt]{article} % Course Summary Transcription -- MATH3161 & MATH5165 % VERBATIM transcription of the provided PDF (Course Summary-2025.pdf) % Compiles with LuaLaTeX / pdfLaTeX without additional styling packages. \usepackage{amsmath,amssymb,amsfonts} \usepackage{geometry} \geometry{margin=1in} \setlength{\parindent}{0pt} \begin{document} \begin{center} \textbf{The University of New South Wales}\\ School of Mathematics and Statistics\\[1.2em] \textbf{MATH3161 \& MATH5165 -- Optimization, Term 1, 2025}\footnote{Prof Jeya Jeyakumar, School of Mathematics and Statistics, UNSW Sydney.}\\[1.2em] \textbf{OPTIMIZATION -- COURSE SUMMARY} \end{center} \bigskip \section*{TOPIC 1 -- MODEL FORMULATION} \textbf{Standard Formulation:} \begin{equation*} \begin{aligned} &\min_{x \in \mathbb{R}^n} && f(x) \\ &\text{s.t.}\; && c_i(x)=0,\; i = 1,\ldots,m_E,\\ & && c_i(x)\le 0,\; i = m_E+1,\ldots,m. \end{aligned} \end{equation*} \textbf{Conversions:} \begin{itemize} \item Maximize to minimize: $\displaystyle \max f(x) = -\min\bigl(-f(x)\bigr)$ \item Constraint right-hand-side: $c_i(x)=b_i \;\Leftrightarrow\; c_i(x)-b_i = 0$ \item Greater-than-or-equal-to inequalities: $c_i(x)\ge 0 \;\Leftrightarrow\; -c_i(x)\le 0$ \item Strict inequality: $c_i(x) < 0 \;\Leftrightarrow\; c_i(x)+\varepsilon \le 0,\; \text{for some } \varepsilon>0$ \end{itemize} \bigskip \section*{TOPIC 2 -- MATHEMATICAL BACKGROUND} \begin{itemize} \item \textbf{Existence of Global Extrema:} Let $\Omega$ be a compact set and $f$ be continuous on $\Omega$. Then the global extrema of $f$ over $\Omega$ exist. \item \textbf{Gradient:} Let $f: \mathbb{R}^n \to \mathbb{R}$ be continuously differentiable. The gradient $\nabla f : \mathbb{R}^n \to \mathbb{R}^n$ of $f$ at $x$ is \[ \nabla f(x) = \begin{bmatrix} \dfrac{\partial f(x)}{\partial x_1} \\ \vdots \\ \dfrac{\partial f(x)}{\partial x_n} \end{bmatrix}. \] \item \textbf{Hessian:} Let $f: \mathbb{R}^n \to \mathbb{R}$ be twice continuously differentiable. The Hessian $\nabla^2 f : \mathbb{R}^n \to \mathbb{R}^{n\times n}$ of $f$ at $x$ is \[ \nabla^2 f(x)=\begin{bmatrix} \dfrac{\partial^2 f(x)}{\partial x_1^2} & \dfrac{\partial^2 f(x)}{\partial x_1\partial x_2} & \cdots & \dfrac{\partial^2 f(x)}{\partial x_1\partial x_n}\\ \dfrac{\partial^2 f(x)}{\partial x_2\partial x_1} & \dfrac{\partial^2 f(x)}{\partial x_2^2} & \cdots & \dfrac{\partial^2 f(x)}{\partial x_2\partial x_n}\\ \vdots & \vdots & \ddots & \vdots\\ \dfrac{\partial^2 f(x)}{\partial x_n\partial x_1} & \dfrac{\partial^2 f(x)}{\partial x_n\partial x_2} & \cdots & \dfrac{\partial^2 f(x)}{\partial x_n^2} \end{bmatrix}. \] \item Let $A\in\mathbb{R}^{n\times n}$ be any real square matrix (not necessarily symmetric). Then $A$ is: \begin{itemize} \item \emph{positive definite} $\Leftrightarrow x^{\top}Ax>0$ for all $x\in\mathbb{R}^n$, $x\ne 0$. \item \emph{positive semi-definite} $\Leftrightarrow x^{\top}Ax\ge 0$ for all $x\in\mathbb{R}^n$. \item \emph{negative definite} $\Leftrightarrow x^{\top}Ax<0$ for all $x\in\mathbb{R}^n$, $x\ne 0$. \item \emph{negative semi-definite} $\Leftrightarrow x^{\top}Ax\le 0$ for all $x\in\mathbb{R}^n$. \item \emph{indefinite} $\Leftrightarrow$ there exist $x,y\in\mathbb{R}^n$ such that $x^{\top}Ax<0$ and $y^{\top}Ay>0$. \end{itemize} \item A real square matrix $A\in\mathbb{R}^{n\times n}$ is positive definite (resp. positive semi-definite, negative definite, negative semi-definite, indefinite) \emph{iff} its symmetric part $\dfrac{A+A^{\top}}{2}$ is positive definite (resp. positive semi-definite, negative definite, negative semi-definite, indefinite). \item Let $A\in\mathbb{R}^{n\times n}$ be a symmetric matrix. Then: \begin{itemize} \item $A$ has $n$ real eigenvalues. \item There exists an orthogonal matrix $Q$ ($Q^{\top}Q=I$) such that $A=QDQ^{\top}$ where $D=\operatorname{diag}(\lambda_1,\ldots,\lambda_n)$ and $Q=[v_1\;\cdots\;v_n]$ with $v_i$ an eigenvector of $A$ corresponding to eigenvalue $\lambda_i$. \item $\det(A)=\prod_{i=1}^n \lambda_i$ and $\operatorname{tr}(A)=\sum_{i=1}^n \lambda_i=\sum_{i=1}^n A_{ii}$. \item $A$ is positive definite $\Leftrightarrow \lambda_i>0$ for all $i=1,\ldots,n$. \item $A$ is positive semi-definite $\Leftrightarrow \lambda_i\ge 0$ for all $i=1,\ldots,n$. \item $A$ is indefinite $\Leftrightarrow$ there exist $i,j$ with $\lambda_i>0$ and $\lambda_j<0$. \end{itemize} \end{itemize} \bigskip \section*{TOPIC 3 -- CONVEXITY OF SETS AND FUNCTIONS} \begin{itemize} \item \textbf{Convex set:} A set $\Omega\subseteq \mathbb{R}^n$ is convex $\Leftrightarrow \theta x + (1-\theta) y\in\Omega$ for all $\theta\in[0,1]$ and for all $x,y\in\Omega$. \item \textbf{Convex function:} Let $\Omega\subseteq \mathbb{R}^n$ be a convex set. The function $f:\Omega\to\mathbb{R}$ is convex \emph{iff} \[ f\bigl(\theta x + (1-\theta) y\bigr) \le \theta f(x) + (1-\theta) f(y)\quad \forall\,\theta\in[0,1],\;\forall\,x,y\in\Omega. \] \item \textbf{Convex inequality:} Let $c: \mathbb{R}^n \to \mathbb{R}$ be a convex function. Then $\Omega=\{x\in\mathbb{R}^n: c(x)\le 0\}$ is a convex set. \item Let $\Omega\subset \mathbb{R}^n$ be convex and $f\in C^2(\Omega)$. Then \begin{itemize} \item $\nabla^2 f(x)$ is positive semi-definite $\forall x\in\Omega \;\Rightarrow\; f$ is convex on $\Omega$. \item $\nabla^2 f(x)$ is positive definite $\forall x\in\Omega \;\Rightarrow\; f$ is strictly convex on $\Omega$. \item $\Omega$ is open \emph{and} $\nabla^2 f(x)$ is positive semi-definite $\forall x\in\Omega$ $\Leftrightarrow\; f$ is convex on $\Omega$. \end{itemize} \item \textbf{Epigraph:} For $f: \Omega\to\mathbb{R}$, $\operatorname{epi} f = \{(x,r)\in\mathbb{R}^n\times\mathbb{R}: x\in\Omega,\; f(x)\le r\}$. \item \textbf{Epigraph of a convex function:} $f$ is convex on a convex $\Omega$ $\Leftrightarrow\; \operatorname{epi} f$ is convex in $\mathbb{R}^n\times\mathbb{R}$. \end{itemize} \bigskip \section*{TOPIC 4 -- UNCONSTRAINED OPTIMIZATION} \begin{itemize} \item \textbf{Problem form:} $\displaystyle \min_{x\in\Omega} f(x)$ with $\Omega = \mathbb{R}^n$. \item \textbf{First-order necessary condition:} If $x^*$ is a local minimizer and $f\in C^1(\mathbb{R}^n)$ then $\nabla f(x^*) = 0$. \item \textbf{Unconstrained stationary point:} $x^*$ with $\nabla f(x^*)=0$. Such points may be minimizers, maximizers or saddle points. \item \textbf{Second-order necessary conditions:} If $f\in C^2(\mathbb{R}^n)$ then \begin{itemize} \item Local minimizer $\Rightarrow \nabla f(x^*)=0$ \emph{and} $\nabla^2 f(x^*)$ positive semi-definite. \item Local maximizer $\Rightarrow \nabla f(x^*)=0$ \emph{and} $\nabla^2 f(x^*)$ negative semi-definite. \end{itemize} \item \textbf{Second-order sufficient conditions:} If $\nabla f(x^*)=0$ then \begin{itemize} \item $\nabla^2 f(x^*)$ positive definite $\Rightarrow$ $x^*$ is a \emph{strict} local minimizer. \item $\nabla^2 f(x^*)$ negative definite $\Rightarrow$ $x^*$ is a \emph{strict} local maximizer. \item $\nabla^2 f(x^*)$ indefinite $\Rightarrow$ $x^*$ is a saddle point. \end{itemize} \item \textbf{Sufficiency under convexity/concavity:} For $f\in C^2(\mathbb{R}^n)$: \begin{itemize} \item $f$ convex $\Rightarrow$ any stationary point is a global minimizer. \item $f$ \emph{strictly} convex $\Rightarrow$ stationary point is the \emph{unique} global minimizer. \item $f$ concave $\Rightarrow$ any stationary point is a global maximizer. \item $f$ \emph{strictly} concave $\Rightarrow$ stationary point is the \emph{unique} global maximizer. \end{itemize} \end{itemize} \bigskip \section*{TOPIC 5 -- EQUALITY CONSTRAINTS} \textbf{Standard form:} \begin{equation*} \min_{x\in\mathbb{R}^n} \bigl\{ f(x) : c_i(x)=0,\; i=1,\ldots,m \bigr\}. \tag{PE} \end{equation*} \textbf{Lagrangian:} For $x\in\mathbb{R}^n$, $\lambda\in\mathbb{R}^m$, \[ L(x,\lambda) := f(x) + \sum_{i=1}^m \lambda_i c_i(x). \] \begin{itemize} \item \textbf{Regular point:} A feasible point $x$ is \emph{regular} $\Leftrightarrow$ the gradients $\nabla c_i(x)$, $i=1,\ldots,m$, are linearly independent. \item \textbf{First-order necessary optimality conditions:} If $x^*$ is a local minimizer and a regular point of (PE), then $\exists\,\lambda^*\in\mathbb{R}^m$ such that \[ \nabla_x L(x^*,\lambda^*) = 0, \qquad \nabla_{\lambda} L(x^*,\lambda^*) = 0. \] \item \textbf{Constrained stationary point:} Any $x^*$ for which $\exists\,\lambda^*$ satisfying the above equalities. \item \textbf{Second-order sufficient conditions:} Let $(x^*,\lambda^*)$ be a constrained stationary point. Define \[ A(x^*) = \begin{bmatrix} \nabla c_1(x^*) & \cdots & \nabla c_m(x^*) \end{bmatrix}, \qquad Z^* \in \mathbb{R}^{n\times(n-t^*)}, \; t^*=\operatorname{rank}A(x^*), \; (Z^*)^{\top}A(x^*) = 0. \] The \emph{reduced Hessian} of $L$ is $W_Z^* = (Z^*)^{\top}\nabla_{xx}^2 L(x^*,\lambda^*) Z^*$. If $W_Z^*$ is positive definite, then $x^*$ is a strict local minimizer. \end{itemize} \bigskip \section*{TOPIC 6 -- INEQUALITY CONSTRAINTS} \textbf{Standard form (with equalities and inequalities):} \begin{equation*} \min_{x\in\mathbb{R}^n} \bigl\{ f(x) : c_i(x)=0,\; i\in E,\; c_i(x)\le 0,\; i\in I \bigr\}. \tag{NLP} \end{equation*} \begin{itemize} \item \textbf{Active set at $x^*$:} $A(x^*) = \{ i\in E\cup I : c_i(x^*) = 0 \}.$ \item \textbf{Regular point:} Feasible $x^*$ such that $\{\nabla c_i(x^*) : i\in A(x^*)\}$ are linearly independent. \item \textbf{Constrained stationary point:} Feasible $x^*$ for which $\exists\, \lambda_i^*$ ($i\in A(x^*)$) with $\displaystyle \nabla f(x^*) + \sum_{i\in A(x^*)} \lambda_i^* \nabla c_i(x^*) = 0$. \item \textbf{Karush--Kuhn--Tucker (KKT) necessary conditions:} If $x^*$ is a local minimizer and a regular point, then $\exists\, \lambda_i^*$ ($i\in A(x^*)$) such that \[ \nabla f(x^*) + \sum_{i\in A(x^*)} \lambda_i^* \nabla c_i(x^*) = 0, \] with $c_i(x^*)=0$ $(i\in E)$, $c_i(x^*)\le 0$ $(i\in I)$, $\lambda_i^*\ge 0$ $(i\in I)$, and $\lambda_i^*=0$ for $i\notin A(x^*)$. \item \textbf{Second-order sufficient conditions for strict local minimum:} Let $t^*=|A(x^*)|$, $A^*=[\nabla c_i(x^*)\mid i\in A(x^*)]$. If $t^*0\;\forall i\in I\cap A(x^*)$ and $W_Z^*$ is positive definite, then $x^*$ is a strict local minimizer. \item \textbf{Convex problem:} (NLP) is a \emph{convex optimization} problem if $f$ is convex on the feasible set, $c_i$ is affine $\forall i\in E$, and $c_i$ is convex $\forall i\in I$. \item \textbf{KKT sufficient conditions for global minimum:} If (NLP) is convex and $x^*$ satisfies the KKT conditions with $\lambda_i^*\ge 0$ for all $i\in I\cap A(x^*)$, then $x^*$ is a global minimizer. \item \textbf{Dual problem (Wolfe dual):} \begin{equation*} \max_{y\in\mathbb{R}^n,\,\lambda\in\mathbb{R}^m} \left\{ f(y) + \sum_{i=1}^m \lambda_i c_i(y) : \nabla f(y) + \sum_{i=1}^m \lambda_i \nabla c_i(y) = 0,\; \lambda_i\ge 0\ (i\in I) \right\}. \tag{D} \end{equation*} \end{itemize} \bigskip \section*{TOPIC 7 -- NUMERICAL METHODS} \begin{itemize} \item \textbf{Rates of convergence of iterative methods.} If $x_k\to x^*$ and $\displaystyle \frac{\lVert x_{k+1}-x^*\rVert}{\lVert x_k - x^*\rVert^{\alpha}} \to \beta$ as $k\to\infty$, the method has \emph{$\alpha$-th order} convergence. Key cases: $\alpha=1$ (\emph{linear}), $\alpha=1$ with $\beta=0$ (\emph{superlinear}), $\alpha=2$ (\emph{quadratic}). \item \textbf{Line search methods.} Given $s^{(k)}$ at $x^{(k)}$, set $x^{(k+1)} = x^{(k)} + \alpha^{(k)} s^{(k)}$ where $\alpha^{(k)}$ minimizes or approximately minimizes $\ell_k(\alpha) = f(x^{(k)} + \alpha s^{(k)})$. \item \textbf{Descent direction:} $(g^{(k)})^{\top} s^{(k)} < 0$. \item \textbf{Exact line search condition:} $\ell_k'(\alpha) = g(x^{(k)} + \alpha s^{(k)})^{\top} s^{(k)} = 0$. \item If $s^{(k)}$ is a descent direction, a line search yields $\alpha^{(k)}>0$ with $f^{(k+1)} < f^{(k)}$. \item \emph{Global convergence}: convergence to a stationary point from any $x^{(1)}$. \item \emph{Quadratic termination}: method finds minimizer of a strictly convex quadratic in finite known iterations. \end{itemize} \subsection*{Steepest Descent Method} \begin{itemize} \item Search direction: $s^{(k)} = -g^{(k)}$. \item Descent direction: Yes. \item Global convergence: Yes. \item Quadratic termination: No. \item Rate: Linear with exact line searches. If $f$ is strictly convex quadratic, then for each $k$, \[ \lVert x^{(k+1)} - x^* \rVert \le \left( \frac{\kappa-1}{\kappa+1} \right)^{\!k} \lVert x^{(1)} - x^* \rVert, \] where $\kappa$ is the condition number of $\nabla^2 f$. \end{itemize} \subsection*{Basic Newton's Method} \begin{itemize} \item Search direction: solve $G^{(k)} \delta^{(k)} = -g^{(k)}$ where $G^{(k)}$ is the Hessian. \item Descent direction: Yes, if $G^{(k)}$ positive definite. \item Global convergence: No (Hessian may be singular). \item Quadratic termination: Yes (one iteration for strictly convex quadratics). \item Rate: Quadratic if $G^*$ positive definite. \item Usage: When Hessian can be evaluated and is positive definite. \end{itemize} \subsection*{Conjugate Gradient Method} \begin{itemize} \item Search direction: $s^{(k)} = -g^{(k)} + \beta^{(k)} s^{(k-1)}$. \item Descent direction: Yes. \item Quadratic termination: Yes with exact line searches. \item Usage: Large $n$; stores only vectors, avoids solving linear systems. \end{itemize} \bigskip \section*{TOPIC 8 -- PENALTY FUNCTION METHODS} \textbf{Standard formulation:} Same as (NLP). \textbf{Penalty problem:} \begin{equation*} \min_{x\in\mathbb{R}^n} \bigl( f(x) + \mu P(x) \bigr), \tag{$P_{\mu}$} \end{equation*} with \[ P(x) = \sum_{i=1}^{m_E} [c_i(x)]^2 + \sum_{i=m_E+1}^{m} [c_i(x)]_{+}^2, \qquad [c_i(x)]_{+}=\max\{c_i(x),0\}. \] \begin{itemize} \item If $c: \mathbb{R}^n \to \mathbb{R}$ is convex, then $\max\{c(x),0\}^2$ is convex. \item $\displaystyle \frac{\partial}{\partial x_j} \bigl( \max\{c(x),0\}^2 \bigr) = 2\,\max\{c(x),0\}\, \frac{\partial c}{\partial x_j}$. \item \textbf{Convergence Theorem:} For each $\mu>0$ let $x_{\mu}$ minimize $(P_{\mu})$ and set $\theta(\mu)=f(x_{\mu})+\mu P(x_{\mu})$. Suppose $\{x_{\mu}\}$ lies in a closed bounded set. Then \[ \min_{x} \{ f(x) : c_i(x)=0,\, i\in E,\; c_i(x)\le 0,\, i\in I \} = \lim_{\mu\to\infty} \theta(\mu). \] Moreover, any cluster point $x^*$ of $\{x_{\mu}\}$ solves the original problem, and $\mu P(x_{\mu})\to 0$ as $\mu\to\infty$. \end{itemize} \bigskip \section*{TOPIC 9 -- OPTIMAL CONTROL} A typical \emph{autonomous} optimal control problem with fixed target is \begin{equation*} (C)\quad \min_{u(t)\in U} \int_{t_0}^{t_1} f_0\bigl( x(t),u(t) \bigr)\,dt \quad\text{subject to}\quad x' (t) = f\bigl( x(t),u(t) \bigr),\; x(t_0)=x_0,\; x(t_1)=x_1. \end{equation*} \textbf{Hamiltonian:} \[ H(x,\hat z,u) = \hat z^{\top} \dot{\hat x} = \sum_{i=0}^n z_i(t) f_i\bigl( x(t),u(t) \bigr), \] where $\hat z(t)=[z_0(t),\ldots,z_n(t)]^{\top}$, $\hat x(t)=[x_0(t),\ldots,x_n(t)]^{\top}$, $x_0(t)=f_0(x(t),u(t))$, $x_0(t_0)=0$. \textbf{Co-state equations:} $\dot{\hat z} = -\dfrac{\partial H}{\partial \hat x}$. \medskip \textbf{Pontryagin Maximum Principle (autonomous, fixed targets).} Suppose $(x^*,u^*)$ is optimal for (C). Then \begin{itemize} \item $z_0 = -1$ (\emph{normal} case), so \[ H\bigl( x,\hat z,u \bigr) = -f_0\bigl( x,u \bigr) + \sum_{i=1}^n z_i(t) f_i\bigl( x,u \bigr). \] \item Co-state equations admit a solution $\hat z^*$. \item $u^*$ maximizes $H(x^*,\hat z^*,u)$ over $u\in U$. \item $x^*$ satisfies state equation with $x^*(t_0)=x_0$, $x^*(t_1)=x_1$. \item The Hamiltonian is constant along the optimal path and equals $0$ if $t_1$ is free: \[ H(x^*,\hat z^*,u^*) = \begin{cases} \text{constant}, & t_1 \text{ fixed},\\ 0, & t_1 \text{ free}. \end{cases} \] \end{itemize} \textbf{Partially free targets:} If target is intersection of $k$ surfaces $g_i(x_1)=0$, $i=1,\ldots,k$, then transversality condition is $z_1 = \sum_{i=1}^k c_i \nabla g_i(x_1)$ for some constants $c_i$. \textbf{Completely free target:} If $x(t_1)$ is unrestricted, then $z(t_1)=0$. \medskip \textbf{Non-autonomous problems.} For state $x' = f(x,u,t)$ and cost $J=\int_{t_0}^{t_1} f_0(x,u,t)\,dt$, introduce extra state $x_{n+1}$ with $x_{n+1}' = 1$, $x_{n+1}(t_0)=t_0$, $x_{n+1}(t_1)=t_1$. \end{document}