{ "cells": [ { "cell_type": "markdown", "id": "a56cf352-227e-4e7c-b3a3-08bcee89474b", "metadata": {}, "source": [ "## 2.1 Data Manipulation" ] }, { "cell_type": "code", "execution_count": 2, "id": "d6bdec3c-efd6-4ef7-a9a8-81e3ab934c02", "metadata": {}, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": 6, "id": "ae2b9f54-eb49-4280-9d85-2c7b9f187046", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = torch.arange(12, dtype=torch.float32)\n", "x" ] }, { "cell_type": "code", "execution_count": 7, "id": "84cb49da-a4d6-44f2-8844-f366e087d0e7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.numel() # num elements; presumably" ] }, { "cell_type": "code", "execution_count": 8, "id": "d86a96f7-c85b-4cf1-b5de-65c97800b36d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(x)" ] }, { "cell_type": "code", "execution_count": 11, "id": "77d50fa1-6a4c-4636-97d5-ce0d4a0df847", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([12])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape # no brackets! it's an attribute of the tensor not a method." ] }, { "cell_type": "code", "execution_count": 12, "id": "910234a0-3cdf-4ddb-b7a0-f6a35f3d8a58", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0., 1., 2., 3.],\n", " [ 4., 5., 6., 7.],\n", " [ 8., 9., 10., 11.]])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = x.reshape(3, 4) # == x.reshape(3, -1) == x.reshape(-1, 4)\n", "X" ] }, { "cell_type": "code", "execution_count": 13, "id": "9a4ab7c1-7750-4d2a-af51-1661e16624a8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[[0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.]],\n", "\n", " [[0., 0., 0., 0.],\n", " [0., 0., 0., 0.],\n", " [0., 0., 0., 0.]]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.zeros((2,3,4))" ] }, { "cell_type": "code", "execution_count": 16, "id": "d5d156ab-cd43-4f6e-8e7e-ea4d43d154be", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[[1., 1., 1., 1.],\n", " [1., 1., 1., 1.],\n", " [1., 1., 1., 1.]],\n", " \n", " [[1., 1., 1., 1.],\n", " [1., 1., 1., 1.],\n", " [1., 1., 1., 1.]]]),\n", " 2)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.ones((2,3,4)), len(torch.ones((2,3,4))) # seems that length is just along the 0th axis." ] }, { "cell_type": "code", "execution_count": 17, "id": "77355468-24c9-43c9-94ca-ce2460d00021", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.8725, 1.0880, 2.1401, 0.2476],\n", " [ 0.9554, 0.5242, -1.0524, -2.1604],\n", " [ 0.5442, 0.0042, 0.6839, 0.7259]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.randn(3,4) # standard deviation = 1, mean = 0" ] }, { "cell_type": "code", "execution_count": 18, "id": "e8185351-bc9c-4e33-9ea7-b53fd71d2491", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([ 8., 9., 10., 11.]),\n", " tensor([[ 4., 5., 6., 7.],\n", " [ 8., 9., 10., 11.]]))" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[-1], X[1:3] # applied along axis 0." ] }, { "cell_type": "code", "execution_count": 19, "id": "f238292a-84e3-444f-8e86-7ec015602cd7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0., 1., 2., 3.],\n", " [ 4., 5., 17., 7.],\n", " [ 8., 9., 10., 11.]])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[1, 2] = 17\n", "X" ] }, { "cell_type": "code", "execution_count": 21, "id": "5aeca8bd-c8e4-4c7c-9ded-73d5b479f4eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[12., 12., 12., 12.],\n", " [12., 12., 12., 12.],\n", " [ 8., 9., 10., 11.]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[:2, :] = 12\n", "X" ] }, { "cell_type": "code", "execution_count": 22, "id": "2e97ae00-23e6-463a-a5ab-eb7d71e76465", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([162754.7969, 162754.7969, 162754.7969, 162754.7969, 162754.7969,\n", " 162754.7969, 162754.7969, 162754.7969, 2980.9580, 8103.0840,\n", " 22026.4648, 59874.1406])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.exp(x)" ] }, { "cell_type": "code", "execution_count": 24, "id": "52e99dd5-c530-4982-92f9-ff4be5182bfb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 0., 1., 2., 3.],\n", " [ 4., 5., 6., 7.],\n", " [ 8., 9., 10., 11.],\n", " [ 2., 1., 4., 3.],\n", " [ 1., 2., 3., 4.],\n", " [ 4., 3., 2., 1.]]),\n", " tensor([[ 0., 1., 2., 3., 2., 1., 4., 3.],\n", " [ 4., 5., 6., 7., 1., 2., 3., 4.],\n", " [ 8., 9., 10., 11., 4., 3., 2., 1.]]))" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n", "Y = torch.tensor([[2.0,1,4,3], [1,2,3,4], [4,3,2,1]])\n", "torch.cat((X,Y), dim=0), torch.cat((X,Y), dim=1)" ] }, { "cell_type": "code", "execution_count": 29, "id": "ea277935-483a-49bc-977b-a62b44fcd235", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[False, True, False, True],\n", " [False, False, False, False],\n", " [False, False, False, False]]),\n", " tensor([[ True, False, True, False],\n", " [False, False, False, False],\n", " [False, False, False, False]]),\n", " tensor([[False, False, False, False],\n", " [ True, True, True, True],\n", " [ True, True, True, True]]),\n", " tensor([[False, True, False, True],\n", " [ True, True, True, True],\n", " [ True, True, True, True]]),\n", " tensor([[ True, True, True, True],\n", " [False, False, False, False],\n", " [False, False, False, False]]))" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X == Y, X < Y, X > Y, X >= Y, X <= Y" ] }, { "cell_type": "code", "execution_count": 30, "id": "189a0c85-5482-48c1-b5e2-24580db02909", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(66.)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.sum()" ] }, { "cell_type": "code", "execution_count": 32, "id": "7ea9d17e-f1d6-4674-86fb-1664a8d8517c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[0],\n", " [1],\n", " [2]]),\n", " tensor([[0, 1]]))" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = torch.arange(3).reshape((3,1))\n", "b = torch.arange(2).reshape((1,2))\n", "a, b" ] }, { "cell_type": "code", "execution_count": 33, "id": "54bc2963-a134-4c2c-aae9-80d2555fd607", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0, 1],\n", " [1, 2],\n", " [2, 3]])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a + b" ] }, { "cell_type": "code", "execution_count": 36, "id": "f628b2f8-2445-4547-a2fc-68ed2fe506ee", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "before = id(Y)\n", "Y = Y + X\n", "id(Y) == before" ] }, { "cell_type": "code", "execution_count": 39, "id": "9102b526-6643-43cd-901c-b3bd1222c6ff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id(Z): 132091087213568\n", "id(Z): 132091087213568\n" ] } ], "source": [ "Z = torch.zeros_like(Y)\n", "print('id(Z):', id(Z))\n", "Z[:] = X + Y\n", "print('id(Z):', id(Z))" ] }, { "cell_type": "code", "execution_count": 42, "id": "6f78c10d-daa1-4de7-842c-6634b2b777b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "before = id(X)\n", "X += Y\n", "id(X) == before" ] }, { "cell_type": "code", "execution_count": 43, "id": "11d6b2c7-b81c-4307-8b84-2bb374edeeaa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(numpy.ndarray, torch.Tensor)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A = X.numpy()\n", "B = torch.from_numpy(A)\n", "type(A), type(B)" ] }, { "cell_type": "code", "execution_count": 44, "id": "02b88ed7-7e1d-4338-b0c7-ffb0fd26de5d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([3.5000]), 3.5, 3.5, 3)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = torch.tensor([3.5])\n", "a, a.item(), float(a), int(a)" ] }, { "cell_type": "code", "execution_count": 56, "id": "44d3bbcf-77ec-4837-a40f-be3008e56e14", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[[ 0, 1],\n", " [ 2, 3]],\n", " \n", " [[ 4, 5],\n", " [ 6, 7]],\n", " \n", " [[ 8, 9],\n", " [10, 11]],\n", " \n", " [[12, 13],\n", " [14, 15]]]),\n", " tensor([0, 1]),\n", " tensor([[[ 0, 2],\n", " [ 2, 4]],\n", " \n", " [[ 4, 6],\n", " [ 6, 8]],\n", " \n", " [[ 8, 10],\n", " [10, 12]],\n", " \n", " [[12, 14],\n", " [14, 16]]]))" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = torch.arange(16).reshape((4,2,-1))\n", "e = torch.arange(2) # I do not understand n-dimensional broadcasting!\n", "d, e, d + e" ] }, { "cell_type": "markdown", "id": "eb1f1cd9-a13f-432a-9f41-dd3b609f0879", "metadata": {}, "source": [ "## 2.2 Data Preprocessing" ] }, { "cell_type": "code", "execution_count": 57, "id": "bccaf97c-04ae-4c52-86d5-2164d671aa4a", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.makedirs(os.path.join('..', 'data'), exist_ok=True)\n", "data_file = os.path.join('..', 'data', 'house_tiny.csv')\n", "with open(data_file, 'w') as f:\n", " f.write('''NumRooms,RoofType,Price\n", " NA,NA,127500\n", " 2,NA,106000\n", " 4,Slate,178100\n", " NA,NA,140000''')" ] }, { "cell_type": "code", "execution_count": 58, "id": "179526f8-53e0-4523-9892-6bc29c918988", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " NumRooms RoofType Price\n", "0 NA NaN 127500\n", "1 2 NaN 106000\n", "2 4 Slate 178100\n", "3 NA NaN 140000\n" ] } ], "source": [ "import pandas as pd\n", "data = pd.read_csv(data_file)\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 60, "id": "65aad730-8ec6-4a37-9b8a-781e2e7d19cb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " NumRooms_ 2 NumRooms_ 4 NumRooms_ NA NumRooms_nan \\\n", "0 False False True False \n", "1 True False False False \n", "2 False True False False \n", "3 False False True False \n", "\n", " RoofType_Slate RoofType_nan \n", "0 False True \n", "1 False True \n", "2 True False \n", "3 False True \n" ] } ], "source": [ "inputs , targets = data.iloc[:, 0:-1], data.iloc[:, -1]\n", "inputs = pd.get_dummies(inputs, dummy_na=True)\n", "print(inputs)" ] }, { "cell_type": "code", "execution_count": 61, "id": "1203db17-c648-4e94-b306-a2d36ce4c8dc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " NumRooms_ 2 NumRooms_ 4 NumRooms_ NA NumRooms_nan \\\n", "0 False False True False \n", "1 True False False False \n", "2 False True False False \n", "3 False False True False \n", "\n", " RoofType_Slate RoofType_nan \n", "0 False True \n", "1 False True \n", "2 True False \n", "3 False True \n" ] } ], "source": [ "inputs = inputs.fillna(inputs.mean())\n", "print(inputs)" ] }, { "cell_type": "code", "execution_count": 62, "id": "3cf97c46-b4c9-4073-b37a-7b1d59ba8f8d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[0., 0., 1., 0., 0., 1.],\n", " [1., 0., 0., 0., 0., 1.],\n", " [0., 1., 0., 0., 1., 0.],\n", " [0., 0., 1., 0., 0., 1.]], dtype=torch.float64),\n", " tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = torch.tensor(inputs.to_numpy(dtype=float))\n", "y = torch.tensor(targets.to_numpy(dtype=float))\n", "X, y" ] }, { "cell_type": "markdown", "id": "10c4d254-79cf-4463-b61c-bb081e909300", "metadata": {}, "source": [ "## 2.3 Linear Algebra" ] }, { "cell_type": "code", "execution_count": 66, "id": "da157cb9-c0a3-498c-aad2-06da2f2da85c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0, 3],\n", " [1, 4],\n", " [2, 5]])" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A = torch.arange(6).reshape(2,-1)\n", "A.T" ] }, { "cell_type": "code", "execution_count": 67, "id": "e59887cd-bf1f-4383-b974-5b1440bfd176", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0, 1, 2],\n", " [3, 4, 5]])" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "B = A.clone()\n", "B" ] }, { "cell_type": "code", "execution_count": 69, "id": "9272df75-d999-4cd4-a356-3cebc03bd6ac", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0, 1, 4],\n", " [ 9, 16, 25]])" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A * B # hadamard product" ] }, { "cell_type": "code", "execution_count": 81, "id": "b55fbe97-aef5-45de-853c-dc5fddce716a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[0, 1, 2],\n", " [3, 4, 5]]),\n", " tensor([3, 5, 7]),\n", " tensor([ 3, 12]))" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A, A.sum(axis=0), A.sum(axis=1)" ] }, { "cell_type": "code", "execution_count": 82, "id": "a5b32189-6da6-4766-88f1-b6224eab99f9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 3],\n", " [12]]),\n", " torch.Size([2, 1]))" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum_A = A.sum(axis=1, keepdims=True)\n", "sum_A, sum_A.shape" ] }, { "cell_type": "code", "execution_count": 83, "id": "5db207b7-476a-4a0d-8afe-aef98d05faab", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0.0000, 0.3333, 0.6667],\n", " [0.2500, 0.3333, 0.4167]])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# an artifact of maintaining the shape is:\n", "A / sum_A # where now the rows sum to 1." ] }, { "cell_type": "code", "execution_count": 84, "id": "6b22eb9f-95df-42b0-b998-e0a1263cd063", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0, 1, 2],\n", " [3, 5, 7]])" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A.cumsum(axis=0)" ] }, { "cell_type": "code", "execution_count": 88, "id": "41b4e2e9-f6ae-4ec5-b8b5-fb17110ddf36", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = torch.ones(3, dtype = torch.float32)\n", "x = torch.arange(3, dtype=torch.float32)\n", "x, y, torch.dot(x, y)" ] }, { "cell_type": "code", "execution_count": 89, "id": "b8070b1f-b33e-4800-ba58-9e6f3bf09135", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(3.)" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.sum(x*y) # equiv to the dot product." ] }, { "cell_type": "code", "execution_count": 92, "id": "f4d11b8e-3281-47f4-be54-05d2215571bc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([2, 3]), torch.Size([3]), tensor([ 5, 14]), tensor([ 5, 14]))" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = torch.arange(3, )\n", "A.shape, x.shape, torch.mv(A, x), A@x # mv -> matrix vector multiplication." ] }, { "cell_type": "code", "execution_count": 101, "id": "3432a174-d08a-48d5-8899-75c61c514efe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[ 3, 3, 3, 3],\n", " [12, 12, 12, 12]]),\n", " tensor([[ 3, 3, 3, 3],\n", " [12, 12, 12, 12]]))" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "B = torch.ones((3,4), dtype=torch.int64) # i need to be more careful about the initialisation data types.\n", "torch.mm(A, B), A@B" ] }, { "cell_type": "code", "execution_count": 102, "id": "e3184c18-5b5b-468c-9165-aae7891ad1ea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(5.)" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "u = torch.tensor([3.0,-4.0])\n", "torch.norm(u)" ] }, { "cell_type": "code", "execution_count": 103, "id": "195286ac-8e6c-4b95-9348-d310b6ae70ef", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(7.)" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.abs(u).sum()" ] }, { "cell_type": "code", "execution_count": 3, "id": "9c9c0631-5895-4bec-9b58-8830b37fe354", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(6.)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.norm(torch.ones((4,9)))" ] }, { "cell_type": "markdown", "id": "ec1aad5b-952e-446a-b92b-d6e2f795b0a3", "metadata": {}, "source": [ "## 2.4 Calculus" ] }, { "cell_type": "code", "execution_count": 6, "id": "25aa1728-169a-460f-b168-4baf792485a4", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "from matplotlib_inline import backend_inline\n", "from d2l import torch as d2l" ] }, { "cell_type": "code", "execution_count": 7, "id": "9ce615c0-8392-4a89-8a9b-b9782ca7f215", "metadata": {}, "outputs": [], "source": [ "def f(x):\n", " return 3 * x ** 2 - 4 * x" ] }, { "cell_type": "code", "execution_count": 11, "id": "08bbbc3f-0e50-4363-a025-9e16bc852f6c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "h=0.10000,numerical limit=2.30000\n", "h=0.01000,numerical limit=2.03000\n", "h=0.00100,numerical limit=2.00300\n", "h=0.00010,numerical limit=2.00030\n", "h=0.00001,numerical limit=2.00003\n" ] } ], "source": [ "for h in 10.0**np.arange(-1,-6,-1):\n", " print(f'h={h:.5f},numerical limit={(f(1+h)-f(1))/h:.5f}')" ] }, { "cell_type": "code", "execution_count": 12, "id": "90d88ea1-03d2-461e-893e-ace17393f74a", "metadata": {}, "outputs": [], "source": [ "#@save\n", "def use_svg_display():\n", " \"\"\"Use the svg format to display a plot in Jupyter\"\"\"\n", " backend_inline.set_matplotlib_formats('svg')" ] }, { "cell_type": "code", "execution_count": 14, "id": "887e17df-ccbb-419f-8b0e-efc8f37b6270", "metadata": {}, "outputs": [], "source": [ "def set_figsize(figsize=(3.5, 2.5)): #@save\n", " \"\"\"set the figure size for matplotlib\"\"\"\n", " use_svg_display()\n", " d2l.plt.rcParams['figure.figsize'] = figsize" ] }, { "cell_type": "code", "execution_count": 15, "id": "a3833ef9-32cd-4f25-bfb2-a362413a9ab9", "metadata": {}, "outputs": [], "source": [ "#@save\n", "def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n", " \"\"\"set the axes for matplotlib\"\"\"\n", " axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)\n", " axes.set_xscale(xscale), axes.set_yscale(yscale)\n", " axes.set_xlim(xlim), axes.set_ylim(ylim)\n", " if legend:\n", " axes.legend(legend)\n", " axes.grid()" ] }, { "cell_type": "code", "execution_count": 21, "id": "8bf5a390-1478-45c8-b56d-5021874c2998", "metadata": {}, "outputs": [], "source": [ "#@save\n", "def plot(X, Y=None, xlabel=None, ylabel=None, legend=[], xlim=None,\n", " ylim=None, xscale='linear', yscale='linear',\n", " fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n", " \"\"\"plot data points\"\"\"\n", " def has_one_axis(X): # true if X (tensor or list) has 1 axis\n", " return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n", " and not hasattr(X[0], \"__len__\"))\n", "\n", " if has_one_axis(X): X = [X]\n", " if Y is None:\n", " X, Y = [[]] * len(X), X\n", " elif has_one_axis(Y):\n", " Y = [Y]\n", " if len(X) != len(Y):\n", " X = X * len(Y)\n", "\n", " set_figsize(figsize)\n", " if axes is None:\n", " axes = d2l.plt.gca()\n", "\n", " axes.cla()\n", " for x, y, fmt in zip(X, Y, fmts):\n", " axes.plot(x,y,fmt) if len(x) else axes.plot(y, fmt)\n", " set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)" ] }, { "cell_type": "code", "execution_count": 22, "id": "6de06b77-3348-41df-bda3-1a8682f0d626", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", " \n", " 2025-06-08T15:14:15.585636\n", " image/svg+xml\n", " \n", " \n", " Matplotlib v3.7.2, https://matplotlib.org/\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "x = np.arange(0, 3, 0.1)\n", "plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])" ] }, { "cell_type": "code", "execution_count": 24, "id": "24df052d-2ce5-439f-b13e-3a2723400058", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", " \n", " 2025-06-08T16:06:28.047091\n", " image/svg+xml\n", " \n", " \n", " Matplotlib v3.7.2, https://matplotlib.org/\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# L2 norm function\n", "def l2_norm(x):\n", " return np.linalg.norm(x)\n", "\n", "# Gradient of L2 norm (undefined at origin, so we handle that)\n", "def grad_l2_norm(x):\n", " norm = np.linalg.norm(x)\n", " if norm == 0:\n", " return np.array([0.0, 0.0]) # or raise an exception\n", " return x / norm\n", "\n", "# Create a grid of points in R2\n", "x_vals = np.linspace(-2, 2, 20)\n", "y_vals = np.linspace(-2, 2, 20)\n", "X, Y = np.meshgrid(x_vals, y_vals)\n", "U = np.zeros_like(X)\n", "V = np.zeros_like(Y)\n", "\n", "# Compute gradient vectors\n", "for i in range(X.shape[0]):\n", " for j in range(X.shape[1]):\n", " point = np.array([X[i, j], Y[i, j]])\n", " grad = grad_l2_norm(point)\n", " U[i, j], V[i, j] = grad[0], grad[1]\n", "\n", "# Plot the vector field\n", "plt.figure(figsize=(6, 6))\n", "plt.quiver(X, Y, U, V, color='blue')\n", "plt.title('Gradient of L2 Norm in $\\\\mathbb{R}^2$')\n", "plt.xlabel('x')\n", "plt.ylabel('y')\n", "plt.grid(True)\n", "plt.gca().set_aspect('equal')\n", "plt.show()\n", "# notice that the gradients are all pointing in the direction of _steepest ascent_.\n", "# with loss functions we would step in the negative of this direction, \n", "# i.e. towards the centre; dragging weights to 0 as expected." ] }, { "cell_type": "markdown", "id": "5316904d-af02-4fd5-b22d-c16f9b65da4f", "metadata": {}, "source": [ "## 2.5 Automatic Differentiation" ] }, { "cell_type": "code", "execution_count": 32, "id": "fb16c41b-5fa7-4190-aec8-5a35d999f8dc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0., 1., 2., 3.])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = torch.arange(4.0)\n", "x" ] }, { "cell_type": "code", "execution_count": 33, "id": "c0227d73-b684-44f9-b573-468bd29a1602", "metadata": {}, "outputs": [], "source": [ "x.requires_grad_(True) # notice the dunder after grad!\n", "x.grad" ] }, { "cell_type": "code", "execution_count": 34, "id": "b9ed55bd-7838-404f-a988-05e6e787e3b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(28., grad_fn=)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = 2 * torch.dot(x, x)\n", "y" ] }, { "cell_type": "code", "execution_count": 35, "id": "8f482ebf-9eb7-4056-90d9-ec424c2d263e", "metadata": {}, "outputs": [], "source": [ "y.backward()" ] }, { "cell_type": "code", "execution_count": 37, "id": "7f53b3ab-f271-4d6f-9941-06d47aceff7d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0., 1., 2., 3.], requires_grad=True)" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 38, "id": "41b3bf75-e394-4fb1-ad7a-c1df099df021", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([ 0., 4., 8., 12.])" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.grad" ] }, { "cell_type": "code", "execution_count": 39, "id": "94d9ec23-690f-41f3-b20e-eec46e86687d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1., 1., 1., 1.])" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.grad.zero_() # dunder again; resets the gradient -- important to do!\n", "y = x.sum()\n", "y.backward()\n", "x.grad" ] }, { "cell_type": "code", "execution_count": 40, "id": "0fdb539c-7ef8-4fb4-b2b8-53ef0c11229f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0., 2., 4., 6.])" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.grad.zero_()\n", "y = x*x\n", "y.backward(gradient=torch.ones(len(y))) # faster is y.sum().backward()\n", "x.grad" ] }, { "cell_type": "code", "execution_count": 41, "id": "ba6b30e2-ab9e-4c54-aea6-393b9610da7d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([True, True, True, True])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.grad.zero_()\n", "y = x*x\n", "u = y.detach()\n", "z = u*x\n", "z.sum().backward()\n", "x.grad == u" ] }, { "cell_type": "code", "execution_count": 43, "id": "119d4827-c90f-49ef-92f0-d04af0442481", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([True, True, True, True])" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.grad.zero_()\n", "y.sum().backward()\n", "x.grad == 2 * x" ] }, { "cell_type": "code", "execution_count": 44, "id": "b44f0c1e-d13f-408e-8e0a-b0204f7d0858", "metadata": {}, "outputs": [], "source": [ "def f(a):\n", " b = a * 2\n", " while b.norm() < 1000:\n", " b = b * 2\n", " if b.sum() > 0:\n", " c = b\n", " else:\n", " c = 100 * b\n", " return c\n", " " ] }, { "cell_type": "code", "execution_count": 45, "id": "5a67be82-b886-47c1-8778-1711acb1cf93", "metadata": {}, "outputs": [], "source": [ "a = torch.randn(size=(), requires_grad=True) # note no dunder!\n", "d = f(a)\n", "d.backward()" ] }, { "cell_type": "code", "execution_count": 46, "id": "55f1e5ce-7f3b-4e99-b615-d22d75fa78e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(True)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.grad == d / a" ] }, { "cell_type": "code", "execution_count": null, "id": "e6233a45-f0c1-46aa-b34f-74647df39be6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.23" } }, "nbformat": 4, "nbformat_minor": 5 }