{
"cells": [
{
"cell_type": "markdown",
"id": "a56cf352-227e-4e7c-b3a3-08bcee89474b",
"metadata": {},
"source": [
"## 2.1 Data Manipulation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d6bdec3c-efd6-4ef7-a9a8-81e3ab934c02",
"metadata": {},
"outputs": [],
"source": [
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ae2b9f54-eb49-4280-9d85-2c7b9f187046",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.arange(12, dtype=torch.float32)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "84cb49da-a4d6-44f2-8844-f366e087d0e7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.numel() # num elements; presumably"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d86a96f7-c85b-4cf1-b5de-65c97800b36d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(x)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "77d50fa1-6a4c-4636-97d5-ce0d4a0df847",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([12])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.shape # no brackets! it's an attribute of the tensor not a method."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "910234a0-3cdf-4ddb-b7a0-f6a35f3d8a58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0., 1., 2., 3.],\n",
" [ 4., 5., 6., 7.],\n",
" [ 8., 9., 10., 11.]])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = x.reshape(3, 4) # == x.reshape(3, -1) == x.reshape(-1, 4)\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "9a4ab7c1-7750-4d2a-af51-1661e16624a8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.]],\n",
"\n",
" [[0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.]]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.zeros((2,3,4))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d5d156ab-cd43-4f6e-8e7e-ea4d43d154be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]],\n",
" \n",
" [[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]]),\n",
" 2)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.ones((2,3,4)), len(torch.ones((2,3,4))) # seems that length is just along the 0th axis."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "77355468-24c9-43c9-94ca-ce2460d00021",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-0.8725, 1.0880, 2.1401, 0.2476],\n",
" [ 0.9554, 0.5242, -1.0524, -2.1604],\n",
" [ 0.5442, 0.0042, 0.6839, 0.7259]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.randn(3,4) # standard deviation = 1, mean = 0"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e8185351-bc9c-4e33-9ea7-b53fd71d2491",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([ 8., 9., 10., 11.]),\n",
" tensor([[ 4., 5., 6., 7.],\n",
" [ 8., 9., 10., 11.]]))"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[-1], X[1:3] # applied along axis 0."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f238292a-84e3-444f-8e86-7ec015602cd7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0., 1., 2., 3.],\n",
" [ 4., 5., 17., 7.],\n",
" [ 8., 9., 10., 11.]])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[1, 2] = 17\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "5aeca8bd-c8e4-4c7c-9ded-73d5b479f4eb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[12., 12., 12., 12.],\n",
" [12., 12., 12., 12.],\n",
" [ 8., 9., 10., 11.]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[:2, :] = 12\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2e97ae00-23e6-463a-a5ab-eb7d71e76465",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([162754.7969, 162754.7969, 162754.7969, 162754.7969, 162754.7969,\n",
" 162754.7969, 162754.7969, 162754.7969, 2980.9580, 8103.0840,\n",
" 22026.4648, 59874.1406])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.exp(x)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "52e99dd5-c530-4982-92f9-ff4be5182bfb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[ 0., 1., 2., 3.],\n",
" [ 4., 5., 6., 7.],\n",
" [ 8., 9., 10., 11.],\n",
" [ 2., 1., 4., 3.],\n",
" [ 1., 2., 3., 4.],\n",
" [ 4., 3., 2., 1.]]),\n",
" tensor([[ 0., 1., 2., 3., 2., 1., 4., 3.],\n",
" [ 4., 5., 6., 7., 1., 2., 3., 4.],\n",
" [ 8., 9., 10., 11., 4., 3., 2., 1.]]))"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n",
"Y = torch.tensor([[2.0,1,4,3], [1,2,3,4], [4,3,2,1]])\n",
"torch.cat((X,Y), dim=0), torch.cat((X,Y), dim=1)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "ea277935-483a-49bc-977b-a62b44fcd235",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[False, True, False, True],\n",
" [False, False, False, False],\n",
" [False, False, False, False]]),\n",
" tensor([[ True, False, True, False],\n",
" [False, False, False, False],\n",
" [False, False, False, False]]),\n",
" tensor([[False, False, False, False],\n",
" [ True, True, True, True],\n",
" [ True, True, True, True]]),\n",
" tensor([[False, True, False, True],\n",
" [ True, True, True, True],\n",
" [ True, True, True, True]]),\n",
" tensor([[ True, True, True, True],\n",
" [False, False, False, False],\n",
" [False, False, False, False]]))"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X == Y, X < Y, X > Y, X >= Y, X <= Y"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "189a0c85-5482-48c1-b5e2-24580db02909",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(66.)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.sum()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "7ea9d17e-f1d6-4674-86fb-1664a8d8517c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[0],\n",
" [1],\n",
" [2]]),\n",
" tensor([[0, 1]]))"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = torch.arange(3).reshape((3,1))\n",
"b = torch.arange(2).reshape((1,2))\n",
"a, b"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "54bc2963-a134-4c2c-aae9-80d2555fd607",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 1],\n",
" [1, 2],\n",
" [2, 3]])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a + b"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "f628b2f8-2445-4547-a2fc-68ed2fe506ee",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"before = id(Y)\n",
"Y = Y + X\n",
"id(Y) == before"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "9102b526-6643-43cd-901c-b3bd1222c6ff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id(Z): 132091087213568\n",
"id(Z): 132091087213568\n"
]
}
],
"source": [
"Z = torch.zeros_like(Y)\n",
"print('id(Z):', id(Z))\n",
"Z[:] = X + Y\n",
"print('id(Z):', id(Z))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "6f78c10d-daa1-4de7-842c-6634b2b777b5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"before = id(X)\n",
"X += Y\n",
"id(X) == before"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "11d6b2c7-b81c-4307-8b84-2bb374edeeaa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(numpy.ndarray, torch.Tensor)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A = X.numpy()\n",
"B = torch.from_numpy(A)\n",
"type(A), type(B)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "02b88ed7-7e1d-4338-b0c7-ffb0fd26de5d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([3.5000]), 3.5, 3.5, 3)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = torch.tensor([3.5])\n",
"a, a.item(), float(a), int(a)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "44d3bbcf-77ec-4837-a40f-be3008e56e14",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[[ 0, 1],\n",
" [ 2, 3]],\n",
" \n",
" [[ 4, 5],\n",
" [ 6, 7]],\n",
" \n",
" [[ 8, 9],\n",
" [10, 11]],\n",
" \n",
" [[12, 13],\n",
" [14, 15]]]),\n",
" tensor([0, 1]),\n",
" tensor([[[ 0, 2],\n",
" [ 2, 4]],\n",
" \n",
" [[ 4, 6],\n",
" [ 6, 8]],\n",
" \n",
" [[ 8, 10],\n",
" [10, 12]],\n",
" \n",
" [[12, 14],\n",
" [14, 16]]]))"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = torch.arange(16).reshape((4,2,-1))\n",
"e = torch.arange(2) # I do not understand n-dimensional broadcasting!\n",
"d, e, d + e"
]
},
{
"cell_type": "markdown",
"id": "eb1f1cd9-a13f-432a-9f41-dd3b609f0879",
"metadata": {},
"source": [
"## 2.2 Data Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "bccaf97c-04ae-4c52-86d5-2164d671aa4a",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.makedirs(os.path.join('..', 'data'), exist_ok=True)\n",
"data_file = os.path.join('..', 'data', 'house_tiny.csv')\n",
"with open(data_file, 'w') as f:\n",
" f.write('''NumRooms,RoofType,Price\n",
" NA,NA,127500\n",
" 2,NA,106000\n",
" 4,Slate,178100\n",
" NA,NA,140000''')"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "179526f8-53e0-4523-9892-6bc29c918988",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NumRooms RoofType Price\n",
"0 NA NaN 127500\n",
"1 2 NaN 106000\n",
"2 4 Slate 178100\n",
"3 NA NaN 140000\n"
]
}
],
"source": [
"import pandas as pd\n",
"data = pd.read_csv(data_file)\n",
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "65aad730-8ec6-4a37-9b8a-781e2e7d19cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NumRooms_ 2 NumRooms_ 4 NumRooms_ NA NumRooms_nan \\\n",
"0 False False True False \n",
"1 True False False False \n",
"2 False True False False \n",
"3 False False True False \n",
"\n",
" RoofType_Slate RoofType_nan \n",
"0 False True \n",
"1 False True \n",
"2 True False \n",
"3 False True \n"
]
}
],
"source": [
"inputs , targets = data.iloc[:, 0:-1], data.iloc[:, -1]\n",
"inputs = pd.get_dummies(inputs, dummy_na=True)\n",
"print(inputs)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "1203db17-c648-4e94-b306-a2d36ce4c8dc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NumRooms_ 2 NumRooms_ 4 NumRooms_ NA NumRooms_nan \\\n",
"0 False False True False \n",
"1 True False False False \n",
"2 False True False False \n",
"3 False False True False \n",
"\n",
" RoofType_Slate RoofType_nan \n",
"0 False True \n",
"1 False True \n",
"2 True False \n",
"3 False True \n"
]
}
],
"source": [
"inputs = inputs.fillna(inputs.mean())\n",
"print(inputs)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "3cf97c46-b4c9-4073-b37a-7b1d59ba8f8d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[0., 0., 1., 0., 0., 1.],\n",
" [1., 0., 0., 0., 0., 1.],\n",
" [0., 1., 0., 0., 1., 0.],\n",
" [0., 0., 1., 0., 0., 1.]], dtype=torch.float64),\n",
" tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = torch.tensor(inputs.to_numpy(dtype=float))\n",
"y = torch.tensor(targets.to_numpy(dtype=float))\n",
"X, y"
]
},
{
"cell_type": "markdown",
"id": "10c4d254-79cf-4463-b61c-bb081e909300",
"metadata": {},
"source": [
"## 2.3 Linear Algebra"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "da157cb9-c0a3-498c-aad2-06da2f2da85c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 3],\n",
" [1, 4],\n",
" [2, 5]])"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A = torch.arange(6).reshape(2,-1)\n",
"A.T"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "e59887cd-bf1f-4383-b974-5b1440bfd176",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 1, 2],\n",
" [3, 4, 5]])"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"B = A.clone()\n",
"B"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "9272df75-d999-4cd4-a356-3cebc03bd6ac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0, 1, 4],\n",
" [ 9, 16, 25]])"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A * B # hadamard product"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "b55fbe97-aef5-45de-853c-dc5fddce716a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[0, 1, 2],\n",
" [3, 4, 5]]),\n",
" tensor([3, 5, 7]),\n",
" tensor([ 3, 12]))"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A, A.sum(axis=0), A.sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "a5b32189-6da6-4766-88f1-b6224eab99f9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[ 3],\n",
" [12]]),\n",
" torch.Size([2, 1]))"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum_A = A.sum(axis=1, keepdims=True)\n",
"sum_A, sum_A.shape"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "5db207b7-476a-4a0d-8afe-aef98d05faab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0000, 0.3333, 0.6667],\n",
" [0.2500, 0.3333, 0.4167]])"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# an artifact of maintaining the shape is:\n",
"A / sum_A # where now the rows sum to 1."
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "6b22eb9f-95df-42b0-b998-e0a1263cd063",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 1, 2],\n",
" [3, 5, 7]])"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A.cumsum(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "41b4e2e9-f6ae-4ec5-b8b5-fb17110ddf36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.ones(3, dtype = torch.float32)\n",
"x = torch.arange(3, dtype=torch.float32)\n",
"x, y, torch.dot(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "b8070b1f-b33e-4800-ba58-9e6f3bf09135",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(3.)"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.sum(x*y) # equiv to the dot product."
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "f4d11b8e-3281-47f4-be54-05d2215571bc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(torch.Size([2, 3]), torch.Size([3]), tensor([ 5, 14]), tensor([ 5, 14]))"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.arange(3, )\n",
"A.shape, x.shape, torch.mv(A, x), A@x # mv -> matrix vector multiplication."
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "3432a174-d08a-48d5-8899-75c61c514efe",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[ 3, 3, 3, 3],\n",
" [12, 12, 12, 12]]),\n",
" tensor([[ 3, 3, 3, 3],\n",
" [12, 12, 12, 12]]))"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"B = torch.ones((3,4), dtype=torch.int64) # i need to be more careful about the initialisation data types.\n",
"torch.mm(A, B), A@B"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "e3184c18-5b5b-468c-9165-aae7891ad1ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(5.)"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"u = torch.tensor([3.0,-4.0])\n",
"torch.norm(u)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "195286ac-8e6c-4b95-9348-d310b6ae70ef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(7.)"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.abs(u).sum()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9c9c0631-5895-4bec-9b58-8830b37fe354",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6.)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.norm(torch.ones((4,9)))"
]
},
{
"cell_type": "markdown",
"id": "ec1aad5b-952e-446a-b92b-d6e2f795b0a3",
"metadata": {},
"source": [
"## 2.4 Calculus"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "25aa1728-169a-460f-b168-4baf792485a4",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"from matplotlib_inline import backend_inline\n",
"from d2l import torch as d2l"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9ce615c0-8392-4a89-8a9b-b9782ca7f215",
"metadata": {},
"outputs": [],
"source": [
"def f(x):\n",
" return 3 * x ** 2 - 4 * x"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "08bbbc3f-0e50-4363-a025-9e16bc852f6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"h=0.10000,numerical limit=2.30000\n",
"h=0.01000,numerical limit=2.03000\n",
"h=0.00100,numerical limit=2.00300\n",
"h=0.00010,numerical limit=2.00030\n",
"h=0.00001,numerical limit=2.00003\n"
]
}
],
"source": [
"for h in 10.0**np.arange(-1,-6,-1):\n",
" print(f'h={h:.5f},numerical limit={(f(1+h)-f(1))/h:.5f}')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "90d88ea1-03d2-461e-893e-ace17393f74a",
"metadata": {},
"outputs": [],
"source": [
"#@save\n",
"def use_svg_display():\n",
" \"\"\"Use the svg format to display a plot in Jupyter\"\"\"\n",
" backend_inline.set_matplotlib_formats('svg')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "887e17df-ccbb-419f-8b0e-efc8f37b6270",
"metadata": {},
"outputs": [],
"source": [
"def set_figsize(figsize=(3.5, 2.5)): #@save\n",
" \"\"\"set the figure size for matplotlib\"\"\"\n",
" use_svg_display()\n",
" d2l.plt.rcParams['figure.figsize'] = figsize"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "a3833ef9-32cd-4f25-bfb2-a362413a9ab9",
"metadata": {},
"outputs": [],
"source": [
"#@save\n",
"def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n",
" \"\"\"set the axes for matplotlib\"\"\"\n",
" axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)\n",
" axes.set_xscale(xscale), axes.set_yscale(yscale)\n",
" axes.set_xlim(xlim), axes.set_ylim(ylim)\n",
" if legend:\n",
" axes.legend(legend)\n",
" axes.grid()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "8bf5a390-1478-45c8-b56d-5021874c2998",
"metadata": {},
"outputs": [],
"source": [
"#@save\n",
"def plot(X, Y=None, xlabel=None, ylabel=None, legend=[], xlim=None,\n",
" ylim=None, xscale='linear', yscale='linear',\n",
" fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n",
" \"\"\"plot data points\"\"\"\n",
" def has_one_axis(X): # true if X (tensor or list) has 1 axis\n",
" return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n",
" and not hasattr(X[0], \"__len__\"))\n",
"\n",
" if has_one_axis(X): X = [X]\n",
" if Y is None:\n",
" X, Y = [[]] * len(X), X\n",
" elif has_one_axis(Y):\n",
" Y = [Y]\n",
" if len(X) != len(Y):\n",
" X = X * len(Y)\n",
"\n",
" set_figsize(figsize)\n",
" if axes is None:\n",
" axes = d2l.plt.gca()\n",
"\n",
" axes.cla()\n",
" for x, y, fmt in zip(X, Y, fmts):\n",
" axes.plot(x,y,fmt) if len(x) else axes.plot(y, fmt)\n",
" set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "6de06b77-3348-41df-bda3-1a8682f0d626",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"x = np.arange(0, 3, 0.1)\n",
"plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "24df052d-2ce5-439f-b13e-3a2723400058",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# L2 norm function\n",
"def l2_norm(x):\n",
" return np.linalg.norm(x)\n",
"\n",
"# Gradient of L2 norm (undefined at origin, so we handle that)\n",
"def grad_l2_norm(x):\n",
" norm = np.linalg.norm(x)\n",
" if norm == 0:\n",
" return np.array([0.0, 0.0]) # or raise an exception\n",
" return x / norm\n",
"\n",
"# Create a grid of points in R2\n",
"x_vals = np.linspace(-2, 2, 20)\n",
"y_vals = np.linspace(-2, 2, 20)\n",
"X, Y = np.meshgrid(x_vals, y_vals)\n",
"U = np.zeros_like(X)\n",
"V = np.zeros_like(Y)\n",
"\n",
"# Compute gradient vectors\n",
"for i in range(X.shape[0]):\n",
" for j in range(X.shape[1]):\n",
" point = np.array([X[i, j], Y[i, j]])\n",
" grad = grad_l2_norm(point)\n",
" U[i, j], V[i, j] = grad[0], grad[1]\n",
"\n",
"# Plot the vector field\n",
"plt.figure(figsize=(6, 6))\n",
"plt.quiver(X, Y, U, V, color='blue')\n",
"plt.title('Gradient of L2 Norm in $\\\\mathbb{R}^2$')\n",
"plt.xlabel('x')\n",
"plt.ylabel('y')\n",
"plt.grid(True)\n",
"plt.gca().set_aspect('equal')\n",
"plt.show()\n",
"# notice that the gradients are all pointing in the direction of _steepest ascent_.\n",
"# with loss functions we would step in the negative of this direction, \n",
"# i.e. towards the centre; dragging weights to 0 as expected."
]
},
{
"cell_type": "markdown",
"id": "5316904d-af02-4fd5-b22d-c16f9b65da4f",
"metadata": {},
"source": [
"## 2.5 Automatic Differentiation"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "fb16c41b-5fa7-4190-aec8-5a35d999f8dc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([0., 1., 2., 3.])"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.arange(4.0)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "c0227d73-b684-44f9-b573-468bd29a1602",
"metadata": {},
"outputs": [],
"source": [
"x.requires_grad_(True) # notice the dunder after grad!\n",
"x.grad"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "b9ed55bd-7838-404f-a988-05e6e787e3b5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(28., grad_fn=)"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = 2 * torch.dot(x, x)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "8f482ebf-9eb7-4056-90d9-ec424c2d263e",
"metadata": {},
"outputs": [],
"source": [
"y.backward()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "7f53b3ab-f271-4d6f-9941-06d47aceff7d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([0., 1., 2., 3.], requires_grad=True)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "41b3bf75-e394-4fb1-ad7a-c1df099df021",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0., 4., 8., 12.])"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.grad"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "94d9ec23-690f-41f3-b20e-eec46e86687d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([1., 1., 1., 1.])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.grad.zero_() # dunder again; resets the gradient -- important to do!\n",
"y = x.sum()\n",
"y.backward()\n",
"x.grad"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "0fdb539c-7ef8-4fb4-b2b8-53ef0c11229f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([0., 2., 4., 6.])"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.grad.zero_()\n",
"y = x*x\n",
"y.backward(gradient=torch.ones(len(y))) # faster is y.sum().backward()\n",
"x.grad"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "ba6b30e2-ab9e-4c54-aea6-393b9610da7d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([True, True, True, True])"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.grad.zero_()\n",
"y = x*x\n",
"u = y.detach()\n",
"z = u*x\n",
"z.sum().backward()\n",
"x.grad == u"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "119d4827-c90f-49ef-92f0-d04af0442481",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([True, True, True, True])"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.grad.zero_()\n",
"y.sum().backward()\n",
"x.grad == 2 * x"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "b44f0c1e-d13f-408e-8e0a-b0204f7d0858",
"metadata": {},
"outputs": [],
"source": [
"def f(a):\n",
" b = a * 2\n",
" while b.norm() < 1000:\n",
" b = b * 2\n",
" if b.sum() > 0:\n",
" c = b\n",
" else:\n",
" c = 100 * b\n",
" return c\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "5a67be82-b886-47c1-8778-1711acb1cf93",
"metadata": {},
"outputs": [],
"source": [
"a = torch.randn(size=(), requires_grad=True) # note no dunder!\n",
"d = f(a)\n",
"d.backward()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "55f1e5ce-7f3b-4e99-b615-d22d75fa78e0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(True)"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.grad == d / a"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6233a45-f0c1-46aa-b34f-74647df39be6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.23"
}
},
"nbformat": 4,
"nbformat_minor": 5
}