In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sklearn.metrics as metrics
import numpy as np
from torchvision import datasets, transforms
class NetBNormConv(nn.Module): #92% accuracy
# much faster convergence.
def __init__(self):
super(NetBNormConv, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.pool = nn.MaxPool2d(2, 2)
self.dropout = nn.Dropout(0.25)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.pool2 = nn.MaxPool2d(2, 2)
self.relu = nn.ReLU()
self.fc1 = nn.Linear(128 * 3 * 3, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(self.relu(self.bn1(self.conv1(x))))
x = self.pool(self.relu(self.bn2(self.conv2(x))))
x = self.dropout(x)
x = self.relu(self.bn3(self.conv3(x)))
x = self.pool2(x)
x = x.view(x.size(0), -1)
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
class NetConv(nn.Module): # 87% accuracy
# two convolutional layers and one fully connected layer,
# all using relu, followed by log_softmax
def __init__(self):
super(NetConv, self).__init__()
self.conv1 = nn.Conv2d(1, 128, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(128, 256, kernel_size=5, stride=2, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc = nn.Linear(256 * 6 * 6, 10)
self.relu = nn.ReLU()
self.act = nn.LogSoftmax(dim=1)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool(x)
#x = x.flatten(1)
x = x.view(-1, 256 * 6 * 6)
x = self.fc(x)
x = self.act(x)
return x
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
conf_matrix = np.zeros((10,10)) # initialize confusion matrix
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, reduction='sum').item()
# determine index with maximal log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
# update confusion matrix
conf_matrix = conf_matrix + metrics.confusion_matrix(
target.cpu(),pred.cpu(),labels=[0,1,2,3,4,5,6,7,8,9])
# print confusion matrix
np.set_printoptions(precision=4, suppress=True)
print(conf_matrix)
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def main(model, lr=0.01, mom=0.5, epochs=20):
use_mps = torch.backends.mps.is_available()
device = torch.device('mps' if use_mps else 'cpu')
# normalise.
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# fetch and load training data
trainset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=False)
# fetch and load test data
testset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)
# choose network architecture
if model == 'lin':
net = NetLin().to(device)
elif model == 'full':
net = NetFull().to(device)
elif model == 'bn-conv':
net = NetBNormConv().to(device)
print(net)
else:
net = NetConv().to(device)
if list(net.parameters()):
# use SGD optimizer
#optimizer = optim.SGD(net.parameters(), lr=lr, momentum=mom)
# use Adam optimizer
#optimizer = optim.Adam(net.parameters(),lr=lr,
# weight_decay=0.00001)
optimizer = optim.SGD(net.parameters(),lr=lr,momentum=0.9, weight_decay=0.00001)
# training and testing loop
for epoch in range(1, epochs + 1):
train(net, device, train_loader, optimizer, epoch)
test(net, device, test_loader)
main('bn-conv')
NetBNormConv( (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (dropout): Dropout(p=0.25, inplace=False) (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (relu): ReLU() (fc1): Linear(in_features=1152, out_features=128, bias=True) (fc2): Linear(in_features=128, out_features=10, bias=True) ) Train Epoch: 1 [0/60000 (0%)] Loss: 2.269554 Train Epoch: 1 [6400/60000 (11%)] Loss: 0.577421 Train Epoch: 1 [12800/60000 (21%)] Loss: 0.379949 Train Epoch: 1 [19200/60000 (32%)] Loss: 0.544332 Train Epoch: 1 [25600/60000 (43%)] Loss: 0.486600 Train Epoch: 1 [32000/60000 (53%)] Loss: 0.408367 Train Epoch: 1 [38400/60000 (64%)] Loss: 0.385972 Train Epoch: 1 [44800/60000 (75%)] Loss: 0.602968 Train Epoch: 1 [51200/60000 (85%)] Loss: 0.379398 Train Epoch: 1 [57600/60000 (96%)] Loss: 0.430390 [[856. 0. 18. 44. 12. 2. 63. 0. 5. 0.] [ 1. 962. 1. 26. 7. 0. 1. 0. 2. 0.] [ 18. 0. 713. 15. 196. 0. 57. 0. 1. 0.] [ 18. 3. 6. 911. 48. 0. 14. 0. 0. 0.] [ 1. 1. 21. 31. 910. 1. 35. 0. 0. 0.] [ 0. 0. 0. 0. 0. 943. 0. 51. 0. 6.] [191. 0. 68. 32. 153. 0. 547. 0. 9. 0.] [ 0. 0. 0. 0. 0. 5. 0. 986. 0. 9.] [ 1. 1. 5. 8. 12. 9. 2. 6. 955. 1.] [ 0. 0. 0. 0. 0. 5. 1. 59. 0. 935.]] Test set: Average loss: 0.3528, Accuracy: 8718/10000 (87%) Train Epoch: 2 [0/60000 (0%)] Loss: 0.248701 Train Epoch: 2 [6400/60000 (11%)] Loss: 0.382996 Train Epoch: 2 [12800/60000 (21%)] Loss: 0.265890 Train Epoch: 2 [19200/60000 (32%)] Loss: 0.390715 Train Epoch: 2 [25600/60000 (43%)] Loss: 0.316079 Train Epoch: 2 [32000/60000 (53%)] Loss: 0.438563 Train Epoch: 2 [38400/60000 (64%)] Loss: 0.320268 Train Epoch: 2 [44800/60000 (75%)] Loss: 0.469024 Train Epoch: 2 [51200/60000 (85%)] Loss: 0.364491 Train Epoch: 2 [57600/60000 (96%)] Loss: 0.357442 [[833. 3. 7. 41. 7. 3. 102. 0. 4. 0.] [ 3. 974. 0. 18. 3. 0. 0. 0. 2. 0.] [ 18. 1. 629. 14. 210. 0. 125. 0. 3. 0.] [ 14. 5. 3. 915. 39. 0. 23. 0. 1. 0.] [ 1. 1. 12. 35. 870. 0. 80. 0. 1. 0.] [ 0. 0. 0. 0. 0. 901. 0. 83. 0. 16.] [160. 2. 41. 37. 83. 0. 667. 0. 10. 0.] [ 0. 0. 0. 0. 0. 1. 0. 986. 0. 13.] [ 1. 1. 0. 4. 6. 2. 6. 7. 973. 0.] [ 0. 0. 1. 0. 0. 3. 0. 56. 0. 940.]] Test set: Average loss: 0.3520, Accuracy: 8688/10000 (87%) Train Epoch: 3 [0/60000 (0%)] Loss: 0.352757 Train Epoch: 3 [6400/60000 (11%)] Loss: 0.290876 Train Epoch: 3 [12800/60000 (21%)] Loss: 0.218306 Train Epoch: 3 [19200/60000 (32%)] Loss: 0.233687 Train Epoch: 3 [25600/60000 (43%)] Loss: 0.257908 Train Epoch: 3 [32000/60000 (53%)] Loss: 0.386713 Train Epoch: 3 [38400/60000 (64%)] Loss: 0.186729 Train Epoch: 3 [44800/60000 (75%)] Loss: 0.380331 Train Epoch: 3 [51200/60000 (85%)] Loss: 0.325970 Train Epoch: 3 [57600/60000 (96%)] Loss: 0.189678 [[852. 1. 3. 31. 4. 4. 100. 1. 4. 0.] [ 2. 971. 0. 22. 3. 0. 0. 0. 2. 0.] [ 26. 1. 647. 20. 183. 0. 121. 0. 2. 0.] [ 11. 4. 0. 941. 22. 0. 21. 0. 0. 1.] [ 2. 1. 7. 41. 871. 0. 78. 0. 0. 0.] [ 0. 0. 0. 0. 0. 952. 0. 36. 0. 12.] [124. 2. 26. 41. 80. 0. 718. 0. 9. 0.] [ 0. 0. 0. 0. 0. 2. 0. 970. 0. 28.] [ 1. 1. 0. 5. 3. 3. 2. 3. 981. 1.] [ 0. 0. 0. 0. 0. 3. 0. 33. 0. 964.]] Test set: Average loss: 0.3135, Accuracy: 8867/10000 (89%) Train Epoch: 4 [0/60000 (0%)] Loss: 0.227521 Train Epoch: 4 [6400/60000 (11%)] Loss: 0.265327 Train Epoch: 4 [12800/60000 (21%)] Loss: 0.168143 Train Epoch: 4 [19200/60000 (32%)] Loss: 0.323850 Train Epoch: 4 [25600/60000 (43%)] Loss: 0.337085 Train Epoch: 4 [32000/60000 (53%)] Loss: 0.312168 Train Epoch: 4 [38400/60000 (64%)] Loss: 0.217497 Train Epoch: 4 [44800/60000 (75%)] Loss: 0.357237 Train Epoch: 4 [51200/60000 (85%)] Loss: 0.300967 Train Epoch: 4 [57600/60000 (96%)] Loss: 0.208505 [[899. 0. 6. 20. 4. 2. 65. 0. 4. 0.] [ 1. 975. 0. 20. 2. 0. 0. 0. 2. 0.] [ 27. 1. 787. 15. 96. 1. 72. 0. 1. 0.] [ 16. 2. 3. 942. 17. 0. 19. 0. 0. 1.] [ 2. 1. 22. 50. 855. 1. 69. 0. 0. 0.] [ 0. 0. 0. 0. 0. 964. 0. 23. 0. 13.] [158. 3. 50. 34. 72. 0. 673. 0. 10. 0.] [ 0. 0. 0. 0. 0. 2. 0. 982. 0. 16.] [ 1. 1. 1. 6. 2. 2. 2. 3. 982. 0.] [ 0. 0. 0. 0. 0. 3. 0. 36. 0. 961.]] Test set: Average loss: 0.2750, Accuracy: 9020/10000 (90%) Train Epoch: 5 [0/60000 (0%)] Loss: 0.147354 Train Epoch: 5 [6400/60000 (11%)] Loss: 0.251549 Train Epoch: 5 [12800/60000 (21%)] Loss: 0.091474 Train Epoch: 5 [19200/60000 (32%)] Loss: 0.234651 Train Epoch: 5 [25600/60000 (43%)] Loss: 0.307294 Train Epoch: 5 [32000/60000 (53%)] Loss: 0.314786 Train Epoch: 5 [38400/60000 (64%)] Loss: 0.208747 Train Epoch: 5 [44800/60000 (75%)] Loss: 0.434271 Train Epoch: 5 [51200/60000 (85%)] Loss: 0.326049 Train Epoch: 5 [57600/60000 (96%)] Loss: 0.215203 [[907. 0. 3. 15. 4. 4. 62. 0. 5. 0.] [ 3. 971. 0. 21. 3. 0. 0. 0. 2. 0.] [ 32. 1. 726. 7. 129. 1. 99. 0. 5. 0.] [ 21. 0. 3. 910. 36. 0. 28. 0. 0. 2.] [ 3. 1. 10. 24. 875. 1. 86. 0. 0. 0.] [ 0. 0. 0. 0. 0. 981. 0. 15. 0. 4.] [155. 1. 33. 26. 61. 0. 706. 0. 18. 0.] [ 0. 0. 0. 0. 0. 9. 0. 963. 0. 28.] [ 2. 0. 1. 4. 1. 3. 0. 5. 984. 0.] [ 0. 0. 0. 0. 0. 6. 0. 28. 0. 966.]] Test set: Average loss: 0.2795, Accuracy: 8989/10000 (90%) Train Epoch: 6 [0/60000 (0%)] Loss: 0.184679 Train Epoch: 6 [6400/60000 (11%)] Loss: 0.244871 Train Epoch: 6 [12800/60000 (21%)] Loss: 0.102463 Train Epoch: 6 [19200/60000 (32%)] Loss: 0.234760 Train Epoch: 6 [25600/60000 (43%)] Loss: 0.401345 Train Epoch: 6 [32000/60000 (53%)] Loss: 0.287975 Train Epoch: 6 [38400/60000 (64%)] Loss: 0.224324 Train Epoch: 6 [44800/60000 (75%)] Loss: 0.329089 Train Epoch: 6 [51200/60000 (85%)] Loss: 0.288648 Train Epoch: 6 [57600/60000 (96%)] Loss: 0.125468 [[931. 0. 5. 13. 3. 2. 38. 0. 8. 0.] [ 3. 977. 0. 16. 2. 0. 0. 0. 2. 0.] [ 22. 1. 800. 10. 109. 1. 53. 0. 4. 0.] [ 19. 1. 6. 929. 24. 0. 19. 0. 1. 1.] [ 2. 1. 22. 37. 886. 1. 51. 0. 0. 0.] [ 0. 0. 0. 0. 0. 977. 0. 14. 0. 9.] [178. 1. 46. 24. 70. 0. 667. 0. 14. 0.] [ 0. 0. 0. 0. 0. 8. 0. 967. 0. 25.] [ 3. 1. 1. 2. 1. 2. 0. 3. 987. 0.] [ 0. 0. 0. 0. 0. 5. 0. 23. 0. 972.]] Test set: Average loss: 0.2526, Accuracy: 9093/10000 (91%) Train Epoch: 7 [0/60000 (0%)] Loss: 0.150053 Train Epoch: 7 [6400/60000 (11%)] Loss: 0.164031 Train Epoch: 7 [12800/60000 (21%)] Loss: 0.106214 Train Epoch: 7 [19200/60000 (32%)] Loss: 0.257319 Train Epoch: 7 [25600/60000 (43%)] Loss: 0.331515 Train Epoch: 7 [32000/60000 (53%)] Loss: 0.274458 Train Epoch: 7 [38400/60000 (64%)] Loss: 0.197483 Train Epoch: 7 [44800/60000 (75%)] Loss: 0.263878 Train Epoch: 7 [51200/60000 (85%)] Loss: 0.206326 Train Epoch: 7 [57600/60000 (96%)] Loss: 0.109148 [[925. 0. 5. 15. 4. 1. 45. 0. 5. 0.] [ 1. 983. 0. 13. 1. 0. 0. 0. 2. 0.] [ 23. 1. 799. 11. 89. 0. 74. 0. 3. 0.] [ 26. 3. 6. 920. 22. 0. 22. 0. 0. 1.] [ 2. 1. 19. 34. 872. 0. 72. 0. 0. 0.] [ 0. 0. 0. 0. 0. 967. 0. 24. 0. 9.] [164. 2. 38. 27. 60. 0. 699. 0. 10. 0.] [ 0. 0. 0. 0. 0. 1. 0. 976. 0. 23.] [ 2. 1. 1. 3. 1. 2. 1. 2. 987. 0.] [ 0. 0. 0. 0. 0. 5. 0. 21. 0. 974.]] Test set: Average loss: 0.2468, Accuracy: 9102/10000 (91%) Train Epoch: 8 [0/60000 (0%)] Loss: 0.121899 Train Epoch: 8 [6400/60000 (11%)] Loss: 0.193384 Train Epoch: 8 [12800/60000 (21%)] Loss: 0.082954 Train Epoch: 8 [19200/60000 (32%)] Loss: 0.254026 Train Epoch: 8 [25600/60000 (43%)] Loss: 0.221451 Train Epoch: 8 [32000/60000 (53%)] Loss: 0.404695 Train Epoch: 8 [38400/60000 (64%)] Loss: 0.190577 Train Epoch: 8 [44800/60000 (75%)] Loss: 0.283892 Train Epoch: 8 [51200/60000 (85%)] Loss: 0.252844 Train Epoch: 8 [57600/60000 (96%)] Loss: 0.110405 [[923. 0. 4. 13. 3. 2. 53. 0. 2. 0.] [ 4. 977. 0. 14. 2. 0. 1. 0. 2. 0.] [ 20. 1. 752. 8. 141. 0. 77. 0. 1. 0.] [ 22. 0. 7. 920. 26. 0. 24. 0. 0. 1.] [ 2. 1. 8. 26. 935. 0. 28. 0. 0. 0.] [ 0. 0. 0. 0. 0. 973. 0. 19. 0. 8.] [154. 1. 31. 21. 91. 0. 695. 0. 7. 0.] [ 0. 0. 0. 0. 0. 1. 0. 981. 0. 18.] [ 2. 1. 0. 3. 6. 2. 1. 3. 982. 0.] [ 0. 0. 0. 0. 0. 5. 0. 29. 0. 966.]] Test set: Average loss: 0.2563, Accuracy: 9104/10000 (91%) Train Epoch: 9 [0/60000 (0%)] Loss: 0.136938 Train Epoch: 9 [6400/60000 (11%)] Loss: 0.247591 Train Epoch: 9 [12800/60000 (21%)] Loss: 0.090397 Train Epoch: 9 [19200/60000 (32%)] Loss: 0.241102 Train Epoch: 9 [25600/60000 (43%)] Loss: 0.269329 Train Epoch: 9 [32000/60000 (53%)] Loss: 0.310535 Train Epoch: 9 [38400/60000 (64%)] Loss: 0.168209 Train Epoch: 9 [44800/60000 (75%)] Loss: 0.234917 Train Epoch: 9 [51200/60000 (85%)] Loss: 0.158344 Train Epoch: 9 [57600/60000 (96%)] Loss: 0.131066 [[926. 0. 5. 12. 2. 1. 48. 0. 6. 0.] [ 4. 979. 0. 14. 1. 0. 0. 0. 2. 0.] [ 28. 1. 795. 10. 97. 0. 68. 0. 1. 0.] [ 21. 0. 6. 952. 8. 0. 12. 0. 0. 1.] [ 2. 1. 17. 38. 890. 0. 51. 0. 1. 0.] [ 0. 0. 0. 0. 0. 976. 0. 19. 0. 5.] [163. 1. 34. 30. 63. 0. 703. 0. 6. 0.] [ 0. 0. 0. 0. 0. 5. 0. 979. 0. 16.] [ 3. 1. 1. 2. 1. 2. 0. 3. 987. 0.] [ 1. 0. 0. 0. 0. 4. 0. 37. 0. 958.]] Test set: Average loss: 0.2417, Accuracy: 9145/10000 (91%) Train Epoch: 10 [0/60000 (0%)] Loss: 0.104825 Train Epoch: 10 [6400/60000 (11%)] Loss: 0.194985 Train Epoch: 10 [12800/60000 (21%)] Loss: 0.117805 Train Epoch: 10 [19200/60000 (32%)] Loss: 0.224127 Train Epoch: 10 [25600/60000 (43%)] Loss: 0.160759 Train Epoch: 10 [32000/60000 (53%)] Loss: 0.293535 Train Epoch: 10 [38400/60000 (64%)] Loss: 0.247059 Train Epoch: 10 [44800/60000 (75%)] Loss: 0.283217 Train Epoch: 10 [51200/60000 (85%)] Loss: 0.169100 Train Epoch: 10 [57600/60000 (96%)] Loss: 0.112807 [[918. 0. 8. 12. 2. 3. 51. 0. 6. 0.] [ 2. 977. 0. 18. 1. 0. 0. 0. 2. 0.] [ 24. 1. 819. 10. 84. 1. 57. 0. 4. 0.] [ 20. 0. 4. 949. 9. 0. 16. 0. 0. 2.] [ 2. 1. 14. 38. 916. 0. 27. 0. 1. 1.] [ 0. 0. 0. 0. 0. 981. 0. 14. 0. 5.] [168. 0. 38. 28. 93. 1. 662. 0. 10. 0.] [ 0. 0. 0. 0. 0. 8. 0. 976. 0. 16.] [ 3. 1. 0. 2. 2. 2. 0. 3. 987. 0.] [ 1. 0. 0. 0. 0. 4. 0. 29. 0. 966.]] Test set: Average loss: 0.2386, Accuracy: 9151/10000 (92%) Train Epoch: 11 [0/60000 (0%)] Loss: 0.151567 Train Epoch: 11 [6400/60000 (11%)] Loss: 0.146887 Train Epoch: 11 [12800/60000 (21%)] Loss: 0.062417 Train Epoch: 11 [19200/60000 (32%)] Loss: 0.335674 Train Epoch: 11 [25600/60000 (43%)] Loss: 0.181816 Train Epoch: 11 [32000/60000 (53%)] Loss: 0.277126 Train Epoch: 11 [38400/60000 (64%)] Loss: 0.147858 Train Epoch: 11 [44800/60000 (75%)] Loss: 0.188083 Train Epoch: 11 [51200/60000 (85%)] Loss: 0.163272 Train Epoch: 11 [57600/60000 (96%)] Loss: 0.123491 [[948. 0. 6. 11. 3. 2. 27. 0. 3. 0.] [ 0. 987. 0. 11. 1. 0. 0. 0. 1. 0.] [ 22. 1. 840. 8. 77. 0. 51. 0. 1. 0.] [ 22. 0. 6. 942. 15. 0. 13. 0. 0. 2.] [ 1. 1. 29. 30. 913. 0. 26. 0. 0. 0.] [ 0. 0. 0. 0. 0. 973. 0. 19. 0. 8.] [201. 2. 39. 24. 86. 1. 640. 0. 7. 0.] [ 0. 0. 0. 0. 0. 2. 0. 972. 0. 26.] [ 3. 1. 0. 3. 2. 2. 0. 3. 985. 1.] [ 0. 0. 0. 0. 0. 5. 0. 21. 0. 974.]] Test set: Average loss: 0.2390, Accuracy: 9174/10000 (92%) Train Epoch: 12 [0/60000 (0%)] Loss: 0.159876 Train Epoch: 12 [6400/60000 (11%)] Loss: 0.125542 Train Epoch: 12 [12800/60000 (21%)] Loss: 0.112106 Train Epoch: 12 [19200/60000 (32%)] Loss: 0.224706 Train Epoch: 12 [25600/60000 (43%)] Loss: 0.265347 Train Epoch: 12 [32000/60000 (53%)] Loss: 0.293587 Train Epoch: 12 [38400/60000 (64%)] Loss: 0.249993 Train Epoch: 12 [44800/60000 (75%)] Loss: 0.196611 Train Epoch: 12 [51200/60000 (85%)] Loss: 0.170177 Train Epoch: 12 [57600/60000 (96%)] Loss: 0.163824 [[935. 0. 4. 13. 3. 3. 37. 0. 5. 0.] [ 0. 986. 0. 11. 1. 0. 0. 0. 2. 0.] [ 23. 1. 844. 7. 76. 0. 47. 0. 2. 0.] [ 20. 1. 5. 939. 20. 0. 14. 0. 0. 1.] [ 2. 2. 22. 27. 926. 0. 21. 0. 0. 0.] [ 0. 0. 0. 0. 0. 982. 0. 13. 0. 5.] [173. 1. 45. 34. 91. 0. 646. 0. 10. 0.] [ 0. 0. 0. 0. 0. 2. 0. 980. 0. 18.] [ 3. 1. 1. 3. 1. 2. 0. 5. 984. 0.] [ 0. 0. 0. 0. 0. 5. 0. 26. 0. 969.]] Test set: Average loss: 0.2351, Accuracy: 9191/10000 (92%) Train Epoch: 13 [0/60000 (0%)] Loss: 0.158473 Train Epoch: 13 [6400/60000 (11%)] Loss: 0.158532 Train Epoch: 13 [12800/60000 (21%)] Loss: 0.038748 Train Epoch: 13 [19200/60000 (32%)] Loss: 0.163933 Train Epoch: 13 [25600/60000 (43%)] Loss: 0.261832 Train Epoch: 13 [32000/60000 (53%)] Loss: 0.280879 Train Epoch: 13 [38400/60000 (64%)] Loss: 0.167006 Train Epoch: 13 [44800/60000 (75%)] Loss: 0.268756 Train Epoch: 13 [51200/60000 (85%)] Loss: 0.225810 Train Epoch: 13 [57600/60000 (96%)] Loss: 0.110739 [[922. 0. 5. 9. 3. 2. 50. 0. 9. 0.] [ 1. 980. 0. 13. 2. 0. 1. 0. 3. 0.] [ 22. 1. 822. 7. 93. 0. 54. 0. 1. 0.] [ 18. 1. 6. 923. 28. 0. 23. 0. 0. 1.] [ 1. 1. 12. 16. 942. 0. 28. 0. 0. 0.] [ 0. 0. 0. 0. 0. 976. 0. 18. 0. 6.] [151. 0. 36. 22. 87. 0. 698. 0. 6. 0.] [ 0. 0. 0. 0. 0. 1. 0. 971. 0. 28.] [ 4. 1. 0. 3. 3. 2. 0. 2. 985. 0.] [ 0. 0. 0. 0. 1. 4. 0. 18. 0. 977.]] Test set: Average loss: 0.2319, Accuracy: 9196/10000 (92%) Train Epoch: 14 [0/60000 (0%)] Loss: 0.115108 Train Epoch: 14 [6400/60000 (11%)] Loss: 0.188998 Train Epoch: 14 [12800/60000 (21%)] Loss: 0.067578 Train Epoch: 14 [19200/60000 (32%)] Loss: 0.158400 Train Epoch: 14 [25600/60000 (43%)] Loss: 0.254126 Train Epoch: 14 [32000/60000 (53%)] Loss: 0.337398 Train Epoch: 14 [38400/60000 (64%)] Loss: 0.123709 Train Epoch: 14 [44800/60000 (75%)] Loss: 0.205702 Train Epoch: 14 [51200/60000 (85%)] Loss: 0.138448 Train Epoch: 14 [57600/60000 (96%)] Loss: 0.153552 [[942. 0. 7. 9. 3. 2. 31. 0. 6. 0.] [ 1. 976. 0. 17. 1. 0. 2. 0. 3. 0.] [ 20. 1. 897. 5. 51. 0. 25. 0. 1. 0.] [ 16. 1. 6. 941. 20. 0. 15. 0. 1. 0.] [ 2. 1. 44. 22. 907. 0. 24. 0. 0. 0.] [ 0. 0. 0. 0. 0. 971. 0. 22. 1. 6.] [192. 0. 67. 28. 77. 0. 626. 0. 10. 0.] [ 0. 0. 0. 0. 0. 2. 0. 982. 0. 16.] [ 4. 1. 0. 3. 1. 1. 0. 2. 988. 0.] [ 1. 0. 0. 0. 0. 4. 0. 31. 0. 964.]] Test set: Average loss: 0.2374, Accuracy: 9194/10000 (92%) Train Epoch: 15 [0/60000 (0%)] Loss: 0.155847 Train Epoch: 15 [6400/60000 (11%)] Loss: 0.116800 Train Epoch: 15 [12800/60000 (21%)] Loss: 0.039833 Train Epoch: 15 [19200/60000 (32%)] Loss: 0.171820 Train Epoch: 15 [25600/60000 (43%)] Loss: 0.162665 Train Epoch: 15 [32000/60000 (53%)] Loss: 0.248377 Train Epoch: 15 [38400/60000 (64%)] Loss: 0.156792 Train Epoch: 15 [44800/60000 (75%)] Loss: 0.187477 Train Epoch: 15 [51200/60000 (85%)] Loss: 0.152571 Train Epoch: 15 [57600/60000 (96%)] Loss: 0.049584 [[922. 0. 6. 15. 6. 1. 44. 0. 6. 0.] [ 1. 985. 0. 11. 1. 0. 0. 0. 2. 0.] [ 24. 1. 832. 6. 102. 0. 34. 0. 1. 0.] [ 10. 3. 5. 935. 28. 0. 17. 0. 1. 1.] [ 2. 1. 15. 21. 948. 0. 13. 0. 0. 0.] [ 0. 0. 0. 0. 0. 973. 0. 18. 0. 9.] [144. 1. 48. 29. 114. 0. 655. 0. 9. 0.] [ 0. 0. 0. 0. 0. 3. 0. 974. 0. 23.] [ 2. 1. 1. 3. 1. 2. 1. 3. 986. 0.] [ 1. 0. 0. 0. 0. 3. 0. 25. 0. 971.]] Test set: Average loss: 0.2455, Accuracy: 9181/10000 (92%) Train Epoch: 16 [0/60000 (0%)] Loss: 0.117903 Train Epoch: 16 [6400/60000 (11%)] Loss: 0.107160 Train Epoch: 16 [12800/60000 (21%)] Loss: 0.051522 Train Epoch: 16 [19200/60000 (32%)] Loss: 0.121555 Train Epoch: 16 [25600/60000 (43%)] Loss: 0.120275 Train Epoch: 16 [32000/60000 (53%)] Loss: 0.232415 Train Epoch: 16 [38400/60000 (64%)] Loss: 0.120694 Train Epoch: 16 [44800/60000 (75%)] Loss: 0.246462 Train Epoch: 16 [51200/60000 (85%)] Loss: 0.144428 Train Epoch: 16 [57600/60000 (96%)] Loss: 0.069679 [[940. 0. 11. 7. 2. 1. 34. 0. 5. 0.] [ 2. 980. 1. 11. 3. 0. 0. 0. 3. 0.] [ 22. 1. 890. 7. 51. 0. 26. 0. 3. 0.] [ 20. 2. 4. 935. 21. 0. 15. 0. 2. 1.] [ 2. 0. 54. 23. 902. 0. 16. 0. 1. 2.] [ 0. 0. 0. 0. 0. 969. 0. 21. 0. 10.] [189. 1. 67. 26. 101. 0. 605. 0. 11. 0.] [ 0. 0. 0. 0. 0. 3. 0. 978. 0. 19.] [ 2. 1. 0. 2. 0. 3. 0. 1. 991. 0.] [ 0. 0. 0. 0. 1. 2. 0. 27. 0. 970.]] Test set: Average loss: 0.2465, Accuracy: 9160/10000 (92%) Train Epoch: 17 [0/60000 (0%)] Loss: 0.123562 Train Epoch: 17 [6400/60000 (11%)] Loss: 0.105745 Train Epoch: 17 [12800/60000 (21%)] Loss: 0.045860 Train Epoch: 17 [19200/60000 (32%)] Loss: 0.108356 Train Epoch: 17 [25600/60000 (43%)] Loss: 0.101529 Train Epoch: 17 [32000/60000 (53%)] Loss: 0.245731 Train Epoch: 17 [38400/60000 (64%)] Loss: 0.096277 Train Epoch: 17 [44800/60000 (75%)] Loss: 0.128771 Train Epoch: 17 [51200/60000 (85%)] Loss: 0.132103 Train Epoch: 17 [57600/60000 (96%)] Loss: 0.143218 [[944. 0. 6. 7. 3. 1. 34. 0. 5. 0.] [ 2. 981. 1. 11. 1. 0. 2. 0. 2. 0.] [ 23. 1. 875. 7. 61. 0. 32. 0. 1. 0.] [ 21. 6. 5. 936. 14. 0. 17. 0. 1. 0.] [ 2. 0. 36. 22. 914. 0. 26. 0. 0. 0.] [ 0. 0. 0. 0. 0. 973. 0. 16. 0. 11.] [175. 0. 48. 21. 73. 0. 674. 0. 9. 0.] [ 0. 0. 0. 0. 0. 1. 0. 975. 0. 24.] [ 4. 1. 0. 3. 0. 3. 0. 2. 987. 0.] [ 1. 0. 0. 0. 0. 2. 0. 20. 0. 977.]] Test set: Average loss: 0.2334, Accuracy: 9236/10000 (92%) Train Epoch: 18 [0/60000 (0%)] Loss: 0.143297 Train Epoch: 18 [6400/60000 (11%)] Loss: 0.209471 Train Epoch: 18 [12800/60000 (21%)] Loss: 0.083080 Train Epoch: 18 [19200/60000 (32%)] Loss: 0.173864 Train Epoch: 18 [25600/60000 (43%)] Loss: 0.143225 Train Epoch: 18 [32000/60000 (53%)] Loss: 0.234640 Train Epoch: 18 [38400/60000 (64%)] Loss: 0.128212 Train Epoch: 18 [44800/60000 (75%)] Loss: 0.141312 Train Epoch: 18 [51200/60000 (85%)] Loss: 0.164612 Train Epoch: 18 [57600/60000 (96%)] Loss: 0.079647 [[949. 0. 8. 6. 1. 1. 31. 0. 4. 0.] [ 1. 986. 0. 10. 1. 0. 0. 0. 2. 0.] [ 21. 1. 917. 6. 26. 0. 28. 0. 1. 0.] [ 20. 5. 7. 933. 14. 0. 19. 0. 1. 1.] [ 4. 1. 72. 26. 862. 0. 34. 0. 1. 0.] [ 0. 0. 0. 0. 0. 981. 0. 16. 0. 3.] [193. 3. 83. 23. 53. 0. 636. 0. 9. 0.] [ 0. 0. 0. 0. 0. 1. 0. 975. 0. 24.] [ 5. 1. 1. 3. 0. 1. 0. 2. 987. 0.] [ 1. 0. 0. 0. 0. 5. 0. 17. 0. 977.]] Test set: Average loss: 0.2397, Accuracy: 9203/10000 (92%) Train Epoch: 19 [0/60000 (0%)] Loss: 0.140702 Train Epoch: 19 [6400/60000 (11%)] Loss: 0.074412 Train Epoch: 19 [12800/60000 (21%)] Loss: 0.039120 Train Epoch: 19 [19200/60000 (32%)] Loss: 0.173565 Train Epoch: 19 [25600/60000 (43%)] Loss: 0.213276 Train Epoch: 19 [32000/60000 (53%)] Loss: 0.242456 Train Epoch: 19 [38400/60000 (64%)] Loss: 0.119806 Train Epoch: 19 [44800/60000 (75%)] Loss: 0.139165 Train Epoch: 19 [51200/60000 (85%)] Loss: 0.104396 Train Epoch: 19 [57600/60000 (96%)] Loss: 0.093405 [[931. 0. 14. 8. 2. 1. 38. 0. 6. 0.] [ 0. 990. 1. 6. 1. 0. 0. 0. 2. 0.] [ 20. 1. 913. 6. 33. 0. 26. 0. 1. 0.] [ 17. 4. 8. 933. 18. 0. 17. 0. 2. 1.] [ 1. 0. 48. 22. 898. 0. 30. 0. 1. 0.] [ 0. 0. 0. 0. 0. 972. 0. 20. 0. 8.] [173. 1. 65. 20. 70. 0. 658. 0. 13. 0.] [ 0. 0. 0. 0. 0. 2. 0. 973. 0. 25.] [ 2. 1. 0. 2. 1. 2. 0. 2. 990. 0.] [ 0. 0. 0. 0. 0. 2. 0. 21. 1. 976.]] Test set: Average loss: 0.2422, Accuracy: 9234/10000 (92%) Train Epoch: 20 [0/60000 (0%)] Loss: 0.093597 Train Epoch: 20 [6400/60000 (11%)] Loss: 0.094620 Train Epoch: 20 [12800/60000 (21%)] Loss: 0.030099 Train Epoch: 20 [19200/60000 (32%)] Loss: 0.139641 Train Epoch: 20 [25600/60000 (43%)] Loss: 0.218915 Train Epoch: 20 [32000/60000 (53%)] Loss: 0.222010 Train Epoch: 20 [38400/60000 (64%)] Loss: 0.083391 Train Epoch: 20 [44800/60000 (75%)] Loss: 0.193114 Train Epoch: 20 [51200/60000 (85%)] Loss: 0.141759 Train Epoch: 20 [57600/60000 (96%)] Loss: 0.078522 [[934. 0. 8. 6. 3. 1. 45. 0. 3. 0.] [ 2. 980. 0. 13. 1. 0. 1. 0. 3. 0.] [ 19. 1. 892. 7. 43. 0. 38. 0. 0. 0.] [ 17. 2. 7. 936. 16. 0. 21. 0. 1. 0.] [ 2. 0. 41. 18. 902. 0. 37. 0. 0. 0.] [ 0. 0. 0. 0. 0. 976. 0. 20. 0. 4.] [144. 1. 58. 20. 59. 0. 708. 0. 10. 0.] [ 0. 0. 0. 0. 0. 3. 0. 977. 0. 20.] [ 4. 1. 0. 3. 1. 1. 0. 2. 988. 0.] [ 1. 0. 0. 0. 0. 3. 0. 23. 0. 973.]] Test set: Average loss: 0.2341, Accuracy: 9266/10000 (93%)
In [ ]: