n = MLP(3, [4,4,1]) xs = [ [2.0, 3.0, -1, 0], [3.0, -1.0, 0.5], [0.5, 1.0, 1.0], [1.0, 1.0, -1.0] ] ys = [1.0, -1.0, -1.0, 1.0] for k in range(20): ypred = [n(x) for x in xs] loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred)) for p in n.parameters(): # without this, you accumulate the gradients and create an artificial momentum. p.grad = 0.0 loss.backward() for p in n.parameters(): p.data += -0.01 * p.grad print(k, loss.data) ypred