Writing a simple NN
What is underneath the Neural Networks?
Goal
Understand the workings of an NN(heavily modified/reproduced from fastbook notbook)
Concrete Problem and Plan
Recognize 3's and 7's from mnist dataset using the following methods:
- Start with a baseline
- Write a Linear NN
- Write a Linear NN using pytorch functions (nn)
- Write a Linear NN using fastai functions
- Write a non-linear NN
- Write a non-linear NN using pytorch
- Write a non-linear NN using fastai
- Write a (DEEPER 18 layer) non-linear NN using fastai
- compare all answers for epochs, lr and accuracy
Stack tensors with list comprehension
def stack_tensors(paths):
lcomp_tensors = [tensor(Image.open(o)) for o in paths]
print (len(lcomp_tensors))
return torch.stack(lcomp_tensors).float()/255
stacked_threes_tr = stack_tensors(threes_tr)
stacked_threes_vd = stack_tensors(threes_vd)
stacked_sevens_tr = stack_tensors(sevens_tr)
stacked_sevens_vd = stack_tensors(sevens_vd)
stacked_threes_tr.shape
Mean of stacked tensors
mean_3_2d = stacked_threes_tr.mean((0))
mean_7_2d = stacked_sevens_tr.mean((0))
show_image(mean_3_2d), show_image(stacked_threes_tr[10])
Distance measurement
def l1_norm(a,b): return (a-b).abs().mean((-1,-2))
def l2_norm(a,b): return ((a-b)**2).mean((-1,-2)).sqrt()
# F.l1_loss(a_3.float(),mean7), F.mse_loss(a_3,mean7).sqrt() #pytorch functions
Is a tensor a 3
or 7
?
def is_3(stacked_tensor,mean_3,mean_7):
return l2_norm(stacked_tensor, mean_3)<l2_norm(stacked_tensor,mean_7)
Checking the accuracy
accuracy_3s_1 = is_3(stacked_threes_vd,mean_3_2d,mean_7_2d).float().mean()
accuracy_3s_2 = 1-is_3(stacked_sevens_vd,mean_3_2d,mean_7_2d).float().mean()
print("Accuracy of prediction is: ",(accuracy_3s_1+accuracy_3s_2)/2)
## no need to do 7 separately
Result: 97% Accuracy.
Steps
- initialize parameters (
w,b
) - Predicit with training vectors (
X@w + b
) - Calculate the loss (something that varies with tiny variation of parameters)
- Calculate the gradient at parameter values (
params.grad
) - Make a step based on the gradient (
w -= gradient(w) * lr
) - Repeat n times!
- Calculate Validation accuracy (No. of predictions matching Targets)
Squeeze into tuple
dset= list(zip(train_x,train_y))
valid_dset = list(zip(valid_x,valid_y))
x,y = dset[0]
x.shape,y,len(dset),type(dset[0])
Using Data Loaders to load into batches
dl = DataLoader(dset,batch_size=256,shuffle=False)
dl_vd = DataLoader(valid_dset,batch_size=256,shuffle=False)
# Not sure why True Shuffle is not working even on the validation set. Somehow it gets shuffled wrong
xb,yb = first(dl_vd)
xb.shape, yb.shape
Step 1
def init_params() :
w = torch.randn(28*28).requires_grad_()
b = torch.randn(1).requires_grad_()
return (w,b)
Step 2
def linear1(tens):
return tens@w+b
Step 3
def mnist_loss(prediction,target):
prediction = prediction.sigmoid()
return torch.where(target==1,1-prediction,prediction).mean()
## Check what happenes if you use sum (guess: nothing different)
Step 4
def calc_grad(xb,yb,model):
pred = model(xb)
loss = mnist_loss(pred,yb)
loss.backward()
Step 1-5
def train_epoch(dl,model,params):
for xb,yb in dl:
calc_grad(xb,yb,model)
for p in params:
# print(b,params[1])
p.data-= p.grad*lr
# print(b,params[1])
p.grad.zero_()
## p.data is needed otherwise get leaf-variable error
Step 7
def batch_accuracy(xb, yb):
preds = xb.sigmoid()
correct = (preds>0.5) == yb
return correct.float().mean()
def validate_epoch(dl,model):
accs = [batch_accuracy(model(xb),yb) for xb,yb in dl]
return round(torch.stack(accs).mean().item(), 4)
Setting parameters
lr=1
torch.manual_seed(0)
w,b = init_params()
params=w,b
Training
for i in range(20):
train_epoch(dl, linear1, params)
print(validate_epoch(dl_vd, linear1), end=' ')
Result: 96 percent with 20 epochs
SGD
class BasicOptim:
def __init__(self,params,lr): self.params,self.lr = list(params),lr
def step(self):
for p in self.params: p.data -= p.grad *self.lr
def zero_grad(self):
for p in self.params: p.grad = None # p.grad.zero_() also OK.
def train_epoch(dl,model):
for xb,yb in dl:
calc_grad(xb,yb,model)
opt.step()
opt.zero_grad()
def train_model(dl,model, no_epochs):
for i in range(no_epochs):
train_epoch(dl,model)
print(validate_epoch(dl_vd,linear2), end=' ')
Setting parameters
lr=1
torch.manual_seed(0)
linear2 = nn.Linear(28*28,1)
opt = BasicOptim(linear2.parameters(),lr)
train_model(dl, linear2,20)
Result: 98% with 20 epochs
def train_model(dl,model, no_epochs):
for i in range(no_epochs):
train_epoch(dl,model)
print(validate_epoch(dl_vd,linear2), end=' ')
Setting parameters
lr=1
torch.manual_seed(0)
linear2 = nn.Linear(28*28,1)
opt = BasicOptim(linear2.parameters(),lr)
train_model(dl, linear2,20)
Result: 98% with 20 epochs
Everything in 3 lines
dls = DataLoaders(dl,dl_vd)
learn = Learner(dls, nn.Linear(28*28,1),opt_func=SGD, loss_func=mnist_loss,metrics=batch_accuracy)
learn.fit(10,lr=lr) ## part which is a for loop of training and validating
Result: 97% with 10 epochs
Non-linear NN with RELU (Rectified Linear Unit)
def simple_net(xb):
res = xb@w1 + b1
res = res.max(tensor(0.0))
res = res@w2 + b2
return res
Initializing parameters
def init_params2(size): return torch.randn(size).requires_grad_()
w1 = init_params2((28*28,30))
b1 = init_params2(1)
w2 = init_params2(30)
b2 = init_params2(1)
Non-linear NN with pytorch
simple_net2 = nn.Sequential(
nn.Linear(28*28,30),
nn.ReLU(),
nn.Linear(30,1)
)
learn2 = Learner(dls, simple_net2,opt_func=SGD, loss_func=mnist_loss, metrics=batch_accuracy)
learn2.fit(20,0.1)
18 layer model #resnet18
dls = ImageDataLoaders.from_folder(path)
learn = cnn_learner(dls, resnet18, pretrained=False,
loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(1, 0.1)
Result: 99.7% with 1 epoch