import matplotlib.pyplot as pltfigure = plt.figure()num_of_images =9for index inrange(num_of_images): plt.subplot(3, 3, index+1) plt.axis('off') plt.title("Ground Truth: {}".format(labels[index])) plt.imshow(images[index].numpy().squeeze(), cmap='gray_r')
Define the network
Let’s define this network:
tensor.view(-1,n), Returns a new tensor with the same data as the self tensor but of a different shape. the size -1 is inferred from other dimensions
classNet(nn.Module):def__init__(self):super(Net, self).__init__()# input ch, output ch, convolution# 1 input image channel, 6 output channels, 3x3 square convolution# kernel self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5)# an affine operation: y = Wx + b self.fc1 = nn.Linear(16*5*5, 120)# 5*5 from image dimension self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) self.pool = nn.MaxPool2d(2, 2)defforward(self,x):# Max pooling over a (2, 2) window x = F.relu(self.conv1(x)) x = self.pool(x) x = F.relu(self.conv2(x)) x = self.pool(x)#x = x.view(-1, self.num_flat_features(x)) x = x.view(-1, 16*5*5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x)return x#net = Net()#print(net)
You just have to define the forward function, and the backward function (where gradients are computed) is automatically defined for you using autograd. You can use any of the Tensor operations in the forward function.
Loss Function and Optimization
A loss function takes the (output, target) pair of inputs, and computes a value that estimates how far away the output is from the target. Define loss function as loss=criterion(outputs, labels)
import torch.optim as optim# loss functioncriterion = nn.CrossEntropyLoss()# Optimization methodoptimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
Zero the gradient buffers of all parameters and backprops with random gradients:
Train network
device = torch.device("cuda"if torch.cuda.is_available() else"cpu")net.to(device)for epoch inrange(5):# loop over the dataset multiple times running_loss =0.0for i, data inenumerate(trainloader, 0):# get the inputs; data is a list of [inputs, labels] images, labels = data[0].to(device), data[1].to(device)# zero the parameter gradients optimizer.zero_grad()# forward + backward + optimize outputs =net(images) loss =criterion(outputs, labels) loss.backward() optimizer.step()# print statistics running_loss += loss.item()if i %100==99:# print every 100 mini-batchesprint('[%d, %5d] loss: %.3f'% (epoch +1, i +1, running_loss /10)) running_loss =0.0print('Finished Training')
We have trained the network for 2 passes over the training dataset. But we need to check if the network has learnt anything at all.
We will check this by predicting the class label that the neural network outputs, and checking it against the ground-truth. If the prediction is correct, we add the sample to the list of correct predictions.
Okay, first step. Let us display an image from the test set to get familiar.
correct =0total =0with torch.no_grad():for data in testloader: images, labels = data[0].to(device), data[1].to(device) outputs =net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print('Accuracy of the network on the %d test images: %d%%'%(len(testloader.dataset), 100* correct / total))
Accuracy of the network on the 10000 test images: 98 %
Visualize test results
You need to covert from GPU to Tensor.cpu() . e.g. images.cpu()
device = torch.device("cuda"if torch.cuda.is_available() else"cpu")continued_net.to(device)deftrain_continue(epoch): running_loss =0.0for i, data inenumerate(trainloader, 0):# get the inputs; data is a list of [inputs, labels] images, labels = data[0].to(device), data[1].to(device)# zero the parameter gradients optimizer.zero_grad()# forward + backward + optimize outputs =continued_net(images) loss =criterion(outputs, labels) loss.backward() optimizer.step()# print statistics running_loss += loss.item()if i %100==99:# print every 100 mini-batchesprint('[%d, %5d] loss: %.3f'% (epoch +1, i +1, running_loss /10)) running_loss =0.0for epoch inrange(6,10):# loop over the dataset multiple timestrain_continue(epoch)print('Finished Training')
# Accuracy of continued trainingcorrect =0total =0with torch.no_grad():for data in testloader: images, labels = data[0].to(device), data[1].to(device) outputs =continued_net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print('Accuracy of the network on the %d test images: %d%%'%(len(testloader.dataset), 100* correct / total))
Accuracy of the network on the 10000 test images: 98 %