Torch - nn

For PyTorch to track operations, you need to wrap a tensor with the Variable.

You can get the tensor back with the .data attribute of the Variable.

The gradients are computed with respect to some variable zwith z.backward().

Train

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                             ])
# Download and load the training data
trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # Defining the layers, 200, 50, 10 units each
        self.fc1 = nn.Linear(784, 200)
        self.fc2 = nn.Linear(200, 50)
        # Output layer, 10 units - one for each digit
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        ''' Forward pass through the network, returns the output logits '''
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)

        return x

    def predict(self, x):
        ''' This function for predicts classes by calculating the softmax '''
        logits = self.forward(x)
        return F.softmax(logits)
net = Network()
criterion = nn.CrossEntropyLoss() # combine nn.LogSoftmax() and nn.NLLLoss() for efficiency & stability
optimizer = optim.Adam(net.parameters(), lr=0.001)
epochs = 1
steps = 0
running_loss = 0
print_every = 10
for e in range(epochs):
    for images, labels in iter(trainloader):
        steps += 1
        # Flatten MNIST images into a 784 long vector
        images.resize_(images.size()[0], 784)

        inputs = Variable(images)
        targets = Variable(labels)
        optimizer.zero_grad()

        # - - - - - - - - - - - - - 
        output = net.forward(inputs)
        loss = criterion(output, targets)
        loss.backward() # calculate gradients
        optimizer.step() # Take a step with the optimizer to update the weights
        # - - - - - - - - - - - - - 

        running_loss += loss.data[0]

        # - - - - - - - - - - - - - - - - - - - - - - - - 
        # Display
        if steps % print_every == 0:
            # Test accuracy
            accuracy = 0
            for ii, (images, labels) in enumerate(testloader):

                images = images.resize_(images.size()[0], 784)
                inputs = Variable(images, volatile=True)

                predicted = net.predict(inputs).data
                equality = (labels == predicted.max(1)[1])
                accuracy += equality.type_as(torch.FloatTensor()).mean()

            print("Epoch: {}/{}".format(e+1, epochs),
                  "Loss: {:.4f}".format(running_loss/print_every),
                  "Test accuracy: {:.4f}".format(accuracy/(ii+1)))
            running_loss = 0

Epoch: 1/1 Loss: 2.1246 Test accuracy: 0.4726

Epoch: 1/1 Loss: 1.5973 Test accuracy: 0.5754

Epoch: 1/1 Loss: 1.2325 Test accuracy: 0.7483

Epoch: 1/1 Loss: 0.9512 Test accuracy: 0.7622

Test

dataiter = iter(testloader)
images, labels = dataiter.next()
img = images[0]
ps = net.predict(Variable(img.resize_(1, 784)))
helper.view_classify(img.resize_(1, 28, 28), ps)

Last updated

Was this helpful?