CNN Tutorial

This tutorial describes how to implement convolutional neural network (CNN) on MinPy. CNN is surprisingly effective on computer vision and natural language processing tasks, it is widely used in real world applications.

However, these tasks are also extremely computationally demanding. Therefore, training CNN models effectively calls for GPU acceleration. This tutorial explains how to use MinPy’s ability to run on GPU transparently for the same model you developped for CPU.

This is also a gentle introduction on how to use module builder to specific an otherwise complex network.

We do suggest you start with Complete solver and optimizer guide for MinPy’s conventional solver architecture.

Dataset: CIFAR-10

We use CIFAR-10 dataset for our CNN model.

CNN on MinPy

In Complete solver and optimizer guide, we introduced a simple model/solver architecture. Implementing CNN in MinPy is straightforward following the convention. The only difference is the model part. As for the performance critical CNN layers, it is important to use MXNet symbol, which has been carefully optimized for better performance on GPU. The following MinPy code defines a classical CNN to classify CIFAR-10 dataset.

If you are running on a server with GPU, uncommenting line 16 to get the training going on GPU!

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
"""Convolution Neural Network example using only MXNet symbol."""
import sys
import argparse

from minpy.nn.io import NDArrayIter
# Can also use MXNet IO here
# from mxnet.io import NDArrayIter
from minpy.core import Function
from minpy.nn import layers
from minpy.nn.model import ModelBase
from minpy.nn.solver import Solver
from examples.utils.data_utils import get_CIFAR10_data

# Please uncomment following if you have GPU-enabled MXNet installed.
#from minpy.context import set_context, gpu
#set_context(gpu(0)) # set the global context as gpu(0)

import mxnet as mx

batch_size=128
input_size=(3, 32, 32)
flattened_input_size=3 * 32 * 32
hidden_size=512
num_classes=10

class ConvolutionNet(ModelBase):
    def __init__(self):
        super(ConvolutionNet, self).__init__()
        # Define symbols that using convolution and max pooling to extract better features
        # from input image.
        net = mx.sym.Variable(name='X')
        net = mx.sym.Convolution(
                data=net, name='conv', kernel=(7, 7), num_filter=32)
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.Pooling(
                data=net, name='pool', pool_type='max', kernel=(2, 2),
                stride=(2, 2))
        net = mx.sym.Flatten(data=net)
        net = mx.sym.FullyConnected(
                data=net, name='fc1', num_hidden=hidden_size)
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.FullyConnected(
                data=net, name='fc2', num_hidden=num_classes)
        net = mx.sym.SoftmaxOutput(data=net, name='softmax', normalization='batch')
        # Create forward function and add parameters to this model.
        input_shapes = {'X': (batch_size,) + input_size, 'softmax_label': (batch_size,)}
        self.cnn = Function(net, input_shapes=input_shapes, name='cnn')
        self.add_params(self.cnn.get_params())

    def forward_batch(self, batch, mode):
        out = self.cnn(X=batch.data[0],
                       softmax_label=batch.label[0],
                       **self.params)
        return out

    def loss(self, predict, y):
        return layers.softmax_cross_entropy(predict, y)

def main(args):
    # Create model.
    model = ConvolutionNet()
    # Create data iterators for training and testing sets.
    data = get_CIFAR10_data(args.data_dir)
    train_dataiter = NDArrayIter(data=data['X_train'],
                                 label=data['y_train'],
                                 batch_size=batch_size,
                                 shuffle=True)
    test_dataiter = NDArrayIter(data=data['X_test'],
                                label=data['y_test'],
                                batch_size=batch_size,
                                shuffle=False)
    # Create solver.
    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=10,
                    init_rule='gaussian',
                    init_config={
                        'stdvar': 0.001
                    },
                    update_rule='sgd_momentum',
                    optim_config={
                        'learning_rate': 1e-3,
                        'momentum': 0.9
                    },
                    verbose=True,
                    print_every=20)
    # Initialize model parameters.
    solver.init()
    # Train!
    solver.train()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Multi-layer perceptron example using minpy operators")
    parser.add_argument('--data_dir',
                        type=str,
                        required=True,
                        help='Directory that contains cifar10 data')
    main(parser.parse_args())

Build Your Network with minpy.model_builder

minpy.model_builder provides an interface helping you implement models more efficiently. Model builder generates models compatible with Minpy’s solver. You only need to specify basic layer configurations of your model and model builder is going to handle the rest. Below is a model builder implementation of CNN. Please refer to Complete model builder guide for details.

Uncommenting line #20 to train on GPU.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'''
  This example demonstrates how to use minpy model builder to construct neural networks.

  For details about how to train a model with solver, please refer to:
    http://minpy.readthedocs.io/en/latest/tutorial/complete.html

  More models are available in minpy.nn.model_gallery.
'''

import sys
import argparse

import minpy.nn.model_builder as builder
from minpy.nn.solver import Solver
from minpy.nn.io import NDArrayIter
from examples.utils.data_utils import get_CIFAR10_data

# Please uncomment following if you have GPU-enabled MXNet installed.
#from minpy.context import set_context, gpu
#set_context(gpu(0)) # set the global context as gpu(0)

batch_size = 128
hidden_size = 512
num_classes = 10

def main(args):
    # Define a convolutional neural network the same as above
    net = builder.Sequential(
        builder.Convolution((7, 7), 32),
        builder.ReLU(),
        builder.Pooling('max', (2, 2), (2, 2)),
        builder.Flatten(),
        builder.Affine(hidden_size),
        builder.Affine(num_classes),
    )

    # Cast the definition to a model compatible with minpy solver
    model = builder.Model(net, 'softmax', (3 * 32 * 32,))

    data = get_CIFAR10_data(args.data_dir)

    train_dataiter = NDArrayIter(data['X_train'],
                         data['y_train'],
                         batch_size=batch_size,
                         shuffle=True)

    test_dataiter = NDArrayIter(data['X_test'],
                         data['y_test'],
                         batch_size=batch_size,
                         shuffle=False)

    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=10,
                    init_rule='gaussian',
                    init_config={
                        'stdvar': 0.001
                    },
                    update_rule='sgd_momentum',
                    optim_config={
                        'learning_rate': 1e-3,
                        'momentum': 0.9
                    },
                    verbose=True,
                    print_every=20)
    solver.init()
    solver.train()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Multi-layer perceptron example using minpy operators")
    parser.add_argument('--data_dir',
                        type=str,
                        required=True,
                        help='Directory that contains cifar10 data')
    main(parser.parse_args())