Пример #1
0
def test_mnist():
    '''
    Tests load_mnist().

    Checks test & train sets' formats and sizes, but not content.
    '''
    train_set, test_set = load_mnist()

    for mnist, expected_size in safe_izip((train_set, test_set),
                                          (60000, 10000)):
        assert_equal(mnist.num_examples(), expected_size)

    expected_formats = [DenseFormat(shape=[-1, 28, 28],
                                    axes=['b', '0', '1'],
                                    dtype='uint8'),
                        DenseFormat(shape=[-1],
                                    axes=['b'],
                                    dtype='uint8')]
    expected_names = [u'images', u'labels']
    expected_sizes = [60000, 10000]

    for dataset, expected_size in safe_izip((train_set, test_set),
                                            expected_sizes):
        assert_all_equal(dataset.names, expected_names)
        assert_all_equal(dataset.formats, expected_formats)

        for tensor, fmt in safe_izip(dataset.tensors, dataset.formats):
            fmt.check(tensor)
            assert_equal(tensor.shape[0], expected_size)

        labels = dataset.tensors[dataset.names.index('labels')]
        assert_true(numpy.logical_and(labels[...] >= 0,
                                      labels[...] < 10).all())
Пример #2
0
 def make_pickle(file_path):
     '''
     Pickles the MNIST dataset.
     '''
     hdf5_data = load_mnist()
     with open(file_path, 'wb') as pickle_file:
         cPickle.dump(hdf5_data,
                      pickle_file,
                      protocol=cPickle.HIGHEST_PROTOCOL)
Пример #3
0
def test_pickle_h5_dataset():
    '''
    Tests pickling and unpickling of H5Dataset.
    '''

    # Path for the pickle file, not the .h5 file.
    file_path = '/tmp/test_mnist_test_pickle_hdf5_data.pkl'

    def make_pickle(file_path):
        '''
        Pickles the MNIST dataset.
        '''
        hdf5_data = load_mnist()
        with open(file_path, 'wb') as pickle_file:
            cPickle.dump(hdf5_data,
                         pickle_file,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    make_pickle(file_path)
    assert_less(_file_size_in_bytes(file_path), 1024 * 5)

    def load_pickle(file_path):
        '''
        Loads the MNIST dataset pickled above.
        '''
        with open(file_path, 'rb') as pickle_file:
            return cPickle.load(pickle_file)

    mnist_datasets = load_mnist()
    pickled_mnist_datasets = load_pickle(file_path)

    for (mnist_dataset,
         pickled_mnist_dataset) in safe_izip(mnist_datasets,
                                             pickled_mnist_datasets):
        for (name,
             expected_name,
             fmt,
             expected_fmt,
             tensor,
             expected_tensor) in safe_izip(pickled_mnist_dataset.names,
                                           mnist_dataset.names,
                                           pickled_mnist_dataset.formats,
                                           mnist_dataset.formats,
                                           pickled_mnist_dataset.tensors,
                                           mnist_dataset.tensors):
            assert_equal(name, expected_name)
            assert_equal(fmt, expected_fmt)
            assert_array_equal(tensor, expected_tensor)
import numpy
import theano
from simplelearn.utils import safe_izip
from simplelearn.data.dataset import Dataset
from simplelearn.data.mnist import load_mnist
from simplelearn.formats import DenseFormat
from simplelearn.nodes import RescaleImage, FormatNode, Conv2dLayer, SoftmaxLayer, CrossEntropy, Misclassification, AffineLayer
from simplelearn.training import Sgd, SgdParameterUpdater, SavesAtMinimum, AverageMonitor, ValidationCallback, SavesAtMinimum, StopsOnStagnation, LimitsNumEpochs
from simplelearn.io import SerializableModel
import time
import pdb


###### HERE IS THE MAIN EXAMPLE ##########

training_set, testing_set = load_mnist()

training_tensors = [t[:50000, ...] for t in training_set.tensors]  # the first 50000 examples
validation_tensors = [t[50000:, ...] for t in training_set.tensors]  # the remaining 10000 examples
training_set, validation_set = [Dataset(tensors=t,
                                        names=training_set.names,
                                        formats=training_set.formats)
                                for t in (training_tensors, validation_tensors)]

training_iter = training_set.iterator(iterator_type='sequential', batch_size=100)

image_node, label_node = training_iter.make_input_nodes()

float_image_node = RescaleImage(image_node)

input_shape = float_image_node.output_format.shape
Пример #5
0
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    # split training set into training and validation sets
    tensors = mnist_training.tensors
    training_tensors = [t[: -args.validation_size, ...] for t in tensors]
    validation_tensors = [t[-args.validation_size :, ...] for t in tensors]

    if args.no_shuffle_dataset == False:

        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0], training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0], training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(
            validation_tensors[0], validation_tensors[1]
        )

    all_images_shared = theano.shared(numpy.vstack([training_tensors[0], validation_tensors[0]]))
    all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1], validation_tensors[1]]))

    length_training = training_tensors[0].shape[0]
    length_validation = validation_tensors[0].shape[0]
    indices_training = numpy.asarray(range(length_training))
    indices_validation = numpy.asarray(range(length_training, length_training + length_validation))
    indices_training_dataset = Dataset(
        tensors=[indices_training], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")]
    )
    indices_validation_dataset = Dataset(
        tensors=[indices_validation], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")]
    )
    indices_training_iterator = indices_training_dataset.iterator(
        iterator_type="sequential", batch_size=args.batch_size
    )
    indices_validation_iterator = indices_validation_dataset.iterator(iterator_type="sequential", batch_size=10000)

    mnist_validation_iterator = indices_validation_iterator
    mnist_training_iterator = indices_training_iterator

    input_indices_symbolic, = indices_training_iterator.make_input_nodes()
    image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared)
    label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared)

    image_node = CastNode(image_lookup_node, "floatX")
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(34523)
    theano_rng = RandomStreams(23845)

    (affine_nodes, output_node) = build_fc_classifier(
        image_node, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng
    )

    loss_node = CrossEntropy(output_node, label_lookup_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 200

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []
    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params, affine_node.bias_node.params):
            parameters.append(params)
            gradients = theano.gradient.grad(loss_sum, params)
            parameter_updater = SgdParameterUpdater(
                params, gradients, args.learning_rate, args.initial_momentum, args.nesterov
            )
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(
                LinearlyInterpolatesOverEpochs(
                    parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation
                )
            )

    #
    # Makes batch and epoch callbacks
    #

    """
    def make_output_basename(args):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format(
            output_prefix,
            args.learning_rate,
            args.initial_momentum,
            args.nesterov,
            args.batch_size)
    """

    assert_equal(os.path.splitext(args.output_prefix)[1], "")
    if os.path.isdir(args.output_prefix) and not args.output_prefix.endswith("/"):
        args.output_prefix += "/"

    output_dir, output_prefix = os.path.split(args.output_prefix)
    if output_prefix != "":
        output_prefix = output_prefix + "_"

    output_prefix = os.path.join(output_dir, output_prefix)

    epoch_logger = EpochLogger(output_prefix + "SGD_nesterov.h5")

    # misclassification_node = Misclassification(output_node, label_node)
    # mcr_logger = LogsToLists()
    # training_stopper = StopsOnStagnation(max_epochs=10,
    #                                      min_proportional_decrease=0.0)

    misclassification_node = Misclassification(output_node, label_lookup_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to("validation mean loss", validation_loss_monitor)

    validation_misclassification_monitor = MeanOverEpoch(
        misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=20, min_proportional_decrease=0.0)]
    )

    epoch_logger.subscribe_to("validation misclassification", validation_misclassification_monitor)

    # batch callback (monitor)
    # training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss])
    epoch_logger.subscribe_to("training mean loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[])
    epoch_logger.subscribe_to("training misclassification %", training_misclassification_monitor)

    # epoch callbacks
    # validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        basename = make_output_basename(args)
        return "{}{}.pkl".format(basename, "_best" if best else "")

    # model = SerializableModel([input_indices_symbolic], [output_node])
    # saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])

    epoch_logger.subscribe_to("validation loss", validation_loss_monitor)

    epoch_timer = EpochTimer2()
    epoch_logger.subscribe_to("epoch duration", epoch_timer)

    validation_callback = ValidationCallback(
        inputs=[input_indices_symbolic.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor],
    )

    trainer = Sgd(
        [input_indices_symbolic],
        mnist_training_iterator,
        callbacks=(
            parameter_updaters
            + momentum_updaters
            + [  # training_loss_monitor,
                # training_misclassification_monitor,
                validation_callback,
                LimitsNumEpochs(max_epochs),
                epoch_timer,
            ]
        ),
    )
    # validation_loss_monitor]))

    # stuff_to_pickle = OrderedDict(
    #     (('model', model),
    #      ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    # trainer.epoch_callbacks += (momentum_updaters +
    #                             [PicklesOnEpoch(stuff_to_pickle,
    #                                             make_output_filename(args),
    #                                             overwrite=False),
    #                              validation_callback,
    #                              LimitsNumEpochs(max_epochs)])

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time
    print("Total elapsed time is for training is: ", elapsed_time)
Пример #6
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [64, 64]
    filter_init_uniform_ranges = [.05] * len(filter_counts)
    filter_shapes = [(5, 5), (5, 5)]
    pool_shapes = [(4, 4), (4, 4)]
    pool_strides = [(2, 2), (2, 2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = ([.5 if args.dropout else 1.0] *
                             (len(filter_counts) + len(affine_output_sizes)))

    assert_equal(affine_output_sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    # split training set into training and validation sets
    tensors = mnist_training.tensors
    training_tensors = [t[:-args.validation_size, ...] for t in tensors]
    validation_tensors = [t[-args.validation_size:, ...] for t in tensors]

    if args.no_shuffle_dataset == False:
        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])

    all_images_shared = theano.shared(numpy.vstack([training_tensors[0],validation_tensors[0]]))
    all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1],validation_tensors[1]]))

    length_training = training_tensors[0].shape[0]
    length_validation = validation_tensors[0].shape[0]
    indices_training = numpy.asarray(range(length_training))
    indices_validation = numpy.asarray(range(length_training, length_training + length_validation))
    indices_training_dataset = Dataset( tensors=[indices_training], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] )
    indices_validation_dataset = Dataset( tensors=[indices_validation], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] )
    indices_training_iterator = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size)
    indices_validation_iterator = indices_validation_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size)

    mnist_validation_iterator = indices_validation_iterator
    mnist_training_iterator = indices_training_iterator

    input_indices_symbolic, = indices_training_iterator.make_input_nodes()
    image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared)
    label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared)

    image_node = RescaleImage(image_lookup_node)

    rng = numpy.random.RandomState(129734)
    theano_rng = RandomStreams(2387845)

    (conv_layers,
     affine_layers,
     output_node) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_lookup_node)
    scalar_loss = loss_node.output_symbol.mean()

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 200

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []

    def add_updaters(parameter,
                     scalar_loss,
                     parameter_updaters):
        '''
        Adds a ParameterUpdater to parameter_updaters, and a
        LinearlyInterpolatesOverEpochs to momentum_updaters.
        '''
        gradient = theano.gradient.grad(scalar_loss, parameter)
        parameter_updaters.append(SgdParameterUpdater(parameter,
                                                      gradient,
                                                      args.learning_rate))

    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)
        add_updaters(filters,
                     scalar_loss,
                     parameter_updaters)

        if args.max_filter_norm != numpy.inf:
            limit_param_norms(parameter_updaters[-1],
                              filters,
                              args.max_filter_norm,
                              (1, 2, 3))

        bias = conv_layer.bias_node.params
        parameters.append(bias)
        add_updaters(bias,
                     scalar_loss,
                     parameter_updaters)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)
        add_updaters(weights,
                     scalar_loss,
                     parameter_updaters)
        if args.max_col_norm != numpy.inf:
            limit_param_norms(parameter_updater=parameter_updaters[-1],
                              param=weights,
                              max_norm=args.max_col_norm,
                              input_axes=[0])

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)
        add_updaters(biases,
                     scalar_loss,
                     parameter_updaters)

    #
    # Makes batch and epoch callbacks
    #

    '''
    def make_output_filename(args, best=False):
            
            Constructs a filename that reflects the command-line params.
            
            assert_equal(os.path.splitext(args.output_prefix)[1], "")

            if os.path.isdir(args.output_prefix):
                output_dir, output_prefix = args.output_prefix, ""
            else:
                output_dir, output_prefix = os.path.split(args.output_prefix)
                assert_true(os.path.isdir(output_dir))

            if output_prefix != "":
                output_prefix = output_prefix + "_"

            output_prefix = os.path.join(output_dir, output_prefix)

            return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                    (output_prefix,
                     args.learning_rate,
                     args.initial_momentum,
                     args.nesterov,
                     args.batch_size,
                     "_best" if best else ""))
    '''

    assert_equal(os.path.splitext(args.output_prefix)[1], "")
    if os.path.isdir(args.output_prefix) and \
       not args.output_prefix.endswith('/'):
        args.output_prefix += '/'

    output_dir, output_prefix = os.path.split(args.output_prefix)
    if output_prefix != "":
        output_prefix = output_prefix + "_"

    output_prefix = os.path.join(output_dir, output_prefix)

    epoch_logger = EpochLogger(output_prefix + "SGD.h5")


    misclassification_node = Misclassification(output_node, label_lookup_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    training_stopper = StopsOnStagnation(max_epochs=20,
                                             min_proportional_decrease=0.0)
    validation_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                             callbacks=[print_misclassification_rate,
                                                        training_stopper])

    epoch_logger.subscribe_to('validation misclassification',
                                validation_misclassification_monitor)

    # batch callback (monitor)
    #training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss])
    epoch_logger.subscribe_to("training loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    epoch_timer = EpochTimer2()
    epoch_logger.subscribe_to('epoch duration', epoch_timer)
#    epoch_logger.subscribe_to('epoch time',
 #                             epoch_timer)
    #################


    #model = SerializableModel([input_indices_symbolic], [output_node])
    #saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[])
    epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[input_indices_symbolic.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Sgd([input_indices_symbolic],
                  mnist_training_iterator,
                  callbacks=(parameter_updaters + [#training_loss_monitor,
                              #training_misclassification_monitor,
                              validation_callback]))

    '''
    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])
    '''
    trainer.epoch_callbacks += ([LimitsNumEpochs(max_epochs),
                                 epoch_timer])

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time

    print("Total elapsed time is for training is: ", elapsed_time)
Пример #7
0
def test_mnist_against_pylearn2():
    '''
    Tests the content of the MNIST dataset loaded by load_mnist().

    Compares against pylearn2's MNIST wrapper. No-op if pylearn2 is not
    installed.
    '''
    if not PYLEARN2_IS_INSTALLED:
        raise nose.SkipTest()

    train_set, test_set = load_mnist()

    simplelearn_datasets = (train_set, test_set)
    pylearn2_datasets = [Pylearn2Mnist(which_set=w) for w in ('train', 'test')]

    def get_convert_function():
        '''
        Converts simplelearn's mnist data (uint8, [0, 255]), to
        pylearn2's iterator output (float32, [0.0, 1.0]).
        '''
        iterator = simplelearn_datasets[0].iterator(iterator_type='sequential',
                                                    batch_size=1)
        input_nodes = iterator.make_input_nodes()
        sl_batch_converter = RescaleImage(input_nodes[0])
        return theano.function([input_nodes[0].output_symbol],
                               sl_batch_converter.output_symbol)

    convert_s_batch = get_convert_function()

    def check_equal(s_mnist, p_mnist):
        '''
        Compares simplelearn and pylearn2's MNIST datasets.
        '''
        batch_size = 100

        s_iter = s_mnist.iterator(batch_size=batch_size,
                                  iterator_type='sequential',
                                  loop_style='divisible')

        def get_pylearn2_iterator():
            image_space = Conv2DSpace(shape=[28, 28],
                                      num_channels=1,
                                      axes=('b', 0, 1, 'c'),
                                      # pylearn2.MNIST forces this dtype
                                      dtype='float32')
            # label_space = VectorSpace(dim=10, dtype='uint8')
            label_space = IndexSpace(max_labels=10, dim=1, dtype='uint8')
            space = CompositeSpace([image_space, label_space])
            source = ('features', 'targets')
            specs = (space, source)

            return p_mnist.iterator(batch_size=batch_size,
                                    mode='sequential',
                                    data_specs=specs)

        p_iter = get_pylearn2_iterator()
        keep_going = True
        count = 0

        while keep_going:
            s_image, s_label = s_iter.next()
            p_image, p_label = p_iter.next()

            s_image = convert_s_batch(s_image)[..., numpy.newaxis]
            s_label = s_label[:, numpy.newaxis]

            assert_allclose(s_image, p_image)
            assert_array_equal(s_label, p_label)
            count += s_image.shape[0]
            keep_going = not s_iter.next_is_new_epoch()

        assert_equal(count, s_mnist.tensors[0].shape[0])
        assert_equal(count, p_mnist.X.shape[0])

    for s_mnist, p_mnist in safe_izip(simplelearn_datasets,
                                      pylearn2_datasets):
        check_equal(s_mnist, p_mnist)
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]
        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        batch_size=args.batch_size)
    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = CastNode(image_uint8_node, 'floatX')
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(34523)
    theano_rng = RandomStreams(23845)

    (affine_nodes,
     output_node) = build_fc_classifier(image_node,
                                        sizes,
                                        sparse_init_counts,
                                        args.dropout_include_rates,
                                        rng,
                                        theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 10000

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []
    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params,
                       affine_node.bias_node.params):
            parameters.append(params)
            gradients = theano.gradient.grad(loss_sum, params)
            parameter_updater = SgdParameterUpdater(params,
                                                    gradients,
                                                    args.learning_rate,
                                                    args.initial_momentum,
                                                    args.nesterov)
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(LinearlyInterpolatesOverEpochs(
                parameter_updater.momentum,
                args.final_momentum,
                args.epochs_to_momentum_saturation))

    #
    # Makes batch and epoch callbacks
    #

    def make_output_basename(args):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format(
            output_prefix,
            args.learning_rate,
            args.initial_momentum,
            args.nesterov,
            args.batch_size)

    epoch_logger = EpochLogger(make_output_basename(args) + "_log.h5")

    # misclassification_node = Misclassification(output_node, label_node)
    # mcr_logger = LogsToLists()
    # training_stopper = StopsOnStagnation(max_epochs=10,
    #                                      min_proportional_decrease=0.0)
    misclassification_node = Misclassification(output_node, label_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    validation_misclassification_monitor = MeanOverEpoch(
        misclassification_node,
        callbacks=[print_mcr,
                   StopsOnStagnation(max_epochs=10,
                                     min_proportional_decrease=0.0)])

    epoch_logger.subscribe_to('validation misclassification',
                              validation_misclassification_monitor)

    # batch callback (monitor)
    # training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss])
    epoch_logger.subscribe_to('training mean loss', training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    # epoch callbacks
    # validation_loss_logger = LogsToLists()


    def make_output_filename(args, best=False):
        basename = make_output_basename(args)
        return "{}{}.pkl".format(basename, '_best' if best else "")

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(
        loss_node,
        callbacks=[saves_best])

    epoch_logger.subscribe_to('validation loss', validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    trainer = Sgd([image_uint8_node, label_node],
                  mnist_training.iterator(iterator_type='sequential',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters +
                             momentum_updaters +
                             [training_loss_monitor,
                              training_misclassification_monitor,
                              validation_callback,
                              LimitsNumEpochs(max_epochs)]))
                                                   # validation_loss_monitor]))

    # stuff_to_pickle = OrderedDict(
    #     (('model', model),
    #      ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    # trainer.epoch_callbacks += (momentum_updaters +
    #                             [PicklesOnEpoch(stuff_to_pickle,
    #                                             make_output_filename(args),
    #                                             overwrite=False),
    #                              validation_callback,
    #                              LimitsNumEpochs(max_epochs)])

    trainer.train()
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        size_tensors = tensors[0].shape[0]
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[size_tensors - args.validation_size:, ...] for t in tensors]

        shuffle_dataset = True
        if shuffle_dataset == True:
            def shuffle_in_unison_inplace(a, b):
                assert len(a) == len(b)
                p = numpy.random.permutation(len(a))
                return a[p], b[p]

            [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
            [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])

        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        batch_size=args.batch_size)
    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = CastNode(image_uint8_node, 'floatX')
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(34523)
    theano_rng = RandomStreams(23845)

    (affine_nodes,
     output_node) = build_fc_classifier(image_node,
                                        sizes,
                                        sparse_init_counts,
                                        args.dropout_include_rates,
                                        rng,
                                        theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 10000

    #
    # Makes parameter updaters
    #

    parameters = []
    parameters_peek_ahead = []

    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params,
                       affine_node.bias_node.params):
            parameters.append(params)
            parameter_peek_ahead = theano.shared(numpy.zeros(params.get_value().shape, dtype=params.dtype))
            parameters_peek_ahead.append(parameter_peek_ahead)

    loss_sum2 = theano.clone(loss_sum, replace = {parameter: parameter_peek_ahead for parameter,parameter_peek_ahead in safe_izip(parameters, parameters_peek_ahead)} )
    #
    # Makes parameter updaters
    #
    training_iterator = mnist_training.iterator(iterator_type='sequential',batch_size=args.batch_size)

    parameter_updaters = []
    momentum_updaters = []
    for params, params_peek_ahead in safe_izip(parameters, parameters_peek_ahead):
            gradient_peek_ahead = theano.gradient.grad(loss_sum2, params_peek_ahead)
            parameter_updater = RMSpropSgdParameterUpdater(params,
                                                    params_peek_ahead,
                                                    gradient_peek_ahead,
                                                    args.learning_rate,
                                                    args.initial_momentum,
                                                    args.nesterov)
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(LinearlyInterpolatesOverEpochs(
                parameter_updater.momentum,
                args.final_momentum,
                args.epochs_to_momentum_saturation))

    updates = [updater.updates.values()[0] - updater.updates.keys()[0]
               for updater in parameter_updaters]
    update_norm_monitors = [UpdateNormMonitor("layer %d %s" %
                                              (i // 2,
                                               "weights" if i % 2 == 0 else
                                               "bias"),
                                              update)
                            for i, update in enumerate(updates)]

    #
    # Makes batch and epoch callbacks
    #

    misclassification_node = Misclassification(output_node, label_node)
    mcr_logger = LogsToLists()
    training_stopper = StopsOnStagnation(max_epochs=10,
                                         min_proportional_decrease=0.0)
    mcr_monitor = AverageMonitor(misclassification_node.output_symbol,
                                 misclassification_node.output_format,
                                 callbacks=[print_mcr,
                                            mcr_logger,
                                            training_stopper])

    # batch callback (monitor)
    training_loss_logger = LogsToLists()
    training_loss_monitor = AverageMonitor(loss_node.output_symbol,
                                           loss_node.output_format,
                                           callbacks=[print_loss,
                                                      training_loss_logger])

    # print out 10-D feature vector
    # feature_vector_monitor = AverageMonitor(affine_nodes[-1].output_symbol,
    #                                         affine_nodes[-1].output_format,
    #                                         callbacks=[print_feature_vector])

    # epoch callbacks
    validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                (output_prefix,
                 args.learning_rate,
                 args.initial_momentum,
                 args.nesterov,
                 args.batch_size,
                 "_best" if best else ""))

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = AverageMonitor(
        loss_node.output_symbol,
        loss_node.output_format,
        callbacks=[validation_loss_logger, saves_best])

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        monitors=[validation_loss_monitor, mcr_monitor])

    trainer = RMSpropSgd([image_uint8_node, label_node],
                  training_iterator,
                  parameters,
                  parameter_updaters,
                  monitors=[training_loss_monitor],
                  epoch_callbacks=[])

    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks = (momentum_updaters +
                               [PicklesOnEpoch(stuff_to_pickle,
                                               make_output_filename(args),
                                               overwrite=False),
                                validation_callback,
                                LimitsNumEpochs(max_epochs)])

    trainer.train()
Пример #10
0
from simplelearn.data.mnist import load_mnist
from simplelearn.formats import DenseFormat
from simplelearn.training import (SgdParameterUpdater,
                                  Sgd,
                                  # LogsToLists,
                                  SavesAtMinimum,
                                  MeanOverEpoch,
                                  LimitsNumEpochs,
                                  LinearlyInterpolatesOverEpochs,
                                  # PicklesOnEpoch,
                                  ValidationCallback,
                                  StopsOnStagnation,
                                  EpochLogger)
import pdb

mnist_training, mnist_testing = load_mnist()

# split training set into training and validation sets
tensors = mnist_training.tensors
training_tensors = [t[:-args.validation_size, ...] for t in tensors]
validation_tensors = [t[-args.validation_size:, ...] for t in tensors]

if args.no_shuffle_dataset == False:
    def shuffle_in_unison_inplace(a, b):
        assert len(a) == len(b)
        p = numpy.random.permutation(len(a))
        return a[p], b[p]

    [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
    [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])
Пример #11
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [64, 64]
    filter_init_uniform_ranges = [.05] * len(filter_counts)
    filter_shapes = [(5, 5), (5, 5)]
    pool_shapes = [(4, 4), (4, 4)]
    pool_strides = [(2, 2), (2, 2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = ([.5 if args.dropout else 1.0] *
                             (len(filter_counts) + len(affine_output_sizes)))

    assert_equal(affine_output_sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]
        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(1234)
    theano_rng = RandomStreams(23845)

    (conv_layers,
     affine_layers,
     output_node) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    scalar_loss = loss_node.output_symbol.mean()

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 500

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []

    def add_updaters(parameter,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters):
        '''
        Adds a ParameterUpdater to parameter_updaters, and a
        LinearlyInterpolatesOverEpochs to momentum_updaters.
        '''
        gradient = theano.gradient.grad(scalar_loss, parameter)
        parameter_updaters.append(SgdParameterUpdater(parameter,
                                                      gradient,
                                                      args.learning_rate,
                                                      args.initial_momentum,
                                                      not args.no_nesterov))
        momentum_updaters.append(LinearlyInterpolatesOverEpochs(
            parameter_updaters[-1].momentum,
            args.final_momentum,
            args.epochs_to_momentum_saturation))

    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)
        add_updaters(filters,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

        if args.max_filter_norm != numpy.inf:
            limit_param_norms(parameter_updaters[-1],
                              filters,
                              args.max_filter_norm,
                              (1, 2, 3))

        bias = conv_layer.bias_node.params
        parameters.append(bias)
        add_updaters(bias,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)
        add_updaters(weights,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)
        if args.max_col_norm != numpy.inf:
            limit_param_norms(parameter_updater=parameter_updaters[-1],
                              param=weights,
                              max_norm=args.max_col_norm,
                              input_axes=[0])

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)
        add_updaters(biases,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    #
    # Makes batch and epoch callbacks
    #

    def make_misclassification_monitor():
        '''
        Returns an MeanOverEpoch of the misclassification rate.
        '''
        misclassification_node = Misclassification(output_node, label_node)
        mcr_logger = LogsToLists()
        training_stopper = StopsOnStagnation(max_epochs=10,
                                             min_proportional_decrease=0.0)
        return MeanOverEpoch(misclassification_node,
                             callbacks=[print_misclassification_rate,
                                        mcr_logger,
                                        training_stopper])

    mcr_monitor = make_misclassification_monitor()

    # batch callback (monitor)
    training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss,
                                                     training_loss_logger])

    # epoch callbacks
    validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        '''
        Constructs a filename that reflects the command-line params.
        '''
        assert_equal(os.path.splitext(args.output_prefix)[1], "")

        if os.path.isdir(args.output_prefix):
            output_dir, output_prefix = args.output_prefix, ""
        else:
            output_dir, output_prefix = os.path.split(args.output_prefix)
            assert_true(os.path.isdir(output_dir))

        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                (output_prefix,
                 args.learning_rate,
                 args.initial_momentum,
                 not args.no_nesterov,
                 args.batch_size,
                 "_best" if best else ""))

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[validation_loss_logger,
                                                       saves_best])

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor, mcr_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Sgd([image_uint8_node, label_node],
                  mnist_training.iterator(iterator_type='sequential',
                                          loop_style='divisible',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters + [training_loss_monitor]))

    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])

    trainer.train()
Пример #12
0
def main():
    args = parse_args()
    model = args.model

    assert_is_instance(model, SerializableModel)
    assert_equal(len(model.output_nodes), 1)
    assert_equal(len(model.input_nodes), 1)

    output_node = model.output_nodes[0]
    assert_is_instance(output_node, (Softmax, SoftmaxLayer))
    assert_equal(output_node.output_format.axes, ('b', 'f'))

    input_uint8_node = model.input_nodes[0]
    function = model.compile_function()

    def get_input_float_node(output_node):
        '''
        Crawls back the chain from output_node towards inputs, and returns
        the RescaleImage node if found.
        '''
        assert_equal(len(output_node.inputs), 1)
        while not isinstance(output_node, (CastNode, RescaleImage)):
            if isinstance(output_node, InputNode):
                raise RuntimeError("Expected model to contain a CastNode or "
                                   "RescaleImage node, but didn't find one.")
            output_node = output_node.inputs[0]

        return output_node

    input_float_node = get_input_float_node(output_node)

    mnist_test = load_mnist()[1]
    mnist_test_iterator = mnist_test.iterator(iterator_type='sequential',
                                              batch_size=1)
    label_node = mnist_test_iterator.make_input_nodes()[1]

    cross_entropy = CrossEntropy(output_node, label_node)

    #
    # Create shared-variable versions of the float image and label nodes,
    # and swap them into the computational graph.
    #

    shared_input_float = theano.shared(
        input_float_node.output_format.make_batch(is_symbolic=False,
                                                  batch_size=1))

    shared_label = theano.shared(
        label_node.output_format.make_batch(batch_size=1, is_symbolic=False))

    cross_entropy.output_symbol = theano.clone(
        cross_entropy.output_symbol,
        replace={input_float_node.output_symbol: shared_input_float,
                 label_node.output_symbol: shared_label})

    loss_symbol = cross_entropy.output_symbol.mean()
    output_node.output_format.check(output_node.output_symbol)

    def get_optimized_images(float_image):

        optimized_images = input_float_node.output_format.make_batch(
            is_symbolic=False,
            batch_size=10)

        for i in xrange(model.output_nodes[0].output_format.shape[1]):
            print("optimizing image w.r.t. '%d' label" % i)
            param_updater = SgdParameterUpdater(
                shared_input_float,
                loss_symbol,
                learning_rate=args.learning_rate,
                momentum=args.momentum,
                use_nesterov=args.nesterov)

            sgd = Sgd(input_nodes=[],
                      input_iterator=DummyIterator(),
                      callbacks=[param_updater,
                                 LimitsNumEpochs(args.max_iterations)])

            shared_input_float.set_value(float_image)
            shared_label.set_value(numpy.asarray([i],
                                                 dtype=shared_label.dtype))
            sgd.train()

            optimized_images[i, ...] = shared_input_float.get_value()[0, ...]

        return optimized_images

    figure = pyplot.figure(figsize=numpy.array([5, 5]) * [3, 5])

    image_axes = figure.add_subplot(3, 5, 1)

    optimized_image_axes = []
    for r in range(2, 4):
        for c in range(1, 6):
            optimized_image_axes.append(figure.add_subplot(3,
                                                           5,
                                                           (r - 1) * 5 + c))

    get_float_image = theano.function([input_uint8_node.output_symbol],
                                      input_float_node.output_symbol)

    def update_display(uint8_image, target_label):
        float_image = get_float_image(uint8_image)

        # Normalize the input range from [0, 255] to [0.0, 1.0], even
        # though we didn't during training. If we don't do this here,
        # the gradient descent is overpowered by the strength of the
        # original signal, and doesn't make progress (comment out to see).
        float_image /= 255.0

        normalize_images = True
        norm = (None if normalize_images
                else matplotlib.colors.NoNorm())

        image_axes.imshow(float_image[0, ...], cmap='gray', norm=norm)

        optimized_images = get_optimized_images(float_image)
        for image, axes in safe_izip(optimized_images, optimized_image_axes):
            axes.imshow(image, cmap='gray', norm=matplotlib.colors.NoNorm())

        figure.canvas.draw()

    def on_key_press(event):
        if event.key == ' ':
            update_display(*mnist_test_iterator.next())
        elif event.key == 'q':
            sys.exit(0)

    figure.canvas.mpl_connect('key_press_event', on_key_press)

    update_display(*mnist_test_iterator.next())
    pyplot.show()
Пример #13
0
def main():
    '''
    Entry point of script.
    '''

    def parse_args():
        '''
        Parses command-line args.
        '''

        parser = argparse.ArgumentParser(
            description="Viewer for MNIST's images and labels.")

        parser.add_argument('--which-set',
                            required=True,
                            choices=['test', 'train'],
                            help=("Which set to view ('test' or 'train'"))

        return parser.parse_args()

    args = parse_args()

    figure, image_axes = pyplot.subplots(1,
                                         1,
                                         squeeze=True,
                                         figsize=(4, 4))

    figure.canvas.set_window_title("MNIST's %sing set" % args.which_set)

    image_axes.get_xaxis().set_visible(False)
    image_axes.get_yaxis().set_visible(False)

    dataset = load_mnist()[0 if args.which_set == 'train' else 1]
    iterator = dataset.iterator(batch_size=1, iterator_type='sequential')

    index = [0]

    def show_next():
        '''
        Shows the next image and label.
        '''

        images, labels = iterator.next()
        image = images[0]
        label = labels[0]

        image_axes.imshow(image, cmap='gray')

        image_num = index[0] % dataset.num_examples()

        image_axes.set_title('label: %d (%d of %d)' %
                             (label, image_num + 1, dataset.num_examples()))

        figure.canvas.draw()
        index[0] += 1

    show_next()

    def on_key_press(event):
        if event.key == 'q':
            sys.exit(0)
        elif event.key == ' ':
            show_next()

    figure.canvas.mpl_connect('key_press_event', on_key_press)
    pyplot.show()
Пример #14
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [64, 64]
    filter_init_uniform_ranges = [.05] * len(filter_counts)
    filter_shapes = [(5, 5), (5, 5)]
    pool_shapes = [(4, 4), (4, 4)]
    pool_strides = [(2, 2), (2, 2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = ([.5 if args.dropout else 1.0] *
                             (len(filter_counts) + len(affine_output_sizes)))

    assert_equal(affine_output_sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]

        if args.shuffle_dataset == True:
            def shuffle_in_unison_inplace(a, b):
                assert len(a) == len(b)
                p = numpy.random.permutation(len(a))
                return a[p], b[p]

            [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
            [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])


        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    mnist_training_iterator = mnist_training.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(129734)
    theano_rng = RandomStreams(2387845)

    (conv_layers,
     affine_layers,
     output_node,
     params_flat,
     params_old_flat) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    scalar_loss = loss_node.output_symbol.mean()
#    scalar_loss2 = theano.clone(scalar_loss, replace = {params_flat: params_old_flat})

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 500

    #
    # Makes parameter updater
    #

    gradient = theano.gradient.grad(scalar_loss, params_flat)

    loss_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],scalar_loss)
    gradient_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],gradient)

    cost_arguments = mnist_training_iterator.next()
    print(loss_function(*cost_arguments))
    grads = gradient_function(*cost_arguments)
    print(grads)
    print(grads.shape)

    #
    # Makes batch and epoch callbacks
    #
    def make_output_filename(args, best=False):
            '''
            Constructs a filename that reflects the command-line params.
            '''
            assert_equal(os.path.splitext(args.output_prefix)[1], "")

            if os.path.isdir(args.output_prefix):
                output_dir, output_prefix = args.output_prefix, ""
            else:
                output_dir, output_prefix = os.path.split(args.output_prefix)
                assert_true(os.path.isdir(output_dir))

            if output_prefix != "":
                output_prefix = output_prefix + "_"

            output_prefix = os.path.join(output_dir, output_prefix)

            return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                    (output_prefix,
                     args.learning_rate,
                     args.initial_momentum,
                     not args.no_nesterov,
                     args.batch_size,
                     "_best" if best else ""))


    # Set up the loggers
    epoch_logger = EpochLogger(make_output_filename(args) + "_log.h5")
    misclassification_node = Misclassification(output_node, label_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    training_stopper = StopsOnStagnation(max_epochs=100,
                                             min_proportional_decrease=0.0)
    validation_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                             callbacks=[print_misclassification_rate,
                                                        training_stopper])

    epoch_logger.subscribe_to('validation misclassification',
                                validation_misclassification_monitor)

    # batch callback (monitor)
    #training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss])
    epoch_logger.subscribe_to("training loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    epoch_timer = EpochTimer()
#    epoch_logger.subscribe_to('epoch time',
 #                             epoch_timer)
    #################


    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[saves_best])
    epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Bgfs(inputs=[image_node, label_node],
                  parameters=params_flat,
                  old_parameters=params_old_flat,
                  gradient=gradient,
                  learning_rate=args.learning_rate,
                  training_iterator=mnist_training_iterator,
                  training_set = mnist_training,
                  scalar_loss=scalar_loss,
                  epoch_callbacks=([
                             #training_loss_monitor,
                             # training_misclassification_monitor,
                              validation_callback,
                              LimitsNumEpochs(max_epochs),
                              EpochTimer()]))

    '''
    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])
    '''

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time

    print("Total elapsed time is for training is: ", elapsed_time)