def main(): args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/multilayer_perceptron/: # multilayer_perceptron.ipynb # mlp_tutorial_part_3.yaml sizes = [500, 500, 10] sparse_init_counts = [15, 15] assert_equal(len(sparse_init_counts), len(sizes) - 1) assert_equal(sizes[-1], 10) mnist_training, mnist_testing = load_mnist() # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size:, ...] for t in tensors] if args.no_shuffle_dataset == False: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) all_images_shared = theano.shared(numpy.vstack([training_tensors[0],validation_tensors[0]])) all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1],validation_tensors[1]])) length_training = training_tensors[0].shape[0] length_validation = validation_tensors[0].shape[0] indices_training = numpy.asarray(range(length_training)) indices_validation = numpy.asarray(range(length_training, length_training + length_validation)) indices_training_dataset = Dataset( tensors=[indices_training], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_validation_dataset = Dataset( tensors=[indices_validation], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_training_iterator = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size) indices_validation_iterator = indices_validation_dataset.iterator(iterator_type='sequential',batch_size=10000) training_iterator_full = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size_full) mnist_validation_iterator = indices_validation_iterator mnist_training_iterator = indices_training_iterator input_indices_symbolic, = indices_training_iterator.make_input_nodes() image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared) label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared) image_node = CastNode(image_lookup_node, 'floatX') # image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(34523) theano_rng = RandomStreams(23845) (affine_nodes, output_node) = build_fc_classifier(image_node, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_lookup_node) loss_sum = loss_node.output_symbol.mean() max_epochs = 200 # # Makes parameter updaters # parameters = [] old_param_symbols = [] for affine_node in affine_nodes: for params in (affine_node.linear_node.params, affine_node.bias_node.params): parameters.append(params) old_param_symbol = theano.shared(numpy.zeros(params.get_value().shape, dtype=params.dtype)) old_param_symbols.append(old_param_symbol) loss_sum2 = theano.clone(loss_sum, replace = {parameter: old_parameter for parameter,old_parameter in safe_izip(parameters, old_param_symbols)} ) # # Makes parameter updaters # parameter_updaters = [] all_gradients = [] momentum_updaters = [] for params, old_params in safe_izip(parameters, old_param_symbols): gradients = theano.gradient.grad(loss_sum, params) all_gradients.append(gradients) gradient_symbol_old_param = theano.gradient.grad(loss_sum2, old_params) parameter_updater = SemiSgdParameterUpdater(params, gradients, gradient_symbol_old_param, args.learning_rate, args.initial_momentum, args.method, mnist_training_iterator, training_iterator_full, args.nesterov) parameter_updaters.append(parameter_updater) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation)) # # Makes batch and epoch callbacks # ''' def make_output_basename(args): assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format( output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size) ''' assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) epoch_logger = EpochLogger(output_prefix + "S2GD_plus.h5") # misclassification_node = Misclassification(output_node, label_node) # mcr_logger = LogsToLists() # training_stopper = StopsOnStagnation(max_epochs=10, # min_proportional_decrease=0.0) misclassification_node = Misclassification(output_node, label_lookup_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) validation_misclassification_monitor = MeanOverEpoch( misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=20, min_proportional_decrease=0.0)]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) # training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to('training mean loss', training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) epoch_timer = EpochTimer2() epoch_logger.subscribe_to('epoch duration', epoch_timer) # epoch callbacks # validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): basename = make_output_basename(args) return "{}{}.pkl".format(basename, '_best' if best else "") #model = SerializableModel([input_indices_symbolic], [output_node]) #saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch( loss_node, callbacks=[]) epoch_logger.subscribe_to('validation loss', validation_loss_monitor) validation_callback = ValidationCallback( inputs=[input_indices_symbolic.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) trainer = SemiSgd([input_indices_symbolic], mnist_training_iterator, parameters, old_param_symbols, parameter_updaters, training_iterator_full, epoch_callbacks=(parameter_updaters + momentum_updaters + [#training_loss_monitor, #training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs), epoch_timer])) # validation_loss_monitor])) # stuff_to_pickle = OrderedDict( # (('model', model), # ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) # trainer.epoch_callbacks += (momentum_updaters + # [PicklesOnEpoch(stuff_to_pickle, # make_output_filename(args), # overwrite=False), # validation_callback, # LimitsNumEpochs(max_epochs)]) print args.method, " is now executed" start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print("Total elapsed time is for training is: ", elapsed_time)
def main(): ''' Entry point of this script. ''' args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/convolutional_network/: # convolutional_network.ipynb filter_counts = [64, 64] filter_init_uniform_ranges = [.05] * len(filter_counts) filter_shapes = [(5, 5), (5, 5)] pool_shapes = [(4, 4), (4, 4)] pool_strides = [(2, 2), (2, 2)] affine_output_sizes = [10] affine_init_stddevs = [.05] * len(affine_output_sizes) dropout_include_rates = ([.5 if args.dropout else 1.0] * (len(filter_counts) + len(affine_output_sizes))) assert_equal(affine_output_sizes[-1], 10) mnist_training, mnist_testing = load_mnist() # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size:, ...] for t in tensors] if args.no_shuffle_dataset == False: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) all_images_shared = theano.shared(numpy.vstack([training_tensors[0],validation_tensors[0]])) all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1],validation_tensors[1]])) length_training = training_tensors[0].shape[0] length_validation = validation_tensors[0].shape[0] indices_training = numpy.asarray(range(length_training)) indices_validation = numpy.asarray(range(length_training, length_training + length_validation)) indices_training_dataset = Dataset( tensors=[indices_training], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_validation_dataset = Dataset( tensors=[indices_validation], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_training_iterator = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size) indices_validation_iterator = indices_validation_dataset.iterator(iterator_type='sequential',batch_size=10000) training_iterator_full = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size_full) mnist_validation_iterator = indices_validation_iterator mnist_training_iterator = indices_training_iterator input_indices_symbolic, = indices_training_iterator.make_input_nodes() image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared) label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared) image_node = RescaleImage(image_lookup_node) rng = numpy.random.RandomState(129734) theano_rng = RandomStreams(2387845) (conv_layers, affine_layers, output_node) = build_conv_classifier(image_node, filter_shapes, filter_counts, filter_init_uniform_ranges, pool_shapes, pool_strides, affine_output_sizes, affine_init_stddevs, dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_lookup_node) scalar_loss = loss_node.output_symbol.mean() if args.weight_decay != 0.0: for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum() scalar_loss = scalar_loss + filter_loss for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum() scalar_loss = scalar_loss + weight_loss max_epochs = 200 # # Extract variables # parameters = [] old_parameters = [] def add_updaters(parameter, old_parameter, scalar_loss, scalar_loss_at_old_params, parameter_updaters, momentum_updaters): ''' Adds a ParameterUpdater to parameter_updaters, and a LinearlyInterpolatesOverEpochs to momentum_updaters. ''' gradient = theano.gradient.grad(scalar_loss, parameter) all_gradients.append(gradient) gradient_at_old_params = theano.gradient.grad(scalar_loss_at_old_params, old_parameter) parameter_updaters.append(SemiSgdParameterUpdater(parameter, gradient, gradient_at_old_params, args.learning_rate, args.initial_momentum, args.method, mnist_training_iterator, training_iterator_full, args.nesterov)) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updaters[-1].momentum, args.final_momentum, args.epochs_to_momentum_saturation)) for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters parameters.append(filters) old_param = theano.shared(numpy.zeros(filters.get_value().shape, dtype=filters.dtype)) old_parameters.append(old_param) bias = conv_layer.bias_node.params parameters.append(bias) old_param = theano.shared(numpy.zeros(bias.get_value().shape, dtype=bias.dtype)) old_parameters.append(old_param) for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params parameters.append(weights) old_param = theano.shared(numpy.zeros(weights.get_value().shape, dtype=weights.dtype)) old_parameters.append(old_param) biases = affine_layer.affine_node.bias_node.params parameters.append(biases) old_param = theano.shared(numpy.zeros(biases.get_value().shape, dtype=biases.dtype)) old_parameters.append(old_param) # loss_node2 = theano.clone(loss_node, replace = {parameter: old_parameter for parameter,old_parameter in safe_izip(parameters, old_parameters)} ) scalar_loss2 = theano.clone(scalar_loss, replace = {parameter: old_parameter for parameter,old_parameter in safe_izip(parameters, old_parameters)} ) # scalar_loss2 = loss_node2.output_symbol.mean() # Create the parameters updaters parameter_updaters = [] momentum_updaters = [] all_gradients = [] counter = 0 for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters old_params = old_parameters[counter] add_updaters(filters, old_params, scalar_loss, scalar_loss2, parameter_updaters, momentum_updaters) counter = counter + 1 if args.max_filter_norm != numpy.inf: limit_param_norms(parameter_updaters[-1], filters, args.max_filter_norm, (1, 2, 3)) bias = conv_layer.bias_node.params old_params = old_parameters[counter] add_updaters(bias, old_params, scalar_loss, scalar_loss2, parameter_updaters, momentum_updaters) counter = counter + 1 for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params old_params = old_parameters[counter] add_updaters(weights, old_params, scalar_loss, scalar_loss2, parameter_updaters, momentum_updaters) counter = counter + 1 if args.max_col_norm != numpy.inf: limit_param_norms(parameter_updater=parameter_updaters[-1], param=weights, max_norm=args.max_col_norm, input_axes=[0]) biases = affine_layer.affine_node.bias_node.params old_params = old_parameters[counter] add_updaters(biases, old_params, scalar_loss, scalar_loss2, parameter_updaters, momentum_updaters) counter = counter + 1 ''' print(parameters) print(len(parameters)) for param in parameters: print(param.get_value().shape) ''' # # Makes batch and epoch callbacks # ''' def make_output_filename(args, best=False): Constructs a filename that reflects the command-line params. assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix): output_dir, output_prefix = args.output_prefix, "" else: output_dir, output_prefix = os.path.split(args.output_prefix) assert_true(os.path.isdir(output_dir)) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" % (output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size, "_best" if best else "")) ''' # Set up the loggers assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) epoch_logger = EpochLogger(output_prefix + "S2GD_plus.h5") misclassification_node = Misclassification(output_node, label_lookup_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) training_stopper = StopsOnStagnation(max_epochs=20, min_proportional_decrease=0.0) validation_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[print_misclassification_rate, training_stopper]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) #training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to("training loss", training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) epoch_timer = EpochTimer2() epoch_logger.subscribe_to('epoch duration', epoch_timer) # epoch_logger.subscribe_to('epoch time', # epoch_timer) ################# #model = SerializableModel([input_indices_symbolic], [output_node]) #saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor) validation_callback = ValidationCallback( inputs=[input_indices_symbolic.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) # trainer = Sgd((image_node.output_symbol, label_node.output_symbol), trainer = SemiSgd([input_indices_symbolic], mnist_training_iterator, parameters, old_parameters, parameter_updaters, training_iterator_full, epoch_callbacks=(parameter_updaters + momentum_updaters + [#training_loss_monitor, #training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs), epoch_timer])) ''' stuff_to_pickle = OrderedDict( (('model', model), ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) trainer.epoch_callbacks += (momentum_updaters + [EpochTimer(), PicklesOnEpoch(stuff_to_pickle, make_output_filename(args), overwrite=False), validation_callback, LimitsNumEpochs(max_epochs)]) ''' print(args.method, " is now executed") start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print("Total elapsed time is for training is: ", elapsed_time)