Exemplo n.º 1
0
    def test_gradient_clipping(self):
        """
        Create a known gradient and check whether it is being clipped
        correctly
        """
        mlp = MLP(layers=[Linear(dim=1, irange=0, layer_name='linear')],
                  nvis=1)
        W, b = mlp.layers[0].get_params()
        W.set_value([[10]])

        X = mlp.get_input_space().make_theano_batch()
        y = mlp.get_output_space().make_theano_batch()

        cost = Default()
        gradients, _ = cost.get_gradients(mlp, (X, y))

        clipped_cost = GradientClipping(20, Default())
        clipped_gradients, _ = clipped_cost.get_gradients(mlp, (X, y))

        # The MLP defines f(x) = (x W)^2, with df/dW = 2 W x^2
        f = function([X, y], [gradients[W].sum(), clipped_gradients[W].sum()],
                     allow_input_downcast=True)

        # df/dW = df/db = 20 for W = 10, x = 1, so the norm is 20 * sqrt(2)
        # and the gradients should be clipped to 20 / sqrt(2)
        np.testing.assert_allclose(f([[1]], [[0]]), [20, 20 / np.sqrt(2)])
    def test_gradient_clipping(self):
        """
        Create a known gradient and check whether it is being clipped
        correctly
        """
        mlp = MLP(layers=[Linear(dim=1, irange=0, layer_name='linear')],
                  nvis=1)
        W, b = mlp.layers[0].get_params()
        W.set_value([[10]])

        X = mlp.get_input_space().make_theano_batch()
        y = mlp.get_output_space().make_theano_batch()

        cost = Default()
        gradients, _ = cost.get_gradients(mlp, (X, y))

        clipped_cost = GradientClipping(20, Default())
        clipped_gradients, _ = clipped_cost.get_gradients(mlp, (X, y))

        # The MLP defines f(x) = (x W)^2, with df/dW = 2 W x^2
        f = function([X, y], [gradients[W].sum(), clipped_gradients[W].sum()],
                     allow_input_downcast=True)

        # df/dW = df/db = 20 for W = 10, x = 1, so the norm is 20 * sqrt(2)
        # and the gradients should be clipped to 20 / sqrt(2)
        np.testing.assert_allclose(f([[1]], [[0]]), [20, 20 / np.sqrt(2)])
Exemplo n.º 3
0
    def test_gradient(self):
        """
        Testing to see whether the gradient can be calculated.
        """
        rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=2, layer_name='recurrent',
                                    irange=0, nonlinearity=lambda x: x),
                          Linear(dim=1, layer_name='linear', irange=0)])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()

        default_cost = Default()
        cost = default_cost.expr(rnn, ((X_data, X_mask), (y_data, y_mask)))
        tensor.grad(cost, rnn.get_params(), disconnected_inputs='ignore')
Exemplo n.º 4
0
    def __init__(self,
                 layers,
                 batch_size=None,
                 input_space=None,
                 input_source='features',
                 target_source='targets',
                 nvis=None,
                 seed=None,
                 layer_name=None,
                 monitor_targets=True,
                 dataset_adaptor=VectorDataset(),
                 trainer=SGDTrainer(cost=Default()),
                 **kwargs):
        self.configs = {
            'layers': layers,
            'batch_size': batch_size,
            'input_space': input_space,
            'input_source': input_source,
            'target_source': target_source,
            'nvis': nvis,
            'seed': seed,
            'layer_name': layer_name,
            'monitor_targets': monitor_targets,
            # 'kwargs': kwargs,
        }

        self.dataset_adaptor = dataset_adaptor
        self.trainer = trainer
Exemplo n.º 5
0
    def set_training_criteria(self,
                              learning_rate=0.05,
                              cost=Default(),
                              batch_size=10,
                              max_epochs=10):

        self.training_alg = SGD(learning_rate=learning_rate,
                                cost=cost,
                                batch_size=batch_size,
                                monitoring_dataset=self.datasets,
                                termination_criterion=EpochCounter(max_epochs))
Exemplo n.º 6
0
def get_layer_trainer_logistic(layer, trainset,validset):
    # configs on sgd

    config = {'learning_rate': 0.1,
              'cost' : Default(),
              'batch_size': 150,
              'monitoring_dataset': validset,
              'termination_criterion': MonitorBased(channel_name='y_misclass',N=10,prop_decrease=0),
              'update_callbacks': None
              }

    train_algo = SGD(**config)
    model = layer
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None)
Exemplo n.º 7
0
def get_layer_trainer_logistic(layer, trainset):
    # configs on sgd

    config = {'learning_rate': 0.1,
              'cost' : Default(),
              'batch_size': 10,
              'monitoring_batches': 10,
              'monitoring_dataset': trainset,
              'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED),
              'update_callbacks': None
              }

    train_algo = SGD(**config)
    model = layer
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None)
Exemplo n.º 8
0
def test_correctness():
    """
    Test that the cost function works with float64
    """
    x_train, y_train, x_valid, y_valid = create_dataset()

    trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train)
    validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid)

    n_inputs = trainset.X.shape[1]
    n_outputs = 1
    n_hidden = 10

    hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5
    output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5

    model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden',
                                istdev=hidden_istdev),
                        Sigmoid(dim=n_outputs, layer_name='output',
                                istdev=output_istdev)],
                nvis=n_inputs, seed=[2013, 9, 16])

    termination_criterion = And([EpochCounter(max_epochs=1),
                                 MonitorBased(prop_decrease=1e-7,
                                 N=2)])

    cost = SumOfCosts([(0.99, Default()),
                       (0.01, L1WeightDecay({}))])

    algo = SGD(1e-1,
               update_callbacks=[ExponentialDecay(decay_factor=1.00001,
                                 min_lr=1e-10)],
               cost=cost,
               monitoring_dataset=validset,
               termination_criterion=termination_criterion,
               monitor_iteration_mode='even_shuffled_sequential',
               batch_size=2)

    train = Train(model=model, dataset=trainset, algorithm=algo)
    train.main_loop()
 def get_default_cost(self):
     return Default()
def get_layer_MLP(layers,trainset,validset):
    
    #processor = Standardize();
    
#    trainset = BlackBoxDataset( which_set = 'train',
#                                start = 0,
#                                stop = 900,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = True,
#                                )
#    
#    validset = BlackBoxDataset( which_set = 'train',
#                                start = 900,
#                                stop = 1000 ,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = False,
#                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .1,
                'init_momentum': .5,
                'cost' :  Default(), #Dropout(**dropCfg),
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=10,prop_decrease=0),
                'update_callbacks': None
              }
     
#    configCfg0 = {'layer_name' : 'h0',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
#    
#    configCfg1 = {'layer_name' : 'h1',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    layers.append(Softmax(**sftmaxCfg)) 

    train_algo = SGD(**config)
    model = MLP(batch_size=10,layers=layers,nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, #[LinearDecayOverEpoch(start= 5, saturate= 100, decay_factor= .01)], 
            save_path = "best_dbn_model.pkl",
            save_freq = 100)
Exemplo n.º 11
0
def supervisedLayerwisePRL(trainset, testset):
    '''
	The supervised layerwise training as used in the PRL Paper.
	
	Input
	------
	trainset : A path to an hdf5 file created through h5py.
	testset  : A path to an hdf5 file created through h5py.
	'''
    batch_size = 100

    # Both train and test h5py files are expected to have a 'topo_view' and 'y'
    # datasets side them corresponding to the 'b01c' data format as used in pylearn2
    # and 'y' equivalent to the one hot encoded labels
    trn = HDF5Dataset(filename=trainset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    tst = HDF5Dataset(filename=testset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    '''
	The 1st Convolution and Pooling Layers are added below.
	'''
    h1 = mlp.ConvRectifiedLinear(layer_name='h1',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    layers = [h1, fc, output]

    mdl = mlp.MLP(layers,
                  input_space=Conv2DSpace(shape=(70, 70), num_channels=1))

    trainer = sgd.SGD(
        learning_rate=0.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(
            costs=[Default(),
                   WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best1.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl')
    mdl = push_monitor(mdl, 'k')
    '''
	The 2nd Convolution and Pooling Layers are added below.
	'''
    h2 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h2, fc, output])

    trainer = sgd.SGD(learning_rate=0.002,
                      batch_size=batch_size,
                      learning_rule=learning_rule.RMSProp(),
                      cost=SumOfCosts(costs=[
                          Default(),
                          WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005])
                      ]),
                      train_iteration_mode='shuffled_sequential',
                      monitor_iteration_mode='sequential',
                      termination_criterion=EpochCounter(max_epochs=15),
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld
                      })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best2.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl')
    mdl = push_monitor(mdl, 'l')
    '''
	The 3rd Convolution and Pooling Layers are added below.
	'''
    h3 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=10,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h3, output])

    trainer = sgd.SGD(
        learning_rate=.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(costs=[
            Default(),
            WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005])
        ]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best3.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()