Exemplo n.º 1
0
 def get_term_monitorbased(self, term_obj):
     print 'monitor_based'
     return MonitorBased(
             prop_decrease=term_obj.proportional_decrease,
             N=term_obj.max_epochs,
             channel_name=term_obj.channel_name
         )
Exemplo n.º 2
0
def train_example(dataset=None):
    model = GaussianBinaryRBM(nvis=1296,
                              nhid=61,
                              irange=0.5,
                              energy_function_class=grbm_type_1(),
                              learn_sigma=True,
                              init_sigma=.4,
                              init_bias_hid=2.,
                              mean_vis=False,
                              sigma_lr_scale=1e-3)
    cost = SMD(corruptor=GaussianCorruptor(stdev=0.4))
    algorithm = SGD(learning_rate=.1,
                    batch_size=5,
                    monitoring_batches=20,
                    monitoring_dataset=dataset,
                    cost=cost,
                    termination_criterion=MonitorBased(prop_decrease=0.01,
                                                       N=1))
    train = Train(dataset=dataset,
                  model=model,
                  save_path="./experiment/training.pkl",
                  save_freq=10,
                  algorithm=algorithm,
                  extensions=[])
    train.main_loop()
Exemplo n.º 3
0
    def _create_trainer(self, dataset):
        sgd.log.setLevel(logging.WARNING)

        # Aggregate all the dropout parameters into shared dictionaries.
        probs, scales = {}, {}
        for l in [l for l in self.layers if l.dropout is not None]:
            incl = 1.0 - l.dropout
            probs[l.name] = incl
            scales[l.name] = 1.0 / incl

        if self.cost == "Dropout" or len(probs) > 0:
            # Use the globally specified dropout rate when there are no layer-specific ones.
            incl = 1.0 - self.dropout
            default_prob, default_scale = incl, 1.0 / incl

            # Pass all the parameters to pylearn2 as a custom cost function.
            self.cost = Dropout(default_input_include_prob=default_prob,
                                default_input_scale=default_scale,
                                input_include_probs=probs,
                                input_scales=scales)

        logging.getLogger('pylearn2.monitor').setLevel(logging.WARNING)
        if dataset is not None:
            termination_criterion = MonitorBased(channel_name='objective',
                                                 N=self.n_stable,
                                                 prop_decrease=self.f_stable)
        else:
            termination_criterion = None

        return sgd.SGD(cost=self.cost,
                       batch_size=self.batch_size,
                       learning_rule=self._learning_rule,
                       learning_rate=self.learning_rate,
                       termination_criterion=termination_criterion,
                       monitoring_dataset=dataset)
Exemplo n.º 4
0
def model1():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    valid_set = MNIST(which_set='test', one_hot=True)
    test_set = MNIST(which_set='test', one_hot=True)

    #import pdb
    #pdb.set_trace()
    #print train_set.X.shape[1]

    # =====<Create the MLP Model>=====

    h2_layer = NoisyRELU(layer_name='h1',
                         sparse_init=15,
                         noise_factor=5,
                         dim=1000,
                         desired_active_rate=0.2,
                         bias_factor=20,
                         max_col_norm=1)
    #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1)
    #print h1_layer.get_params()
    #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1)
    y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1)

    mlp = MLP(batch_size=200,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={'valid': valid_set},
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model1.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    #import pdb
    #pdb.set_trace()
    train_obj.main_loop()

    # =====<Run the training>=====
    '''
Exemplo n.º 5
0
 def create_algorithm(self):
     cost_crit = MonitorBased(channel_name=self.optimize_for,
                              prop_decrease=0.,
                              N=10)
     epoch_cnt_crit = EpochCounter(max_epochs=self.max_epochs)
     term = And(criteria=[cost_crit, epoch_cnt_crit])
     self.algorithm = SGD(batch_size=100,
                          learning_rate=.01,
                          monitoring_dataset=self.alg_datasets,
                          termination_criterion=term)
Exemplo n.º 6
0
def model2():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = MNIST(which_set='test', one_hot=True)

    # =====<Create the MLP Model>=====

    h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5)
    #print h1_layer.get_params()
    h2_layer = RectifiedLinear(layer_name='h2',
                               dim=1000,
                               sparse_init=15,
                               max_col_norm=1)
    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=100,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=100,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=SumOfCosts(costs=[
                  MethodCost('cost_from_X'),
                  WeightDecay(coeffs=[0.00005, 0.00005, 0.00005])
              ]),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.0001, N=5))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)]

    # =====<Create Training Object>=====
    save_path = './mlp_model2.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    train_obj.main_loop()
Exemplo n.º 7
0
def model3():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = SVHN_On_Memory(which_set='train')
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = SVHN_On_Memory(which_set='test')

    # =====<Create the MLP Model>=====

    h1_layer = NoisyRELU(layer_name='h1',
                         dim=2000,
                         threshold=5,
                         sparse_init=15,
                         max_col_norm=1)
    #print h1_layer.get_params()
    #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1)

    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=64,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=64,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=10)
    #train_obj.setup_extensions()

    train_obj.main_loop()
Exemplo n.º 8
0
 def get_term_monitorbased(self, term_id):
     row = self.db.executeSQL(
         """
     SELECT proportional_decrease, max_epoch, channel_name
     FROM hps3.term_monitorBased
     WHERE term_id = %s
     """, (term_id, ), self.db.FETCH_ONE)
     if not row or row is None:
         raise HPSData("No monitorBased term for term_id="\
             +str(term_id))
     print 'monitor_based'
     (proportional_decrease, max_epochs, channel_name) = row
     return MonitorBased(prop_decrease=proportional_decrease,
                         N=max_epochs,
                         channel_name=channel_name)
Exemplo n.º 9
0
def get_layer_trainer_logistic(layer, trainset,validset):
    # configs on sgd

    config = {'learning_rate': 0.1,
              'cost' : Default(),
              'batch_size': 150,
              'monitoring_dataset': validset,
              'termination_criterion': MonitorBased(channel_name='y_misclass',N=10,prop_decrease=0),
              'update_callbacks': None
              }

    train_algo = SGD(**config)
    model = layer
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None)
Exemplo n.º 10
0
    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0,
                                     N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(
            batch_size=1,
            learning_rate=self.lr,
            termination_criterion=term,
            monitoring_dataset=mon_dataset,
            cost=cost,
            learning_rule=_learning_rule,
        )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)
Exemplo n.º 11
0
def test_correctness():
    """
    Test that the cost function works with float64
    """
    x_train, y_train, x_valid, y_valid = create_dataset()

    trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train)
    validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid)

    n_inputs = trainset.X.shape[1]
    n_outputs = 1
    n_hidden = 10

    hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5
    output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5

    model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden',
                                istdev=hidden_istdev),
                        Sigmoid(dim=n_outputs, layer_name='output',
                                istdev=output_istdev)],
                nvis=n_inputs, seed=[2013, 9, 16])

    termination_criterion = And([EpochCounter(max_epochs=1),
                                 MonitorBased(prop_decrease=1e-7,
                                 N=2)])

    cost = SumOfCosts([(0.99, Default()),
                       (0.01, L1WeightDecay({}))])

    algo = SGD(1e-1,
               update_callbacks=[ExponentialDecay(decay_factor=1.00001,
                                 min_lr=1e-10)],
               cost=cost,
               monitoring_dataset=validset,
               termination_criterion=termination_criterion,
               monitor_iteration_mode='even_shuffled_sequential',
               batch_size=2)

    train = Train(model=model, dataset=trainset, algorithm=algo)
    train.main_loop()
Exemplo n.º 12
0
def get_trainer(model, trainset, validset, save_path):
  
  monitoring  = dict(valid=validset, train=trainset)
  termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100)
  extensions  = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path),
                #MomentumAdjustor(start=1, saturate=100, final_momentum=.9),
                LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)]

  config = {
  'learning_rate': .01,
  #'learning_rule': Momentum(0.5),
  'learning_rule': RMSProp(),
  'train_iteration_mode': 'shuffled_sequential',
  'batch_size': 1200,#250,
  #'batches_per_iter' : 100,
  'monitoring_dataset': monitoring,
  'monitor_iteration_mode' : 'shuffled_sequential',
  'termination_criterion' : termination,
  }

  return Train(model=model, 
      algorithm=SGD(**config),
      dataset=trainset,
      extensions=extensions)
def main():

    #creating layers
        #2 convolutional rectified layers, border mode valid
    batch_size = 48
    lr = 1.0 #0.1/4
    finMomentum = 0.9
    maxout_units = 2000
    num_pcs = 4
    lay1_reg = lay2_reg = maxout_reg = None
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib'
    #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib'
    save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib'
    best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib'

    #numBatches = 400000/batch_size

    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)

    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train')
    ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid')
    #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train')

    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [4, 4],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 128,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay2_reg)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= maxout_units,
                    num_pieces= num_pcs,
                    W_lr_scale = 0.1,
                    max_col_norm= maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 3, axes=('b', 0, 1, 'c')),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain}

    term_crit  = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective')

    kp_ada = KeypointADADELTA(decay_factor = 0.95, 
            #init_momentum = 0.5, 
                        monitoring_dataset = monitoring_dataset, batch_size = batch_size,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01)
    #train_ext = ADADELTA(0.95)

    #train object
    train = Train(dataset = ddmTrain,
                  save_path= save_path,
                  save_freq=10,
                  model = MLPerc,
                  algorithm= kp_ada,
                  extensions = [#train_ext, 
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= best_path)#,

#                                MomentumAdjustor(start = 1,#
 #                                                saturate = 25,
  #                                               final_momentum = finMomentum)
  ] )
    train.main_loop()
    train.save()
Exemplo n.º 14
0
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 20
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)
 
# learning rate
start = .1
saturate = 20
decay_factor = .00001
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='objective', N=20, prop_decrease=0.0)
 
# create Stochastic Gradient Descent trainer 
trainer = sgd.SGD(learning_rate=.001,
                    batch_size=10,
                    monitoring_dataset=ds_valid, 
                    termination_criterion=termination_criterion, 
                    cost=L1_cost)
#learning_rule=momentum_rule,
trainer.setup(ann, ds_train) 

# add monitor for saving the model with best score
monitor_save_best = best_params.MonitorBasedSaveBest('objective','./tmp/best.pkl')
 

#####################################
Exemplo n.º 15
0
def main():

    #creating layers
    #2 convolutional rectified layers, border mode valid
    batch_size = params.batch_size
    lr = params.lr
    finMomentum = params.momentum
    maxout_units = params.units
    num_pcs = params.pieces
    lay1_reg = lay2_reg = maxout_reg = params.norm_reg
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + '.joblib'
    best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + 'best.joblib'
    numBatches = 400000 / batch_size

    from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints
    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)
    
    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = EmotiwKeypoints(hack='train', preproc='STD')
    ddmValid = EmotiwKeypoints(hack='val', preproc='STD')

    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=128,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay2_reg)

    # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name='maxout',
                    irange=.005,
                    num_units=maxout_units,
                    num_pieces=num_pcs,
                    W_lr_scale=0.1,
                    max_col_norm=maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=batch_size,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=3),
                 layers=[layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)
    mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0},
                           input_scales={'convRect1': 1.})

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation': ddmValid}

    term_crit = MonitorBased(prop_decrease=1e-7,
                             N=100,
                             channel_name='validation_objective')

    kpSGD = KeypointSGD(learning_rate=lr,
                        init_momentum=0.5,
                        monitoring_dataset=monitoring_dataset,
                        batch_size=batch_size,
                        termination_criterion=term_crit,
                        cost=mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)

    #train object
    train = Train(dataset=ddmTrain,
                  save_path=save_path,
                  save_freq=10,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path=best_path),
                      MomentumAdjustor(start=1,
                                       saturate=25,
                                       final_momentum=finMomentum)
                  ])
    train.main_loop()
    train.save()
Exemplo n.º 16
0
def main():
    training_data, validation_data, test_data, std_scale = load_training_data()
    kaggle_test_features = load_test_data(std_scale)

    ###############
    # pylearn2 ML
    hl1 = mlp.Sigmoid(layer_name='hl1', dim=200, irange=.1, init_bias=1.)
    hl2 = mlp.Sigmoid(layer_name='hl2', dim=100, irange=.1, init_bias=1.)

    # create Softmax output layer
    output_layer = mlp.Softmax(9, 'output', irange=.1)
    # create Stochastic Gradient Descent trainer that runs for 400 epochs
    trainer = sgd.SGD(learning_rate=.05,
                      batch_size=300,
                      learning_rule=learning_rule.Momentum(.5),
                      termination_criterion=MonitorBased(
                          channel_name='valid_objective',
                          prop_decrease=0.,
                          N=10),
                      monitoring_dataset={
                          'valid': validation_data,
                          'train': training_data
                      })

    layers = [hl1, hl2, output_layer]
    # create neural net
    model = mlp.MLP(layers, nvis=93)

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_objective',
        save_path='pylearn2_results/pylearn2_test.pkl')

    velocity = learning_rule.MomentumAdjustor(final_momentum=.6,
                                              start=1,
                                              saturate=250)
    decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)
    ######################

    experiment = Train(dataset=training_data,
                       model=model,
                       algorithm=trainer,
                       extensions=[watcher, velocity, decay])

    experiment.main_loop()

    #load best model and test
    ################
    model = serial.load('pylearn2_results/pylearn2_test.pkl')
    # get an prediction of the accuracy from the test_data
    test_results = model.fprop(theano.shared(test_data[0],
                                             name='test_data')).eval()

    print test_results.shape
    loss = multiclass_log_loss(test_data[1], test_results)

    print 'Test multiclass log loss:', loss

    out_file = 'pylearn2_results/' + str(loss) + 'ann'
    #exp.save(out_file + '.pkl')

    #save the kaggle results

    results = model.fprop(
        theano.shared(kaggle_test_features, name='kaggle_test_data')).eval()
    save_results(out_file + '.csv', kaggle_test_features, results)
def get_layer_MLP(layers,trainset,validset):
    
    #processor = Standardize();
    
#    trainset = BlackBoxDataset( which_set = 'train',
#                                start = 0,
#                                stop = 900,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = True,
#                                )
#    
#    validset = BlackBoxDataset( which_set = 'train',
#                                start = 900,
#                                stop = 1000 ,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = False,
#                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .1,
                'init_momentum': .5,
                'cost' :  Default(), #Dropout(**dropCfg),
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=10,prop_decrease=0),
                'update_callbacks': None
              }
     
#    configCfg0 = {'layer_name' : 'h0',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
#    
#    configCfg1 = {'layer_name' : 'h1',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    layers.append(Softmax(**sftmaxCfg)) 

    train_algo = SGD(**config)
    model = MLP(batch_size=10,layers=layers,nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, #[LinearDecayOverEpoch(start= 5, saturate= 100, decay_factor= .01)], 
            save_path = "best_dbn_model.pkl",
            save_freq = 100)
Exemplo n.º 18
0
def test_works():
    load = True
    if load == False:
        ddmTrain = FacialKeypoint(which_set='train', start=0, stop=6000)
        ddmValid = FacialKeypoint(which_set='train', start=6000, stop=7049)
        # valid can_fit = false
        pipeline = preprocessing.Pipeline()
        stndrdz = preprocessing.Standardize()
        stndrdz.apply(ddmTrain, can_fit=True)
        #doubt, how about can_fit = False?
        stndrdz.apply(ddmValid, can_fit=False)
        GCN = preprocessing.GlobalContrastNormalization()
        GCN.apply(ddmTrain, can_fit=True)
        GCN.apply(ddmValid, can_fit=False)

        pcklFile = open('kpd.pkl', 'wb')
        obj = (ddmTrain, ddmValid)
        pickle.dump(obj, pcklFile)
        pcklFile.close()
        return
    else:
        pcklFile = open('kpd.pkl', 'rb')
        (ddmTrain, ddmValid) = pickle.load(pcklFile)
        pcklFile.close()

    #creating layers
    #2 convolutional rectified layers, border mode valid
    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    # Rectified linear units
    layer3 = RectifiedLinear(dim=3000, sparse_init=15, layer_name='RectLin3')

    #multisoftmax
    n_groups = 30
    n_classes = 98
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=8,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=1),
                 layers=[layer1, layer2, layer3, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)

    #algorithm

    # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria

    term_crit = MonitorBased(prop_decrease=0.00001,
                             N=30,
                             channel_name='validation_objective')
    kpSGD = KeypointSGD(learning_rate=0.001,
                        init_momentum=0.5,
                        monitoring_dataset={
                            'validation': ddmValid,
                            'training': ddmTrain
                        },
                        batch_size=8,
                        batches_per_iter=750,
                        termination_criterion=term_crit,
                        train_iteration_mode='random_uniform',
                        cost=mlp_cost)

    #train extension
    train_ext = ExponentialDecayOverEpoch(decay_factor=0.998,
                                          min_lr_scale=0.01)
    #train object
    train = Train(dataset=ddmTrain,
                  save_path='kpd_model2.pkl',
                  save_freq=1,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path='kpd_best.pkl'),
                      MomentumAdjustor(start=1, saturate=20, final_momentum=.9)
                  ])
    train.main_loop()
    train.save()
Exemplo n.º 19
0
                              dim=13,
                              istdev=0.01,
                              monitor_style="bit_vector_class")],
          nvis=26);
          
print "[MESSAGE] The model is built";

### build algorithm

algorithm=SGD(batch_size=100,
              learning_rate=0.05,
              monitoring_dataset={'train':valid_data,
                                  'valid':valid_data,
                                  'test':test_data},
              termination_criterion=Or(criteria=[MonitorBased(channel_name="valid_objective",
                                                              prop_decrease=0.00001,
                                                              N=40),
                                                 EpochCounter(max_epochs=200)]),
              cost = Dropout(input_include_probs={'hidden_0':1., 'hidden_1':1., 'y':0.5},
                             input_scales={ 'hidden_0': 1., 'hidden_1':1., 'y':2.}),
              update_callbacks=ExponentialDecay(decay_factor=1.0000003, 
                                                min_lr=.000001));
                                                
print "[MESSAGE] Training algorithm is built";
                              
### build training

idpath = os.path.splitext(os.path.abspath(__file__))[0]; # ID for output files.
save_path = idpath + '.pkl';

train=Train(dataset=train_data,
Exemplo n.º 20
0
# create datasets
ds_train = Pima()
ds_train, ds_valid = ds_train.split(0.7)
ds_valid, ds_test = ds_valid.split(0.7)

# create sigmoid hidden layer with 20 nodes, init weights in range -0.05 to 0.05 and add
# a bias with value 1
hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=20, irange=.05, init_bias=1.)
# softmax output layer
output_layer = mlp.Softmax(2, 'output', irange=.05)
layers = [hidden_layer, output_layer]

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass',
                                     N=50, prop_decrease=0.0)

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 50
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

# learning rate
start = 1
saturate = 50
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)
Exemplo n.º 21
0
print 'Val Dataset Loaded'
last_ndim = 240
n_classes = 7

import pdb
pdb.set_trace()

algorithm = SGD(
    batch_size=batch_size,
    learning_rate=learning_rate,
    init_momentum=.5,
    monitoring_dataset={'valid': val_ds},
    cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}),
    termination_criterion=MonitorBased(channel_name="valid_y_misclass",
                                       prop_decrease=0.,
                                       N=100),
    #termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter {max_epochs: 1},
    update_callbacks=ExponentialDecay(decay_factor=1.00004, min_lr=.000001))

extensions = [
    MonitorBasedSaveBest(channel_name='valid_y_misclass',
                         save_path=save_best_path),
    MomentumAdjustor(start=1, saturate=250, final_momentum=.7)
]

model = MLP(batch_size=batch_size,
            input_space=Conv2DSpace(shape=[48, 48],
                                    num_channels=num_chan,
                                    axes=['c', 0, 1, 'b']),
            layers=[
Exemplo n.º 22
0
    nn.monitor()
    save_best.on_monitor(nn, train, algo)
    if not algo.continue_learning(nn):
        break

# SoftPlus with Dropout

h0 = mlp.Softplus(layer_name='h0', dim=60, sparse_init=0)
h1 = mlp.Softplus(layer_name='h1', dim=60, sparse_init=0)
y0 = mlp.Softmax(layer_name='y0', n_classes=5, irange=0)
layers = [h0, h1, y0]

model = mlp.MLP(layers, nvis=train.X.shape[1])

monitoring = dict(valid=valid)
termination = MonitorBased(channel_name="valid_y0_misclass", N=5)
extensions = [
    best_params.MonitorBasedSaveBest(channel_name="valid_y0_misclass",
                                     save_path="train_best.pkl")
]

algorithm = sgd.SGD(0.1,
                    batch_size=100,
                    cost=Dropout(),
                    monitoring_dataset=monitoring,
                    termination_criterion=termination)

print 'Running training'
train_job = Train(train,
                  model,
                  algorithm,
def get_layer_MLP():
    
    extraset = BlackBoxDataset( which_set = 'extra')
    
    processor = Standardize();
    
    processor.apply(extraset,can_fit=True)
    
    trainset = BlackBoxDataset( which_set = 'train',
                                start = 0,
                                stop = 900,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = True,
                                )
    
    validset = BlackBoxDataset( which_set = 'train',
                                start = 900,
                                stop = 1000 ,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = False,
                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .05,
                'init_momentum': .00,
                'cost' : Dropout(**dropCfg), 
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0),
                'update_callbacks': None
              }
     
    config0 = {
                'layer_name': 'h0',
                'num_units': 1875,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    config1 = {
                'layer_name': 'h1',
                'num_units': 700,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    l1 = Maxout(**config0)
    l2 = Maxout(**config1)
    l3 = Softmax(**sftmaxCfg)

    train_algo = SGD(**config)
    model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, 
            save_path = "maxout_best_model.pkl",
            save_freq = 1)
Exemplo n.º 24
0
def main( x ):

	l1_dim = x[0]
	l2_dim = x[1]
	learning_rate = x[2]
	momentum = x[3]
	l1_dropout = x[4]
	decay_factor = x[5]
	
	min_lr = 1e-7

	#

	train = np.loadtxt( train_file, delimiter = ',' )
	x_train = train[:,0:-1]
	y_train = train[:,-1]
	y_train.shape = ( y_train.shape[0], 1 )

	# 

	validation = np.loadtxt( validation_file, delimiter = ',' )
	x_valid = validation[:,0:-1]
	y_valid = validation[:,-1]
	y_valid.shape = ( y_valid.shape[0], 1 )

	#

	#input_space = VectorSpace( dim = x.shape[1] )
	full = DenseDesignMatrix( X = x_train, y = y_train )
	valid = DenseDesignMatrix( X = x_valid, y = y_valid )

	l1 = mlp.RectifiedLinear( 
		layer_name='l1',
		irange=.001,
		dim = l1_dim,
		# "Rather than using weight decay, we constrain the norms of the weight vectors"
		max_col_norm=1.
	)

	l2 = mlp.RectifiedLinear(
		layer_name='l2',
		irange=.001,
		dim = l2_dim,
		max_col_norm=1.
	)

	output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 )

	layers = [l1, l2, output]
	nvis = x_train.shape[1]

	mdl = mlp.MLP( layers, nvis = nvis )	# input_space = input_space

	#lr = .001
	#epochs = 100
	
	decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr )

	trainer = sgd.SGD(
		learning_rate = learning_rate,
		batch_size=128,
		learning_rule=learning_rule.Momentum( momentum ),
		
		update_callbacks = [ decay ],

		# Remember, default dropout is .5
		cost = Dropout( input_include_probs = {'l1': l1_dropout},
				   input_scales={'l1': 1.}),

		#termination_criterion = EpochCounter(epochs),
		termination_criterion = MonitorBased(
			channel_name = "valid_objective",
			prop_decrease = 0.001,				# 0.1% of objective
			N = 10	
		),

		# valid_objective is MSE

		monitoring_dataset = { 'train': full, 'valid': valid }
	)

	watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file )
	
	experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] )
	experiment.main_loop()

	###

	error = get_error_from_model( output_model_file )
	print "*** error: {} ***".format( error )
	return error