def _create_trainer(self, dataset): sgd.log.setLevel(logging.WARNING) # Aggregate all the dropout parameters into shared dictionaries. probs, scales = {}, {} for l in [l for l in self.layers if l.dropout is not None]: incl = 1.0 - l.dropout probs[l.name] = incl scales[l.name] = 1.0 / incl if self.cost == "Dropout" or len(probs) > 0: # Use the globally specified dropout rate when there are no layer-specific ones. incl = 1.0 - self.dropout default_prob, default_scale = incl, 1.0 / incl # Pass all the parameters to pylearn2 as a custom cost function. self.cost = Dropout(default_input_include_prob=default_prob, default_input_scale=default_scale, input_include_probs=probs, input_scales=scales) logging.getLogger('pylearn2.monitor').setLevel(logging.WARNING) if dataset is not None: termination_criterion = MonitorBased(channel_name='objective', N=self.n_stable, prop_decrease=self.f_stable) else: termination_criterion = None return sgd.SGD(cost=self.cost, batch_size=self.batch_size, learning_rule=self._learning_rule, learning_rate=self.learning_rate, termination_criterion=termination_criterion, monitoring_dataset=dataset)
def get_trainer(model, trainset, validset, epochs=20, batch_size=200): monitoring_batches = None if validset is None else 20 train_algo = SGD(batch_size=batch_size, init_momentum=0.5, learning_rate=0.1, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={ 'h0': 0.8, 'h1': 0.8, 'h2': 0.8, 'h3': 0.8, 'y': 0.5 }, input_scales={ 'h0': 1. / 0.8, 'h1': 1. / 0.8, 'h2': 1. / 0.8, 'h3': 1. / 0.8, 'y': 1. / 0.5 }, default_input_include_prob=0.5, default_input_scale=1. / 0.5), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.0001, min_lr=0.001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
def __init__(self, layers, dropout=False, input_scaler=None, output_scaler=None, learning_rate=0.01, verbose=0): """ :param layers: List of tuples of types of layers alongside the number of neurons :param learning_rate: The learning rate for all layers :param verbose: Verbosity level :return: """ self.layers = layers self.ds = None self.f = None self.verbose = verbose cost = None if (dropout): cost = Dropout() self.trainer = sgd.SGD(learning_rate=learning_rate, cost=cost, batch_size=100) self.input_normaliser = input_scaler self.output_normaliser = output_scaler
def get_trainer(model, trainset, validset, epochs=20, batch_size=100): monitoring_batches = None if validset is None else 20 train_algo = SGD( batch_size=batch_size, init_momentum=0.5, learning_rate=0.05, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={'h0': 0.8}, input_scales={'h0': 1.}, default_input_include_prob=0.5, default_input_scale=1. / 0.5), #termination_criterion = MonitorBased(channel_name='y_misclass', prop_decrease=0., N=50), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.00002, min_lr=0.0001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(0.8*epochs))])
def get_layer_trainer_sgd(model, trainset): drop_cost = Dropout(input_include_probs={'h0': .4}, input_scales={'h0': 1.}) # configs on sgd train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential', learning_rate=0.2, cost=drop_cost, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS), update_callbacks=None) extensions = [ MonitorBasedSaveBest(channel_name="y_kl", save_path="./convnet_test_best.pkl") ] return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def get_finetuner(model, trainset, batch_size=100, epochs=100): train_algo = SGD(batch_size=batch_size, learning_rule=Momentum(init_momentum=0.5), learning_rate=0.5, monitoring_batches=batch_size, monitoring_dataset=trainset, cost=Dropout(input_include_probs={'h0': .5}, input_scales={'h0': 2.}), termination_criterion=EpochCounter(epochs)) path = DATA_DIR + 'model' + str(SUBMODEL) + 'saved_daex.pkl' return Train(model=model, algorithm=train_algo, dataset=trainset, save_path=path, save_freq=10, extensions=[ MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs * 0.8)), LinearDecayOverEpoch(start=1, saturate=int(epochs * 0.7), decay_factor=.02) ])
def get_trainer2(model, trainset, epochs=50): train_algo = SGD( batch_size=bsize, learning_rate=0.5, learning_rule=Momentum(init_momentum=0.5), monitoring_batches=bsize, monitoring_dataset=trainset, cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}), termination_criterion=EpochCounter(epochs), ) path = DATA_DIR + 'model2saved_conv.pkl' return Train(model=model, algorithm=train_algo, dataset=trainset, save_path=path, save_freq=1, extensions=[ MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(epochs * 0.5)), LinearDecayOverEpoch(start=1, saturate=int(epochs * 0.8), decay_factor=.01) ])
images_train = images[train_index] y_train = y[train_index] images_train, y_train = shuffle(images_train, y_train, random_state=7) X_train = DenseDesignMatrix(X=images_train, y=y_train,view_converter=view_converter) images_test = images[test_index] y_test = y[test_index] X_test = DenseDesignMatrix(X=images_test, y=y_test,view_converter=view_converter) if retrain: print "training on", X_train.X.shape, 'testing on', X_test.X.shape trainer = sgd.SGD(learning_rate=learn_rate, batch_size=batch_size, learning_rule=learning_rule.Momentum(momentum_start), cost=Dropout( input_include_probs={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.}, input_scales={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.} ), termination_criterion=EpochCounter(max_epochs=max_epochs), monitoring_dataset={'train':X_train, 'valid':X_test}, ) input_space = Conv2DSpace(shape=(central_window_shape, central_window_shape), axes = axes, num_channels = 1) ann = mlp.MLP(layers, input_space=input_space) velocity = learning_rule.MomentumAdjustor(final_momentum=momentum_end, start=1, saturate=momentum_saturate)
output = mlp.Softmax(layer_name='y', n_classes=10, irange=.005, max_col_norm=1.9365) layers = [l1, l2, l3, l4, output] mdl = mlp.MLP(layers, input_space=in_space) trainer = sgd.SGD(learning_rate=.17, batch_size=128, learning_rule=learning_rule.Momentum(.5), # Remember, default dropout is .5 cost=Dropout(input_include_probs={'l1': .8}, input_scales={'l1': 1.}), termination_criterion=EpochCounter(max_epochs=475), monitoring_dataset={'valid': tst, 'train': trn}) preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()]) trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True) tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False) serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='kaggle_cifar10_maxout_zca.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.65, start=1,
def main( x ): l1_dim = x[0] l2_dim = x[1] learning_rate = x[2] momentum = x[3] l1_dropout = x[4] decay_factor = x[5] min_lr = 1e-7 # train = np.loadtxt( train_file, delimiter = ',' ) x_train = train[:,0:-1] y_train = train[:,-1] y_train.shape = ( y_train.shape[0], 1 ) # validation = np.loadtxt( validation_file, delimiter = ',' ) x_valid = validation[:,0:-1] y_valid = validation[:,-1] y_valid.shape = ( y_valid.shape[0], 1 ) # #input_space = VectorSpace( dim = x.shape[1] ) full = DenseDesignMatrix( X = x_train, y = y_train ) valid = DenseDesignMatrix( X = x_valid, y = y_valid ) l1 = mlp.RectifiedLinear( layer_name='l1', irange=.001, dim = l1_dim, # "Rather than using weight decay, we constrain the norms of the weight vectors" max_col_norm=1. ) l2 = mlp.RectifiedLinear( layer_name='l2', irange=.001, dim = l2_dim, max_col_norm=1. ) output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 ) layers = [l1, l2, output] nvis = x_train.shape[1] mdl = mlp.MLP( layers, nvis = nvis ) # input_space = input_space #lr = .001 #epochs = 100 decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr ) trainer = sgd.SGD( learning_rate = learning_rate, batch_size=128, learning_rule=learning_rule.Momentum( momentum ), update_callbacks = [ decay ], # Remember, default dropout is .5 cost = Dropout( input_include_probs = {'l1': l1_dropout}, input_scales={'l1': 1.}), #termination_criterion = EpochCounter(epochs), termination_criterion = MonitorBased( channel_name = "valid_objective", prop_decrease = 0.001, # 0.1% of objective N = 10 ), # valid_objective is MSE monitoring_dataset = { 'train': full, 'valid': valid } ) watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file ) experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] ) experiment.main_loop() ### error = get_error_from_model( output_model_file ) print "*** error: {} ***".format( error ) return error
def get_cost_fn(self): if self.model_params.get('dropout'): return Dropout() return None
def get_cost_fn(self): return Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.})
def get_cost_fn(self): return Dropout()
nvis=26); print "[MESSAGE] The model is built"; ### build algorithm algorithm=SGD(batch_size=100, learning_rate=0.05, monitoring_dataset={'train':valid_data, 'valid':valid_data, 'test':test_data}, termination_criterion=Or(criteria=[MonitorBased(channel_name="valid_objective", prop_decrease=0.00001, N=40), EpochCounter(max_epochs=200)]), cost = Dropout(input_include_probs={'hidden_0':1., 'hidden_1':1., 'y':0.5}, input_scales={ 'hidden_0': 1., 'hidden_1':1., 'y':2.}), update_callbacks=ExponentialDecay(decay_factor=1.0000003, min_lr=.000001)); print "[MESSAGE] Training algorithm is built"; ### build training idpath = os.path.splitext(os.path.abspath(__file__))[0]; # ID for output files. save_path = idpath + '.pkl'; train=Train(dataset=train_data, model=model, algorithm=algorithm, save_path=save_path, save_freq=100);
def get_layer_MLP(): extraset = BlackBoxDataset( which_set = 'extra') processor = Standardize(); processor.apply(extraset,can_fit=True) trainset = BlackBoxDataset( which_set = 'train', start = 0, stop = 900, preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = True, ) validset = BlackBoxDataset( which_set = 'train', start = 900, stop = 1000 , preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = False, ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .05, 'init_momentum': .00, 'cost' : Dropout(**dropCfg), 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0), 'update_callbacks': None } config0 = { 'layer_name': 'h0', 'num_units': 1875, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } config1 = { 'layer_name': 'h1', 'num_units': 700, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } l1 = Maxout(**config0) l2 = Maxout(**config1) l3 = Softmax(**sftmaxCfg) train_algo = SGD(**config) model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, save_path = "maxout_best_model.pkl", save_freq = 1)
irange=ir, dim=dim, max_col_norm=1.) l3 = RectifiedLinear(layer_name='l3', irange=ir, dim=dim, max_col_norm=1.) output = Softmax(layer_name='y', n_classes=9, irange=ir, max_col_norm=mcn_out) mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1]) trainer = sgd.SGD(learning_rate=lr, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(default_input_include_prob=ip, default_input_scale=1 / ip), termination_criterion=EpochCounter(epochs), seed=seed) decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1) experiment = Train(dataset=training, model=mdl, algorithm=trainer, extensions=[decay]) experiment.main_loop() epochs_current = epochs for s in range(n_add): trainer = sgd.SGD(learning_rate=lr * .1, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(default_input_include_prob=ip, default_input_scale=1 / ip),
seed = i + 3819 R = RImatrix(X.shape[1], m, k, rm_dup_cols = True, seed = seed) R = np.abs(R.todense().astype(np.float32)) dim1 = R.shape[1] l1 = RectifiedLinear(layer_name='l1', irange = ir1, dim = dim1, mask_weights = R) l2 = RectifiedLinear(layer_name='l2', irange = ir2, dim = dim2, max_col_norm = 1.) l3 = RectifiedLinear(layer_name='l3', irange = ir2, dim = dim2, max_col_norm = 1.) l4 = RectifiedLinear(layer_name='l4', irange = ir2, dim = dim2, max_col_norm = 1.) output = Softmax(layer_name='y', n_classes = 9, irange = ir_out, max_col_norm = mcn_out) mdl = MLP([l1, l2, l3, l4, output], nvis = X2.shape[1]) trainer = sgd.SGD(learning_rate=lr, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(input_include_probs = {'l1':1.}, input_scales = {'l1':1.}, default_input_include_prob=ip, default_input_scale=1/ip), termination_criterion=EpochCounter(epochs),seed = seed) decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor= .1) experiment = Train(dataset = training, model=mdl, algorithm=trainer, extensions=[decay]) experiment.main_loop() epochs_current = epochs for s in range(n_add): del mdl.monitor trainer = sgd.SGD(learning_rate=lr * .1, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(input_include_probs = {'l1':1.}, input_scales = {'l1':1.}, default_input_include_prob=ip, default_input_scale=1/ip),
def get_layer_MLP(layers,trainset,validset): #processor = Standardize(); # trainset = BlackBoxDataset( which_set = 'train', # start = 0, # stop = 900, # preprocessor = Standardize(), # fit_preprocessor = True, # fit_test_preprocessor = True, # ) # # validset = BlackBoxDataset( which_set = 'train', # start = 900, # stop = 1000 , # preprocessor = Standardize(), # fit_preprocessor = True, # fit_test_preprocessor = False, # ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .05, 'init_momentum': .00, 'cost' : Dropout(**dropCfg), #Default() 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=50,prop_decrease=0), 'update_callbacks': None } # configCfg0 = {'layer_name' : 'h0', # 'dim' : 1875, # 'irange' : .05, # # Rather than using weight decay, we constrain the norms of the weight vectors # 'max_col_norm' : 1.} # # configCfg1 = {'layer_name' : 'h1', # 'dim' : 1875, # 'irange' : .05, # # Rather than using weight decay, we constrain the norms of the weight vectors # 'max_col_norm' : 1.} sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } layers.append(Softmax(**sftmaxCfg)) train_algo = SGD(**config) model = MLP(batch_size=50,layers=layers,nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, #[LinearDecayOverEpoch(start= 5, saturate= 100, decay_factor= .01)], save_path = "sae_2_best_model.pkl", save_freq = 100)
y = Softmax(n_classes = 2, layer_name = "y", irange = 0.1) inputSpace = Conv2DSpace(shape = [cropSize,cropSize], num_channels = 3) model = MLP(layers = [h0, h1, y], batch_size = batchSize, input_space = inputSpace) algorithm = SGD(learning_rate = 1E-3, cost = SumOfCosts([ MethodCost("cost_from_X"), Dropout(default_input_include_prob = 0.25, default_input_scale = 1.3333) ]), batch_size = batchSize, monitoring_batch_size = batchSize, monitoring_dataset = {'train': train, 'valid':valid}, monitor_iteration_mode = "even_batchwise_shuffled_sequential", termination_criterion = EpochCounter(max_epochs = 200), learning_rule = Momentum(init_momentum = 0.0), train_iteration_mode = "even_batchwise_shuffled_sequential") train = Train(dataset = train, model = model, algorithm = algorithm, save_path = "ConvNet8.pkl", save_freq = 1,
irange=ir, dim=dim, max_col_norm=1.) l3 = RectifiedLinear(layer_name='l3', irange=ir, dim=dim, max_col_norm=1.) output = Softmax(layer_name='y', n_classes=9, irange=ir, max_col_norm=mcn_out) mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1]) trainer = sgd.SGD(learning_rate=lr, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(default_input_include_prob=ip, default_input_scale=1 / ip), termination_criterion=EpochCounter(epochs), seed=seed) decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1) #fname = path + 'model/TRI_' + 'kmax_'+ str(k_max) + '_seed_' + str(seed) + '.pkl' experiment = Train(dataset=training, model=mdl, algorithm=trainer, extensions=[decay]) # save_path = fname, save_freq = epochs) experiment.main_loop() pred_train = predict(mdl, X2[:num_train].astype(np.float32)) pred_test = predict(mdl, X2[num_train:].astype(np.float32)) predAll_train += pred_train predAll_test += pred_test sc1 = log_loss(yMat, pred_train)
h1 = mlp.Softplus(layer_name='h1', dim=60, sparse_init=0) y0 = mlp.Softmax(layer_name='y0', n_classes=5, irange=0) layers = [h0, h1, y0] model = mlp.MLP(layers, nvis=train.X.shape[1]) monitoring = dict(valid=valid) termination = MonitorBased(channel_name="valid_y0_misclass", N=5) extensions = [ best_params.MonitorBasedSaveBest(channel_name="valid_y0_misclass", save_path="train_best.pkl") ] algorithm = sgd.SGD(0.1, batch_size=100, cost=Dropout(), monitoring_dataset=monitoring, termination_criterion=termination) print 'Running training' train_job = Train(train, model, algorithm, extensions=extensions, save_path="train.pkl", save_freq=1) train_job.main_loop() # Rectified Linear with Momentum from pylearn2.training_algorithms import sgd, learning_rule