def train_model(): global ninput, noutput simdata = SimulationData( sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/") simdata.load_data() simdata.preprocessor() dataset = simdata.get_matrix() structure = get_structure() layers = [] for pair in structure: layers.append(get_autoencoder(pair)) model = DeepComposedAutoencoder(layers) training_alg = SGD(learning_rate=1e-3, cost=MeanSquaredReconstructionError(), batch_size=1296, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=50)) extensions = [MonitorBasedLRAdjuster()] experiment = Train(dataset=dataset, model=model, algorithm=training_alg, save_path='training2.pkl', save_freq=10, allow_overwrite=True, extensions=extensions) experiment.main_loop()
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use # the spcified dataset_name in the constructor when multiple datasets # exist. dim = 3 m = 10 rng = np.random.RandomState([06, 02, 2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test} algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) dataset_name = monitoring_dataset.keys()[0] monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) train.main_loop()
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[ SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5) ], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised") alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print("done training")
def get_layer_trainer_sgd_rbm(layer, trainset): train_algo = SGD( learning_rate = 1e-1, batch_size = 5, #"batches_per_iter" : 2000, monitoring_batches = 20, monitoring_dataset = trainset, cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)), termination_criterion = EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), ) model = layer extensions = [MonitorBasedLRAdjuster()] return Train(model = model, algorithm = train_algo, save_path='grbm.pkl',save_freq=1, extensions = extensions, dataset = trainset)
def testing_multiple_datasets_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. # This case happens when the user has not specified either channel_name or dataset_name in the constructor dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test}, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) monitor_lr = MonitorBasedLRAdjuster() train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError: return raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + "either channel_name or database_name in the constructor to disambiguate.")
def get_layer_trainer_sgd_rbm(layer, trainset): train_algo = SGD( learning_rate = 1e-1, batch_size = 5, #"batches_per_iter" : 2000, monitoring_batches = 20, monitoring_dataset = trainset, cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)), termination_criterion = EpochCounter(max_epochs=MAX_EPOCHS), # another option: # MonitorBasedTermCrit(prop_decrease=0.01, N=10), ) model = layer callbacks = [MonitorBasedLRAdjuster(), ModelSaver()] return Train(model = model, algorithm = train_algo, callbacks = callbacks, dataset = trainset)
def create_adjustors(self): initial_momentum = .5 final_momentum = .99 start = 1 saturate = self.max_epochs self.momentum_adjustor = learning_rule.MomentumAdjustor( final_momentum, start, saturate) self.momentum_rule = learning_rule.Momentum(initial_momentum, nesterov_momentum=True) if self.lr_monitor_decay: self.learning_rate_adjustor = MonitorBasedLRAdjuster( high_trigger=1., shrink_amt=0.9, low_trigger=.95, grow_amt=1.1, channel_name='train_objective') elif self.lr_lin_decay: self.learning_rate_adjustor = LinearDecayOverEpoch( start, saturate, self.lr_lin_decay)
def test_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py # gets the learning rate properly over the training epochs # it runs a small softmax and at the end checks the learning values. It runs 2 loops. Each loop evaluates one of the if clauses when checking # the observation channels. Otherwise, longer training epochs are needed to observe both if and elif cases. high_trigger = 1.0 shrink_amt = 0.99 low_trigger = 0.99 grow_amt = 1.01 min_lr = 1e-7 max_lr = 1. dim = 3 m = 10 rng = np.random.RandomState([25, 9, 2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 5 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) cost = DummyCost() for i in xrange(2): if i == 1: high_trigger = 0.99 model = SoftmaxModel(dim) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) monitor_lr = MonitorBasedLRAdjuster(high_trigger=high_trigger, shrink_amt=shrink_amt, low_trigger=low_trigger, grow_amt=grow_amt, min_lr=min_lr, max_lr=max_lr) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) train.main_loop() v = model.monitor.channels['objective'].val_record lr = model.monitor.channels['learning_rate'].val_record lr_monitor = learning_rate for i in xrange(2, epoch_num + 1): if v[i - 1] > high_trigger * v[i - 2]: lr_monitor *= shrink_amt elif v[i - 1] > low_trigger * v[i - 2]: lr_monitor *= grow_amt lr_monitor = max(min_lr, lr_monitor) lr_monitor = min(max_lr, lr_monitor) assert np.allclose(lr_monitor, lr[i])
def test_bad_monitoring_input_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py avoids wrong # settings of channel_name or dataset_name in the constructor. dim = 3 m = 10 rng = np.random.RandomState([06, 02, 2014]) X = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 2 dataset = DenseDesignMatrix(X=X) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) cost = DummyCost() model = SoftmaxModel(dim) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) # testing for bad dataset_name input dummy = 'void' monitor_lr = MonitorBasedLRAdjuster(dataset_name=dummy) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError as e: pass except Exception: reraise_as(AssertionError("MonitorBasedLRAdjuster takes dataset_name " "that is invalid ")) # testing for bad channel_name input monitor_lr2 = MonitorBasedLRAdjuster(channel_name=dummy) model2 = SoftmaxModel(dim) train2 = Train(dataset, model2, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr2]) try: train2.main_loop() except ValueError as e: pass except Exception: reraise_as(AssertionError("MonitorBasedLRAdjuster takes channel_name " "that is invalid ")) return
def test_bad_monitoring_input_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py avoids wrong # settings of channel_name or dataset_name in the constructor. dim = 3 m = 10 rng = np.random.RandomState([06, 02, 2014]) X = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 2 dataset = DenseDesignMatrix(X=X) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) cost = DummyCost() model = SoftmaxModel(dim) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) #testing for bad dataset_name input dummy = 'void' monitor_lr = MonitorBasedLRAdjuster(dataset_name=dummy) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError as e: err_input = 'The dataset_name \'' + dummy + '\' is not valid.' channel_name = dummy + '_objective' err_message = ('There is no monitoring channel named \'' + channel_name + '\'. You probably need to specify a valid monitoring ' 'channel by using either dataset_name or channel_name ' 'in the MonitorBasedLRAdjuster constructor. ' + err_input) assert err_message == str(e) except: raise AssertionError("MonitorBasedLRAdjuster takes dataset_name that " "is invalid ") #testing for bad channel_name input monitor_lr2 = MonitorBasedLRAdjuster(channel_name=dummy) model2 = SoftmaxModel(dim) train2 = Train(dataset, model2, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr2]) try: train2.main_loop() except ValueError as e: err_input = 'The channel_name \'' + dummy + '\' is not valid.' err_message = ('There is no monitoring channel named \'' + dummy + '\'. You probably need to specify a valid monitoring ' 'channel by using either dataset_name or channel_name ' 'in the MonitorBasedLRAdjuster constructor. ' + err_input) assert err_message == str(e) except: raise AssertionError("MonitorBasedLRAdjuster takes channel_name that " "is invalid ") return