def yaml_file_execution(file_path): try: train = yaml_parse.load_path(file_path) train.algorithm.termination_criterion = EpochCounter(max_epochs=2) train.main_loop() except NoDataPathError: raise SkipTest("PYLEARN2_DATA_PATH environment variable not defined")
def test_train_ae(): GC = GaussianCorruptor gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print "done training"
def get_layer_trainer_sgd_autoencoder(layer, trainset, batch_size=10, learning_rate=0.1, max_epochs=100, name=''): # configs on sgd train_algo = SGD( learning_rate=learning_rate, # learning_rule = AdaDelta(), learning_rule=Momentum(init_momentum=0.5), cost=MeanSquaredReconstructionError(), batch_size=batch_size, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=max_epochs), update_callbacks=None) log_callback = LoggingCallback(name) return Train(model=layer, algorithm=train_algo, extensions=[ log_callback, OneOverEpoch(start=1, half_life=5), MomentumAdjustor(final_momentum=0.7, start=10, saturate=100) ], dataset=trainset)
def train_with_monitoring_datasets(train_dataset, monitoring_datasets, model_force_batch_size, train_iteration_mode, monitor_iteration_mode): model = SoftmaxModel(dim) if model_force_batch_size: model.force_batch_size = model_force_batch_size cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=batch_size, train_iteration_mode=train_iteration_mode, monitor_iteration_mode=monitor_iteration_mode, monitoring_dataset=monitoring_datasets, termination_criterion=EpochCounter(2)) train = Train(train_dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_execution_order(): # ensure save is called directly after monitoring by checking # parameter values in `on_monitor` and `on_save`. model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=1) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) extension = ParamMonitor() train = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[extension], save_freq=1, save_path="save.pkl") # mock save train.save = MethodType(only_run_extensions, train) train.main_loop()
def get_ae_pretrainer(layer, data, batch_size, epochs=30): init_lr = 0.05 train_algo = SGD( batch_size=batch_size, learning_rate=init_lr, learning_rule=Momentum(init_momentum=0.5), monitoring_batches=batch_size, monitoring_dataset=data, # for ContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')]]), # for HigherOrderContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')], # [0.5, cost.MethodCost(method='higher_order_penalty')]]), # for DenoisingAutoencoder: cost=MeanSquaredReconstructionError(), termination_criterion=EpochCounter(epochs)) return Train(model=layer, algorithm=train_algo, dataset=data, extensions=[ MomentumAdjustor(final_momentum=0.9, start=0, saturate=25), LinearDecayOverEpoch(start=1, saturate=25, decay_factor=.02) ])
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels, input_path, pred_path): t = time.time() sub_window = gen_center_sub_window(76, num_cols) trn = SarDataset(ds[0][0], ds[0][1], sub_window) vld = SarDataset(ds[1][0], ds[1][1], sub_window) tst = SarDataset(ds[2][0], ds[2][1], sub_window) print 'Take {}s to read data'.format(time.time() - t) t = time.time() batch_size = 100 h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1) hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1) output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05) trainer = sgd.SGD(learning_rate=0.001, batch_size=100, termination_criterion=EpochCounter(2000), cost=dropout.Dropout(), train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_shuffled_sequential', monitoring_dataset={ 'test': tst, 'valid': vld, 'train': trn }) layers = [hidden_layer, hidden_layer2, output_layer] input_space = space.Conv2DSpace(shape=[num_rows, num_cols], num_channels=num_channels) ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective', save_path='sar_cnn_mlp.pkl') experiment = Train(dataset=trn, model=ann, algorithm=trainer, extensions=[watcher]) print 'Take {}s to compile code'.format(time.time() - t) t = time.time() experiment.main_loop() print 'Training time: {}s'.format(time.time() - t) serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols), ann, on_overwrite='backup') #read hh and hv into a 3D numpy image = read_hhv(input_path) return ann, sar_predict(ann, image, pred_path)
def test_mnist(): """ Test the mnist.yaml file from the maxout paper on random input """ skip_if_no_gpu() train = load_train_file( os.path.join(pylearn2.__path__[0], "scripts/papers/maxout/mnist.yaml")) # Load fake MNIST data init_value = control.load_data control.load_data = [False] train.dataset = MNIST(which_set='train', one_hot=1, axes=['c', 0, 1, 'b'], start=0, stop=100) train.algorithm._set_monitoring_dataset(train.dataset) control.load_data = init_value # Train shortly and prevent saving train.algorithm.termination_criterion = EpochCounter(max_epochs=1) train.extensions.pop(0) train.save_freq = 0 train.main_loop()
def train_model(): global ninput, noutput simdata = SimulationData( sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/") simdata.load_data() simdata.preprocessor() dataset = simdata.get_matrix() structure = get_structure() layers = [] for pair in structure: layers.append(get_autoencoder(pair)) model = DeepComposedAutoencoder(layers) training_alg = SGD(learning_rate=1e-3, cost=MeanSquaredReconstructionError(), batch_size=1296, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=50)) extensions = [MonitorBasedLRAdjuster()] experiment = Train(dataset=dataset, model=model, algorithm=training_alg, save_path='training2.pkl', save_freq=10, allow_overwrite=True, extensions=extensions) experiment.main_loop()
def test_sgd_sup(): # tests that we can run the sgd algorithm # on a supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25, 9, 2012]) X = rng.randn(m, dim) idx = rng.randint(0, dim, (m, )) Y = np.zeros((m, dim)) for i in xrange(m): Y[i, idx[i]] = 1 dataset = DenseDesignMatrix(X=X, y=Y) m = 15 X = rng.randn(m, dim) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m, dim)) for i in xrange(m): Y[i, idx[i]] = 1 # Including a monitoring dataset lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X, y=Y) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 cost = SupervisedDummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def get_trainer(model, trainset, validset, epochs=20, batch_size=200): monitoring_batches = None if validset is None else 20 train_algo = SGD(batch_size=batch_size, init_momentum=0.5, learning_rate=0.1, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={ 'h0': 0.8, 'h1': 0.8, 'h2': 0.8, 'h3': 0.8, 'y': 0.5 }, input_scales={ 'h0': 1. / 0.8, 'h1': 1. / 0.8, 'h2': 1. / 0.8, 'h3': 1. / 0.8, 'y': 1. / 0.5 }, default_input_include_prob=0.5, default_input_scale=1. / 0.5), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.0001, min_lr=0.001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
def test_multiple_inputs(): """ Create a VectorSpacesDataset with two inputs (features0 and features1) and train an MLP which takes both inputs for 1 epoch. """ mlp = MLP(layers=[ FlattenerLayer( CompositeLayer('composite', [Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], { 0: [1], 1: [0] })), Softmax(5, 'softmax', 0.1) ], input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]), input_source=('features0', 'features1')) dataset = VectorSpacesDataset( (np.random.rand(20, 20).astype(theano.config.floatX), np.random.rand(20, 15).astype(theano.config.floatX), np.random.rand(20, 5).astype(theano.config.floatX)), (CompositeSpace( [VectorSpace(20), VectorSpace(15), VectorSpace(5)]), ('features1', 'features0', 'targets'))) train = Train(dataset, mlp, SGD(0.1, batch_size=5)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def runAutoencoder(): ds = StockPrice() #print ds.train[0][0] data = np.random.randn(10, 5).astype(config.floatX) #print data print BinomialCorruptor(.2) ae = DenoisingAutoencoder(BinomialCorruptor(corruption_level=.2), 1000, 100, act_enc='sigmoid', act_dec='linear', tied_weights=False) trainer = sgd.SGD(learning_rate=.005, batch_size=5, termination_criterion=EpochCounter(3), cost=cost_ae.MeanSquaredReconstructionError(), monitoring_batches=5, monitoring_dataset=ds) trainer.setup(ae, ds) while True: trainer.train(dataset=ds) ae.monitor() ae.monitor.report_epoch() if not trainer.continue_learning(ae): break #print ds.train[0][0] #print ae.reconstruct(ds.train[0][0]) w = ae.weights.get_value() #ae.hidbias.set_value(np.random.randn(1000).astype(config.floatX)) hb = ae.hidbias.get_value() #ae.visbias.set_value(np.random.randn(100).astype(config.floatX)) vb = ae.visbias.get_value() d = tensor.matrix() result = np.dot(1. / (1 + np.exp(-hb - np.dot(ds.train[0][0], w))), w.T) + vb
def runSP(): ds = StockPrice() # create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=10000, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Linear(layer_name='output', dim=1, irange=.1, init_bias=1.) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.005, batch_size=500, termination_criterion=EpochCounter(10)) layers = [hidden_layer, output_layer] # create neural net that takes two inputs ann = mlp.MLP(layers, nvis=1000) trainer.setup(ann, ds) # train neural net until the termination criterion is true while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break #accuracy = Accuracy() acc = Accuracy() for i, predict in enumerate(ann.fprop(theano.shared(ds.valid[0], name='inputs')).eval()): print predict, ds.valid[1][i] acc.evaluatePN(predict[0], ds.valid[1][i][0]) acc.printResult()
def set_training_criteria(self, learning_rate=0.05, batch_size=10, max_epochs=10): self.training_alg = DefaultTrainingAlgorithm( batch_size=batch_size, monitoring_dataset=self.datasets, termination_criterion=EpochCounter(max_epochs))
def test_bgd_unsup(): # tests that we can run the bgd algorithm # on an supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25, 9, 2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 class DummyCost(Cost): def expr(self, model, data): self.get_data_specs(model)[0].validate(data) X = data return T.square(model(X) - X).mean() def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source()) cost = DummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = BGD(cost, batch_size=5, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def create_algorithm(self): cost_crit = MonitorBased(channel_name=self.optimize_for, prop_decrease=0., N=10) epoch_cnt_crit = EpochCounter(max_epochs=self.max_epochs) term = And(criteria=[cost_crit, epoch_cnt_crit]) self.algorithm = SGD(batch_size=100, learning_rate=.01, monitoring_dataset=self.alg_datasets, termination_criterion=term)
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use # the spcified dataset_name in the constructor when multiple datasets # exist. dim = 3 m = 10 rng = np.random.RandomState([06, 02, 2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test} algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) dataset_name = monitoring_dataset.keys()[0] monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) train.main_loop()
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[ SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5) ], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised") alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print("done training")
def set_training_criteria(self, learning_rate=0.05, cost=MeanSquaredReconstructionError(), batch_size=10, max_epochs=10): self.training_alg = SGD(learning_rate = learning_rate, cost = cost, batch_size = batch_size, monitoring_dataset = self.datasets, termination_criterion = EpochCounter(max_epochs))
def set_training_criteria(self, learning_rate=0.05, cost=Default(), batch_size=10, max_epochs=10): self.training_alg = SGD(learning_rate=learning_rate, cost=cost, batch_size=batch_size, monitoring_dataset=self.datasets, termination_criterion=EpochCounter(max_epochs))
def get_term_epochcounter(self, term_id): row = self.db.executeSQL( """ SELECT max_epoch FROM hps3.term_epochcounter WHERE term_id = %s """, (term_id, ), self.db.FETCH_ONE) if not row or row is None: raise HPSData("No epochCounter term for term_id="\ +str(term_id)) max_epochs = row[0] return EpochCounter(max_epochs)
def get_trainer(model, trainset, validset, epochs=50): monitoring_batches = None if validset is None else 50 train_algo = SGD( batch_size = 200, init_momentum = 0.5, learning_rate = 0.5, monitoring_batches = monitoring_batches, monitoring_dataset = validset, cost = MethodCost(method='cost_from_X', supervised=1), termination_criterion = EpochCounter(epochs), update_callbacks = ExponentialDecay(decay_factor=1.0005, min_lr=0.001) ) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.95, start=0, saturate=int(epochs*0.8)), ])
def create_layer_one(self): which_set = "train" one_hot = True start = 0 # Creating 5 random patch layers based on 8,000 samples (Saturation point where the objective no longer improves. stop = 800 # GridPatchCIFAR10 Randomly selects 5 16x16 patches from each image, and we do this 5 times. This helps increase training time and captures more information. Similar to how the neurons in the eye are attached to a specific region in the image. dataset = GridPatchCIFAR10(which_set=which_set, one_hot=one_hot, start=start, stop=stop) # Denoising autoencoder model hyper-parameters nvis = 768 nhid = 512 irange = 0.05 corruption_lvl = 0.2 corruptor = BinomialCorruptor(corruption_level=corruption_lvl) activation_encoder = "tanh" # Linear activation activation_decoder = None # Creating the denoising autoencoder model = DenoisingAutoencoder(nvis=nvis, nhid=nhid, irange=irange, corruptor=corruptor, act_enc=activation_encoder, act_dec=activation_decoder) # Parameters for SGD learning algorithm instantiated below learning_rate = 0.001 batch_size = 100 monitoring_batches = 5 monitoring_dataset = dataset cost = MeanSquaredReconstructionError() max_epochs = 10 termination_criterion = EpochCounter(max_epochs=max_epochs) # SGD Learning algorithm algorithm = SGD(learning_rate=learning_rate, batch_size=batch_size, monitoring_batches=monitoring_batches, monitoring_dataset=dataset, cost=cost, termination_criterion=termination_criterion) processes = [] for i in range(0,5): print "Training DAE Sub-Layer: ", i save_path = self.save_path+str(i)+".pkl" save_freq = 1 train = Train(dataset=dataset,model=model,algorithm=algorithm, save_path=save_path, save_freq=save_freq) p = Process(target=train.main_loop, args=()) p.start() processes.append(p) for process in processes: process.join()
def test_sgd_sequential(): # tests that requesting train_iteration_mode = 'sequential' # works dim = 1 batch_size = 3 m = 5 * batch_size dataset = ArangeDataset(m) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 visited = [False] * m def visit(X): assert X.shape[1] == 1 assert np.all(X[1:] == X[0:-1] + 1) start = int(X[0, 0]) if start > 0: assert visited[start - 1] for i in xrange(batch_size): assert not visited[start + i] visited[start + i] = 1 data_specs = (model.get_input_space(), model.get_input_source()) cost = CallbackCost(visit, data_specs) # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, train_iteration_mode='sequential', monitoring_dataset=None, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) algorithm.setup(dataset=dataset, model=model) algorithm.train(dataset) assert all(visited)
def test_sgd_topo(): # tests that we can run the sgd algorithm # on data with topology # does not test for correctness at all, just # that the algorithm runs without dying rows = 3 cols = 4 channels = 2 dim = rows * cols * channels m = 10 rng = np.random.RandomState([25, 9, 2012]) dataset = get_topological_dataset(rng, rows, cols, channels, m) # including a monitoring datasets lets us test that # the monitor works with supervised data m = 15 monitoring_dataset = get_topological_dataset(rng, rows, cols, channels, m) model = TopoSoftmaxModel(rows, cols, channels) learning_rate = 1e-3 batch_size = 5 cost = CrossEntropy() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def get_layer_trainer_sgd_rbm(layer, trainset): train_algo = SGD( learning_rate = 1e-1, batch_size = 5, #"batches_per_iter" : 2000, monitoring_batches = 20, monitoring_dataset = trainset, cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)), termination_criterion = EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED), ) model = layer extensions = [MonitorBasedLRAdjuster()] return Train(model = model, algorithm = train_algo, save_path='grbm.pkl',save_freq=1, extensions = extensions, dataset = trainset)
def limited_epoch_train(file_path, max_epochs=1): """ This method trains a given YAML file for a single epoch Parameters ---------- file_path : str The path to the YAML file to be trained max_epochs : int The number of epochs to train this YAML file for. Defaults to 1. """ train = load_train_file(file_path) train.algorithm.termination_criterion = EpochCounter(max_epochs=max_epochs) train.main_loop()
def testing_multiple_datasets_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. # This case happens when the user has not specified either channel_name or dataset_name in the constructor dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test}, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) monitor_lr = MonitorBasedLRAdjuster() train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError: return raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + "either channel_name or database_name in the constructor to disambiguate.")
def get_finetuner(model, cost, trainset, validset=None, batch_size=100, iters=100): train_algo = sgd.SGD( batch_size=batch_size, init_momentum=0.5, learning_rate=0.5, #monitoring_batches = 100/batch_size, #monitoring_dataset = {'train': trainset, 'valid': validset}, cost=cost, termination_criterion=EpochCounter(iters), update_callbacks=sgd.ExponentialDecay(decay_factor=1.005, min_lr=0.05)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, \ extensions=[sgd.MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(0.8*iters)), ])