def test_train_ae(): ds = MNIST(which_set='train',one_hot=True,all_labelled=ALL_LABELLED,supervised=SUPERVISED) gsn = GSN.new( layer_sizes=[ds.X.shape[1], HIDDEN_SIZE,ds.X.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(GAUSSIAN_NOISE)] * 3, post_corruptors=[SaltPepperCorruptor(SALT_PEPPER_NOISE), None,SmoothOneHotCorruptor(GAUSSIAN_NOISE)], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False ) _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=MONITORING_BATCHES ) trainer = Train(ds, gsn, algorithm=alg, save_path="./results/gsn_ae_trained.pkl", save_freq=5, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print "done training"
class RBMTraining: def __init__(self, data_path="./datasets/", save_path="training.pkl", simulation_data = None, identifier = 0): self.id = identifier self.data_path = data_path self.save_path = save_path if simulation_data != None: self.sim_data = simulation_data self.save_data_loaded() else: self.sim_data = SimulationData(data_path) self.load_data() def load_data(self): self.sim_data.load_data() self.sim_data.preprocessor() tmp = self.sim_data.split_train_test() self.datasets = {'train' : tmp[0], 'test' : tmp[1]} self.num_simulations = self.sim_data.num_simulations self.input_values = self.sim_data.input_values self.output_values = self.sim_data.output_values def set_structure(self, num_layers = 4, shape = 'linear'): self.vis = self.input_values self.hid = self.output_values return [self.vis, self.hid] def get_model(self): self.model = RBM(nvis=self.vis, nhid=self.hid, irange=.05) return self.model def set_training_criteria(self, learning_rate=0.05, batch_size=10, max_epochs=10): self.training_alg = DefaultTrainingAlgorithm(batch_size = batch_size, monitoring_dataset = self.datasets, termination_criterion = EpochCounter(max_epochs)) def set_extensions(self, extensions=None): self.extensions = None #[MonitorBasedSaveBest(channel_name='objective', #save_path = './training/training_monitor_best.pkl')] def set_attributes(self, attributes): self.attributes = attributes def define_training_experiment(self, save_freq = 10): self.experiment = Train(dataset=self.datasets['train'], model=self.model, algorithm=self.training_alg, save_path=self.save_path , save_freq=save_freq, allow_overwrite=True, extensions=self.extensions) def train_experiment(self): self.experiment.main_loop()
def test_pylearn2_trainin(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() block_cost = BlocksCost(cost) block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features')) # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def train_example(dataset=None): model = GaussianBinaryRBM(nvis=1296, nhid=61, irange=0.5, energy_function_class=grbm_type_1(), learn_sigma=True, init_sigma=.4, init_bias_hid=2., mean_vis=False, sigma_lr_scale=1e-3) cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)) algorithm = SGD(learning_rate=.1, batch_size=5, monitoring_batches=20, monitoring_dataset=dataset, cost=cost, termination_criterion=MonitorBased(prop_decrease=0.01, N=1)) train = Train(dataset=dataset, model=model, save_path="./experiment/training.pkl", save_freq=10, algorithm=algorithm, extensions=[]) train.main_loop()
def finish_one_layer(X_img_train, X_txt_train, y_train, X_img_test, X_txt_test, y_test, h_units, epochs, lr=0.1, model_type='FullModal', alpha=0.8, layer_num='1', prefix='', suffix='', save_path=''): """预备+训练+测试完整的一层""" #1.构造数据集 dsit_train, dsit_test = make_dataset(X_img_train=X_img_train, X_txt_train=X_txt_train, y_train=y_train, X_img_test=X_img_test, X_txt_test=X_txt_test, y_test=y_test) #2.训练单层模型 monitoring_dataset = {'train': dsit_train, 'test': dsit_test} ae_model = MyMultimodalAutoEncoder(model_type=model_type, alpha=alpha, n_vis_img=X_img_train.shape[1], n_vis_txt=X_txt_train.shape[1], n_hid_img=h_units, n_hid_txt=h_units, dec_f_img=True, dec_f_txt=True) alg = SGD(learning_rate=lr, cost=None, batch_size=20, init_momentum=None, monitoring_dataset=monitoring_dataset, termination_criterion=EpochCounter(max_epochs=epochs)) train = Train(dataset=dsit_train, model=ae_model, algorithm=alg, save_path='multi_ae_save_layer' + layer_num + '.pkl', save_freq=10) t0 = time.clock() train.main_loop() print 'training time for layer%s: %f' % (layer_num, time.clock() - t0) #3.计算经过训练后模型传播的设计矩阵 X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test, X_propup_train, X_propup_test = propup_design_matrix(X_train=dsit_train.X, X_test=dsit_test.X, ae_model=ae_model) #4.测试训练后的模型分类性能 print '!!!evaluate model on dataset+++++++++++++++++++++++++++++++++++++++++++++++++++++++' model_evaluate(X_img_train=X_img_propup_train, X_txt_train=X_txt_propup_train, y_train=y_train, X_img_test= X_img_propup_test, X_txt_test=X_txt_propup_test, y_test=y_test, layer_num=layer_num, prefix=prefix, suffix=suffix, save_path=save_path) return X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test
def __init__(self, runner, model_params, resume=False, resume_data=None, s3_data=None, **kwargs): dataset = create_dense_design_matrix(x=runner.dp.train_set_x) if resume: model, model_params = self.resume_model(model_params, resume_data) else: model = self.new_model(model_params, dataset=dataset) termination_criterion = MaxEpochNumber(model_params['maxnum_iter']) algorithm = SGD(learning_rate=model_params['learning_rate']['init'], monitoring_dataset=dataset, cost=MeanSquaredReconstructionError(), termination_criterion=termination_criterion, batch_size=model_params['batch_size']) ext = AutoEncoderStatReporter(runner, resume=resume, resume_data=resume_data, save_freq=model_params['save_freq']) self.train_obj = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[ext])
def my_train(): trainset = CIN_FEATURE2(which_set='train') validset = CIN_FEATURE2(which_set='valid') layers = [] layers1 = [] h1 = Linear(layer_name='h1', dim=850, irange=0.05) h2 = Linear(layer_name='h2', dim=556, irange=0.05) layers1.append(h1) layers1.append(h2) l1 = CompositeLayerWithSource(layer_name='c', layers=layers1) l2 = Linear(layer_name='o', dim=2, irange=0.05) layers.append(l1) layers.append(l2) input_space = CompositeSpace(components=[VectorSpace(dim=850), VectorSpace(dim=556)]) input_source = ['feature850', 'feature556'] model = MLPWithSource(batch_size=1140, layers=layers, input_space=input_space, input_source=input_source) algorithm = BGD(conjugate=1, # batch_size=1140, line_search_mode='exhaustive', cost=Default(), termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS)) train = Train(dataset=trainset, model=model, algorithm=algorithm) train.main_loop()
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels, input_path, pred_path): t = time.time() sub_window = gen_center_sub_window(76, num_cols) trn = SarDataset(ds[0][0], ds[0][1], sub_window) vld = SarDataset(ds[1][0], ds[1][1], sub_window) tst = SarDataset(ds[2][0], ds[2][1], sub_window) print 'Take {}s to read data'.format(time.time() - t) t = time.time() batch_size = 100 h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1) hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1) output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05) trainer = sgd.SGD(learning_rate=0.001, batch_size=100, termination_criterion=EpochCounter(2000), cost=dropout.Dropout(), train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_shuffled_sequential', monitoring_dataset={ 'test': tst, 'valid': vld, 'train': trn }) layers = [hidden_layer, hidden_layer2, output_layer] input_space = space.Conv2DSpace(shape=[num_rows, num_cols], num_channels=num_channels) ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective', save_path='sar_cnn_mlp.pkl') experiment = Train(dataset=trn, model=ann, algorithm=trainer, extensions=[watcher]) print 'Take {}s to compile code'.format(time.time() - t) t = time.time() experiment.main_loop() print 'Training time: {}s'.format(time.time() - t) serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols), ann, on_overwrite='backup') #read hh and hv into a 3D numpy image = read_hhv(input_path) return ann, sar_predict(ann, image, pred_path)
def test_serialization_guard(): # tests that Train refuses to serialize the dataset dim = 2 m = 11 rng = np.random.RandomState([28,9,2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) model = DummyModel(dim) # make the dataset part of the model, so it will get # serialized model.dataset = dataset Monitor.get_monitor(model) algorithm = DummyAlgorithm() train = Train(dataset, model, algorithm, save_path='_tmp_unit_test.pkl', save_freq=1, extensions=None) try: train.main_loop() except RuntimeError: return assert False # train did not complain, this is a bug
def test_empty_monitoring_datasets(): """ Test that handling of monitoring datasets dictionnary does not fail when it is empty. """ learning_rate = 1e-3 batch_size = 5 dim = 3 rng = np.random.RandomState([25, 9, 2012]) train_dataset = DenseDesignMatrix(X=rng.randn(10, dim)) model = SoftmaxModel(dim) cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_dataset={}, termination_criterion=EpochCounter(2)) train = Train(train_dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_serialization_guard(): # tests that Train refuses to serialize the dataset dim = 2 m = 11 rng = np.random.RandomState([28, 9, 2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) model = DummyModel(dim) # make the dataset part of the model, so it will get # serialized model.dataset = dataset Monitor.get_monitor(model) algorithm = DummyAlgorithm() train = Train(dataset, model, algorithm, save_path='_tmp_unit_test.pkl', save_freq=1, callbacks=None) try: train.main_loop() except RuntimeError: return assert False # train did not complain, this is a bug
def test_multiple_inputs(): """ Create a VectorSpacesDataset with two inputs (features0 and features1) and train an MLP which takes both inputs for 1 epoch. """ mlp = MLP(layers=[ FlattenerLayer( CompositeLayer('composite', [Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], { 0: [1], 1: [0] })), Softmax(5, 'softmax', 0.1) ], input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]), input_source=('features0', 'features1')) dataset = VectorSpacesDataset( (np.random.rand(20, 20).astype(theano.config.floatX), np.random.rand(20, 15).astype(theano.config.floatX), np.random.rand(20, 5).astype(theano.config.floatX)), (CompositeSpace( [VectorSpace(20), VectorSpace(15), VectorSpace(5)]), ('features1', 'features0', 'targets'))) train = Train(dataset, mlp, SGD(0.1, batch_size=5)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def test_train_ae(): GC = GaussianCorruptor gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False ) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10 ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print "done training"
def test_execution_order(): # ensure save is called directly after monitoring by checking # parameter values in `on_monitor` and `on_save`. model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=1) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) extension = ParamMonitor() train = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[extension], save_freq=1, save_path="save.pkl") # mock save train.save = MethodType(only_run_extensions, train) train.main_loop()
def test_training_a_model(): """ tests wether SparseDataset can be trained with a dummy model. """ dim = 3 m = 10 rng = np.random.RandomState([22, 4, 2014]) X = rng.randn(m, dim) ds = csr_matrix(X) dataset = SparseDataset(from_scipy_sparse_dataset=ds) model = SoftmaxModel(dim) learning_rate = 1e-1 batch_size = 5 epoch_num = 2 termination_criterion = EpochCounter(epoch_num) cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=batch_size, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def train(): LEARNING_RATE = 1e-4 MOMENTUM = 0.25 MAX_EPOCHS = 500 BATCHES_PER_EPOCH = 100 BATCH_SIZE = 1000 dataset = FunnelDistribution() cost = FunnelGSNCost([(0, 1.0, MSR())], walkback=1) gc = GaussianCorruptor(0.75) dc = DropoutCorruptor(.5) gsn = GSN.new([10, 200, 10], [None, "tanh", "tanh"], # activation [None] * 3, # pre corruption [None] * 3, # post corruption [None] * 3, # layer samplers tied=False) gsn._bias_switch = False alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=cost, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_batches=100, monitoring_dataset=dataset) trainer = Train(dataset, gsn, algorithm=alg, save_path="funnel_gsn.pkl", extensions=[MonitorBasedLRAdjuster()], save_freq=50) trainer.main_loop() print "done training"
def train_with_monitoring_datasets(train_dataset, monitoring_datasets, model_force_batch_size, train_iteration_mode, monitor_iteration_mode): model = SoftmaxModel(dim) if model_force_batch_size: model.force_batch_size = model_force_batch_size cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=batch_size, train_iteration_mode=train_iteration_mode, monitor_iteration_mode=monitor_iteration_mode, monitoring_dataset=monitoring_datasets, termination_criterion=EpochCounter(2)) train = Train(train_dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_batch_size_specialization(): # Tests that using a batch size of 1 for training and a batch size # other than 1 for monitoring does not result in a crash. # This catches a bug reported in the [email protected] # e-mail "[pylearn-dev] monitor assertion error: channel_X.type != X.type" # The training data was specialized to a row matrix (theano tensor with # first dim broadcastable) and the monitor ended up with expressions # mixing the specialized and non-specialized version of the expression. m = 2 rng = np.random.RandomState([25,9,2012]) X = np.zeros((m,1)) dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(1) learning_rate = 1e-3 cost = DummyCost() algorithm = SGD(learning_rate, cost, batch_size=1, monitoring_batches=1, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=1), update_callbacks=None, set_batch_size = False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def finish_one_layer(X_train, y_train, X_test, y_test, img_units, txt_units, h_units, epochs, lr=0.1, model_type='FullModal', alpha=0.5, beta=0.5, layer_num='1', prefix='', suffix='', save_path=''): """ 预备+训练+测试完整的一层 暂时假定单模态是图像,将图像平均分为两半 """ #0.参数检查 print 'img_units=', img_units print 'txt_units=', txt_units print 'X_train.shape[1]=', X_train.shape[1] assert img_units + txt_units == X_train.shape[1] assert img_units + txt_units == X_test.shape[1] #1.构造数据集 dsit_train, dsit_test = make_dataset_single_modal(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test) #2.训练单层模型 monitoring_dataset = {'train': dsit_train, 'test': dsit_test} print 'in finish_one_layer, alpha=%f, beta=%f' % (alpha, beta) ae_model = AdjustableMultimodalAutoEncoder(model_type=model_type, alpha=alpha, beta=beta, n_vis_img=img_units, n_vis_txt=txt_units, n_hid_img=h_units, n_hid_txt=h_units, dec_f_img=True, dec_f_txt=True) alg = SGD(learning_rate=lr, cost=None, batch_size=20, init_momentum=None, monitoring_dataset=monitoring_dataset, termination_criterion=EpochCounter(max_epochs=epochs)) #cost=None,目的是使用模型自带的get_default_cost()的返回值提供的代价 train = Train(dataset=dsit_train, model=ae_model, algorithm=alg, save_path='multi_ae_save_layer' + layer_num + '.pkl', save_freq=10) t0 = time.clock() train.main_loop() print 'training time for layer%s: %f' % (layer_num, time.clock() - t0) #3.计算经过训练后模型传播的设计矩阵 X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test, X_propup_train, X_propup_test = propup_design_matrix(X_train=dsit_train.X, X_test=dsit_test.X, ae_model=ae_model) #4.测试训练后的模型分类性能 print '!!!evaluate model on dataset+++++++++++++++++++++++++++++++++++++++++++++++++++++++' model_evaluate(X_img_train=X_img_propup_train, X_txt_train=X_txt_propup_train, y_train=y_train, X_img_test= X_img_propup_test, X_txt_test=X_txt_propup_test, y_test=y_test, layer_num=layer_num, prefix=prefix, suffix=suffix, save_path=save_path) return X_propup_train, X_propup_test
def train_model(): global ninput, noutput simdata = SimulationData( sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/") simdata.load_data() simdata.preprocessor() dataset = simdata.get_matrix() structure = get_structure() layers = [] for pair in structure: layers.append(get_autoencoder(pair)) model = DeepComposedAutoencoder(layers) training_alg = SGD(learning_rate=1e-3, cost=MeanSquaredReconstructionError(), batch_size=1296, monitoring_dataset=dataset, termination_criterion=EpochCounter(max_epochs=50)) extensions = [MonitorBasedLRAdjuster()] experiment = Train(dataset=dataset, model=model, algorithm=training_alg, save_path='training2.pkl', save_freq=10, allow_overwrite=True, extensions=extensions) experiment.main_loop()
def test_train_ae(): GC = GaussianCorruptor gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print "done training"
def test_multiple_inputs(): """ Create a VectorSpacesDataset with two inputs (features0 and features1) and train an MLP which takes both inputs for 1 epoch. """ mlp = MLP( layers=[ FlattenerLayer( CompositeLayer( 'composite', [Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], { 0: [1], 1: [0] } ) ), Softmax(5, 'softmax', 0.1) ], input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]), input_source=('features0', 'features1') ) dataset = VectorSpacesDataset( (np.random.rand(20, 20).astype(theano.config.floatX), np.random.rand(20, 15).astype(theano.config.floatX), np.random.rand(20, 5).astype(theano.config.floatX)), (CompositeSpace([ VectorSpace(20), VectorSpace(15), VectorSpace(5)]), ('features1', 'features0', 'targets'))) train = Train(dataset, mlp, SGD(0.1, batch_size=5)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def test_sgd_sup(): # tests that we can run the sgd algorithm # on a supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25, 9, 2012]) X = rng.randn(m, dim) idx = rng.randint(0, dim, (m, )) Y = np.zeros((m, dim)) for i in xrange(m): Y[i, idx[i]] = 1 dataset = DenseDesignMatrix(X=X, y=Y) m = 15 X = rng.randn(m, dim) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m, dim)) for i in xrange(m): Y[i, idx[i]] = 1 # Including a monitoring dataset lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X, y=Y) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 cost = SupervisedDummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_flattener_layer_state_separation_for_softmax(): """ Creates a CompositeLayer wrapping two Softmax layers and ensures that state gets correctly picked apart. """ mlp = MLP( layers=[ FlattenerLayer( CompositeLayer( 'composite', [Softmax(5, 'sf1', 0.1), Softmax(5, 'sf2', 0.1)] ) ) ], nvis=2 ) dataset = DenseDesignMatrix( X=np.random.rand(20, 2).astype(theano.config.floatX), y=np.random.rand(20, 10).astype(theano.config.floatX)) train = Train(dataset, mlp, SGD(0.1, batch_size=5, monitoring_dataset=dataset)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def define_training_experiment(self, save_freq = 10): self.experiment = Train(dataset=self.datasets['train'], model=self.model, algorithm=self.training_alg, save_path=self.save_path , save_freq=save_freq, allow_overwrite=True, extensions=self.extensions)
def model1(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) valid_set = MNIST(which_set='test', one_hot=True) test_set = MNIST(which_set='test', one_hot=True) #import pdb #pdb.set_trace() #print train_set.X.shape[1] # =====<Create the MLP Model>===== h2_layer = NoisyRELU(layer_name='h1', sparse_init=15, noise_factor=5, dim=1000, desired_active_rate=0.2, bias_factor=20, max_col_norm=1) #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1) mlp = MLP(batch_size=200, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(init_momentum=0.1, learning_rate=0.01, monitoring_dataset={'valid': valid_set}, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model1.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() #import pdb #pdb.set_trace() train_obj.main_loop() # =====<Run the training>===== '''
def MultiPIECV(): # Learning rate, nr pieces parms = [(0.1, 2), (0.1, 3), (0.01, 2), (0.01, 3)] accuracies = [] for i in xrange(len(parms)): h0 = maxout.Maxout(layer_name='h0', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) h1 = maxout.Maxout(layer_name='h1', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) h2 = maxout.Maxout(layer_name='h2', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0) layers = [h0, h1, h2, outlayer] model = mlp.MLP(layers, nvis=1200) trainIndices, validationIndices, testIndices = getMultiPIEindices() train = MultiPIE('train', indices=trainIndices) valid = MultiPIE('valid', indices=validationIndices) test = MultiPIE('test', indices=testIndices) monitoring = dict(valid=valid) termination = MonitorBased(channel_name="valid_y_misclass", N=100) extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass", save_path="/data/mcr10/train_best.pkl")] algorithm = sgd.SGD(parms[i][0], batch_size=100, cost=Dropout(), monitoring_dataset=monitoring, termination_criterion=termination) save_path = "/data/mcr10/train_best.pkl" if not args.train and os.path.exists(save_path): model = serial.load(save_path) else: print 'Running training' train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=1) train_job.main_loop() X = model.get_input_space().make_batch_theano() Y = model.fprop(X) y = T.argmax(Y, axis=1) f = function(inputs=[X], outputs=y, allow_input_downcast=True) yhat = f(test.X) print sum(yhat) print yhat.shape y = np.argmax(np.squeeze(test.get_targets()), axis=1) accuracy = (y==yhat).sum() / y.size accuracies += [accuracy] # TODO: some confusion matrix? for i in xrange(len(parms)): print "for parameter" + str(i) print "the correct rate was " + str(accuracies[i])
def set_model(self, model, algorithm, trainset): self.model = model self.algorithm = algorithm if self.save_path is None: self.save_path = model.name + ".pkl" self.dataset = trainset self.trainer = Train(model = self.model, algorithm = algorithm, save_path=self.save_path, save_freq=1, extensions = self.train_extensions, dataset = trainset)
def fit(X,y=None):#TODO dataset= self.set_train_params(train_params={ 'dtataset':dataset, 'algorithm':algorithm, 'extensions':extension})#which influenced by X,y #real data or symbol? train=Train(model=self.model,**self.train_params) train.main_loop()
def run(self, start_config_id = None): self.db = DatabaseHandler() print 'running' while True: if start_config_id is None: (config_id, model_id, ext_id, train_id, dataset_id, random_seed, batch_size) \ = self.select_next_config(self.experiment_id) else: (config_id, model_id, ext_id, train_id, dataset_id, random_seed, batch_size) \ = self.select_config(start_config_id) start_config_id = None (dataset_desc, input_space_id) = self.select_dataset(dataset_id) input_space = self.get_space(input_space_id) # build model model = self.get_model(model_id, random_seed, batch_size, input_space) # extensions extensions = self.get_extensions(ext_id) # prepare monitor self.prep_valtest_monitor(model, batch_size) # monitor based save best if self.mbsb_channel_name is not None: save_path = self.save_prefix+str(config_id)+"_best.pkl" extensions.append(MonitorBasedSaveBest( channel_name = self.mbsb_channel_name, save_path = save_path, cost = False \ ) ) # HPS Logger extensions.append( HPSLog(self.log_channel_names, self.db, config_id) ) # training algorithm algorithm = self.get_trainingAlgorithm(train_id, batch_size) print 'sgd complete' learner = Train(dataset=self.train_ddm, model=model, algorithm=algorithm, extensions=extensions) print 'learning' learner.main_loop() self.set_end_time(config_id)
def test_bgd_unsup(): # tests that we can run the bgd algorithm # on an supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25, 9, 2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 class DummyCost(Cost): def expr(self, model, data): self.get_data_specs(model)[0].validate(data) X = data return T.square(model(X) - X).mean() def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source()) cost = DummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = BGD(cost, batch_size=5, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def __call__(self): dataset = DenseDesignMatrix(X=self.X) self.cnmf.termination_criterion = self.termination_criterion self.cnmf.set_W(self.W) train = Train(dataset, self.cnmf) train.main_loop() self.cnmf.monitor = Monitor(self.cnmf) H = self.cnmf.H.get_value() results = {"W": self.cnmf.W.get_value(), "H": H} return numpy.argmax(H, axis=1), results
def test_kmeans(): X = np.random.random(size=(100, 10)) Y = np.random.randint(5, size=(100, 1)) dataset = DenseDesignMatrix(X, y=Y) model = KMeans(k=5, nvis=10) train = Train(model=model, dataset=dataset) train.main_loop()
def test_sgd_topo(): # tests that we can run the sgd algorithm # on data with topology # does not test for correctness at all, just # that the algorithm runs without dying rows = 3 cols = 4 channels = 2 dim = rows * cols * channels m = 10 rng = np.random.RandomState([25,9,2012]) X = rng.randn(m, rows, cols, channels) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m,dim)) for i in xrange(m): Y[i,idx[i]] = 1 dataset = DenseDesignMatrix(topo_view=X, y=Y) m = 15 X = rng.randn(m, rows, cols, channels) idx = rng.randint(0, dim, (m,)) Y = np.zeros((m,dim)) for i in xrange(m): Y[i,idx[i]] = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(topo_view=X, y=Y) model = TopoSoftmaxModel(rows, cols, channels) learning_rate = 1e-3 batch_size = 5 cost = CrossEntropy() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset= monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use # the spcified dataset_name in the constructor when multiple datasets # exist. dim = 3 m = 10 rng = np.random.RandomState([06, 02, 2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test} algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) dataset_name = monitoring_dataset.keys()[0] monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) train.main_loop()
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[ SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5) ], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised") alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print("done training")
def train_example(dataset = None): model = GaussianBinaryRBM(nvis=1296, nhid=61, irange=0.5, energy_function_class=grbm_type_1(), learn_sigma=True, init_sigma=.4, init_bias_hid=2., mean_vis=False, sigma_lr_scale=1e-3) cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)) algorithm = SGD(learning_rate=.1, batch_size=5, monitoring_batches=20, monitoring_dataset=dataset, cost=cost, termination_criterion=MonitorBased(prop_decrease=0.01, N=1)) train = Train(dataset=dataset,model=model,save_path="./experiment/training.pkl", save_freq=10, algorithm=algorithm, extensions=[]) train.main_loop()
def model2(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = MNIST(which_set='test', one_hot=True) # =====<Create the MLP Model>===== h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5) #print h1_layer.get_params() h2_layer = RectifiedLinear(layer_name='h2', dim=1000, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=100, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=100, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=SumOfCosts(costs=[ MethodCost('cost_from_X'), WeightDecay(coeffs=[0.00005, 0.00005, 0.00005]) ]), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.0001, N=5)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)] # =====<Create Training Object>===== save_path = './mlp_model2.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() train_obj.main_loop()
def test(layers): from pylearn2.datasets.iris import Iris ddm = Iris() from pylearn2.models.mlp import MLP mlp = MLP(layers=layers, nvis=4, batch_size=10) from pylearn2.costs.mlp import Default cost = Default() from pylearn2.training_algorithms.sgd import SGD sgd = SGD(learning_rate=0.01, cost=cost, monitoring_dataset=ddm) from pylearn2.train import Train trainer = Train(dataset=ddm, model=mlp, algorithm=sgd) trainer.main_loop()
def __init__(self, runner, model_params, resume=False, resume_data=None, s3_data=None, **kwargs): self.model_params = model_params self.out_nonlin = runner.model['out_nonlin'] if self.out_nonlin == 'LINEARGAUSSIAN': outputs_num = None cost = None else: outputs_num = runner.dp.uniq_outputs_num cost = self.get_cost_fn() dataset = self.construct_datasets(runner.dp.train_set_x, runner.dp.train_set_y, outputs_num) valid_dataset = self.construct_datasets(runner.dp.test_set_x, runner.dp.test_set_y, outputs_num) if resume: model = self.resume_model(model_params, resume_data) lr_init = model_params['learning_rate']['init'] / ( model_params['learning_rate']['decay_factor']** model.monitor.get_batches_seen()) else: model = self.new_model(model_params, dataset=dataset) lr_init = model_params['learning_rate']['init'] batches_per_iter = get_batches_per_iter(model_params, dataset) termination_criterion = MaxEpochNumber(model_params['maxnum_iter']) update_callbacks, extensions = construct_update( model_params, resume, resume_data) algorithm = SGD(learning_rate=lr_init, init_momentum=model_params['momentum']['init'], monitoring_dataset={ 'valid': valid_dataset, 'train': dataset }, cost=cost, termination_criterion=termination_criterion, update_callbacks=update_callbacks, batches_per_iter=batches_per_iter) self.train_obj = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=extensions) ext = MLPStatReporter(model, runner, resume=resume, resume_data=resume_data, save_freq=model_params['save_freq']) self.train_obj.extensions.append(ext)
def model3(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = SVHN_On_Memory(which_set='train') # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = SVHN_On_Memory(which_set='test') # =====<Create the MLP Model>===== h1_layer = NoisyRELU(layer_name='h1', dim=2000, threshold=5, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=64, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=64, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=10) #train_obj.setup_extensions() train_obj.main_loop()
def main(): start_time = time.clock() # optin parser parser = OptionParser() parser.add_option("-p", dest="plot_prediction", action="store_true", default=False, help="plot model prediction transitions") parser.add_option("-f", "--file", dest="out_filename", default=None, help="write animation to FILE (require -a option)", metavar="FILE") (options, args) = parser.parse_args() # make Detaset ds = sinDataset(SIZE_DATA) # make layers hidden_layer1 = mlp.Tanh(layer_name='hidden1', dim=20, irange=0.5, init_bias=1.0) hidden_layer2 = mlp.Tanh(layer_name='hidden2', dim=4, irange=0.5, init_bias=1.0) output_layer = mlp.Linear(layer_name='out', dim=1, irange=0.5, init_bias=1) # set layers layers = [hidden_layer1, hidden_layer2, output_layer] model = mlp.MLP(layers, nvis=1) # set training rule and extensions algorithm = sgd.SGD( learning_rate = 0.01, batch_size = 1, monitoring_batch_size = 1, monitoring_batches = 1, monitoring_dataset = ds, termination_criterion = EpochCounter(MAX_EPOCHS) ) extensions = [sgd.MonitorBasedLRAdjuster()] if options.plot_prediction: plotEx = PlotPredictionOnMonitor() extensions.append(plotEx) trainer = Train(model = model, algorithm = algorithm, dataset = ds, extensions = extensions, save_path='./funcmodel.pkl', save_freq=500) # training loop trainer.main_loop() end_time = time.clock() print("tortal_seconds_this_learning : %f s(%f min)" % (end_time - start_time, (end_time - start_time)/60)) if options.plot_prediction: plotEx.plot(out_filename=options.out_filename)
def test_bgd_unsup(): # tests that we can run the bgd algorithm # on an supervised cost. # does not test for correctness at all, just # that the algorithm runs without dying dim = 3 m = 10 rng = np.random.RandomState([25,9,2012]) X = rng.randn(m, dim) dataset = DenseDesignMatrix(X=X) m = 15 X = rng.randn(m, dim) # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_dataset = DenseDesignMatrix(X=X) model = SoftmaxModel(dim) learning_rate = 1e-3 batch_size = 5 class DummyCost(Cost): def expr(self, model, data): self.get_data_specs(model)[0].validate(data) X = data return T.square(model(X) - X).mean() def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source()) cost = DummyCost() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = BGD(cost, batch_size=5, monitoring_batches=2, monitoring_dataset= monitoring_dataset, termination_criterion = termination_criterion) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def test_kmeans(): X = np.random.random(size=(100, 10)) Y = np.random.randint(5, size=(100, 1)) dataset = DenseDesignMatrix(X, y=Y) model = KMeans( k=5, nvis=10 ) train = Train(model=model, dataset=dataset) train.main_loop()
def MultiPIEmain(): # h0 = maxout.Maxout(layer_name='h0', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) # h1 = maxout.Maxout(layer_name='h1', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) # h2 = maxout.Maxout(layer_name='h2', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) h0 = maxout.Maxout(layer_name='h0', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) h1 = maxout.Maxout(layer_name='h1', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) h2 = maxout.Maxout(layer_name='h2', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0) layers = [h0, h1, h2, outlayer] model = mlp.MLP(layers, nvis=1200) trainIndices, validationIndices, testIndices = getMultiPIEindices() train = MultiPIE('train', indices=trainIndices) valid = MultiPIE('valid', indices=validationIndices) test = MultiPIE('test', indices=testIndices) monitoring = dict(valid=valid) termination = MonitorBased(channel_name="valid_y_misclass", N=100) extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass", save_path="/data/mcr10/train_best.pkl"), MomentumAdjustor(final_momentum=0.7, start=1, saturate=250)] algorithm = sgd.SGD(0.05, batch_size=20, cost=Dropout(), learning_rule=Momentum(0.5), monitoring_dataset=monitoring, termination_criterion=termination) save_path = "/data/mcr10/train_best.pkl" if not args.train and os.path.exists(save_path): model = serial.load(save_path) else: print 'Running training' train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=50) train_job.main_loop() X = model.get_input_space().make_batch_theano() Y = model.fprop(X) y = T.argmax(Y, axis=1) f = function(inputs=[X], outputs=y, allow_input_downcast=True) yhat = f(test.X) print sum(yhat) print yhat.shape y = np.argmax(np.squeeze(test.get_targets()), axis=1) print 'accuracy', (y==yhat).sum() / y.size
def testing_multiple_datasets_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py does not take # multiple datasets in which multiple channels ending in '_objective' # exist. # This case happens when the user has not specified either channel_name or dataset_name in the constructor dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=batch_size, monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test}, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) monitor_lr = MonitorBasedLRAdjuster() train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError: return raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + "either channel_name or database_name in the constructor to disambiguate.")
class AutoEncoderTrainer(BaseTrainer): def __init__(self, runner, model_params, resume=False, resume_data=None, s3_data=None, **kwargs): dataset = create_dense_design_matrix(x=runner.dp.train_set_x) if resume: model, model_params = self.resume_model(model_params, resume_data) else: model = self.new_model(model_params, dataset=dataset) termination_criterion = MaxEpochNumber(model_params['maxnum_iter']) algorithm = SGD(learning_rate=model_params['learning_rate']['init'], monitoring_dataset=dataset, cost=MeanSquaredReconstructionError(), termination_criterion=termination_criterion, batch_size=model_params['batch_size']) ext = AutoEncoderStatReporter(runner, resume=resume, resume_data=resume_data, save_freq=model_params['save_freq']) self.train_obj = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[ext]) def train(self): self.train_obj.main_loop() def resume_model(self, model_params, resume_data): model = resume_data['model'] #TODO: FIX IT model = pylearn2.monitor.push_monitor(model, 'monitor_validation', True) return model, model_params def new_model(self, model_params, dataset): corruptor = BinomialCorruptor( corruption_level=model_params['noise_level']) model = DenoisingAutoencoder(nvis=dataset.X.shape[1], nhid=model_params['hidden_outputs'], irange=model_params['irange'], corruptor=corruptor, act_enc='tanh', act_dec=None) return model
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5)], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False ) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised" ) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print "done training"
def test_sgd_topo(): # tests that we can run the sgd algorithm # on data with topology # does not test for correctness at all, just # that the algorithm runs without dying rows = 3 cols = 4 channels = 2 dim = rows * cols * channels m = 10 rng = np.random.RandomState([25, 9, 2012]) dataset = get_topological_dataset(rng, rows, cols, channels, m) # including a monitoring datasets lets us test that # the monitor works with supervised data m = 15 monitoring_dataset = get_topological_dataset(rng, rows, cols, channels, m) model = TopoSoftmaxModel(rows, cols, channels) learning_rate = 1e-3 batch_size = 5 cost = CrossEntropy() # We need to include this so the test actually stops running at some point termination_criterion = EpochCounter(5) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=3, monitoring_dataset=monitoring_dataset, termination_criterion=termination_criterion, update_callbacks=None, init_momentum=None, set_batch_size=False) train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=None) train.main_loop()
def testing_multiple_datasets_in_monitor_based_lr(): # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. # This case happens when the user has not specified either channel_name or dataset_name in the constructor dim = 3 m = 10 rng = np.random.RandomState([06,02,2014]) X = rng.randn(m, dim) Y = rng.randn(m, dim) learning_rate = 1e-2 batch_size = 5 # We need to include this so the test actually stops running at some point epoch_num = 1 # including a monitoring datasets lets us test that # the monitor works with supervised data monitoring_train = DenseDesignMatrix(X=X) monitoring_test = DenseDesignMatrix(X=Y) cost = DummyCost() model = SoftmaxModel(dim) dataset = DenseDesignMatrix(X=X) termination_criterion = EpochCounter(epoch_num) algorithm = SGD(learning_rate, cost, batch_size=5, monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test}, termination_criterion=termination_criterion, update_callbacks=None, init_momentum = None, set_batch_size = False) monitor_lr = MonitorBasedLRAdjuster() train = Train(dataset, model, algorithm, save_path=None, save_freq=0, extensions=[monitor_lr]) try: train.main_loop() except ValueError: return raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + "either channel_name or database_name in the constructor to disambiguate.")
def get_config(self): # dataset self.load_dataset() # model self.load_model() # monitor: self.setup_monitor() # training algorithm algorithm = self.get_train() # extensions extensions = self.get_extensions() # channels #self.setup_channels() # learner learner = Train(dataset=self.train_ddm, model=self.model, algorithm=algorithm, extensions=extensions) return (self.model, learner, algorithm)
def get_trainer(model, trainset, validset, epochs=20, batch_size=200): monitoring_batches = None if validset is None else 20 train_algo = SGD(batch_size=batch_size, init_momentum=0.5, learning_rate=0.1, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={ 'h0': 0.8, 'h1': 0.8, 'h2': 0.8, 'h3': 0.8, 'y': 0.5 }, input_scales={ 'h0': 1. / 0.8, 'h1': 1. / 0.8, 'h2': 1. / 0.8, 'h3': 1. / 0.8, 'y': 1. / 0.5 }, default_input_include_prob=0.5, default_input_scale=1. / 0.5), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.0001, min_lr=0.001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
def get_config(self, start_config_id=None): if start_config_id is not None: (config_id,random_seed,ext_array,dataset_id,channel_array) \ = self.select_config(start_config_id) else: (config_id,random_seed,ext_array,dataset_id,channel_array) \ = self.select_next_config() # dataset self.load_dataset(dataset_id) # model self.load_model(config_id) # monitor: self.setup_monitor() # training algorithm algorithm = self.get_train(config_id) # extensions extensions = self.get_extensions(ext_array, config_id) # channels self.setup_channels(channel_array) # learner learner = Train(dataset=self.train_ddm, model=self.model, algorithm=algorithm, extensions=extensions) return (config_id, self.model, learner, algorithm)
def get_ae_pretrainer(layer, data, batch_size, epochs=30): init_lr = 0.05 train_algo = SGD( batch_size=batch_size, learning_rate=init_lr, learning_rule=Momentum(init_momentum=0.5), monitoring_batches=batch_size, monitoring_dataset=data, # for ContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')]]), # for HigherOrderContractiveAutoencoder: # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()], # [0.5, cost.MethodCost(method='contraction_penalty')], # [0.5, cost.MethodCost(method='higher_order_penalty')]]), # for DenoisingAutoencoder: cost=MeanSquaredReconstructionError(), termination_criterion=EpochCounter(epochs)) return Train(model=layer, algorithm=train_algo, dataset=data, extensions=[ MomentumAdjustor(final_momentum=0.9, start=0, saturate=25), LinearDecayOverEpoch(start=1, saturate=25, decay_factor=.02) ])
def __init__(self, dataset_iterator, model, algorithm=None, save_path=None, save_freq=0, extensions=None, allow_overwrite=True, save_folds=False, cv_extensions=None): self.dataset_iterator = dataset_iterator trainers = [] for k, datasets in enumerate(dataset_iterator): if save_folds and save_path is not None: path, ext = os.path.splitext(save_path) this_save_path = path + '-{}'.format(k) + ext this_save_freq = save_freq else: this_save_path = None this_save_freq = 0 # setup model, including any pretrained layers if isinstance(model, list): this_model = deepcopy(model[k]) else: this_model = deepcopy(model) if hasattr(this_model, 'layers') and any( [isinstance(l, PretrainedLayerCV) for l in this_model.layers]): for i, layer in enumerate(this_model.layers): if isinstance(layer, PretrainedLayerCV): this_model.layers[i] = layer.select_fold(k) # setup monitoring datasets this_algorithm = deepcopy(algorithm) this_algorithm._set_monitoring_dataset(datasets) # extensions this_extensions = deepcopy(extensions) # construct an isolated Train object # no shared references between trainers are allowed # (hence all the deepcopy operations) try: assert isinstance(datasets, dict) trainer = Train(datasets['train'], this_model, this_algorithm, this_save_path, this_save_freq, this_extensions, allow_overwrite) except AssertionError: raise AssertionError("Dataset iterator must be a dict with " + "dataset names (e.g. 'train') as keys.") except KeyError: raise KeyError("Dataset iterator must yield training data.") trainers.append(trainer) self.trainers = trainers self.save_path = save_path self.allow_overwrite = allow_overwrite if cv_extensions is None: self.cv_extensions = [] else: self.cv_extensions = cv_extensions
def get_layer_trainer_sgd_autoencoder(layer, trainset, batch_size=10, learning_rate=0.1, max_epochs=100, name=''): # configs on sgd train_algo = SGD( learning_rate=learning_rate, # learning_rule = AdaDelta(), learning_rule=Momentum(init_momentum=0.5), cost=MeanSquaredReconstructionError(), batch_size=batch_size, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=max_epochs), update_callbacks=None) log_callback = LoggingCallback(name) return Train(model=layer, algorithm=train_algo, extensions=[ log_callback, OneOverEpoch(start=1, half_life=5), MomentumAdjustor(final_momentum=0.7, start=10, saturate=100) ], dataset=trainset)