def train(ladder, batch_size=100, num_train_examples=60000, num_epochs=150, lrate_decay=0.67): # Setting Logger timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/mnist_100_standard_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training model = Model(ladder.costs.total) all_params = model.parameters print len(all_params) print all_params training_algorithm = GradientDescent( cost=ladder.costs.total, parameters=all_params, step_rule=Adam(learning_rate=ladder.lr)) # Fetch all batch normalization updates. They are in the clean path. # In addition to actual training, also do BN variable approximations bn_updates = ComputationGraph([ladder.costs.class_clean]).updates training_algorithm.add_updates(bn_updates) monitored_variables = [ ladder.costs.class_corr, ladder.costs.class_clean, ladder.error, training_algorithm.total_gradient_norm, ladder.costs.total ] + ladder.costs.denois.values() train_data_stream, test_data_stream = get_mixed_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=test_data_stream, prefix="test", after_epoch=True) main_loop = MainLoop(algorithm=training_algorithm, data_stream=train_data_stream, model=model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('test_CE_corr', model, save_path), SaveLog(save_path, after_epoch=True), LRDecay(lr=ladder.lr, decay_first=num_epochs * lrate_decay, decay_last=num_epochs, after_epoch=True), Printing() ]) main_loop.run()
def train(cli_params): cli_params['save_dir'] = prepare_dir(cli_params['save_to']) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim,) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': ladder.costs.denois.values(), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', ladder.costs.denois.values()), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', ladder.costs.denois.values()), ]), } main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_final", after_n_epochs=p.num_epochs), TrainingDataMonitoring( [ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm] + ladder.costs.denois.values(), prefix="train", after_epoch=True), SaveParams(None, all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(p.save_dir, after_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = main_loop.log.to_dataframe() col = 'valid_final_error_rate_clean' logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
def train(self): """ Setup and train the model """ to_train = ComputationGraph([self.tagger.total_cost]).parameters logger.info('Found the following parameters: %s' % str(to_train)) step_rule = Adam(learning_rate=self.p.lr) training_algorithm = GradientDescent( cost=self.tagger.total_cost, parameters=to_train, step_rule=step_rule, on_unused_sources='warn', theano_func_kwargs={'on_unused_input': 'warn'} ) # TRACKED GRAPH NODES train_params = { 'Train_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: train_params['Train_Classification_Cost'] = self.tagger.corr.class_cost train_params['Train_Classification_Error'] = self.tagger.clean.class_error valid_params = { 'Validation_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: valid_params['Validation_Classification_Cost'] = self.tagger.corr.class_cost valid_params['Validation_Classification_Error'] = self.tagger.clean.class_error test_params = { 'Test_AMI_Score': self.tagger.clean.ami_score, 'Test_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: test_params['Test_Classification_Cost'] = self.tagger.corr.class_cost test_params['Test_Classification_Error'] = self.tagger.clean.class_error short_prints = { "train": train_params, "valid": valid_params, "test": test_params, } main_loop = MainLoop( training_algorithm, # Datastream used for training self.streams['train'], model=Model(self.tagger.total_cost), extensions=[ FinishAfter(after_n_epochs=self.p.num_epochs), SaveParams(self.p.get('save_freq', 0), self.tagger, self.save_dir, before_epoch=True), DataStreamMonitoring( valid_params.values(), self.streams['valid'], prefix="valid" ), FinalTestMonitoring( test_params.values(), self.streams['train'], {'valid': self.streams['valid'], 'test': self.streams['test']}, after_training=True ), TrainingDataMonitoring( train_params.values(), prefix="train", after_epoch=True ), SaveExpParams(self.p, self.save_dir, before_training=True), Timing(after_epoch=True), ShortPrinting(short_prints, after_epoch=True), ]) logger.info('Running the main loop') main_loop.run()
def train(model, configs): get_streams = configs['get_streams'] save_path = configs['save_path'] num_epochs = configs['num_epochs'] batch_size = configs['batch_size'] lrs = configs['lrs'] until_which_epoch = configs['until_which_epoch'] grad_clipping = configs['grad_clipping'] monitorings = model.monitorings # Training if configs['weight_noise'] > 0: cg = ComputationGraph(model.cost) weights = VariableFilter(roles=[WEIGHT])(cg.variables) cg = apply_noise(cg, weights, configs['weight_noise']) model.cost = cg.outputs[0].copy(name='CE') if configs['l2_reg'] > 0: cg = ComputationGraph(model.cost) weights = VariableFilter(roles=[WEIGHT])(cg.variables) new_cost = model.cost + configs['l2_reg'] * sum( [(weight**2).sum() for weight in weights]) model.cost = new_cost.copy(name='CE') blocks_model = Model(model.cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params default_lr = np.float32(configs['lrs'][0]) lr_var = theano.shared(default_lr, name="learning_rate") clipping = StepClipping(threshold=np.cast[floatX](grad_clipping)) # sgd_momentum = Momentum( # learning_rate=0.0001, # momentum=0.95) # step_rule = CompositeRule([clipping, sgd_momentum]) adam = Adam(learning_rate=lr_var) step_rule = CompositeRule([clipping, adam]) training_algorithm = GradientDescent(cost=model.cost, parameters=all_params, step_rule=step_rule, on_unused_sources='warn') monitored_variables = [ lr_var, aggregation.mean(training_algorithm.total_gradient_norm) ] + monitorings for param in all_params: name = param.tag.annotations[0].name + "." + param.name to_monitor = training_algorithm.gradients[param].norm(2) to_monitor.name = name + "_grad_norm" monitored_variables.append(to_monitor) to_monitor = param.norm(2) to_monitor.name = name + "_norm" monitored_variables.append(to_monitor) train_data_stream, valid_data_stream = get_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_CE', blocks_model, save_path, after_epoch=True), SaveLog(after_epoch=True), ProgressBar(), # ErrorPerVideo(model, after_epoch=True, on_interrupt=True), LRDecay(lr_var, lrs, until_which_epoch, after_epoch=True), Printing(after_epoch=True) ]) main_loop.run()
def train(model, batch_size=100, num_epochs=1000): cost = model.cost monitorings = model.monitorings # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/CMV_V2_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params clipping = StepClipping(threshold=np.cast[floatX](10)) adam = Adam(learning_rate=model.lr_var) step_rule = CompositeRule([adam, clipping]) training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=step_rule) monitored_variables = [ model.lr_var, cost, aggregation.mean(training_algorithm.total_gradient_norm) ] + monitorings blocks_model = Model(cost) params_dicts = blocks_model.get_parameter_dict() for name, param in params_dicts.iteritems(): to_monitor = training_algorithm.gradients[param].norm(2) to_monitor.name = name + "_grad_norm" monitored_variables.append(to_monitor) to_monitor = param.norm(2) to_monitor.name = name + "_norm" monitored_variables.append(to_monitor) train_data_stream, valid_data_stream = get_cmv_v2_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), LRDecay(model.lr_var, [0.001, 0.0001, 0.00001, 0.000001], [8, 15, 30, 1000], after_epoch=True), Printing() ]) main_loop.run()