def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + list(ladder.costs.denois.values()), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.items(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.items()} return inputs['targets_labeled'], res[0]
def train(cli_params): cli_params['save_dir'] = prepare_dir(cli_params['save_to']) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim, ) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in list(bn_updates.keys())], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': list(ladder.costs.denois.values()), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', list(ladder.costs.denois.values())), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', list(ladder.costs.denois.values())), ]), } main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples), model=Model(theano.tensor.cast(ladder.costs.total, "float32")), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + list(ladder.costs.denois.values()), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, scheme=ShuffledScheme), prefix="valid_approx"), TrainingDataMonitoring([ ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + list(ladder.costs.denois.values()), prefix="train", after_epoch=True), SaveParams(None, all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = main_loop.log.to_dataframe() col = 'valid_final_error_rate_clean' logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
def train(cli_params): fn = 'noname' if 'save_to' in nodefaultargs or not cli_params.get('load_from'): fn = cli_params['save_to'] cli_params['save_dir'] = prepare_dir(fn) nodefaultargs.append('save_dir') logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim, ) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. # you can turn off BN by setting is_normalizing = False in ladder.py bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert not bn_updates or 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, parameters=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations if bn_updates: training_algorithm.add_updates(bn_updates) short_prints = { "train": OrderedDict([ ('T_E', ladder.error.clean), ('T_O', ladder.oos.clean), ('T_C_class', ladder.costs.class_corr), ('T_C_de', ladder.costs.denois.values()), ('T_T', ladder.costs.total), ]), "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_O', ladder.oos.clean), ('V_C_de', ladder.costs.denois.values()), ('V_T', ladder.costs.total), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', ladder.costs.denois.values()), ('V_T', ladder.costs.total), ]), } if len(data.valid_ind): main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm, balanced_classes=p.balanced_classes, dseed=p.dseed), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring([ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean, ladder.costs.total ] + ladder.costs.denois.values(), make_datastream( data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, balanced_classes=p.balanced_classes, scheme=ShuffledScheme), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean, ladder.costs.total ] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, balanced_classes=p.balanced_classes, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), whiten=whiten, cnorm=cnorm, balanced_classes=p.balanced_classes, scheme=ShuffledScheme), prefix="valid_final", after_n_epochs=p.num_epochs, after_training=True), TrainingDataMonitoring([ ladder.error.clean, ladder.oos.clean, ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + ladder.costs.denois.values(), prefix="train", after_epoch=True), # ladder.costs.class_clean - save model whenever we have best validation result another option `('train',ladder.costs.total)` SaveParams(('valid_approx', ladder.error.clean), all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(p.save_dir, after_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, lrmin=p.lrmin, after_epoch=True), ]) else: main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm, balanced_classes=p.balanced_classes, dseed=p.dseed), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), TrainingDataMonitoring([ ladder.error.clean, ladder.oos.clean, ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + ladder.costs.denois.values(), prefix="train", after_epoch=True), # ladder.costs.class_clean - save model whenever we have best validation result another option `('train',ladder.costs.total)` SaveParams(('train', ladder.error.clean), all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(p.save_dir, after_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, lrmin=p.lrmin, after_epoch=True), ]) main_loop.run() # Get results if len(data.valid_ind) == 0: return None df = DataFrame.from_dict(main_loop.log, orient='index') col = 'valid_final_error_rate_clean' logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]
def analyze(cli_params): """ called when evaluating :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, balanced_classes=p.balanced_classes, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # df = DataFrame.from_dict(main_loop.log, orient='index') # col = 'valid_final_error_rate_clean' # logger.info('%s %g' % (col, df[col].iloc[-1])) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def train(self): """ Setup and train the model """ to_train = ComputationGraph([self.tagger.total_cost]).parameters logger.info('Found the following parameters: %s' % str(to_train)) step_rule = Adam(learning_rate=self.p.lr) training_algorithm = GradientDescent( cost=self.tagger.total_cost, parameters=to_train, step_rule=step_rule, on_unused_sources='warn', theano_func_kwargs={'on_unused_input': 'warn'} ) # TRACKED GRAPH NODES train_params = { 'Train_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: train_params['Train_Classification_Cost'] = self.tagger.corr.class_cost train_params['Train_Classification_Error'] = self.tagger.clean.class_error valid_params = { 'Validation_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: valid_params['Validation_Classification_Cost'] = self.tagger.corr.class_cost valid_params['Validation_Classification_Error'] = self.tagger.clean.class_error test_params = { 'Test_AMI_Score': self.tagger.clean.ami_score, 'Test_Denoising_Cost': self.tagger.corr.denoising_cost, } if self.p.class_cost_x > 0: test_params['Test_Classification_Cost'] = self.tagger.corr.class_cost test_params['Test_Classification_Error'] = self.tagger.clean.class_error short_prints = { "train": train_params, "valid": valid_params, "test": test_params, } main_loop = MainLoop( training_algorithm, # Datastream used for training self.streams['train'], model=Model(self.tagger.total_cost), extensions=[ FinishAfter(after_n_epochs=self.p.num_epochs), SaveParams(self.p.get('save_freq', 0), self.tagger, self.save_dir, before_epoch=True), DataStreamMonitoring( valid_params.values(), self.streams['valid'], prefix="valid" ), FinalTestMonitoring( test_params.values(), self.streams['train'], {'valid': self.streams['valid'], 'test': self.streams['test']}, after_training=True ), TrainingDataMonitoring( train_params.values(), prefix="train", after_epoch=True ), SaveExpParams(self.p, self.save_dir, before_training=True), Timing(after_epoch=True), ShortPrinting(short_prints, after_epoch=True), ]) logger.info('Running the main loop') main_loop.run()
def train_ladder(cli_params, dataset=None, save_to='results/ova_all_full'): cli_params['save_dir'] = prepare_dir(save_to) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': ladder.costs.denois.values(), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', ladder.costs.denois.values()), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', ladder.costs.denois.values()), ]), } ovadataset = dataset['ovadataset'] train_indexes = dataset['train_indexes'] val_indexes = dataset['val_indexes'] main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(ovadataset, train_indexes, p.batch_size, scheme=ShuffledScheme), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(ovadataset, val_indexes, p.batch_size), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean_mc] + ladder.costs.denois.values(), make_datastream(ovadataset, train_indexes, p.batch_size), make_datastream(ovadataset, val_indexes, p.batch_size), prefix="valid_final", after_n_epochs=p.num_epochs), TrainingDataMonitoring([ ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + ladder.costs.denois.values(), prefix="train", after_epoch=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = main_loop.log.to_dataframe() col = 'valid_final_error_matrix_cost' logger.info('%s %g' % (col, df[col].iloc[-1])) ds = make_datastream(ovadataset, val_indexes, p.batch_size) outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} if main_loop.log.status['epoch_interrupt_received']: return None return res[0], inputs