def run_epoch(trainobj): """ Runs an epoch. Returns True to continue or False to terminate. """ if trainobj.first_callbacks_and_monitoring: trainobj.run_callbacks_and_monitoring() trainobj.first_callbacks_and_monitoring = False return True rval = True if trainobj.algorithm is None: rval = trainobj.model.train_all(dataset=trainobj.dataset) if rval is not None: raise ValueError("Model.train_all should not return " + "anything. Use Model.continue_learning " + "to control whether learning continues.") rval = post_epoch(trainobj) else: with log_timing(logger, None, level=logging.DEBUG, callbacks=[trainobj.total_seconds.set_value]): with log_timing(logger, None, final_msg='Time this epoch:', callbacks=[trainobj.training_seconds.set_value]): rval = trainobj.algorithm.train(dataset=trainobj.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not " "return anything. Use " "TrainingAlgorithm.continue_learning " "to control whether learning " "continues.") rval = post_epoch(trainobj) return rval
def main(): parser = argparse.ArgumentParser(description='Collect statistics.') parser.add_argument('infile', nargs='+', type=argparse.FileType('r'), help="The pickle files to read.") parser.add_argument('-O', '--output', type=argparse.FileType('w'), help="Output CSV file to write.") args = parser.parse_args() names = [(a.name, a) for a in args.infile] indices = [b[0] for b in names] columns = ['weight_scaling_error', 'geometric_error', 'arithmetic_error'] df = pd.DataFrame(index=indices, columns=columns) try: dataset = None for i, (name, model_handle) in enumerate(names): with log_timing( log, "Processing %s [%d / %d]" % (name, i + 1, len(names))): with model_handle as f: model = cPickle.load(f) if dataset is None: with log_timing(log, "Loading test set", final_msg="Loaded."): d = None # HACK HACK HACK for k in model.monitor._datasets: if 'valid' in k or '50000' in k: d = k break if d is None: log.warning("No validation set found, using " "first dataset in monitor.") d = model.monitor._datasets[0] dataset = yload(d).get_test_set() d = compare_ensemble(model, dataset, input_scales={ 'h1': 2., 'y': 2. }) df['weight_scaling_error'][name] = d['weight_scaling_error'] df['geometric_error'][name] = d['geometric_error'] df['arithmetic_error'][name] = d['arithmetic_error'] np.save(model_handle.name + '.sca.npy', d['weight_scaling_output']) np.save(model_handle.name + '.geo.npy', d['geometric_output']) np.save(model_handle.name + '.ari.npy', d['arithmetic_output']) finally: df.to_csv(args.output)
def process_dataset(model, dataset, data_specs=None, output_fn=None, batch_size=128): if data_specs is None: data_specs = (CompositeSpace(( model.get_input_space(), model.get_output_space())), ("features", "targets")); if output_fn is None: with log_timing(log, 'compiling output_fn'): minibatch = model.get_input_space().make_theano_batch(); output_fn = theano.function(inputs=[minibatch], outputs=model.fprop(minibatch)); it = dataset.iterator(mode='sequential', batch_size=batch_size, data_specs=data_specs); y_pred = []; y_real = []; output = []; for minibatch, target in it: out = output_fn(minibatch); # this hangs for convnet on Jeep2 output.append(out); # print out # print out.shape y_pred.append(np.argmax(out, axis = 1)); y_real.append(np.argmax(target, axis = 1)); y_pred = np.hstack(y_pred); y_real = np.hstack(y_real); output = np.vstack(output); return y_real, y_pred, output;
def on_monitor(self, model, dataset, algorithm): """ Looks whether the model performs better than earlier. If it's the case, saves the model. Parameters ---------- model : pylearn2.models.model.Model model.monitor must contain a channel with name given by self.channel_name dataset : pylearn2.datasets.dataset.Dataset Not used algorithm : TrainingAlgorithm Not used """ monitor = model.monitor channels = monitor.channels channel = channels[self.channel_name] val_record = channel.val_record new_cost = val_record[-1] if self.coeff * new_cost < self.coeff * self.best_cost: self.best_cost = new_cost # Update the tag of the model object before saving it. self._update_tag(model) if self.store_best_model: self.best_model = deepcopy(model) if self.save_path is not None: with log_timing(log, 'Saving to ' + self.save_path): serial.save(self.save_path, model, on_overwrite='backup')
def on_monitor(self, model, dataset, algorithm): epoch = algorithm.monitor._epochs_seen; model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl'; with log_timing(log, 'saving model to {}'.format(model_file)): serial.save(model_file, model, on_overwrite = 'backup')
def process_dataset(model, dataset, data_specs=None, output_fn=None): if data_specs is None: data_specs = (CompositeSpace(( model.get_input_space(), model.get_output_space())), ("features", "targets")); if output_fn is None: with log_timing(log, 'compiling output_fn'): minibatch = model.get_input_space().make_theano_batch(); output_fn = theano.function(inputs=[minibatch], outputs=model.fprop(minibatch)); it = dataset.iterator('sequential', batch_size=100, data_specs=data_specs); y_pred = []; y_real = []; output = []; for minibatch, target in it: out = output_fn(minibatch); # this hangs for convnet on Jeep2 output.append(out); y_pred.append(np.argmax(out, axis = 1)); y_real.append(np.argmax(target, axis = 1)); y_pred = np.hstack(y_pred); y_real = np.hstack(y_real); output = np.vstack(output); return y_real, y_pred, output;
def on_monitor(self, model, dataset, algorithm): epoch = algorithm.monitor._epochs_seen model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl' with log_timing(log, 'saving model to {}'.format(model_file)): serial.save(model_file, model, on_overwrite='backup')
def on_monitor(self, model, dataset, algorithm): """ Looks whether the model performs better than earlier - or equally good (modification). If it's the case, saves the model. Parameters ---------- model : pylearn2.models.model.Model model.monitor must contain a channel with name given by self.channel_name dataset : pylearn2.datasets.dataset.Dataset Not used algorithm : TrainingAlgorithm Not used """ monitor = model.monitor channels = monitor.channels channel = channels[self.channel_name] val_record = channel.val_record new_cost = val_record[-1] if self.coeff * new_cost <= self.coeff * self.best_cost and \ monitor._epochs_seen >= self.start_epoch: self.best_cost = new_cost # Update the tag of the model object before saving it. self._update_tag(model) if self.store_best_model: self.best_model = deepcopy(model) if self.save_path is not None: with log_timing(log, 'Saving to ' + self.save_path): serial.save(self.save_path, model, on_overwrite='backup')
def extract_output(config, best_epoch): # load best model model_file = os.path.join(config.experiment_root, 'epochs', 'epoch{}.pkl'.format(best_epoch)) print 'loading ' + model_file model = serial.load(model_file) # print model; # additional dataset params config.start_sample = 11200 config.stop_sample = 12800 config.name = 'test' # load dataset dataset, dataset_yaml = load_yaml_file( os.path.join(os.path.dirname(__file__), '..', 'run', 'dataset_template.yaml'), params=config, ) with log_timing(log, 'processing dataset'): y_real, y_pred, output = process_dataset(model, dataset) return y_real, y_pred, output
def main_loop(self): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. """ if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.setup_extensions() self.run_callbacks_and_monitoring() while True: rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0: self.save() continue_learning = self.model.continue_learning() assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) self.setup_extensions() if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") if len(self.model.monitor._datasets)>0: # This monitoring channel keeps track of a shared variable, # which does not need inputs nor data. self.model.monitor.add_channel(name="monitor_seconds_per_epoch", ipt=None, val=self.monitor_time, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.run_callbacks_and_monitoring() while True: with log_timing(log, None, final_msg='Time this epoch:', callbacks=[self.monitor_time.set_value]): import time print 'current time', time.strftime("%H:%M:%S") rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() self.run_callbacks_and_monitoring() if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = self.algorithm.continue_learning(self.model) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def main_loop(self): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. """ if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.setup_extensions() self.run_callbacks_and_monitoring() while True: rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0: self.save() continue_learning = self.model.continue_learning() assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) self.setup_extensions() if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") if len(self.model.monitor._datasets)>0: # This monitoring channel keeps track of a shared variable, # which does not need inputs nor data. self.model.monitor.add_channel(name="monitor_seconds_per_epoch", ipt=None, val=self.monitor_time, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.run_callbacks_and_monitoring() while True: with log_timing(log, None, final_msg='Time this epoch:', callbacks=[self.monitor_time.set_value]): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() self.run_callbacks_and_monitoring() if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = self.algorithm.continue_learning(self.model) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def load_results(experiment_root): # load the model (mlp_best.pkl) model_file = os.path.join(experiment_root, 'mlp_best.pkl'); with log_timing(log, 'loading model from {}'.format(model_file)): model = serial.load(model_file); # load train train_yaml_file = os.path.join(experiment_root, 'train.yaml'); train_yaml = load_yaml_template(train_yaml_file); # fix dataset path localizer = PathLocalizer(); train_yaml = localizer.localize_yaml(train_yaml); with log_timing(log, 'loading train from {}'.format(train_yaml_file)): train = load_yaml(train_yaml)[0]; return train, model;
def load_results(experiment_root): # load the model (mlp_best.pkl) model_file = os.path.join(experiment_root, 'mlp_best.pkl') with log_timing(log, 'loading model from {}'.format(model_file)): model = serial.load(model_file) # load train train_yaml_file = os.path.join(experiment_root, 'train.yaml') train_yaml = load_yaml_template(train_yaml_file) # fix dataset path localizer = PathLocalizer() train_yaml = localizer.localize_yaml(train_yaml) with log_timing(log, 'loading train from {}'.format(train_yaml_file)): train = load_yaml(train_yaml)[0] return train, model
def train_mlp(params): train, yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'cross_trial_template.yaml'), params=params, ); save_yaml_file(yaml_str, os.path.join(params.experiment_root, 'settings.yaml')); with log_timing(log, 'training network'): train.main_loop();
def extract_output(experiment_root): train, model = load_results(experiment_root); # get the datasets with their names from the monitor for key, dataset in train.algorithm.monitoring_dataset.items(): # process each dataset with log_timing(log, 'processing dataset \'{}\''.format(key)): y_real, y_pred, output = process_dataset(model, dataset) save(os.path.join(experiment_root, 'cache', key+'_output.pklz'), (y_real, y_pred, output));
def train_mlp(params): train, yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'cross_trial_template.yaml'), params=params, ) save_yaml_file(yaml_str, os.path.join(params.experiment_root, 'settings.yaml')) with log_timing(log, 'training network'): train.main_loop()
def __init__(self, filepath): self.filepath = filepath with log_timing(log, 'loading data from {}'.format(filepath)): tmp = load(filepath) if len(tmp) == 2: self.data, self.metadata = tmp self.targets = None elif len(tmp) == 3: self.data, self.metadata, self.targets = tmp else: raise ValueError('got {} objects instead of 2 or 3.'.format(len(tmp)))
def save_yaml_file(yaml_str, yaml_file_path): if save_yaml_file is not None: with log_timing(log, 'saving yaml to {}'.format(yaml_file_path)): save_dir = os.path.dirname(yaml_file_path); if save_dir == '': save_dir = '.' if not os.path.exists(save_dir): os.makedirs(save_dir) with open(yaml_file_path, 'w') as yaml_file: yaml_file.write(yaml_str) yaml_file.close();
def extract_output(experiment_root): train, model = load_results(experiment_root) # get the datasets with their names from the monitor for key, dataset in train.algorithm.monitoring_dataset.items(): # process each dataset with log_timing(log, 'processing dataset \'{}\''.format(key)): y_real, y_pred, output = process_dataset(model, dataset) save(os.path.join(experiment_root, 'cache', key + '_output.pklz'), (y_real, y_pred, output))
def train_convnet(config): train, yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_convnet_template.yaml'), params=config, ); save_yaml_file(yaml_str, os.path.join(config.experiment_root, 'settings.yaml')); with log_timing(log, 'training network'): train.main_loop();
def load_data_file(filename): #data = np.loadtxt(filename, dtype=float, delimiter=' ', skiprows=1); #, autostrip=True, names=False) with log_timing(log, 'loading data from {}'.format(filename)): data = np.genfromtxt(filename, dtype=theano.config.floatX, delimiter=' ', skip_header=1, autostrip=True); log.info('loaded {}'.format(data.shape)); # print data.shape; # print data[0]; # print data[-1]; return data;
def __init__(self, filepath): self.filepath = filepath with log_timing(log, 'loading data from {}'.format(filepath)): tmp = load(filepath) if len(tmp) == 2: self.data, self.metadata = tmp self.targets = None elif len(tmp) == 3: self.data, self.metadata, self.targets = tmp else: raise ValueError('got {} objects instead of 2 or 3.'.format( len(tmp)))
def train_convnet(config): train, yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_convnet_template.yaml'), params=config, ) save_yaml_file(yaml_str, os.path.join(config.experiment_root, 'settings.yaml')) with log_timing(log, 'training network'): train.main_loop()
def load_yaml(yaml_template, params=None): log.debug('params: {}'.format(params)) if params is not None: yaml_str = yaml_template % params else: yaml_str = yaml_template log.debug(yaml_str) with log_timing(log, 'parsing yaml'): obj = yaml_parse.load(yaml_str) return obj, yaml_str
def load_yaml(yaml_template, params=None): print params; if params is not None: yaml_str = yaml_template % params; else: yaml_str = yaml_template; print yaml_str; with log_timing(log, 'parsing yaml'): obj = yaml_parse.load(yaml_str); return obj, yaml_str;
def main_loop(self): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. """ if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.run_callbacks_and_monitoring() while True: rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError( "Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues." ) self.model.monitor.report_epoch() if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0: self.save() continue_learning = self.model.continue_learning() assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") self.run_callbacks_and_monitoring() while True: with log_timing(log, None, final_msg='Time this epoch:'): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError( "TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues." ) self.model.monitor.report_epoch() self.run_callbacks_and_monitoring() if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = self.algorithm.continue_learning( self.model) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def main_loop(self): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. """ if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.setup_extensions() self.run_callbacks_and_monitoring() while True: rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0: self.save() continue_learning = self.model.continue_learning() assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) self.setup_extensions() if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") self.run_callbacks_and_monitoring() while True: with log_timing(log, None, final_msg='Time this epoch:'): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.") self.model.monitor.report_epoch() self.run_callbacks_and_monitoring() if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = self.algorithm.continue_learning(self.model) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def __init__(self, save_dir): PYLEARN2_TRAIN_DIR = preprocess('${PYLEARN2_TRAIN_DIR}') PYLEARN2_TRAIN_BASE_NAME = preprocess('${PYLEARN2_TRAIN_BASE_NAME}') src = os.path.join(PYLEARN2_TRAIN_DIR, PYLEARN2_TRAIN_BASE_NAME) dst = os.path.join(save_dir, PYLEARN2_TRAIN_BASE_NAME) if not os.path.exists(save_dir): os.makedirs(save_dir) if os.path.exists(save_dir) and not os.path.isdir(save_dir): raise IOError("save path %s exists, not a directory" % save_dir) elif not os.access(save_dir, os.W_OK): raise IOError("permission error creating %s" % dst) with log_timing(log, 'copying yaml from {} to {}'.format(src, dst)): copyfile(src, dst)
def run_one_epoch(self, datasets, remember_best): batch_generator = self.iterator.get_batches(datasets['train'], shuffle=True) with log_timing(log, None, final_msg='Time updates following epoch:'): for inputs, targets in batch_generator: if self.batch_modifier is not None: inputs, targets = self.batch_modifier.process( inputs, targets) # could happen that batch modifier has removed all inputs... if len(inputs) > 0: self.train_func(inputs, targets) self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params)
def run_one_epoch(self, datasets, remember_best): batch_generator = self.iterator.get_batches(datasets['train'], shuffle=True) with log_timing(log, None, final_msg='Time updates following epoch:'): for inputs, targets in batch_generator: if self.batch_modifier is not None: inputs, targets = self.batch_modifier.process(inputs, targets) # could happen that batch modifier has removed all inputs... if len(inputs) > 0: self.train_func(inputs, targets) self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params)
def train_mlp(params): # sda_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl'); # check whether pre-trained SDA is there pretrained = True for i in xrange(len(params.hidden_layers_sizes)): sda_layer_file = params.get(('layer{}_content').format(i)) if not os.path.isfile(sda_layer_file): log.info( 'did not find pre-trained SDA layer model at {}. re-computing SDA' .format(sda_layer_file)) pretrained = False break else: log.info('found pre-trained SDA layer model at {}'.format( sda_layer_file)) if not pretrained: train_sda(params) n_layers = len(params.hidden_layers_sizes) if params.learning_rule == 'AdaDelta': yaml_template = 'train_sda_mlp_template.AdaDelta.yaml' else: if n_layers == 3: yaml_template = 'train_sda_mlp_template.Momentum.yaml' elif n_layers == 2: yaml_template = 'train_sda_mlp_template.Momentum.2layers.yaml' else: raise '{} layers not supported'.format(n_layers) train, train_yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), yaml_template), params=params, ) save_yaml_file(train_yaml_str, os.path.join(params.experiment_root, 'mlp_train.yaml')) with log_timing(log, 'training MLP'): train.main_loop() log.info('done')
def save(self): """Saves the model.""" # TODO-- save state of training algorithm so training can be # resumed after a crash for extension in self.extensions: extension.on_save(self.model, self.dataset, self.algorithm) if self.save_path is not None: with log_timing(log, "Saving to " + self.save_path): if self.first_save and (not self.allow_overwrite) and os.path.exists(self.save_path): # Every job overwrites its own output on the second save # and every save thereafter. The "allow_overwrite" flag # only pertains to overwriting the output of previous jobs. raise IOError("Trying to overwrite file when not allowed.") try: # Make sure that saving does not serialize the dataset self.dataset._serialization_guard = SerializationGuard() serial.save(self.save_path, self.model, on_overwrite="backup") finally: self.dataset._serialization_guard = None self.first_save = False
def train_mlp(params): # sda_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl'); # check whether pre-trained SDA is there pretrained = True; for i in xrange(len(params.hidden_layers_sizes)): sda_layer_file = params.get(('layer{}_content').format(i)); if not os.path.isfile(sda_layer_file): log.info('did not find pre-trained SDA layer model at {}. re-computing SDA'.format(sda_layer_file)); pretrained = False; break; else: log.info('found pre-trained SDA layer model at {}'.format(sda_layer_file)); if not pretrained: train_sda(params); n_layers = len(params.hidden_layers_sizes); if params.learning_rule == 'AdaDelta': yaml_template = 'train_sda_mlp_template.AdaDelta.yaml' else: if n_layers == 3: yaml_template = 'train_sda_mlp_template.Momentum.yaml' elif n_layers == 2: yaml_template = 'train_sda_mlp_template.Momentum.2layers.yaml' else: raise '{} layers not supported'.format(n_layers); train, train_yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), yaml_template), params=params, ); save_yaml_file(train_yaml_str, os.path.join(params.experiment_root, 'mlp_train.yaml')); with log_timing(log, 'training MLP'): train.main_loop(); log.info('done');
def save(self): """Saves the model.""" #TODO-- save state of training algorithm so training can be # resumed after a crash for extension in self.extensions: extension.on_save(self.model, self.dataset, self.algorithm) if self.save_path is not None: with log_timing(log, 'Saving to ' + self.save_path): if self.first_save and (not self.allow_overwrite) \ and os.path.exists(self.save_path): # Every job overwrites its own output on the second save # and every save thereafter. The "allow_overwrite" flag # only pertains to overwriting the output of previous jobs. raise IOError("Trying to overwrite file when not allowed.") try: # Make sure that saving does not serialize the dataset self.dataset._serialization_guard = SerializationGuard() serial.save(self.save_path, self.model, on_overwrite='backup') finally: self.dataset._serialization_guard = None self.first_save = False
def split_trial(path, trial_len): log.info('processing {}'.format(path)) datafile = glob.glob(os.path.join(path, '*.txt'))[0] metafile = glob.glob(os.path.join(path, '*_Trials_Onsets.xlsx'))[0] log.debug('data file: {}'.format(datafile)) log.debug('meta file: {}'.format(metafile)) onsets = load_xlsx_meta_file(metafile) data = load_data_file(datafile) log.debug(onsets) onsets.append([len(data), 'end']) # artificial last marker trials = {} for i in xrange(len(onsets) - 1): onset, label = onsets[i] next_onset = onsets[i + 1][0] # rounding to integers onset = int(math.floor(float(onset))) next_onset = int(math.floor(float(next_onset))) next_onset = min(onset + trial_len, next_onset) log.debug('[{}..{}) -> {}'.format(onset, next_onset, label)) trial_data = np.vstack(data[onset:next_onset]) log.debug('{} samples extracted'.format(trial_data.shape)) trials[label] = trial_data filename = os.path.join(path, 'trials.pklz') with log_timing(log, 'saving to {}'.format(filename)): save(filename, trials) return trials
def extract_output(config, best_epoch): # load best model model_file = os.path.join(config.experiment_root, "epochs", "epoch{}.pkl".format(best_epoch)) print "loading " + model_file model = serial.load(model_file) # print model; # additional dataset params config.start_sample = 11200 config.stop_sample = 12800 config.name = "test" # load dataset dataset, dataset_yaml = load_yaml_file( os.path.join(os.path.dirname(__file__), "..", "run", "dataset_template.yaml"), params=config ) with log_timing(log, "processing dataset"): y_real, y_pred, output = process_dataset(model, dataset) return y_real, y_pred, output
def split_trial(path, trial_len): log.info('processing {}'.format(path)); datafile = glob.glob(os.path.join(path,'*.txt'))[0]; metafile = glob.glob(os.path.join(path,'*_Trials_Onsets.xlsx'))[0]; log.debug('data file: {}'.format(datafile)); log.debug('meta file: {}'.format(metafile)); onsets = load_xlsx_meta_file(metafile); data = load_data_file(datafile); log.debug(onsets); onsets.append([len(data), 'end']); # artificial last marker trials = {}; for i in xrange(len(onsets) - 1): onset, label = onsets[i]; next_onset = onsets[i+1][0]; # rounding to integers onset = int(math.floor(float(onset))); next_onset = int(math.floor(float(next_onset))); next_onset = min(onset+trial_len, next_onset); log.debug('[{}..{}) -> {}'.format(onset, next_onset, label)); trial_data = np.vstack(data[onset:next_onset]); log.debug('{} samples extracted'.format(trial_data.shape)); trials[label] = trial_data; filename = os.path.join(path, 'trials.pklz'); with log_timing(log, 'saving to {}'.format(filename)): save(filename, trials); return trials;
def extract_output(config, best_epoch): # load best model model_file = os.path.join(config.experiment_root, 'epochs', 'epoch{}.pkl'.format(best_epoch)); print 'loading '+model_file; model = serial.load(model_file); # print model; # additional dataset params config.start_sample = 11200; config.stop_sample = 12800; config.name = 'test'; # load dataset dataset, dataset_yaml = load_yaml_file( os.path.join(os.path.dirname(__file__), '..', 'run', 'dataset_template.yaml'), params=config, ); with log_timing(log, 'processing dataset'): y_real, y_pred, output = process_dataset(model, dataset) return y_real, y_pred, output;
def process_markers(self, markers): # Check if a trial has ended with last samples # need marker samples with some overlap # so we do not miss trial boundaries inbetween two sample blocks marker_samples_with_overlap = np.copy( self.marker_buffer[-len(markers)-2:]) trial_has_ended = np.sum(np.diff(marker_samples_with_overlap) < 0) > 0 if trial_has_ended: trial_starts, trial_stops = self.get_trial_start_stop_indices( self.marker_buffer) trial_start = trial_starts[-1] trial_stop = trial_stops[-1] log.info("Trial has ended for class {:d}".format( self.marker_buffer[trial_start])) assert trial_start < trial_stop, ("trial start {:d} should be " "before trial stop {:d}, markers: {:s}").format(trial_start, trial_stop, str(marker_samples_with_overlap)) self.add_blocks(trial_start + self.trial_start_offset, trial_stop, self.data_processor.sample_buffer, self.marker_buffer) log.info("Now {:d} trials (including breaks)".format( len(self.data_batches))) with log_timing(log, None, final_msg='Time for training:'): self.train() trial_has_started = np.sum(np.diff(marker_samples_with_overlap) > 0) > 0 if trial_has_started: trial_end_in_marker_buffer = np.sum(np.diff(self.marker_buffer) < 0) > 0 if trial_end_in_marker_buffer: # +1 necessary since diff removes one index trial_start = np.flatnonzero(np.diff(self.marker_buffer) > 0)[-1] + 1 trial_stop = np.flatnonzero(np.diff(self.marker_buffer) < 0)[-1] + 1 assert trial_start > trial_stop, ("If trial has just started " "expect this to be after stop of last trial") self.add_break(break_start=trial_stop, break_stop=trial_start, all_samples=self.data_processor.sample_buffer, all_markers=self.marker_buffer)
def main_loop(self, time_budget=None): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. Parameters ---------- time_budget : int, optional The maximum number of seconds before interrupting training. Default is `None`, no time limit. """ t0 = datetime.now() self.setup() if self.algorithm is None: self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return " + "anything. Use Model.continue_learning " + "to control whether learning continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() freq = self.save_freq if freq > 0 and self.model.monitor.get_epochs_seen() % freq == 0: self.save() continue_learning = (self.model.continue_learning() and extension_continue) assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") if len(self.model.monitor._datasets) > 0: # This monitoring channel keeps track of a shared variable, # which does not need inputs nor data. self.training_seconds.__doc__ = """\ The number of seconds that were spent in actual training during the most recent epoch. This excludes seconds that were spent running callbacks for the extensions, computing monitoring channels, etc.""" self.model.monitor.add_channel( name="training_seconds_this_epoch", ipt=None, val=self.training_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.total_seconds.__doc__ = """\ The number of seconds that were spent on the entirety of processing for the previous epoch. This includes not only training but also the computation of the monitoring channels, running TrainExtension callbacks, etc. This value is reported for the *previous* epoch because the amount of time spent on monitoring for this epoch is not known until the monitoring channels have already been reported.""" self.model.monitor.add_channel( name="total_seconds_last_epoch", ipt=None, val=self.total_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break with log_timing(log, None, level=logging.DEBUG, callbacks=[self.total_seconds.set_value]): with log_timing( log, None, final_msg='Time this epoch:', callbacks=[self.training_seconds.set_value]): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not " "return anything. Use " "TrainingAlgorithm.continue_learning " "to control whether learning " "continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() if self.save_freq > 0 and \ self.model.monitor.get_epochs_seen() % self.save_freq == 0: self.save() continue_learning = ( self.algorithm.continue_learning(self.model) and extension_continue ) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def main_loop(self, time_budget=None): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. Parameters ---------- time_budget : int, optional The maximum number of seconds before interrupting training. Default is `None`, no time limit. """ t0 = datetime.now() if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.model.monitor.time_budget_exceeded = False self.setup_extensions() # Model.censor_updates is used by the training algorithm to # enforce constraints after each step of learning. Here we # make sure the constraints are enforced from the start. self.model.enforce_constraints() self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return " + "anything. Use Model.continue_learning " + "to control whether learning continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() freq = self.save_freq if freq > 0 and self.model.monitor.epochs_seen % freq == 0: self.save() continue_learning = (self.model.continue_learning() and extension_continue) assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) self.setup_extensions() # Model.censor_updates is used by the training algorithm to # enforce constraints after each step of learning. Here we # make sure the constraints are enforced from the start. self.model.enforce_constraints() if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") if len(self.model.monitor._datasets)>0: # This monitoring channel keeps track of a shared variable, # which does not need inputs nor data. self.model.monitor.add_channel(name="training_seconds_this_epoch", ipt=None, val=self.training_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.model.monitor.add_channel(name="total_seconds_last_epoch", ipt=None, val=self.total_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break with log_timing(log, None, level=logging.DEBUG, callbacks=[self.total_seconds.set_value]): with log_timing(log, None, final_msg='Time this epoch:', callbacks=[self.training_seconds.set_value]): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not " + "return anything. Use " + "TrainingAlgorithm.continue_learning " + "to control whether learning continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = ( self.algorithm.continue_learning(self.model) and extension_continue ) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is needed so that if new channels are added, Theano's optimizations make sure (to the extent that they can) that the new channels and old channels don't have any redundant calculations. It is also needed to regenerate Theano functions after pickling and unpickling, since Theano functions should not be pickled. """ self._dirty = False init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function(inputs=[], updates=updates, mode=self.theano_function_mode, name = 'Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() #Get the appropriate kind of theano variable to represent the data the model #acts on X = self.model.get_input_space().make_theano_batch(name = "monitoring_X") if config.compute_test_value != 'off': m = self.model.get_test_batch_size() test_value = self.model.get_input_space().get_origin_batch(m) X.tag.test_value = np.cast[X.type.dtype](test_value) if self.require_label: Y = self.model.get_output_space().make_theano_batch(name = "monitoring_Y") log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling monitor including channel '+key+'\n') log.info('\t%s' % key) it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \ for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size)] num_examples = [np.cast[config.floatX](float(i.num_examples)) for i in it] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for channel in self.channels.values(): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] n = num_examples[index] u = updates[index] if isinstance(channel.graph_input, (list, tuple)): g[channel.graph_input[0]] = X g[channel.graph_input[1]] = Y else: g[channel.graph_input] = X if n == 0: raise ValueError("Iterating over 0 examples results in divide by 0") if self.topo: batch_index = d.get_topo_batch_axis() else: batch_index = 0 val = channel.val * T.cast(X.shape[batch_index], config.floatX) / n u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' \ + key.name + ' has dtype ' + key.dtype + \ ' but is driven by an expression with type ' + \ up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key '+var_descriptor(elem)+'\n') mode.record.handle_line('g val '+var_descriptor(g[elem])+'\n') for elem in u: mode.record.handle_line('u key '+var_descriptor(elem)+'\n') mode.record.handle_line('u val '+var_descriptor(u[elem])+'\n') function_name = 'Monitor.accum[%d]' % idx if self.require_label: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just monitor the model # parameters, or some shared variable updated by the training algorithm, so we # need to ignore the unused input error self.accum.append(function([X, Y], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) else: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling unsupervised accum\n') self.accum.append(function([X], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output '+var_descriptor(elem)+'\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del([name for name in final_names if name not in init_names])
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is called any time we need to evaluate the channels and the channel definitions have changed since last we called it, or if the theano functions are unavailable for any other reason (first time they are needed after construction or deserialization, etc.) All channels are compiled as part of the same theano function so that the theano optimizations can eliminate subexpressions that are shared between multiple channels. """ self._dirty = False # Recompute the data specs, since the channels may have changed. self._build_data_specs() init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function( inputs=[], updates=updates, mode=self.theano_function_mode, name='Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() # Get the appropriate kind of theano variable to represent the data # the model acts on batch_names = ['monitoring_%s' % s for s in self._flat_data_specs[1]] theano_args = self._flat_data_specs[0].make_theano_batch(batch_names) # Get a symbolic expression of the batch size # We do it here, rather than for each channel, because channels with an # empty data_specs do not use data, and are unable to extract the batch # size. The case where the whole data specs is empty is not supported. batch_size = self._flat_data_specs[0].batch_size(theano_args) # Also get a nested representation, for joint iteration # with each of channel.graph_input nested_theano_args = self._data_specs_mapping.nest(theano_args) if not isinstance(nested_theano_args, tuple): nested_theano_args = (nested_theano_args, ) assert len(nested_theano_args) == (len(self.channels) + 1) log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling monitor including ' + 'channel ' + key + '\n') log.info('\t%s' % key) it = [ d.iterator(mode=i, num_batches=n, batch_size=b, data_specs=self._flat_data_specs, return_tuple=True) for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size) ] self.num_examples = [ np.cast[config.floatX](float(i.num_examples)) for i in it ] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for i, channel in enumerate(self.channels.values()): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] cur_num_examples = self.num_examples[index] u = updates[index] # Flatten channel.graph_input and the appropriate part of # nested_theano_args, to iterate jointly over them. c_mapping = DataSpecsMapping(channel.data_specs) channel_inputs = c_mapping.flatten(channel.graph_input, return_tuple=True) inputs = c_mapping.flatten(nested_theano_args[i + 1], return_tuple=True) for (channel_X, X) in safe_izip(channel_inputs, inputs): assert channel_X not in g or g[channel_X] is X assert channel_X.type == X.type, (channel_X.type, X.type) g[channel_X] = X if batch_size == 0: # No channel does need any data, so there is not need to # average results, and we will call the accum functions only # once. # TODO: better handling of channels not needing data when # some other channels need data. assert len(self._flat_data_specs[1]) == 0 val = channel.val else: if n == 0: raise ValueError("Iterating over 0 examples results in " + "divide by 0") val = (channel.val * T.cast(batch_size, config.floatX) / cur_num_examples) u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' + key.name + ' has dtype ' + key.dtype + ' but is driven by an expression ' + 'with type ' + up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key ' + var_descriptor(elem) + '\n') mode.record.handle_line('g val ' + var_descriptor(g[elem]) + '\n') for elem in u: mode.record.handle_line('u key ' + var_descriptor(elem) + '\n') mode.record.handle_line('u val ' + var_descriptor(u[elem]) + '\n') function_name = 'Monitor.accum[%d]' % idx if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just # monitor the model parameters, or some shared variable updated # by the training algorithm, so we need to ignore the unused # input error self.accum.append( function(theano_args, givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output ' + var_descriptor(elem) + '\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del( [name for name in final_names if name not in init_names])
def analyze(config): output_path = config.get('output_path'); # model_file = os.path.join(output_path, 'eeg', 'conv3', 'convolutional_network.pkl'); # model_file = os.path.join(output_path, 'eeg', 'conv10', 'epochs', 'cnn_epoch94.pkl'); model_file = '../../../debug/debug_run4/debug_network.pkl'; with log_timing(log, 'loading convnet model from {}'.format(model_file)): model = serial.load(model_file); input_shape = model.get_input_space().shape; config = config.eeg; hyper_params = { 'input_length':input_shape[0], #25+151-1+301-1, # this should leave a single value per channel after convolution 'hop_size':5, # reduce amount of data by factor 5 'dataset_root': config.get('dataset_root'), 'dataset_suffix': config.get('dataset_suffix'), 'save_path': config.get('save_path'), } dataset_yaml = ''' !obj:deepthought.datasets.rwanda2013rhythms.EEGDataset.EEGDataset { name : 'testset', path : %(dataset_root)s, suffix : '_channels', # %(dataset_suffix)s, subjects : [0], resample : [400, 100], start_sample : 2500, stop_sample : 3200, # None (empty) = end of sequence # FIXME: # n_fft : 24, # frame_size : 10, # %(input_length)i, frame_size : %(input_length)i, hop_size : %(hop_size)i, label_mode : 'rhythm_type', # save_matrix_path: '../../../debug/debug.pkl' } ''' dataset_yaml = dataset_yaml % hyper_params; print dataset_yaml; with log_timing(log, 'parsing yaml'): testset = yaml_parse.load(dataset_yaml); # print testset.subject_partitions; # print testset.sequence_partitions; seq_starts = testset.sequence_partitions; # return; # axes=['b', 0, 1, 'c'] # def dimshuffle(b01c): # default = ('b', 0, 1, 'c') # return b01c.transpose(*[default.index(axis) for axis in axes]) # data = dimshuffle(testset.X); # design_matrix = model.get_design_matrix() # view_converter = DefaultViewConverter([475, 1, 1]); # data = view_converter. # ## get the labels # data_specs= (model.get_output_space(), "targets"); # it = testset.iterator( # mode='sequential', # batch_size=100, # data_specs=data_specs); # labels = np.hstack([np.argmax(minibatch, axis = 1) for minibatch in it]) # print labels[0:1000] # # ## get the predictions # minibatch = model.get_input_space().make_theano_batch(); # output_fn = theano.function(inputs=[minibatch], # outputs=T.argmax(model.fprop(minibatch), axis = 1)); # print "function compiled" # # data_specs= (CompositeSpace(( # # model.get_input_space(), # # model.get_output_space())), # # ("features", "targets")); # # data_specs= (model.get_input_space(), "features"); # it = testset.iterator( # mode='sequential', # batch_size=100, # data_specs=data_specs); # print "iterator ready" # # y_pred = np.hstack([output_fn(minibatch) for minibatch in it]) # # print y_pred[0:1000] minibatch = model.get_input_space().make_theano_batch(); output_fn = theano.function(inputs=[minibatch], outputs=T.argmax(model.fprop(minibatch), axis = 1)); print "function compiled" data_specs= (CompositeSpace(( model.get_input_space(), model.get_output_space())), ("features", "targets")); it = testset.iterator('sequential', batch_size=100, data_specs=data_specs); print "iterator ready" y_pred = []; y_real = []; for minibatch, target in it: y_pred.append(output_fn(minibatch)); y_real.append(np.argmax(target, axis = 1)); y_pred = np.hstack(y_pred); y_real = np.hstack(y_real); print y_pred[0:1000] print classification_report(y_real, y_pred); print confusion_matrix(y_real, y_pred); misclass = (y_real != y_pred); print misclass.mean(); correct = 0; s_real = []; s_pred = []; s_pred_agg = []; n_channels = 16; channel_scores = np.zeros(n_channels, dtype=np.int); for i in xrange(len(seq_starts)): start = seq_starts[i]; if i < len(seq_starts) - 1: stop = seq_starts[i+1]; else: stop = None; s_real.append(y_real[start]); # print np.bincount(y_pred[start:stop]); # print np.argmax(np.bincount(y_pred[start:stop])); s_pred.append(np.argmax(np.bincount(y_pred[start:stop]))); s_pred_agg.append(np.mean(y_pred[start:stop])); # works only for binary classification seq_misclass = misclass[start:stop].mean(); # print '{} [{}{}]: {}'.format(i, start, stop, seq_misclass); if seq_misclass < 0.5: # more correct than incorrect correct += 1; channel_scores[i%n_channels] += 1; s_real = np.hstack(s_real); s_pred = np.hstack(s_pred); print s_real; print s_pred; print s_pred_agg; print 'aggregated' print classification_report(s_real, s_pred); print confusion_matrix(s_real, s_pred); s_misclass = (s_real != s_pred); print s_misclass.mean(); print channel_scores; return; input_shape = model.get_input_space().shape; print input_shape view_converter = DefaultViewConverter((input_shape[0], input_shape[1], 1)); data = view_converter.design_mat_to_topo_view(testset.X); print data.shape; X = model.get_input_space().make_theano_batch() Y = model.fprop( X ) Y = T.argmax( Y, axis = 1 ) # needed - otherwise not single value output_fn = theano.function( [X], Y ); # y_pred = output_fn( data ); batch_size = 1000; y_pred = []; batch_start = 0; while batch_start < data.shape[0]: batch_stop = min(data.shape[0], batch_start + batch_size); y_pred.append(output_fn( data[batch_start:batch_stop] )); # if batch_start == 0: print y_pred; batch_start = batch_stop; y_pred = np.hstack(y_pred); print testset.labels[0:1000] print y_pred[0:1000] print classification_report(testset.labels, y_pred); print confusion_matrix(testset.labels, y_pred); labels = np.argmax(testset.y, axis=1) print classification_report(labels, y_pred); print confusion_matrix(labels, y_pred); labels = np.argmax(testset.y, axis=1) print classification_report(labels, y_pred); print confusion_matrix(labels, y_pred); misclass = (labels != y_pred).mean() print misclass # # alternative version from KeepBestParams # minibatch = T.matrix('minibatch') # output_fn = theano.function(inputs=[minibatch],outputs=T.argmax( model.fprop(minibatch), axis = 1 )); # it = testset.iterator('sequential', batch_size=batch_size, targets=False); # y_pred = [output_fn(mbatch) for mbatch in it]; # y_hat = T.argmax(state, axis=1) # y = T.argmax(target, axis=1) # misclass = T.neq(y, y_hat).mean() # misclass = T.cast(misclass, config.floatX) # rval['misclass'] = misclass # rval['nll'] = self.cost(Y_hat=state, Y=target) log.debug('done');
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is needed so that if new channels are added, Theano's optimizations make sure (to the extent that they can) that the new channels and old channels don't have any redundant calculations. It is also needed to regenerate Theano functions after pickling and unpickling, since Theano functions should not be pickled. """ self._dirty = False init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function( inputs=[], updates=updates, mode=self.theano_function_mode, name='Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() # Get the appropriate kind of theano variable to represent the data the model # acts on X = self.model.get_input_space().make_theano_batch(name="monitoring_X") if config.compute_test_value != 'off': m = self.model.get_test_batch_size() test_value = self.model.get_input_space().get_origin_batch(m) X.tag.test_value = np.cast[X.type.dtype](test_value) if self.require_label: Y = self.model.get_output_space().make_theano_batch( name="monitoring_Y") log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line( 'compiling monitor including channel ' + key + '\n') log.info('\t%s' % key) it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \ for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size)] self.num_examples = [ np.cast[config.floatX](float(i.num_examples)) for i in it ] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for channel in self.channels.values(): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] cur_num_examples = self.num_examples[index] u = updates[index] if isinstance(channel.graph_input, (list, tuple)): channel_X, channel_Y = channel.graph_input assert channel_X not in g or g[channel_X] is X assert channel_Y not in g or g[channel_Y] is Y g[channel_X] = X g[channel_Y] = Y else: channel_X = channel.graph_input assert channel_X not in g or g[channel_X] is X g[channel_X] = X if n == 0: raise ValueError( "Iterating over 0 examples results in divide by 0") if self.topo: batch_index = d.get_topo_batch_axis() else: batch_index = 0 val = channel.val * T.cast(X.shape[batch_index], config.floatX) / cur_num_examples u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' \ + key.name + ' has dtype ' + key.dtype + \ ' but is driven by an expression with type ' + \ up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key ' + var_descriptor(elem) + '\n') mode.record.handle_line('g val ' + var_descriptor(g[elem]) + '\n') for elem in u: mode.record.handle_line('u key ' + var_descriptor(elem) + '\n') mode.record.handle_line('u val ' + var_descriptor(u[elem]) + '\n') function_name = 'Monitor.accum[%d]' % idx if self.require_label: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just monitor the model # parameters, or some shared variable updated by the training algorithm, so we # need to ignore the unused input error self.accum.append( function([X, Y], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) else: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line( 'compiling unsupervised accum\n') self.accum.append( function([X], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output ' + var_descriptor(elem) + '\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del( [name for name in final_names if name not in init_names])
def __init__( self, path, name='', # optional name # selectors subjects='all', # optional selector (list) or 'all' trial_types='all', # optional selector (list) or 'all' trial_numbers='all', # optional selector (list) or 'all' conditions='all', # optional selector (list) or 'all' partitioner=None, channel_filter=NoChannelFilter( ), # optional channel filter, default: keep all channel_names=None, # optional channel names (for metadata) label_map=None, # optional conversion of labels remove_dc_offset=False, # optional subtraction of channel mean, usually done already earlier resample=None, # optional down-sampling # optional sub-sequences selection start_sample=0, stop_sample=None, # optional for selection of sub-sequences # optional signal filter to by applied before spitting the signal signal_filter=None, # windowing parameters frame_size=-1, hop_size=-1, # values > 0 will lead to windowing hop_fraction=None, # alternative to specifying absolute hop_size # optional spectrum parameters, n_fft = 0 keeps raw data n_fft=0, n_freq_bins=None, spectrum_log_amplitude=False, spectrum_normalization_mode=None, include_phase=False, flatten_channels=False, layout='tf', # (0,1)-axes layout tf=time x features or ft=features x time save_matrix_path=None, keep_metadata=False, ): ''' Constructor ''' # save params self.params = locals().copy() del self.params['self'] # print self.params # TODO: get the whole filtering into an extra class datafiles_metadata, metadb = load_datafiles_metadata(path) # print datafiles_metadata def apply_filters(filters, node): if isinstance(node, dict): filtered = [] keepkeys = filters[0] for key, value in node.items(): if keepkeys == 'all' or key in keepkeys: filtered.extend(apply_filters(filters[1:], value)) return filtered else: return node # [node] # keep only files that match the metadata filters self.datafiles = apply_filters( [subjects, trial_types, trial_numbers, conditions], datafiles_metadata) # copy metadata for retained files self.metadb = {} for datafile in self.datafiles: self.metadb[datafile] = metadb[datafile] # print self.datafiles # print self.metadb self.name = name if partitioner is not None: self.datafiles = partitioner.get_partition(self.name, self.metadb) self.include_phase = include_phase self.spectrum_normalization_mode = spectrum_normalization_mode self.spectrum_log_amplitude = spectrum_log_amplitude self.sequence_partitions = [ ] # used to keep track of original sequences # metadata: [subject, trial_no, stimulus, channel, start, ] self.metadata = [] sequences = [] labels = [] n_sequences = 0 if frame_size > 0 and hop_size == -1 and hop_fraction is not None: hop_size = np.ceil(frame_size / hop_fraction) for i in xrange(len(self.datafiles)): with log_timing(log, 'loading data from {}'.format(self.datafiles[i])): # save start of next sequence self.sequence_partitions.append(n_sequences) data, metadata = load(os.path.join(path, self.datafiles[i])) label = metadata['label'] if label_map is not None: label = label_map[label] multi_channel_frames = [] # process 1 channel at a time for channel in xrange(data.shape[1]): # filter channels if not channel_filter.keep_channel(channel): continue samples = data[:, channel] # subtract channel mean if remove_dc_offset: samples -= samples.mean() # down-sample if requested if resample is not None and resample[0] != resample[1]: samples = librosa.resample(samples, resample[0], resample[1]) # apply optional signal filter after down-sampling -> requires lower order if signal_filter is not None: samples = signal_filter.process(samples) # get sub-sequence in resampled space # log.info('using samples {}..{} of {}'.format(start_sample,stop_sample, samples.shape)) samples = samples[start_sample:stop_sample] if n_fft is not None and n_fft > 0: # Optionally: ### frequency spectrum branch ### # transform to spectogram hop_length = n_fft / 4 ''' from http://theremin.ucsd.edu/~bmcfee/librosadoc/librosa.html >>> # Get a power spectrogram from a waveform y >>> S = np.abs(librosa.stft(y)) ** 2 >>> log_S = librosa.logamplitude(S) ''' S = librosa.core.stft(samples, n_fft=n_fft, hop_length=hop_length) # mag = np.abs(S) # magnitude spectrum mag = np.abs(S)**2 # power spectrum # include phase information if requested if self.include_phase: # phase = np.unwrap(np.angle(S)) phase = np.angle(S) # Optionally: cut off high bands if n_freq_bins is not None: mag = mag[0:n_freq_bins, :] if self.include_phase: phase = phase[0:n_freq_bins, :] if self.spectrum_log_amplitude: mag = librosa.logamplitude(mag) s = mag # for normalization ''' NOTE on normalization: It depends on the structure of a neural network and (even more) on the properties of data. There is no best normalization algorithm because if there would be one, it would be used everywhere by default... In theory, there is no requirement for the data to be normalized at all. This is a purely practical thing because in practice convergence could take forever if your input is spread out too much. The simplest would be to just normalize it by scaling your data to (-1,1) (or (0,1) depending on activation function), and in most cases it does work. If your algorithm converges well, then this is your answer. If not, there are too many possible problems and methods to outline here without knowing the actual data. ''' ## normalize to mean 0, std 1 if self.spectrum_normalization_mode == 'mean0_std1': # s = preprocessing.scale(s, axis=0); mean = np.mean(s) std = np.std(s) s = (s - mean) / std ## normalize by linear transform to [0,1] elif self.spectrum_normalization_mode == 'linear_0_1': s = s / np.max(s) ## normalize by linear transform to [-1,1] elif self.spectrum_normalization_mode == 'linear_-1_1': s = -1 + 2 * (s - np.min(s)) / (np.max(s) - np.min(s)) elif self.spectrum_normalization_mode is not None: raise ValueError( 'unsupported spectrum normalization mode {}'. format(self.spectrum_normalization_mode)) #print s.mean(axis=0) #print s.std(axis=0) # include phase information if requested if self.include_phase: # normalize phase to [-1.1] phase = phase / np.pi s = np.vstack([s, phase]) # transpose to fit pylearn2 layout s = np.transpose(s) # print s.shape ### end of frequency spectrum branch ### else: ### raw waveform branch ### # normalize to max amplitude 1 s = librosa.util.normalize(samples) # add 2nd data dimension s = s.reshape(s.shape[0], 1) # print s.shape ### end of raw waveform branch ### s = np.asfarray(s, dtype='float32') if frame_size > 0 and hop_size > 0: s = s.copy( ) # FIXME: THIS IS NECESSARY IN MultiChannelEEGSequencesDataset - OTHERWISE, THE FOLLOWING OP DOES NOT WORK!!!! frames = frame(s, frame_length=frame_size, hop_length=hop_size) else: frames = s del s # print frames.shape if flatten_channels: # add artificial channel dimension frames = frames.reshape( (frames.shape[0], frames.shape[1], frames.shape[2], 1)) # print frames.shape sequences.append(frames) # increment counter by new number of frames n_sequences += frames.shape[0] if keep_metadata: # determine channel name channel_name = None if channel_names is not None: channel_name = channel_names[channel] elif 'channels' in metadata: channel_name = metadata['channels'][channel] self.metadata.append({ 'subject': metadata['subject'], # subject 'trial_type': metadata['trial_type'], # trial_type 'trial_no': metadata['trial_no'], # trial_no 'condition': metadata['condition'], # condition 'channel': channel, # channel 'channel_name': channel_name, 'start': self.sequence_partitions[-1], # start 'stop': n_sequences # stop }) for _ in xrange(frames.shape[0]): labels.append(label) else: multi_channel_frames.append(frames) ### end of channel iteration ### if not flatten_channels: # turn list into array multi_channel_frames = np.asfarray(multi_channel_frames, dtype='float32') # [channels x frames x time x freq] -> cb01 # [channels x frames x time x 1] -> cb0. # move channel dimension to end multi_channel_frames = np.rollaxis(multi_channel_frames, 0, 4) # print multi_channel_frames.shape # log.debug(multi_channel_frames.shape) sequences.append(multi_channel_frames) # increment counter by new number of frames n_sequences += multi_channel_frames.shape[0] if keep_metadata: self.metadata.append({ 'subject': metadata['subject'], # subject 'trial_type': metadata['trial_type'], # trial_type 'trial_no': metadata['trial_no'], # trial_no 'condition': metadata['condition'], # condition 'channel': 'all', # channel 'start': self.sequence_partitions[-1], # start 'stop': n_sequences # stop }) for _ in xrange(multi_channel_frames.shape[0]): labels.append(label) ### end of datafile iteration ### # turn into numpy arrays sequences = np.vstack(sequences) # print sequences.shape; labels = np.hstack(labels) # one_hot_y = one_hot(labels) one_hot_formatter = OneHotFormatter(labels.max() + 1) # FIXME! one_hot_y = one_hot_formatter.format(labels) self.labels = labels if layout == 'ft': # swap axes to (batch, feature, time, channels) sequences = sequences.swapaxes(1, 2) log.debug('final dataset shape: {} (b,0,1,c)'.format(sequences.shape)) super(MultiChannelEEGDataset, self).__init__(topo_view=sequences, y=one_hot_y, axes=['b', 0, 1, 'c']) log.info( 'generated dataset "{}" with shape X={}={} y={} labels={} '.format( self.name, self.X.shape, sequences.shape, self.y.shape, self.labels.shape)) if save_matrix_path is not None: matrix = DenseDesignMatrix(topo_view=sequences, y=one_hot_y, axes=['b', 0, 1, 'c']) with log_timing( log, 'saving DenseDesignMatrix to {}'.format(save_matrix_path)): serial.save(save_matrix_path, matrix)
def get_func(learn_discriminator, learn_generator, dont_you_fucking_dare_touch_the_generator=False): updates = OrderedDict() assert (learn_discriminator or learn_generator) and not (learn_discriminator and learn_generator) if learn_discriminator: cur_params = model.discriminator.get_params() else: cur_params = model.generator.get_params() def check(): for param in params: if param not in cur_params: assert param not in updates cur_grads = OrderedDict() for param in cur_params: cur_grads[param] = grads[param] for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype cur_lr_scalers = OrderedDict() for param in cur_params: if param in lr_scalers: lr_scaler = lr_scalers[param] cur_lr_scalers[param] = lr_scaler log.info('Parameter and initial learning rate summary:') for param in cur_params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * cur_lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) updates.update(self.learning_rule.get_updates( learning_rate, cur_grads, cur_lr_scalers)) check() for param in cur_params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' check() model.modify_updates(updates) check() for param in cur_params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) check() if dont_you_fucking_dare_touch_the_generator: for param in model.generator.get_params(): assert param not in updates with log_timing(log, 'Compiling sgd_update'): return function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode)
def setup(self, model, dataset): if self.cost is None: self.cost = model.get_default_cost() inf_params = [ param for param in model.get_params() if np.any(np.isinf(param.get_value())) ] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([np.any(np.isnan(param.get_value())) for param in model.get_params()]): nan_params = [ param for param in model.get_params() if np.any(np.isnan(param.get_value())) ] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model batch_size = self.batch_size if hasattr(model, "force_batch_size"): if model.force_batch_size > 0: if batch_size is not None: if batch_size != model.force_batch_size: if self.set_batch_size: model.set_batch_size(batch_size) else: raise ValueError("batch_size argument to SGD conflicts with model's force_batch_size attribute") else: self.batch_size = model.force_batch_size model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) # TODO: come up with some standard scheme for associating training runs # with monitors / pushing the monitor automatically, instead of just # enforcing that people have called push_monitor assert self.monitor.get_examples_seen() == 0 self.monitor._sanity_check() X = model.get_input_space().make_theano_batch(name="%s[X]" % self.__class__.__name__) self.topo = not X.ndim == 2 if config.compute_test_value == 'raise': if self.topo: X.tag.test_value = dataset.get_batch_topo(self.batch_size) else: X.tag.test_value = dataset.get_batch_design(self.batch_size) Y = T.matrix(name="%s[Y]" % self.__class__.__name__) if self.cost.supervised: if config.compute_test_value == 'raise': _, Y.tag.test_value = dataset.get_batch_design(self.batch_size, True) self.supervised = True cost_value = self.cost(model, X, Y) else: self.supervised = False cost_value = self.cost(model, X) if cost_value is not None and cost_value.name is None: if self.supervised: cost_value.name = 'objective(' + X.name + ', ' + Y.name + ')' else: cost_value.name = 'objective(' + X.name + ')' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: self.monitor.setup(dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs ) if self.supervised: ipt = (X, Y) else: ipt = X dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=ipt, val=learning_rate, dataset=monitoring_dataset) if self.momentum: self.monitor.add_channel(name='momentum', ipt=ipt, val=self.momentum, dataset=monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i if self.cost.supervised: grads, updates = self.cost.get_gradients(model, X, Y) else: grads, updates = self.cost.get_gradients(model, X) for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.momentum is None: updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) else: for param in params: inc = sharedX(param.get_value() * 0.) if param.name is not None: inc.name = 'inc_'+param.name updated_inc = self.momentum * inc - learning_rate * lr_scalers.get(param, 1.) * grads[param] updates[inc] = updated_inc updates[param] = param + updated_inc for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): if self.supervised: fn_inputs = [X, Y] else: fn_inputs = [X] self.sgd_update = function(fn_inputs, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = \ dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = \ self.monitoring_dataset is not None and \ self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = \ any(d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values()) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and \ not has_uniform_batch_size(self.train_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") if has_force_batch_size and has_monitoring_datasets and \ monitoring_datasets_are_uneven and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' learning_rate = self.learning_rate params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost. # We have to do that after learning_rule.get_updates has been # called, since it may have an effect on # learning_rule.add_channels_to_monitor (that is currently the case # for AdaDelta and RMSProp). self._setup_monitor() with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if np.any(np.isinf(param.get_value()))] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([np.any(np.isnan(param.get_value())) for param in model.get_params()]): nan_params = [param for param in model.get_params() if np.any(np.isnan(param.get_value()))] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model batch_size = self.batch_size if hasattr(model, "force_batch_size"): if model.force_batch_size > 0: if batch_size is not None: if batch_size != model.force_batch_size: if self.set_batch_size: model.set_batch_size(batch_size) else: raise ValueError("batch_size argument to SGD " + "conflicts with model's " + "force_batch_size attribute") else: self.batch_size = model.force_batch_size model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: self.monitor.setup( dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode ) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=None, val=learning_rate, data_specs=(NullSpace(), ''), dataset=monitoring_dataset) if self.learning_rule: self.learning_rule.add_channels_to_monitor( self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if np.any(np.isinf(param.get_value()))] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([np.any(np.isnan(param.get_value())) for param in model.get_params()]): nan_params = [param for param in model.get_params() if np.any(np.isnan(param.get_value()))] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: self.monitor.setup(dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=None, val=learning_rate, data_specs=(NullSpace(), ''), dataset=monitoring_dataset) if self.learning_rule: self.learning_rule.add_channels_to_monitor( self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def setup(self, model, dataset): if self.cost is None: self.cost = model.get_default_cost() inf_params = [ param for param in model.get_params() if np.any(np.isinf(param.get_value())) ] if len(inf_params) > 0: raise ValueError("These params are Inf: " + str(inf_params)) if any([ np.any(np.isnan(param.get_value())) for param in model.get_params() ]): nan_params = [ param for param in model.get_params() if np.any(np.isnan(param.get_value())) ] raise ValueError("These params are NaN: " + str(nan_params)) self.model = model batch_size = self.batch_size if hasattr(model, "force_batch_size"): if model.force_batch_size > 0: if batch_size is not None: if batch_size != model.force_batch_size: if self.set_batch_size: model.set_batch_size(batch_size) else: raise ValueError( "batch_size argument to SGD conflicts with model's force_batch_size attribute" ) else: self.batch_size = model.force_batch_size model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() X = model.get_input_space().make_theano_batch(name="%s[X]" % self.__class__.__name__) self.topo = not X.ndim == 2 if config.compute_test_value == 'raise': if self.topo: X.tag.test_value = dataset.get_batch_topo(self.batch_size) else: X.tag.test_value = dataset.get_batch_design(self.batch_size) Y = T.matrix(name="%s[Y]" % self.__class__.__name__) fixed_var_descr = self.cost.get_fixed_var_descr(model, X, Y) self.on_load_batch = fixed_var_descr.on_load_batch if self.cost.supervised: if config.compute_test_value == 'raise': _, Y.tag.test_value = dataset.get_batch_design( self.batch_size, True) self.supervised = True cost_value = self.cost(model, X, Y, **fixed_var_descr.fixed_vars) else: self.supervised = False cost_value = self.cost(model, X, **fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: if self.supervised: cost_value.name = 'objective(' + X.name + ', ' + Y.name + ')' else: cost_value.name = 'objective(' + X.name + ')' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: self.monitor.setup(dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode) if self.supervised: ipt = (X, Y) else: ipt = X dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=ipt, val=learning_rate, dataset=monitoring_dataset) if self.momentum: self.monitor.add_channel(name='momentum', ipt=ipt, val=self.momentum, dataset=monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i if self.cost.supervised: grads, updates = self.cost.get_gradients( model, X, Y, **fixed_var_descr.fixed_vars) else: grads, updates = self.cost.get_gradients( model, X, **fixed_var_descr.fixed_vars) for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % { 'costname': cost_value.name, 'paramname': param.name }) lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param, 1.) log.info('\t' + param_name + ': ' + str(lr)) if self.momentum is None: updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) else: for param in params: inc = sharedX(param.get_value() * 0.) if param.name is not None: inc.name = 'inc_' + param.name updated_inc = self.momentum * inc - learning_rate * lr_scalers.get( param, 1.) * grads[param] updates[inc] = updated_inc updates[param] = param + updated_inc for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): if self.supervised: fn_inputs = [X, Y] else: fn_inputs = [X] self.sgd_update = function(fn_inputs, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def main_loop(self, time_budget=None): """ Repeatedly runs an epoch of the training algorithm, runs any epoch-level callbacks, and saves the model. Parameters ---------- time_budget : int, optional The maximum number of seconds before interrupting training. Default is `None`, no time limit. """ t0 = datetime.now() if self.algorithm is None: self.model.monitor = Monitor.get_monitor(self.model) self.model.monitor.time_budget_exceeded = False self.setup_extensions() # Model.censor_updates is used by the training algorithm to # enforce constraints after each step of learning. Here we # make sure the constraints are enforced from the start. self.model.enforce_constraints() self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break rval = self.model.train_all(dataset=self.dataset) if rval is not None: raise ValueError("Model.train_all should not return " + "anything. Use Model.continue_learning " + "to control whether learning continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() freq = self.save_freq if freq > 0 and self.model.monitor.epochs_seen % freq == 0: self.save() continue_learning = (self.model.continue_learning() and extension_continue) assert continue_learning in [True, False, 0, 1] if not continue_learning: break else: self.algorithm.setup(model=self.model, dataset=self.dataset) self.setup_extensions() # Model.censor_updates is used by the training algorithm to # enforce constraints after each step of learning. Here we # make sure the constraints are enforced from the start. self.model.enforce_constraints() if not hasattr(self.model, 'monitor'): # TODO: is this really necessary? I just put this error here # to prevent an AttributeError later, but I think we could # rewrite to avoid the AttributeError raise RuntimeError("The algorithm is responsible for setting" " up the Monitor, but failed to.") if len(self.model.monitor._datasets) > 0: # This monitoring channel keeps track of a shared variable, # which does not need inputs nor data. self.training_seconds.__doc__ = """\ The number of seconds that were spent in actual training during the most recent epoch. This excludes seconds that were spent running callbacks for the extensions, computing monitoring channels, etc.""" self.model.monitor.add_channel( name="training_seconds_this_epoch", ipt=None, val=self.training_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.total_seconds.__doc__ = """\ The number of seconds that were spent on the entirety of processing for the previous epoch. This includes not only training but also the computation of the monitoring channels, running TrainExtension callbacks, etc. This value is reported for the *previous* epoch because the amount of time spent on monitoring for this epoch is not known until the monitoring channels have already been reported.""" self.model.monitor.add_channel( name="total_seconds_last_epoch", ipt=None, val=self.total_seconds, data_specs=(NullSpace(), ''), dataset=self.model.monitor._datasets[0]) self.run_callbacks_and_monitoring() while True: if self.exceeded_time_budget(t0, time_budget): break with log_timing(log, None, level=logging.DEBUG, callbacks=[self.total_seconds.set_value]): with log_timing( log, None, final_msg='Time this epoch:', callbacks=[self.training_seconds.set_value]): rval = self.algorithm.train(dataset=self.dataset) if rval is not None: raise ValueError("TrainingAlgorithm.train should not " "return anything. Use " "TrainingAlgorithm.continue_learning " "to control whether learning " "continues.") self.model.monitor.report_epoch() extension_continue = self.run_callbacks_and_monitoring() if self.save_freq > 0 and \ self.model.monitor._epochs_seen % self.save_freq == 0: self.save() continue_learning = (self.algorithm.continue_learning( self.model) and extension_continue) assert continue_learning in [True, False, 0, 1] if not continue_learning: break self.model.monitor.training_succeeded = True if self.save_freq > 0: self.save()
def get_func(learn_discriminator, learn_generator): updates = OrderedDict() assert (learn_discriminator or learn_generator ) and not (learn_discriminator and learn_generator) if learn_discriminator: cur_params = model.discriminator.get_params() else: cur_params = model.generator.get_params() cur_grads = OrderedDict() for param in cur_params: cur_grads[param] = grads[param] for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % { 'costname': cost_value.name, 'paramname': param.name }) assert grads[param].dtype == param.dtype cur_lr_scalers = OrderedDict() for param in cur_params: if param in lr_scalers: lr_scaler = lr_scalers[param] cur_lr_scalers[param] = lr_scaler log.info('Parameter and initial learning rate summary:') for param in cur_params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * cur_lr_scalers.get(param, 1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update( self.learning_rule.get_updates(learning_rate, cur_grads, cur_lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in cur_params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in cur_params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): return function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode)