def test_contains_inf(): """ Tests that pylearn2.utils.contains_inf correctly identifies `np.inf` values in an array. """ arr = np.random.random(100) assert not contains_inf(arr) arr[0] = np.nan assert not contains_inf(arr) arr[1] = np.inf assert contains_inf(arr) arr[1] = -np.inf assert contains_inf(arr)
def main_loop(self): self.algorithm.setup(agent=self.agent, environment=self.environment) i = 0 for param in self.agent.get_params(): assert not contains_nan(param.get_value()), (i, param.name) assert not contains_inf(param.get_value()), (i, param.name) while True: rval = self.algorithm.train() assert rval is None i += 1 for param in self.agent.get_params(): assert not contains_nan(param.get_value()), (i, param.name) assert not contains_inf(param.get_value()), (i, param.name) if i % 1000 == 0: serial.save(self.save_path, self.agent) logger.info('saved!')
def do_check_on(var, nd, f, is_input): """ Checks `var` for NaNs / Infs. If detected, raises an exception and / or prints information about `nd`, `f`, and `is_input` to help the user determine the cause of the invalid values. Parameters ---------- var : numpy.ndarray The value to be checked. nd : theano.gof.Apply The Apply node being executed f : callable The thunk for the apply node is_input : bool If True, `var` is an input to `nd`. If False, it is an output. """ error = False if nan_is_error: if contains_nan(var): logger.error('NaN detected') error = True if inf_is_error: if contains_inf(var): logger.error('Inf detected') error = True if big_is_error: if np.abs(var).max() > 1e10: logger.error('Big value detected') error = True if error: if is_input: logger.error('In an input') else: logger.error('In an output') logger.error('Inputs: ') for ivar, ival in zip(nd.inputs, f.inputs): logger.error('var') logger.error(ivar) logger.error(theano.printing.min_informative_str(ivar)) logger.error('val') logger.error(ival) logger.error('Node:') logger.error(nd) assert False
def stochastic_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng = None): """ .. todo:: WRITEME properly Stochastic max pooling for training as defined in: Stochastic Pooling for Regularization of Deep Convolutional Neural Networks Matthew D. Zeiler, Rob Fergus Parameters ---------- bc01 : theano 4-tensor in format (batch size, channels, rows, cols), IMPORTANT: All values should be positive pool_shape : tuple shape of the pool region (rows, cols) pool_stride : tuple strides between pooling regions (row stride, col stride) image_shape : tuple avoid doing some of the arithmetic in theano rng : theano random stream """ r, c = image_shape pr, pc = pool_shape rs, cs = pool_stride batch = bc01.shape[0] channel = bc01.shape[1] rng = make_theano_rng(rng, 2022, which_method='multinomial') # Compute index in pooled space of last needed pool # (needed = each input pixel must appear in at least one pool) def last_pool(im_shp, p_shp, p_strd): rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd)) assert p_strd * rval + p_shp >= im_shp assert p_strd * (rval - 1) + p_shp < im_shp return rval # Compute starting row of the last pool last_pool_r = last_pool(image_shape[0] ,pool_shape[0], pool_stride[0]) * pool_stride[0] # Compute number of rows needed in image for all indexes to work out required_r = last_pool_r + pr last_pool_c = last_pool(image_shape[1] ,pool_shape[1], pool_stride[1]) * pool_stride[1] required_c = last_pool_c + pc # final result shape res_r = int(numpy.floor(last_pool_r/rs)) + 1 res_c = int(numpy.floor(last_pool_c/cs)) + 1 for bc01v in get_debug_values(bc01): assert not contains_inf(bc01v) assert bc01v.shape[2] == image_shape[0] assert bc01v.shape[3] == image_shape[1] # padding padded = tensor.alloc(0.0, batch, channel, required_r, required_c) name = bc01.name if name is None: name = 'anon_bc01' bc01 = tensor.set_subtensor(padded[:,:, 0:r, 0:c], bc01) bc01.name = 'zero_padded_' + name # unraveling window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc) window.name = 'unravlled_winodows_' + name for row_within_pool in xrange(pool_shape[0]): row_stop = last_pool_r + row_within_pool + 1 for col_within_pool in xrange(pool_shape[1]): col_stop = last_pool_c + col_within_pool + 1 win_cell = bc01[:,:,row_within_pool:row_stop:rs, col_within_pool:col_stop:cs] window = tensor.set_subtensor(window[:,:,:,:, row_within_pool, col_within_pool], win_cell) # find the norm norm = window.sum(axis = [4, 5]) norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm) norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x') # get prob prob = rng.multinomial(pvals = norm.reshape((batch * channel * res_r * res_c, pr * pc)), dtype='float32') # select res = (window * prob.reshape((batch, channel, res_r, res_c, pr, pc))).max(axis=5).max(axis=4) res.name = 'pooled_' + name return tensor.cast(res, theano.config.floatX)
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print 'generating names...' model_names = [model_path.replace('.pkl', '!') for model_path in model_paths] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!','') for model_name in model_names] print '...done' for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print >> sys.stderr, arg + " is a yaml config file," + \ "you need to load a trained model." quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel+postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key = number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<'+channel_name+'>'] = channel_name sorted_codes.append(code) x_axis = 'example' print 'set x_axis to example' if len(channels.values()) == 0: print "there are no channels to plot" break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print code + '. ' + codebook[code] print print "Put e, b, s or h in the list somewhere to plot " + \ "epochs, batches, seconds, or hours, respectively." response = raw_input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print '1: smooth all channels' print 'any other response: do nothing, go back to plotting' response = raw_input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i-k), i+1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ','') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print "Input not understood: "+code quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print "Invalid code: "+rng[0] quit(-1) found = False for j in xrange(i,len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print "Invalid code: "+rng[1] quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j+1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes ,= set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color+'--' for color in colors] styles += [color+':' for color in colors] fig = plt.figure() ax = plt.subplot(1,1,1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name= codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print channel_name + ' contains NaNs' if contains_inf(y): print channel_name + 'contains infinite values' if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker = '.', # add point margers to lines label = channel_name) plt.xlabel('# '+x_axis+'s') ax.ticklabel_format( scilimits = (-3,3), axis = 'both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1)) # 0.046 is the size of 1 legend box fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes)) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') parser.add_argument("--yrange", help='The y-range to be used for plotting, e.g. 0:1') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print('generating names...') model_names = [ model_path.replace('.pkl', '!') for model_path in model_paths ] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!', '') for model_name in model_names] print('...done') for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print(sys.stderr, arg + " is a yaml config file," + "you need to load a trained model.", file=sys.stderr) quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel + postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key=number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<' + channel_name + '>'] = channel_name sorted_codes.append(code) x_axis = 'example' print('set x_axis to example') if len(channels.values()) == 0: print("there are no channels to plot") break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print(code + '. ' + codebook[code]) print() print("Put e, b, s or h in the list somewhere to plot " + "epochs, batches, seconds, or hours, respectively.") response = input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print('1: smooth all channels') print('any other response: do nothing, go back to plotting') response = input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i - k), i + 1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ', '') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print("Input not understood: " + code) quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print("Invalid code: " + rng[0]) quit(-1) found = False for j in xrange(i, len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print("Invalid code: " + rng[1]) quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j + 1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes, = set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color + '--' for color in colors] styles += [color + ':' for color in colors] fig = plt.figure() ax = plt.subplot(1, 1, 1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name = codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print(channel_name + ' contains NaNs') if contains_inf(y): print(channel_name + 'contains infinite values') if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker='.', # add point margers to lines label=channel_name) plt.xlabel('# ' + x_axis + 's') ax.ticklabel_format(scilimits=(-3, 3), axis='both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, -0.1)) # 0.046 is the size of 1 legend box fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes)) if (options.yrange is not None): ymin, ymax = map(float, options.yrange.split(':')) plt.ylim(ymin, ymax) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = \ dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = \ self.monitoring_dataset is not None and \ self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = \ any(d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values()) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and \ not has_uniform_batch_size(self.train_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") if has_force_batch_size and has_monitoring_datasets and \ monitoring_datasets_are_uneven and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' learning_rate = self.learning_rate params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost. # We have to do that after learning_rule.get_updates has been # called, since it may have an effect on # learning_rule.add_channels_to_monitor (that is currently the case # for AdaDelta and RMSProp). self._setup_monitor() with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: " + str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: " + str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = self.monitoring_dataset is not None and self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = any( d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values() ) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and not has_uniform_batch_size(self.train_iteration_mode): raise ValueError( "Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) if ( has_force_batch_size and has_monitoring_datasets and monitoring_datasets_are_uneven and not has_uniform_batch_size(self.monitor_iteration_mode) ): raise ValueError( "Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = "%s[%s]" % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, **fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = "objective" # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: if self.monitoring_batch_size is None and self.monitoring_batches is None: self.monitoring_batch_size = self.batch_size self.monitoring_batches = self.batches_per_iter self.monitor.setup( dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.monitoring_batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode, ) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] # TODO: have Monitor support non-data-dependent channels self.monitor.add_channel( name="learning_rate", ipt=None, val=learning_rate, data_specs=(NullSpace(), ""), dataset=monitoring_dataset, ) if self.learning_rule: self.learning_rule.add_channels_to_monitor(self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = "sgd_params[%d]" % i grads, updates = self.cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError( str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict." ) for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = "grad(%(costname)s, %(paramname)s)" % { "costname": cost_value.name, "paramname": param.name, } assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError( "Tried to scale the learning rate on " + str(key) + " which is not an optimization parameter." ) log.info("Parameter and initial learning rate summary:") for param in params: param_name = param.name if param_name is None: param_name = "anon_param" lr = learning_rate.get_value() * lr_scalers.get(param, 1.0) log.info("\t" + param_name + ": " + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates(learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict( safe_zip( params, [param - learning_rate * lr_scalers.get(param, 1.0) * grads[param] for param in params] ) ) ) for param in params: if updates[param].name is None: updates[param].name = "sgd_update(" + param.name + ")" model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = "censor(sgd_update(" + param.name + "))" for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, "Compiling sgd_update"): self.sgd_update = function( theano_args, updates=updates, name="sgd_update", on_unused_input="ignore", mode=self.theano_function_mode, ) self.params = params
def weighted_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng=None): """ This implements test time probability weighted pooling defined in: Stochastic Pooling for Regularization of Deep Convolutional Neural Networks Matthew D. Zeiler, Rob Fergus Parameters ---------- bc01 : theano 4-tensor minibatch in format (batch size, channels, rows, cols), IMPORTANT: All values should be poitivie pool_shape : theano 4-tensor shape of the pool region (rows, cols) pool_stride : tuple strides between pooling regions (row stride, col stride) image_shape : tuple avoid doing some of the arithmetic in theano """ r, c = image_shape pr, pc = pool_shape rs, cs = pool_stride batch = bc01.shape[0] channel = bc01.shape[1] rng = make_theano_rng(rng, 2022, which_method="multinomial") # Compute index in pooled space of last needed pool # (needed = each input pixel must appear in at least one pool) def last_pool(im_shp, p_shp, p_strd): rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd)) assert p_strd * rval + p_shp >= im_shp assert p_strd * (rval - 1) + p_shp < im_shp return rval # Compute starting row of the last pool last_pool_r = last_pool(image_shape[0], pool_shape[0], pool_stride[0]) * pool_stride[0] # Compute number of rows needed in image for all indexes to work out required_r = last_pool_r + pr last_pool_c = last_pool(image_shape[1], pool_shape[1], pool_stride[1]) * pool_stride[1] required_c = last_pool_c + pc # final result shape res_r = int(numpy.floor(last_pool_r / rs)) + 1 res_c = int(numpy.floor(last_pool_c / cs)) + 1 for bc01v in get_debug_values(bc01): assert not contains_inf(bc01v) assert bc01v.shape[2] == image_shape[0] assert bc01v.shape[3] == image_shape[1] # padding padded = tensor.alloc(0.0, batch, channel, required_r, required_c) name = bc01.name if name is None: name = "anon_bc01" bc01 = tensor.set_subtensor(padded[:, :, 0:r, 0:c], bc01) bc01.name = "zero_padded_" + name # unraveling window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc) window.name = "unravlled_winodows_" + name for row_within_pool in xrange(pool_shape[0]): row_stop = last_pool_r + row_within_pool + 1 for col_within_pool in xrange(pool_shape[1]): col_stop = last_pool_c + col_within_pool + 1 win_cell = bc01[:, :, row_within_pool:row_stop:rs, col_within_pool:col_stop:cs] window = tensor.set_subtensor(window[:, :, :, :, row_within_pool, col_within_pool], win_cell) # find the norm norm = window.sum(axis=[4, 5]) norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm) norm = window / norm.dimshuffle(0, 1, 2, 3, "x", "x") # average res = (window * norm).sum(axis=[4, 5]) res.name = "pooled_" + name return res.reshape((batch, channel, res_r, res_c))
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') parser.add_argument("--yrange", help='The y-range to be used for plotting, e.g. 0:1') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print('generating names...') model_names = [model_path.replace('.pkl', '!') for model_path in model_paths] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!','') for model_name in model_names] print('...done') for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print(sys.stderr, arg + " is a yaml config file," + "you need to load a trained model.", file=sys.stderr) quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel+postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key = number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<'+channel_name+'>'] = channel_name sorted_codes.append(code) x_axis = 'example' print('set x_axis to example') if len(channels.values()) == 0: print("there are no channels to plot") break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print(code + '. ' + codebook[code]) print() print("Put e, b, s or h in the list somewhere to plot " + "epochs, batches, seconds, or hours, respectively.") response = input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print('1: smooth all channels') print('any other response: do nothing, go back to plotting') response = input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i-k), i+1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ','') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print("Input not understood: "+code) quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print("Invalid code: "+rng[0]) quit(-1) found = False for j in xrange(i,len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print("Invalid code: "+rng[1]) quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j+1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes ,= set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color+'--' for color in colors] styles += [color+':' for color in colors] fig = plt.figure() ax = plt.subplot(1,1,1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name= codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print(channel_name + ' contains NaNs') if contains_inf(y): print(channel_name + 'contains infinite values') if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker = '.', # add point margers to lines label = channel_name) plt.xlabel('# '+x_axis+'s') ax.ticklabel_format( scilimits = (-3,3), axis = 'both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc = 'upper left', bbox_to_anchor = (1.05, 1.02)) # Get the axis positions and the height and width of the legend plt.draw() ax_pos = ax.get_position() pad_width = ax_pos.x0 * fig.get_size_inches()[0] pad_height = ax_pos.y0 * fig.get_size_inches()[1] dpi = fig.get_dpi() lgd_width = ax.get_legend().get_frame().get_width() / dpi lgd_height = ax.get_legend().get_frame().get_height() / dpi # Adjust the bounding box to encompass both legend and axis. Axis should be 3x3 inches. # I had trouble getting everything to align vertically. ax_width = 3 ax_height = 3 total_width = 2*pad_width + ax_width + lgd_width total_height = 2*pad_height + np.maximum(ax_height, lgd_height) fig.set_size_inches(total_width, total_height) ax.set_position([pad_width/total_width, 1-6*pad_height/total_height, ax_width/total_width, ax_height/total_height]) if(options.yrange is not None): ymin, ymax = map(float, options.yrange.split(':')) plt.ylim(ymin, ymax) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break