def test_mean_aggregator(): num_examples = 4 batch_size = 2 features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**2).mean(axis=0) y.name = 'y' z = y.sum() z.name = 'z' y.tag.aggregation_scheme = Mean(y, 1.) z.tag.aggregation_scheme = Mean(z, 1.) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([8.25, 26.75], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([35], dtype=theano.config.floatX))
def _test_mean_like_aggregator(scheme, func): """Common test function for both Mean and Perplexity.""" features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1], [6, 7]], dtype=theano.config.floatX) num_examples = features.shape[0] batch_size = 2 dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**0.5).sum(axis=0) y.name = 'y' z = y.sum() z.name = 'z' y.tag.aggregation_scheme = scheme(y, x.shape[0]) z.tag.aggregation_scheme = scheme(z, x.shape[0]) y_desired = func((features**0.5).mean(axis=0)) z_desired = func((features**0.5).sum(axis=1).mean(axis=0)) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array(y_desired, dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array(z_desired, dtype=theano.config.floatX))
def build_evaluator(model_name): config = importlib.import_module('.%s' % model_name, 'config') config.batch_size = 1 config.shuffle_questions = False # Build datastream valid_path = os.path.join(os.getcwd(), "squad_rare/dev-v1.0_tokenized.json") vocab_path = os.path.join(os.getcwd(), "squad_rare/vocab.txt") import data ds, valid_stream = data.setup_squad_datastream(valid_path, vocab_path, config) dump_path = os.path.join("model_params", model_name + ".pkl") # Build model m = config.Model(config, ds.vocab_size) model = Model(m.sgd_cost) if os.path.isfile(dump_path): with open(dump_path, 'r') as f: print "Analysing %s from best dump" % (model_name) model.set_parameter_values(cPickle.load(f)) else: print "Analysing %s with random parameters" % (model_name) evaluator = DatasetEvaluator(m.analyse_vars) return evaluator, valid_stream, ds
def __init__(self, variables, data_stream, prefix=None, **kwargs): kwargs.setdefault("after_every_epoch", True) kwargs.setdefault("before_first_epoch", True) super(DataStreamMonitoring, self).__init__(**kwargs) self._evaluator = DatasetEvaluator(variables) self.data_stream = data_stream self.prefix = prefix
def test_dataset_evaluators(): X = theano.tensor.matrix('X') brick = TestBrick(name='test_brick') Y = brick.apply(X) graph = ComputationGraph([Y]) monitor_variables = [v for v in graph.auxiliary_variables] validator = DatasetEvaluator(monitor_variables) data = [ numpy.arange(1, 5, dtype=floatX).reshape(2, 2), numpy.arange(10, 16, dtype=floatX).reshape(3, 2) ] data_stream = IterableDataset(dict(X=data)).get_example_stream() values = validator.evaluate(data_stream) assert values['test_brick_apply_V_squared'] == 4 numpy.testing.assert_allclose(values['test_brick_apply_mean_row_mean'], numpy.vstack(data).mean()) per_batch_mean = numpy.mean([batch.mean() for batch in data]) numpy.testing.assert_allclose( values['test_brick_apply_mean_batch_element'], per_batch_mean) with assert_raises(Exception) as ar: data_stream = IterableDataset(dict(X2=data)).get_example_stream() validator.evaluate(data_stream) assert "Not all data sources" in ar.exception.args[0]
def __init__(self, output_vars, train_data_stream, test_data_streams, **kwargs): super(FinalTestMonitoring, self).__init__(**kwargs) if not isinstance(test_data_streams, dict): self.tst_streams = {self.prefix, test_data_streams} else: self.tst_streams = test_data_streams self._tst_evaluator = DatasetEvaluator(output_vars)
def __init__(self, output_vars, train_data_stream, test_data_stream, **kwargs): output_vars = self.replicate_vars(output_vars) super(FinalTestMonitoring, self).__init__(**kwargs) self.trn_stream = train_data_stream self.tst_stream = test_data_stream bn_ps, bn_share, output_vars_replaced = self._get_bn_params( output_vars) if self._bn: updates = self._get_updates(bn_ps, bn_share) trn_evaluator = DatasetEvaluator(bn_ps, updates=updates) else: trn_evaluator = None self._trn_evaluator = trn_evaluator self._tst_evaluator = DatasetEvaluator(output_vars_replaced)
def test_min_max_aggregators(): num_examples = 4 batch_size = 2 features = numpy.array([[2, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**2).sum(axis=0) y.name = 'y' z = y.min() z.name = 'z' y.tag.aggregation_scheme = Maximum(y) z.tag.aggregation_scheme = Minimum(z) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([29, 90], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([8], dtype=theano.config.floatX)) # Make sure accumulators are reset. features = numpy.array([[2, 1], [1, 3], [1, -1], [2.5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([7.25, 10], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([2], dtype=theano.config.floatX))
def evaluate(self): evaluator = DatasetEvaluator([self.cost, self.error] + self.experiment.get_quantitites_vars()) print('subset\tcost\terror\tf_score\tauc') for split in ['train', 'dev', 'test']: stream = getattr(self, split + '_stream') print('{0}\t{cost}\t{error}\t{f_score}\t{auc}'.format( split, **evaluator.evaluate(stream))) y_prob, y_test = self.get_targets(self.test_stream) report_performance(y_test, y_prob, self.params['threshold'])
def __init__(self, variables, data_stream, updates=None, noise_parameters=None, **kwargs): kwargs.setdefault("after_epoch", True) kwargs.setdefault("before_first_epoch", True) super(DataStreamMonitoring, self).__init__(**kwargs) self._evaluator = DatasetEvaluator(variables, updates) self.data_stream = data_stream self.noise_parameters = noise_parameters
def create_act_table(self, save_to, act_table): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) max_activation_table = (MaxActivationTable().apply(outs).copy( name='max_activation_table')) max_activation_table.tag.aggregation_scheme = ( Concatenate(max_activation_table)) model = Model([error_rate, max_activation_table]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test_stream = DataStream.default_stream( self.mnist_test, iteration_scheme=SequentialScheme(self.mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([error_rate, max_activation_table]) results = evaluator.evaluate(mnist_test_stream) table = results['max_activation_table'] pickle.dump(table, open(act_table, 'wb')) return table
def test_concatenate_aggregator(): num_examples = 4 batch_size = 2 features = numpy.array([[2, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = x.sum(axis=0).copy('y') z = y.sum(axis=0).copy('z') y.tag.aggregation_scheme = Concatenate(y) z.tag.aggregation_scheme = Concatenate(z) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([[4, 12], [7, 5]], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([16, 12], dtype=theano.config.floatX))
def __init__(self, variables, data_stream, sharedDataTrain, sharedDataActualTest, updates=None, saveEveryXIteration=10, **kwargs): super(DataStreamMonitoringPlot, self).__init__(**kwargs) self._evaluator = DatasetEvaluator(variables, updates) self.data_stream = data_stream self.dataTrain = sharedDataTrain self.dataTest = sharedDataActualTest self.saveEveryXIteration = saveEveryXIteration self.curTime = time.time()
def test_dataset_evaluators(): X = theano.tensor.vector('X') Y = theano.tensor.vector('Y') data = [ numpy.arange(1, 7, dtype=theano.config.floatX).reshape(3, 2), numpy.arange(11, 17, dtype=theano.config.floatX).reshape(3, 2) ] data_stream = IterableDataset(dict(X=data[0], Y=data[1])).get_example_stream() validator = DatasetEvaluator([ CrossEntropy(requires=[X, Y], name="monitored_cross_entropy0"), # to test two same quantities and make sure that state will be reset CrossEntropy(requires=[X, Y], name="monitored_cross_entropy1"), CategoricalCrossEntropy().apply(X, Y), ]) values = validator.evaluate(data_stream) numpy.testing.assert_allclose(values['monitored_cross_entropy1'], values['categoricalcrossentropy_apply_cost'])
def __init__(self, actpic_variables=None, pics=None, case_labels=None, label_count=None, data_stream=None, rectify=False, **kwargs): center_val = 0.5 self.input_pics = pics # self.batch_size = batch_size # self.label_count = label_count self.actpic_variables = actpic_variables # attributes pics: (cases, picy, picx) to (cases, labels, picy, picx) # attributed_pics = tensor.batched_tensordot( # tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count), # pics[:, 0, :, :], axes=0) zeroed_pics = pics - 0.5 attributed_pics = tensor.batched_tensordot( tensor.extra_ops.to_one_hot( case_labels.flatten(), label_count), zeroed_pics[:, 0, :, :], axes=0) self.actpics = [self._create_actpic_image_for( name + '_actpic', var, attributed_pics, rectify) for name, var in self.actpic_variables.items()] self.evaluator = DatasetEvaluator(self.actpics) self.data_stream = data_stream self.results = None super(ActpicExtension, self).__init__(**kwargs)
o4 = Logistic().apply(n4) a5 = l5.apply(o4) n5, M5, S5 = normalize(a5, output_dim = 10) probs = Softmax().apply(n5) statistics_list=[(M1,S1,a1), (M2,S2,a2), (M3,S3,a3), (M4,S4,a4), (M5,S5,a5)] # initialize_variables # for variable (M,S) in variables: # compute M and S in the whole data. if normalization == 'bn2': for m,s,var in statistics_list: var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0) init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name] m.set_value(init_mn.astype(floatX)) s.set_value(sqrt(init_var).astype(floatX)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) cost.name = 'cost' error_rate = MisclassificationRate().apply(y.flatten(), probs) error_rate.name = 'error_rate' cg = ComputationGraph([cost]) parameters = cg.parameters # add gradient descent to M,S if normalization == 'bn2': for m,s,var in statistics_list: parameters.extend([m,s])
def train_lstm(train, test, input_dim, hidden_dimension, columns, epochs, save_file, execution_name, batch_size, plot): stream_train = build_stream(train, batch_size, columns) stream_test = build_stream(test, batch_size, columns) # The train stream will return (TimeSequence, BatchSize, Dimensions) for # and the train test will return (TimeSequence, BatchSize, 1) x = T.tensor3('x') y = T.tensor3('y') y = y.reshape((y.shape[1], y.shape[0], y.shape[2])) # input_dim = 6 # output_dim = 1 linear_lstm = LinearLSTM(input_dim, 1, hidden_dimension, # print_intermediate=True, print_attrs=['__str__', 'shape']) y_hat = linear_lstm.apply(x) linear_lstm.initialize() c_test = AbsolutePercentageError().apply(y, y_hat) c_test.name = 'mape' c = SquaredError().apply(y, y_hat) c.name = 'cost' cg = ComputationGraph(c_test) def one_perc_min(current_value, best_value): if (1 - best_value / current_value) > 0.01: return best_value else: return current_value extensions = [] extensions.append(DataStreamMonitoring(variables=[c, c_test], data_stream=stream_test, prefix='test', after_epoch=False, every_n_epochs=100)) extensions.append(TrainingDataMonitoring(variables=[c_test], prefix='train', after_epoch=True)) extensions.append(FinishAfter(after_n_epochs=epochs)) # extensions.append(Printing()) # extensions.append(ProgressBar()) extensions.append(TrackTheBest('test_mape', choose_best=one_perc_min)) extensions.append(TrackTheBest('test_cost', choose_best=one_perc_min)) extensions.append(FinishIfNoImprovementAfter('test_cost_best_so_far', epochs=500)) # Save only parameters, not the whole main loop and only when best_test_cost is updated checkpoint = Checkpoint(save_file, save_main_loop=False, after_training=False) checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('test_cost_best_so_far')) extensions.append(checkpoint) if BOKEH_AVAILABLE and plot: extensions.append(Plot(execution_name, channels=[[ # 'train_cost', 'test_cost']])) step_rule = Adam() algorithm = GradientDescent(cost=c_test, parameters=cg.parameters, step_rule=step_rule) main_loop = MainLoop(algorithm, stream_train, model=Model(c_test), extensions=extensions) main_loop.run() test_mape = 0 if main_loop.log.status.get('best_test_mape', None) is None: with open(save_file, 'rb') as f: parameters = load_parameters(f) model = main_loop.model model.set_parameter_values(parameters) ev = DatasetEvaluator([c_test]) test_mape = ev.evaluate(stream_test)['mape'] else: test_mape = main_loop.log.status['best_test_mape'] return test_mape, main_loop.log.status['epochs_done']
def main(save_to, hist_file): batch_size = 365 feature_maps = [6, 16] mlp_hiddens = [120, 84] conv_sizes = [5, 5] pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # The above are from LeCun's paper. The blocks example had: # feature_maps = [20, 50] # mlp_hiddens = [500] # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='valid', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() logging.info( "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format(i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) mnist_test = MNIST(("test", ), sources=['features', 'targets']) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) error_rate = (MisclassificationRate().apply(y.flatten(), probs).copy(name='error_rate')) confusion = (ConfusionMatrix().apply(y.flatten(), probs).copy(name='confusion')) confusion.tag.aggregation_scheme = Sum(confusion) model = Model([error_rate, confusion]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outs = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(model.variables)) # Load histogram information with open(hist_file, 'rb') as handle: histograms = pickle.load(handle) # Corpora mnist_train = MNIST(("train", )) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size)) mnist_test = MNIST(("test", )) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)) # Probe the given layer target_layer = '/lenet/mlp/linear_0' next_layer_param = '/lenet/mlp/linear_1.W' sample = extract_sample(outs[target_layer], mnist_test_stream) print('sample shape', sample.shape) # Figure neurons to ablate hist = histograms[('linear_1', 'b')] targets = [i for i in range(hist.shape[1]) if hist[2, i] * hist[7, i] < 0] print('ablating', len(targets), ':', targets) # Now adjust the next layer weights based on the probe param = model.get_parameter_dict()[next_layer_param] print('param shape', param.get_value().shape) new_weights = ablate_inputs(targets, sample, param.get_value(), compensate=False) param.set_value(new_weights) # Evaluation pass evaluator = DatasetEvaluator([error_rate, confusion]) print(evaluator.evaluate(mnist_test_stream))
on_unused_input="warn") for batch in train_stream.get_epoch_iterator(as_dict=True): estimate_fn(**batch) new_popstats = dict((popstat, popstat.get_value()) for popstat, _ in updates) from blocks.monitoring.evaluators import DatasetEvaluator results = dict() for situation in "training inference".split(): results[situation] = dict() outputs, = [ extension._evaluator.theano_variables for extension in main_loop.extensions if getattr(extension, "prefix", None) == "valid_%s" % situation ] evaluator = DatasetEvaluator(outputs) for which_set in "train valid test".split(): results[situation][which_set] = OrderedDict( (length, evaluator.evaluate( get_stream(which_set=which_set, batch_size=100, augment=False, length=length))) for length in [50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]) results["proper_test"] = evaluator.evaluate( get_stream(which_set="test", batch_size=1, length=446184)) import cPickle cPickle.dump(
def __init__(self, save_to): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers logging.info("Input dim: {} {} {}".format( *convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format( i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) mnist_test = MNIST(("test",), sources=['features', 'targets']) basis = create_fair_basis(mnist_test, 10, 10) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outs = OrderedDict((full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])( cg.variables)) # Normalize input and apply the convnet error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) confusion = (ConfusionMatrix().apply(y.flatten(), probs) .copy(name='confusion')) confusion.tag.aggregation_scheme = Sum(confusion) confusion_image = (ConfusionImage().apply(y.flatten(), probs, x) .copy(name='confusion_image')) confusion_image.tag.aggregation_scheme = Sum(confusion_image) model = Model( [error_rate, confusion, confusion_image] + list(outs.values())) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test = MNIST(("test",)) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size)) self.model = model self.mnist_test_stream = mnist_test_stream self.evaluator = DatasetEvaluator( [error_rate, confusion, confusion_image]) self.base_results = self.evaluator.evaluate(mnist_test_stream) # TODO: allow target layer to be parameterized self.target_layer = '/lenet/mlp/linear_0' self.next_layer_param = '/lenet/mlp/linear_1.W' self.base_sample = extract_sample( outs[self.target_layer], mnist_test_stream) self.base_param_value = ( model.get_parameter_dict()[ self.next_layer_param].get_value().copy())
def __init__(self, save_to): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers mnist_test = MNIST(("test", ), sources=['features', 'targets']) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) sensitive_unit_count = (SensitiveUnitCount().apply( y.flatten(), probs, biases).copy(name='sensitive_unit_count')) sensitive_unit_count.tag.aggregation_scheme = ( Concatenate(sensitive_unit_count)) active_unit_count = (ActiveUnitCount().apply(outs).copy( name='active_unit_count')) active_unit_count.tag.aggregation_scheme = ( Concatenate(active_unit_count)) ignored_unit_count = (IgnoredUnitCount().apply( y.flatten(), probs, biases, outs).copy(name='ignored_unit_count')) ignored_unit_count.tag.aggregation_scheme = ( Concatenate(ignored_unit_count)) model = Model([ error_rate, sensitive_unit_count, active_unit_count, ignored_unit_count ]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test = MNIST(("test", )) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([ error_rate, sensitive_unit_count, active_unit_count, ignored_unit_count ]) results = evaluator.evaluate(mnist_test_stream) def save_ranked_image(scores, filename): sorted_instances = scores.argsort() filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=(100, 100)) for i, index in enumerate(sorted_instances): filmstrip.set_image((i // 100, i % 100), mnist_test.get_data(request=index)[0]) filmstrip.save(filename) save_ranked_image(results['sensitive_unit_count'], 'sensitive.jpg') save_ranked_image(results['active_unit_count'], 'active.jpg') save_ranked_image(results['ignored_unit_count'], 'ignored.jpg')
def evaluate(args, main_loop): # load parameters of trained model trained_main_loop = load(args.evaluate) transfer_parameters(trained_main_loop, main_loop) del trained_main_loop # extract population statistic updates updates = [ update for update in main_loop.algorithm.updates # FRAGILE if re.search("_(mean|var)$", update[0].name) ] print updates old_popstats = dict( (popstat, popstat.get_value()) for popstat, _ in updates) # baseline doesn't need all this if updates: train_stream = get_stream(which_set="train", batch_size=1000, length=args.length) nbatches = len(list(train_stream.get_epoch_iterator())) # destructure moving average expression to construct a new expression new_updates = [] for popstat, value in updates: # FRAGILE assert value.owner.op.scalar_op == theano.scalar.add terms = value.owner.inputs # right multiplicand of second term is popstat assert popstat in theano.gof.graph.ancestors( [terms[1].owner.inputs[1]]) # right multiplicand of first term is batchstat batchstat = terms[0].owner.inputs[1] old_popstats[popstat] = popstat.get_value() # FRAGILE: assume population statistics not used in computation of batch statistics # otherwise popstat should always have a reasonable value popstat.set_value(0 * popstat.get_value(borrow=True)) new_updates.append( (popstat, popstat + batchstat / float(nbatches))) # FRAGILE: assume all the other algorithm updates are unneeded for computation of batch statistics estimate_fn = theano.function(main_loop.algorithm.inputs, [], updates=new_updates, on_unused_input="warn") print("averaging batch statistics over", nbatches, "batches") for batch in train_stream.get_epoch_iterator(as_dict=True): estimate_fn(**batch) sys.stdout.write(".") sys.stdout.flush() print new_popstats = dict( (popstat, popstat.get_value()) for popstat, _ in updates) from blocks.monitoring.evaluators import DatasetEvaluator results = dict() for situation in "training inference".split(): results[situation] = dict() outputs, = [ extension._evaluator.theano_variables for extension in main_loop.extensions if getattr(extension, "prefix", None) == "valid_%s" % situation ] evaluator = DatasetEvaluator(outputs) for which_set in "valid test".split(): print(situation, which_set) results[situation][which_set] = OrderedDict( (length, evaluator.evaluate( get_stream( which_set=which_set, batch_size=100, length=length))) for length in [1000]) try: results["proper_test"] = evaluator.evaluate( get_stream(which_set="test", batch_size=1, length=5 * 10**6)) except: # that will probably run out of memory pass import cPickle cPickle.dump( dict(results=results, old_popstats=old_popstats, new_popstats=new_popstats), open(sys.argv[1] + "_popstat_results.pkl", "w"))
def analyse(model_name): config = importlib.import_module('.%s' % model_name, 'config') config.batch_size = 1 config.shuffle_questions = False # Build datastream valid_path = os.path.join(os.getcwd(), "squad_rare/dev-v1.0_tokenized.json") vocab_path = os.path.join(os.getcwd(), "squad_rare/vocab.txt") ds, valid_stream = data.setup_squad_datastream(valid_path, vocab_path, config) dump_path = os.path.join("model_params", model_name + ".pkl") analysis_path = os.path.join("analysis", model_name + ".html") # Build model m = config.Model(config, ds.vocab_size) model = Model(m.sgd_cost) if os.path.isfile(dump_path): with open(dump_path, 'r') as f: print "Analysing %s from best dump" % (model_name) model.set_parameter_values(cPickle.load(f)) else: print "Analysing %s with random parameters" % (model_name) printed = 0 evaluator = DatasetEvaluator(m.analyse_vars) f = open(analysis_path, 'w') f.write('<html>') f.write('<body style="background-color:white">') for batch in valid_stream.get_epoch_iterator(as_dict=True): if batch["context"].shape[1] > 150: continue evaluator.initialize_aggregators() evaluator.process_batch(batch) analysis_results = evaluator.get_aggregated_values() #pdb.set_trace() if analysis_results["cost"] > 0: f.write('<p>') for i in range(0, batch["question"].shape[1]): f.write('{0} '.format(ds.vocab[batch["question"][0][i]])) f.write('</p>') if analysis_results["cost"] > 0: foreground = 'red' else: foreground = 'green' f.write('<p style="color:{0}">'.format(foreground)) for a in batch["answer"][0]: f.write('{0} '.format(ds.vocab[a])) f.write('</p>') for key in analysis_results: if "att" in key: analysis_result = analysis_results[key].T lower = 0 #numpy.min(analysis_result) upper = 1 #numpy.max(analysis_result) if abs(upper - lower) < 0.0001: lower = 0 upper = 1 f.write('<p>{0}: {1}, {2}</p>'.format(key, lower, upper)) f.write('<p>') for i in range(0, analysis_result.shape[1]): att = analysis_result[0][i] att_norm = (att - lower) / (upper - lower) background = (1 - att_norm, 1 - (0.8 * att_norm), 1 - (0.6 * att_norm)) if att_norm > 0.7: foreground = 'white' else: foreground = 'black' if batch["context"][0][i] in analysis_results[ "pred"]: # and att > 0.25: foreground = 'red' if batch["context"][0][i] in batch["answer"][0]: foreground = 'green' f.write( '<span style="color:{0};background-color:{1}">{2} </span>' .format(foreground, colors.rgb2hex(background), ds.vocab[batch["context"][0][i]])) f.write('</p>') f.write('<hr>') printed += 1 if printed >= 20: break f.write('</body>') f.write('</html>') f.close()