def get_updates(self, learning_rate, grads, lr_scalers): """Wraps the respective method of the wrapped learning rule. Performs name-based input substitution for the monitored values. Currently very hacky: the inputs from the gradients are typically named `$ALGO[$SOURCE]` in PyLearn2, where `$ALGO` is the algorithm name and `$SOURCE` is a source name from the data specification. This convention is exploited to match them with the inputs of monitoring values, whose input names are expected to match source names. """ updates = self.learning_rule.get_updates(learning_rate, grads, lr_scalers) grad_inputs = ComputationGraph(list(grads.values())).dict_of_inputs() for value, accumulator in zip(self.values, self.accumulators): value_inputs = ComputationGraph(value).dict_of_inputs() replace_dict = dict() for name, input_ in value_inputs.items(): # See docstring to see how it works grad_input = grad_inputs[unpack( [n for n in grad_inputs if n.endswith('[{}]'.format(name))], singleton=True)] replace_dict[input_] = tensor.unbroadcast( grad_input, *range(grad_input.ndim)) updates[accumulator] = ( accumulator + theano.clone(value, replace_dict)) self._callback_called = True updates.update(self.updates) return updates
def get_cost_graph(self, batch=True, prediction=None, prediction_mask=None): if batch: inputs = self.inputs inputs_mask = self.inputs_mask groundtruth = self.labels groundtruth_mask = self.labels_mask else: inputs, inputs_mask = self.bottom.single_to_batch_inputs( self.single_inputs) groundtruth = self.single_labels[:, None] groundtruth_mask = None if not prediction: prediction = groundtruth if not prediction_mask: prediction_mask = groundtruth_mask cost = self.cost(inputs_mask=inputs_mask, labels=prediction, labels_mask=prediction_mask, **inputs) cost_cg = ComputationGraph(cost) if self.criterion['name'].startswith("mse"): placeholder, = VariableFilter(theano_name='groundtruth')(cost_cg) cost_cg = cost_cg.replace({placeholder: groundtruth}) return cost_cg
def test_replace(): # Test if replace works with outputs x = tensor.scalar() y = x + 1 cg = ComputationGraph([y]) doubled_cg = cg.replace([(y, 2 * y)]) out_val = doubled_cg.outputs[0].eval({x: 2}) assert out_val == 6.0
def test_snapshot(): x = tensor.matrix('x') linear = MLP([Identity(), Identity()], [10, 10, 10], weights_init=Constant(1), biases_init=Constant(2)) linear.initialize() y = linear.apply(x) cg = ComputationGraph(y) snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX))) assert len(snapshot) == 14
def test_replace_variable_is_auxiliary(): # Test if warning appears when variable is an AUXILIARY variable with warnings.catch_warnings(record=True) as w: x = tensor.scalar() y = x + 1 add_role(y, AUXILIARY) cg = ComputationGraph([y]) cg.replace([(y, 2 * y)]) assert len(w) == 1 assert "auxiliary" in str(w[-1].message)
def test_replace_variable_not_in_graph(): # Test if warning appears when variable is not in graph with warnings.catch_warnings(record=True) as w: x = tensor.scalar() y = x + 1 z = tensor.scalar() cg = ComputationGraph([y]) cg.replace([(y, 2 * y), (z, 2 * z)]) assert len(w) == 1 assert "not a part of" in str(w[-1].message)
def __init__(self, data_stream, variables, path=None, **kwargs): self.data_stream = data_stream self.variables = variables self.path = path self.prediction = None kwargs.setdefault("after_training", True) super(PredictDataStream, self).__init__(**kwargs) cg = ComputationGraph(variables) self.theano_function = cg.get_theano_function()
def __init__(self, generator, steps=320, n_samples = 10, mean_data = 0, std_data = 1, sample_rate = 8000, save_name = "sample_", **kwargs): super(Speak, self).__init__(**kwargs) steps = 300 sample = ComputationGraph(generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) self.sample_fn = sample.get_theano_function() self.mean_data = mean_data self.std_data = std_data self.sample_rate = sample_rate self.save_name = save_name
def __init__(self, data_stream, variables, path=None, **kwargs): self.data_stream = data_stream self.variables = variables # for zip(var, var1) in self.variables, variables # var.name = var1.name #print (var.name for var in variables) #print "varnames ^" #self.variables.name = variables.name self.path = path self.prediction = None kwargs.setdefault('after_training', True) super(PredictDataStream, self).__init__(**kwargs) cg = ComputationGraph(variables) self.theano_function = cg.get_theano_function()
def test_computation_graph(): x = tensor.matrix('x') y = tensor.matrix('y') z = x + y z.name = 'z' a = z.copy() a.name = 'a' b = z.copy() b.name = 'b' r = tensor.matrix('r') cg = ComputationGraph([a, b]) assert set(cg.inputs) == {x, y} assert set(cg.outputs) == {a, b} assert set(cg.variables) == {x, y, z, a, b} assert cg.variables[2] is z assert ComputationGraph(a).inputs == cg.inputs cg2 = cg.replace({z: r}) assert set(cg2.inputs) == {r} assert set([v.name for v in cg2.outputs]) == {'a', 'b'} W = theano.shared(numpy.zeros((3, 3), dtype=theano.config.floatX)) cg3 = ComputationGraph([z + W]) assert set(cg3.shared_variables) == {W} cg4 = ComputationGraph([W]) assert cg4.variables == [W] w1 = W ** 2 cg5 = ComputationGraph([w1]) assert W in cg5.variables assert w1 in cg5.variables # Test scan s, _ = theano.scan(lambda inp, accum: accum + inp, sequences=x, outputs_info=tensor.zeros_like(x[0])) scan = s.owner.inputs[0].owner.op cg6 = ComputationGraph(s) assert cg6.scans == [scan] assert all(v in cg6.scan_variables for v in scan.inputs + scan.outputs)
def test_computation_graph(): x = tensor.matrix('x') y = tensor.matrix('y') z = x + y a = z.copy() a.name = 'a' b = z.copy() b.name = 'b' r = tensor.matrix('r') cg = ComputationGraph([a, b]) assert set(cg.inputs) == {x, y} assert set(cg.outputs) == {a, b} assert set(cg.variables) == {x, y, z, a, b} assert ComputationGraph(a).inputs == cg.inputs cg2 = cg.replace({z: r}) assert set(cg2.inputs) == {r} assert set([v.name for v in cg2.outputs]) == {'a', 'b'}
def _get_bn_params(self, output_vars): # Pick out the nodes with batch normalization vars cg = ComputationGraph(output_vars) var_filter = VariableFilter(roles=[BNPARAM]) bn_ps = var_filter(cg.variables) if len(bn_ps) == 0: logger.warn('No batch normalization parameters found - is' + ' batch normalization turned off?') self._bn = False self._counter = None self._counter_max = None bn_share = [] output_vars_replaced = output_vars else: self._bn = True assert len(set([p.name for p in bn_ps])) == len(bn_ps), \ 'Some batch norm params have the same name' logger.info('Batch norm parameters: %s' % ', '.join([p.name for p in bn_ps])) # Filter out the shared variables from the model updates def filter_share(par): lst = [ up for up in cg.updates if up.name == 'shared_%s' % par.name ] assert len(lst) == 1 return lst[0] bn_share = map(filter_share, bn_ps) # Replace the BN coefficients in the test data model - Replace the # theano variables in the test graph with the shareds output_vars_replaced = cg.replace(zip(bn_ps, bn_share)).outputs # Pick out the counter self._counter = self._param_from_updates(cg.updates, 'counter') self._counter_max = self._param_from_updates( cg.updates, 'counter_max') return bn_ps, bn_share, output_vars_replaced
def _create_model(with_dropout): cg = ComputationGraph(ali.compute_losses(x, z)) if with_dropout: inputs = VariableFilter( bricks=([ali.discriminator.x_discriminator.layers[0]] + ali.discriminator.x_discriminator.layers[2::3] + ali.discriminator.z_discriminator.layers[::2] + ali.discriminator.joint_discriminator.layers[::2]), roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs)
def test_application_graph_auxiliary_vars(): X = tensor.matrix('X') brick = TestBrick(0) Y = brick.access_application_call(X) graph = ComputationGraph(outputs=[Y]) test_val_found = False for var in graph.variables: if var.name == 'test_val': test_val_found = True break assert test_val_found
def construct_graphs(task, hyperparameters, **kwargs): x, x_shape, y = task.get_variables() convnet = construct_model(task=task, **hyperparameters) convnet.initialize() h = convnet.apply(x) h = h.flatten(ndim=2) emitter = task.get_emitter(input_dim=np.prod(convnet.get_dim("output")), **hyperparameters) emitter.initialize() emitter_outputs = emitter.emit(h, y) cost = emitter_outputs.cost.copy(name="cost") # gather all the outputs we could possibly care about for training # *and* monitoring; prepare_graphs will do graph transformations # after which we may *only* use these to access *any* variables. outputs_by_name = OrderedDict() for key in "x x_shape cost".split(): outputs_by_name[key] = locals()[key] for key in task.monitor_outputs(): outputs_by_name[key] = emitter_outputs[key] outputs = list(outputs_by_name.values()) # construct training and inference graphs mode_by_set = OrderedDict([("train", "training"), ("valid", "inference"), ("test", "inference")]) outputs_by_mode, updates_by_mode = OrderedDict(), OrderedDict() for mode in "training inference".split(): (outputs_by_mode[mode], updates_by_mode[mode]) = prepare_mode(mode, outputs, convnet=convnet, emitter=emitter, **hyperparameters) # inference updates may make sense at some point but don't know # where to put them now assert not updates_by_mode["inference"] # assign by set for convenience graphs_by_set = OrderedDict([(which_set, ComputationGraph(outputs_by_mode[mode])) for which_set, mode in mode_by_set.items()]) outputs_by_set = OrderedDict([(which_set, OrderedDict( util.equizip(outputs_by_name.keys(), outputs_by_mode[mode]))) for which_set, mode in mode_by_set.items()]) updates_by_set = OrderedDict([(which_set, updates_by_mode[mode]) for which_set, mode in mode_by_set.items()]) return graphs_by_set, outputs_by_set, updates_by_set
def __init__(self, quantities): self.quantities = quantities requires = [] for quantity in quantities: requires += quantity.requires self.requires = list(set(requires)) self._initialized = False self.quantity_names = [q.name for q in self.quantities] self._computation_graph = ComputationGraph(self.requires) self.inputs = self._computation_graph.inputs
def test_apply_noise(): x = tensor.scalar() y = tensor.scalar() z = x + y cg = ComputationGraph([z]) noised_cg = apply_noise(cg, [y], 1, 1) assert_allclose(noised_cg.outputs[0].eval({ x: 1., y: 1. }), 2 + MRG_RandomStreams(1).normal(tuple()).eval())
def _get_updates(self, bn_ps, bn_share): cg = ComputationGraph(bn_ps) # Only store updates that relate to params or the counter updates = OrderedDict([(up, cg.updates[up]) for up in cg.updates if up.name == 'counter' or up in bn_share]) assert self._counter == self._param_from_updates(cg.updates, 'counter') assert self._counter_max == self._param_from_updates( cg.updates, 'counter_max') assert len(updates) == len(bn_ps) + 1, \ 'Counter or var missing from update' return updates
def test_convolutional_sequence_use_bias(): cnn = ConvolutionalSequence( sum([[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()] for _ in range(3)], []), num_channels=1, image_size=(1, 1), use_bias=False) cnn.allocate() x = tensor.tensor4() y = cnn.apply(x) params = ComputationGraph(y).parameters assert len(params) == 3 and all(param.name == 'W' for param in params)
def __init__(self, generator, N=8, steps=1200, path='samples', **kwargs): self.N = N self.path = path super(Sample, self).__init__(**kwargs) batch_size = self.N * self.N self.sample = ComputationGraph( generator.generate(n_steps=steps, batch_size=batch_size, iterate=True)).get_theano_function()
def __init__(self, variables, use_take_last=False): _validate_variable_names(variables) self.variables = variables self.variable_names = [v.name for v in self.variables] self.use_take_last = use_take_last self._computation_graph = ComputationGraph(self.variables) self.inputs = self._computation_graph.inputs self._initialized = False self._create_aggregators() self._compile()
def do(self, which_callback, *args): import ipdb ipdb.set_trace() vds = self.main_loop.extensions[1].data_stream num_batches = 1 + vds.data_stream.dataset.num_examples / vds.batch_size # for i in range(9): # batch = vds.get_epoch_iterator().next() # import ipdb; ipdb.set_trace() mlp = self.main_loop.model.top_bricks[1] probs = mlp.apply_outputs ComputationGraph(probs).inputs
def do(self, which_callback, *args, **kwargs): if which_callback == 'before_training': cg = ComputationGraph(self.main_loop.algorithm.total_step_norm) self._learning_rate_var, = VariableFilter( theano_name='learning_rate')(cg) logger.debug("Annealing extension is initialized") elif which_callback == 'after_epoch': logger.debug("Annealing the learning rate to {}".format( self._annealing_learning_rate)) self._learning_rate_var.set_value(self._annealing_learning_rate) else: raise ValueError("don't know what to do")
def _create_model(with_dropout): cg = ComputationGraph(ali.compute_losses(x, z)) if with_dropout: inputs = VariableFilter(bricks=ali.discriminator. joint_discriminator.children[1:], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.5) inputs = VariableFilter( bricks=[ali.discriminator.joint_discriminator], roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs)
def test_batchnorm_rolling(): layer = BatchNormalization(input_dim=5, rolling_accumulate=True) layer.initialize() x = T.matrix() x_val = np.ones((6, 5), dtype=theano.config.floatX) x_val[0, 0] = 10.0 y = layer.apply(x) cg = ComputationGraph([y]) _func = cg.get_theano_function() for i in range(100): ret = _func(x_val) u = layer.u.get_value() assert_allclose(u[0], 1.58491838) assert_allclose(u[1], 0.6339674) s = layer.s.get_value() assert_allclose(s[0], 7.13214684) assert_allclose(s[1], 0.)
def __init__(self, generator, steps=320, n_samples=10, mean_data=0, std_data=1, sample_rate=8000, save_name="sample_", **kwargs): super(Speak, self).__init__(**kwargs) steps = 300 sample = ComputationGraph( generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) self.sample_fn = sample.get_theano_function() self.mean_data = mean_data self.std_data = std_data self.sample_rate = sample_rate self.save_name = save_name
def __init__(self, worker, experiment, config): # Data dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) batch_size = 128 scheme = ShuffledScheme(dataset.num_examples, batch_size) datastream = DataStream(dataset, iteration_scheme=scheme) test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size) test_stream = DataStream(test_dataset, iteration_scheme=test_scheme) # Model m = ModelHelper(config) def score_func(mainloop): scores = mainloop.log.to_dataframe()["test_accur"].values return np.mean(np.sort(scores)[-4:-1]) # Algorithm cg = ComputationGraph([m.cost]) algorithm = GradientDescent(cost=m.cost, params=cg.parameters, step_rule=AdaM()) #job_name = os.path.basename(worker.running_job) job_name = os.path.basename(".") update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name)) if not os.path.exists(update_path): os.mkdir(update_path) self.main_loop = MainLoop( algorithm, datastream, model=Model(m.cost), extensions=[ Timing(), TrainingDataMonitoring([m.cost, m.accur], prefix="train", after_epoch=True), DataStreamMonitoring([m.cost, m.accur], test_stream, prefix="test"), FinishAfter(after_n_epochs=1), LogToFile(os.path.join(update_path, "log.csv")), Printing(), EpochProgress(dataset.num_examples // batch_size + 1) #, DistributeUpdate(worker, every_n_epochs=1) #, DistributeWhetlabFinish(worker, experiment, score_func) #, Plot('cifar10', #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']]) ])
def __init__(self, samples): # Extracting information from the sampling computation graph self.cg = ComputationGraph(samples) self.inputs = self.cg.inputs self.generator = get_brick(samples) if not isinstance(self.generator, BaseSequenceGenerator): raise ValueError self.generate_call = get_application_call(samples) if (not self.generate_call.application == self.generator.generate): raise ValueError self.inner_cg = ComputationGraph(self.generate_call.inner_outputs) # Fetching names from the sequence generator self.context_names = self.generator.generate.contexts self.state_names = self.generator.generate.states # Parsing the inner computation graph of sampling scan self.contexts = [ VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg)[0] for name in self.context_names ] self.input_states = [] # Includes only those state names that were actually used # in 'generate' self.input_state_names = [] for name in self.generator.generate.states: var = VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg) if var: self.input_state_names.append(name) self.input_states.append(var[0]) self.tv_overlap_name = ['tw_vocab_overlap'] self.tv_overlap = [ VariableFilter(bricks=[self.generator], name=self.tv_overlap_name[0], roles=[INPUT])(self.inner_cg)[0] ]
def __init__(self, gate_values, updates, dataset, ploting_path=None, **kwargs): kwargs.setdefault("after_batch", 1) self.text_length = 300 self.dataset = dataset super(VisualizeGateLSTM, self).__init__(**kwargs) in_gates = gate_values["in_gates"] out_gates = gate_values["out_gates"] forget_gates = gate_values["forget_gates"] cg_in = ComputationGraph(in_gates) cg_out = ComputationGraph(out_gates) cg_forget = ComputationGraph(forget_gates) for cg in [cg_in, cg_forget, cg_out]: assert (len(cg.inputs) == 1) assert (cg.inputs[0].name == "features") state_vars = [ theano.shared(v[0:1, :].zeros_like().eval(), v.name + '-gen') for v, _ in updates ] givens = [(v, x) for (v, _), x in zip(updates, state_vars)] f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)] self.generate_in = theano.function(inputs=cg_in.inputs, outputs=in_gates, givens=givens, updates=f_updates) self.generate_out = theano.function(inputs=cg_out.inputs, outputs=out_gates, givens=givens, updates=f_updates) self.generate_forget = theano.function(inputs=cg_forget.inputs, outputs=forget_gates, givens=givens, updates=f_updates)
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): mlp_car = MLP(activations=[Rectifier(), Rectifier(), None], dims=[8 + 185, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_car') mlp_car.initialize() mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5 + 135, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_nocar') mlp_nocar.initialize() feature_car = tensor.concatenate((features_car_cat, features_car_int), axis=1) feature_nocar = tensor.concatenate( (features_nocar_cat, features_nocar_int), axis=1) prediction = mlp_nocar.apply(feature_nocar) # gating with the last feature : does the dude own a car prediction += tensor.addbroadcast(features_hascar, 1) * mlp_car.apply(feature_car) prediction_loc, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) prediction += prediction_loc # add crm mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def test_batchnorm_rolling(): layer = BatchNormalization( input_dim = 5, rolling_accumulate=True) layer.initialize() x = T.matrix() x_val = np.ones((6, 5), dtype=theano.config.floatX) x_val[0,0] = 10.0 y = layer.apply(x) cg = ComputationGraph([y]) _func = cg.get_theano_function() for i in range(100): ret = _func(x_val) u = layer.u.get_value() assert_allclose(u[0], 1.58491838) assert_allclose(u[1], 0.6339674) s = layer.s.get_value() assert_allclose(s[0], 7.13214684) assert_allclose(s[1], 0.)
def test_apply_dropout_custom_divisor(): x = tensor.vector() y = tensor.vector() z = x - y cg = ComputationGraph([z]) scaled_dropped_cg = apply_dropout(cg, [y], 0.8, seed=2, custom_divisor=2.5) x_ = numpy.array([9., 8., 9.], dtype=theano.config.floatX) y_ = numpy.array([4., 5., 6.], dtype=theano.config.floatX) assert_allclose( scaled_dropped_cg.outputs[0].eval({x: x_, y: y_}), x_ - (y_ * MRG_RandomStreams(2).binomial((3,), p=0.2).eval() / 2.5))
def __init__(self, samples): # Extracting information from the sampling computation graph self.cg = ComputationGraph(samples) self.inputs = self.cg.inputs self.generator = get_brick(samples) if not isinstance(self.generator, BaseSequenceGenerator): raise ValueError self.generate_call = get_application_call(samples) if (not self.generate_call.application == self.generator.generate): raise ValueError self.inner_cg = ComputationGraph(self.generate_call.inner_outputs) # Fetching names from the sequence generator self.context_names = self.generator.generate.contexts self.state_names = self.generator.generate.states # WORKING: new function which returns all the outputs of the generate function as auxilliary variables # WORKING: keep all the outputs of the generate function on the beam, parse them at the end self.output_names = self.generator.generate.outputs # Parsing the inner computation graph of sampling scan self.contexts = [ VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg)[0] for name in self.context_names ] self.input_states = [] # Includes only those state names that were actually used # in 'generate' self.input_state_names = [] for name in self.generator.generate.states: var = VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg) if var: self.input_state_names.append(name) self.input_states.append(var[0]) self.compiled = False
def __init__(self, outputs, return_vars, stream): if not isinstance(outputs, list): outputs = [outputs] if not isinstance(return_vars, list): return_vars = [return_vars] self.outputs = outputs self.return_vars = return_vars self.stream = stream cg = ComputationGraph(self.outputs) self.input_names = [i.name for i in cg.inputs] self.f = theano.function(inputs=cg.inputs, outputs=self.outputs)
def use_decoder_on_representations(decoder, training_representation, sampling_representation): punctuation_marks = tensor.lmatrix('punctuation_marks') punctuation_marks_mask = tensor.matrix('punctuation_marks_mask') cost = decoder.cost(training_representation, punctuation_marks_mask, punctuation_marks, punctuation_marks_mask) generated = decoder.generate(sampling_representation) search_model = Model(generated) _, samples = VariableFilter(bricks=[decoder.sequence_generator], name="outputs")(ComputationGraph(generated[1])) return cost, samples, search_model, punctuation_marks, punctuation_marks_mask
def test_apply_dropout(): x = tensor.vector() y = tensor.vector() z = x * y cg = ComputationGraph([z]) dropped_cg = apply_dropout(cg, [x], 0.4, seed=1) x_ = numpy.array([5., 6., 7.], dtype=theano.config.floatX) y_ = numpy.array([1., 2., 3.], dtype=theano.config.floatX) assert_allclose( dropped_cg.outputs[0].eval({x: x_, y: y_}), x_ * y_ * MRG_RandomStreams(1).binomial((3,), p=0.6).eval() / 0.6)
def __init__(self, variables, use_take_last=False): self.variables = variables self.use_take_last = use_take_last self.variable_names = [v.name for v in self.variables] if len(set(self.variable_names)) < len(self.variables): raise ValueError("variables should have different names") self._computation_graph = ComputationGraph(self.variables) self.inputs = self._computation_graph.inputs self._initialized = False self._create_aggregators() self._compile()
def init_beam_search(self, beam_size): """Compile beam search and set the beam size. See Blocks issue #500. """ self.beam_size = beam_size generated = self.get_generate_graph() samples, = VariableFilter(applications=[self.generator.generate], name="outputs")(ComputationGraph( generated['outputs'])) self._beam_search = BeamSearch(beam_size, samples) self._beam_search.compile()
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) main_loop = MainLoop( algorithm, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), model=Model(cost), extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def create_act_table(self, save_to, act_table): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) max_activation_table = (MaxActivationTable().apply(outs).copy( name='max_activation_table')) max_activation_table.tag.aggregation_scheme = ( Concatenate(max_activation_table)) model = Model([error_rate, max_activation_table]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test_stream = DataStream.default_stream( self.mnist_test, iteration_scheme=SequentialScheme(self.mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([error_rate, max_activation_table]) results = evaluator.evaluate(mnist_test_stream) table = results['max_activation_table'] pickle.dump(table, open(act_table, 'wb')) return table
def train_model(cost, train_stream, valid_stream, valid_freq, valid_rare, load_location=None, save_location=None): cost.name = 'nll' perplexity = 2**(cost / tensor.log(2)) perplexity.name = 'ppl' # Define the model model = Model(cost) # Load the parameters from a dumped model if load_location is not None: logger.info('Loading parameters...') model.set_param_values(load_parameter_values(load_location)) cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, step_rule=Scale(learning_rate=0.01), params=cg.parameters) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=[ DataStreamMonitoring([cost, perplexity], valid_stream, prefix='valid_all', every_n_batches=5000), # Overfitting of rare words occurs between 3000 and 4000 iterations DataStreamMonitoring([cost, perplexity], valid_rare, prefix='valid_rare', every_n_batches=500), DataStreamMonitoring([cost, perplexity], valid_freq, prefix='valid_frequent', every_n_batches=5000), Printing(every_n_batches=500) ]) main_loop.run() # Save the main loop if save_location is not None: logger.info('Saving the main loop...') dump_manager = MainLoopDumpManager(save_location) dump_manager.dump(main_loop) logger.info('Saved')
def initialize(self): """Initialize the training algorithm. """ logger.info("Initializing the training algorithm") update_values = [new_value for _, new_value in self.updates] activity_variables = [l.theano_variable for l in self.prunable_layers] logger.debug("Inferring graph inputs...") self.inputs = ComputationGraph(update_values).inputs logger.debug("Compiling training function...") self._function = theano.function(self.inputs, activity_variables, updates=self.updates, **self.theano_func_kwargs) logger.info("The training algorithm is initialized")
def get_cost_graph(self, batch=True, prediction=None, prediction_mask=None): if batch: recordings = self.recordings recordings_mask = self.recordings_mask groundtruth = self.labels groundtruth_mask = self.labels_mask else: recordings = self.single_recording[:, None, :] recordings_mask = tensor.ones_like(recordings[:, :, 0]) groundtruth = self.single_transcription[:, None] groundtruth_mask = None if not prediction: prediction = groundtruth if not prediction_mask: prediction_mask = groundtruth_mask cost = self.cost(recordings, recordings_mask, prediction, prediction_mask) cost_cg = ComputationGraph(cost) if self.criterion['name'].startswith("mse"): placeholder, = VariableFilter(theano_name='groundtruth')(cost_cg) cost_cg = cost_cg.replace({placeholder: groundtruth}) return cost_cg
def __init__(self, graph, data_stream, n_batches, **kwargs): kwargs.setdefault("after_epoch", True) kwargs.setdefault("before_first_epoch", True) super(BatchNormExtension, self).__init__(**kwargs) self.n_batches = n_batches self.bricks = get_batch_norm_bricks(graph) self.data_stream = data_stream self.updates = self._get_updates() variables = [brick.training_output for brick in self.bricks] self._computation_graph = ComputationGraph(variables) self.inputs = self._computation_graph.inputs self.inputs = list(set(self.inputs)) self.inputs_names = [v.name for v in self.inputs] self._compile()
def buildObjective(self): """Builds the approximate objective corresponding to L_elbo in GMVAE article""" # self.z_prior might be the modified version self.L_elbo = T.mean(self.reconst + self.conditional_prior + self.w_prior + self.z_prior) self.L_elbo_modif = T.mean(self.reconst + self.conditional_prior + self.w_prior_modif + self.z_prior_modif) #---Getting model parameter---# cg = ComputationGraph(self.L_elbo) #self.phi_theta is the list of all the parameters in q and p. self.params = VariableFilter(roles=[PARAMETER])(cg.variables)
def _get_bn_params(self, output_vars): # Pick out the nodes with batch normalization vars cg = ComputationGraph(output_vars) var_filter = VariableFilter(roles=[BNPARAM]) bn_ps = var_filter(cg.variables) if len(bn_ps) == 0: logger.warn('No batch normalization parameters found - is' + ' batch normalization turned off?') self._bn = False self._counter = None self._counter_max = None bn_share = [] output_vars_replaced = output_vars else: self._bn = True assert len(set([p.name for p in bn_ps])) == len(bn_ps), \ 'Some batch norm params have the same name' logger.info('Batch norm parameters: %s' % ', '.join([p.name for p in bn_ps])) # Filter out the shared variables from the model updates def filter_share(par): lst = [up for up in cg.updates if up.name == 'shared_%s' % par.name] assert len(lst) == 1 return lst[0] bn_share = map(filter_share, bn_ps) # Replace the BN coefficients in the test data model - Replace the # theano variables in the test graph with the shareds output_vars_replaced = cg.replace(zip(bn_ps, bn_share)).outputs # Pick out the counter self._counter = self._param_from_updates(cg.updates, 'counter') self._counter_max = self._param_from_updates(cg.updates, 'counter_max') return bn_ps, bn_share, output_vars_replaced
def __init__(self, variables, use_take_last=False): self.variables = variables self.use_take_last = use_take_last self.variable_names = [v.name for v in self.variables] if len(set(self.variable_names)) < len(self.variables): duplicates = [] for vname in set(self.variable_names): if self.variable_names.count(vname) > 1: duplicates.append(vname) raise ValueError("variables should have different names!" " Duplicates: {}".format(', '.join(duplicates))) self._computation_graph = ComputationGraph(self.variables) self.inputs = self._computation_graph.inputs self._initialized = False self._create_aggregators() self._compile()
class Pylearn2Cost(pylearn2.costs.cost.Cost): """Wraps a Theano cost to support the PyLearn2 Cost interface. Parameters ---------- cost : Theano variable The Theano variable corresponding to the end of the cost computation graph. Notes ----- The inputs of the computation graph must have names compatible with names of the data sources. The is necessary in order to replace with with the ones given by PyLearn2. """ def __init__(self, cost): self.cost = cost self.inputs = ComputationGraph(self.cost).dict_of_inputs() def expr(self, model, data, **kwargs): assert not model.supervised data = pack(data) data = [tensor.unbroadcast(var, *range(var.ndim)) for var in data] return theano.clone( self.cost, replace=dict(zip(self.inputs.values(), data))) def get_gradients(self, model, data, **kwargs): if not hasattr(self, "_grads"): self._grads = [tensor.grad(self.expr(model, data), p) for p in model.get_params()] return OrderedDict(zip(model.get_params(), self._grads)), OrderedDict() def get_monitoring_channels(self, model, data, **kwargs): return OrderedDict() def get_data_specs(self, model): return model.data_specs
def apply_adaptive_noise(computation_graph, cost, variables, num_examples, parameters=None, init_sigma=1e-6, model_cost_coefficient=1.0, seed=None, gradients=None, ): """Add adaptive noise to parameters of a model. Each of the given variables will be replaced by a normal distribution with learned mean and standard deviation. A model cost is computed based on the precision of the the distributions associated with each variable. It is added to the given cost used to train the model. See: A. Graves "Practical Variational Inference for Neural Networks", NIPS 2011 Parameters ---------- computation_graph : instance of :class:`ComputationGraph` The computation graph. cost : :class:`~tensor.TensorVariable` The cost without weight noise. It should be a member of the computation_graph. variables : :class:`~tensor.TensorVariable` Variables to add noise to. num_examples : int Number of training examples. The cost of the model is divided by the number of training examples, please see A. Graves "Practical Variational Inference for Neural Networks" for justification parameters : list of :class:`~tensor.TensorVariable` parameters of the model, if gradients are given the list will not be used. Otherwise, it will be used to compute the gradients init_sigma : float, initial standard deviation of noise variables model_cost_coefficient : float, the weight of the model cost seed : int, optional The seed with which :class:`~theano.sandbox.rng_mrg.MRG_RandomStreams` is initialized, is set to 1 by default. gradients : dict, optional Adaptive weight noise introduces new parameters for which new cost and gradients must be computed. Unless the gradients paramter is given, it will use theano.grad to get the gradients Returns ------- cost : :class:`~tensor.TensorVariable` The new cost computation_graph : instance of :class:`ComputationGraph` new graph with added noise. gradients : dict a dictionary of gradients for all parameters: the original ones and the adaptive noise ones noise_brick : :class:~lvsr.graph.NoiseBrick the brick that holds all noise parameters and whose .apply method can be used to find variables added by adaptive noise """ if not seed: seed = config.default_seed rng = MRG_RandomStreams(seed) try: cost_index = computation_graph.outputs.index(cost) except ValueError: raise ValueError("cost is not part of the computation_graph") if gradients is None: if parameters is None: raise ValueError("Either gradients or parameters must be given") logger.info("Taking the cost gradient") gradients = dict(equizip(parameters, tensor.grad(cost, parameters))) else: if parameters is not None: logger.warn("Both gradients and parameters given, will ignore" "parameters") parameters = gradients.keys() gradients = OrderedDict(gradients) log_sigma_scale = 2048.0 P_noisy = variables # We will add noise to these Beta = [] # will hold means, log_stdev and stdevs P_with_noise = [] # will hold parames with added noise # These don't change P_clean = list(set(parameters).difference(P_noisy)) noise_brick = NoiseBrick() for p in P_noisy: p_u = p p_val = p.get_value(borrow=True) p_ls2 = theano.shared((numpy.zeros_like(p_val) + numpy.log(init_sigma) * 2. / log_sigma_scale ).astype(dtype=numpy.float32)) p_ls2.name = __get_name(p_u) noise_brick.parameters.append(p_ls2) p_s2 = tensor.exp(p_ls2 * log_sigma_scale) Beta.append((p_u, p_ls2, p_s2)) p_noisy = p_u + rng.normal(size=p_val.shape) * tensor.sqrt(p_s2) p_noisy = tensor.patternbroadcast(p_noisy, p.type.broadcastable) P_with_noise.append(p_noisy) # compute the prior mean and variation temp_sum = 0.0 temp_param_count = 0.0 for p_u, unused_p_ls2, unused_p_s2 in Beta: temp_sum = temp_sum + p_u.sum() temp_param_count = temp_param_count + p_u.shape.prod() prior_u = tensor.cast(temp_sum / temp_param_count, 'float32') temp_sum = 0.0 for p_u, unused_ls2, p_s2 in Beta: temp_sum = temp_sum + (p_s2).sum() + (((p_u-prior_u)**2).sum()) prior_s2 = tensor.cast(temp_sum/temp_param_count, 'float32') # convert everything to use the noisy parameters full_computation_graph = ComputationGraph(computation_graph.outputs + gradients.values()) full_computation_graph = full_computation_graph.replace( dict(zip(P_noisy, P_with_noise))) LC = 0.0 # model cost for p_u, p_ls2, p_s2 in Beta: LC = (LC + 0.5 * ((tensor.log(prior_s2) - p_ls2 * log_sigma_scale).sum()) + 1.0 / (2.0 * prior_s2) * (((p_u - prior_u)**2) + p_s2 - prior_s2 ).sum() ) LC = LC / num_examples * model_cost_coefficient train_cost = noise_brick.apply( full_computation_graph.outputs[cost_index].copy(), LC, prior_u, prior_s2) gradients = OrderedDict( zip(gradients.keys(), full_computation_graph.outputs[-len(gradients):])) # # Delete the gradients form the computational graph # del full_computation_graph.outputs[-len(gradients):] new_grads = {p: gradients.pop(p) for p in P_clean} # # Warning!!! # This only works for batch size 1 (we want that the sum of squares # be the square of the sum! # diag_hessian_estimate = {p: g**2 for p, g in gradients.iteritems()} for p_u, p_ls2, p_s2 in Beta: p_grad = gradients[p_u] p_u_grad = (model_cost_coefficient * (p_u - prior_u) / (num_examples*prior_s2) + p_grad) p_ls2_grad = (numpy.float32(model_cost_coefficient * 0.5 / num_examples * log_sigma_scale) * (p_s2/prior_s2 - 1.0) + (0.5*log_sigma_scale) * p_s2 * diag_hessian_estimate[p_u] ) new_grads[p_u] = p_u_grad new_grads[p_ls2] = p_ls2_grad return train_cost, full_computation_graph, new_grads, noise_brick
def train(cli_params): cli_params["save_dir"] = prepare_dir(cli_params["save_to"]) logfile = os.path.join(cli_params["save_dir"], "log.txt") # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info("Logging into %s" % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim,) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info("Found the following parameters: %s" % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert "counter" in [u.name for u in bn_updates.keys()], "No batch norm params in graph - the graph has been cut?" training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr) ) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) model = Model(ladder.costs.total) monitored_variables = [ ladder.costs.class_corr, ladder.costs.class_clean, ladder.error, # training_algorithm.total_gradient_norm, ladder.costs.total, ] # + ladder.costs.denois.values() # Make a global history recorder so that we can get summary at end of # training when we write to Sentinel # global_history records all relevant monitoring vars # updated by SaveLog every time global_history = {} main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream( data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm, ), model=model, extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # write out to sentinel file for experiment automator to work SentinelWhenFinish(save_dir=p.save_dir, global_history=global_history), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( monitored_variables, make_datastream( data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme ), prefix="valid_approx", ), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( monitored_variables, make_datastream( data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme, ), make_datastream( data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme, ), prefix="valid_final", after_n_epochs=p.num_epochs, ), TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True), SaveParams("valid_approx_cost_class_corr", model, p.save_dir), # SaveParams(None, all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(save_dir=p.save_dir, after_epoch=True, global_history=global_history), Printing(), # ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ], ) main_loop.run() # Get results df = main_loop.log.to_dataframe() col = "valid_final_error_rate" logger.info("%s %g" % (col, df[col].iloc[-1])) if main_loop.log.status["epoch_interrupt_received"]: return None return df
def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = {'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': []} # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def _create_main_loop(self): # hyper parameters hp = self.params batch_size = hp['batch_size'] biases_init = Constant(0) batch_normalize = hp['batch_normalize'] ### Build fprop tensor5 = T.TensorType(config.floatX, (False,)*5) X = tensor5("images") #X = T.tensor4("images") y = T.lvector('targets') gnet_params = OrderedDict() #X_shuffled = X[:, :, :, :, [2, 1, 0]] #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255 X = X[:, :, :, :, [2, 1, 0]] X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255 X_r = X_shuffled.reshape((X_shuffled.shape[0], X_shuffled.shape[1]*X_shuffled.shape[2], X_shuffled.shape[3], X_shuffled.shape[4])) X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32') expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r), mode='rgb') res = expressions['outloss'] y_hat = res.flatten(ndim=2) import pdb; pdb.set_trace() ### Build Cost cost = CategoricalCrossEntropy().apply(y, y_hat) cost = T.cast(cost, theano.config.floatX) cost.name = 'cross_entropy' y_pred = T.argmax(y_hat, axis=1) misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX) misclass.name = 'misclass' monitored_channels = [] monitored_quantities = [cost, misclass, y_hat, y_pred] model = Model(cost) training_cg = ComputationGraph(monitored_quantities) inference_cg = ComputationGraph(monitored_quantities) ### Get evaluation function #training_eval = training_cg.get_theano_function(additional_updates=bn_updates) training_eval = training_cg.get_theano_function() #inference_eval = inference_cg.get_theano_function() # Dataset test = JpegHDF5Dataset('test', #name='jpeg_data_flows.hdf5', load_in_memory=True) #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy')) import pdb; pdb.set_trace() ### Eval labels = np.zeros(test.num_video_examples) y_hat = np.zeros((test.num_video_examples, 101)) labels_flip = np.zeros(test.num_video_examples) y_hat_flip = np.zeros((test.num_video_examples, 101)) ### Important to shuffle list for batch normalization statistic #rng = np.random.RandomState() #examples_list = range(test.num_video_examples) #import pdb; pdb.set_trace() #rng.shuffle(examples_list) nb_frames=1 for i in xrange(24): scheme = HDF5SeqScheme(test.video_indexes, examples=test.num_video_examples, batch_size=batch_size, f_subsample=i, nb_subsample=25, frames_per_video=nb_frames) #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']: for crop in ['center']: stream = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels = True, flip='noflip', nb_frames = nb_frames, data_stream=ForceFloatX(DataStream( dataset=test, iteration_scheme=scheme))) stream_flip = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels = True, flip='flip', nb_frames = nb_frames, data_stream=ForceFloatX(DataStream( dataset=test, iteration_scheme=scheme))) ## Do the evaluation epoch = stream.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) # import cv2 # cv2.imshow('img', batch[0][0, 0, :, :, :]) # cv2.waitKey(160) # cv2.destroyAllWindows() #import pdb; pdb.set_trace() labels_flip[batch_size*j:batch_size*(j+1)] = batch[1] y_hat_flip[batch_size*j:batch_size*(j+1), :] += output[2] preds = y_hat_flip.argmax(axis=1) misclass = np.sum(labels_flip != preds) / float(len(preds)) print i, crop, "flip Misclass:", misclass epoch = stream_flip.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) labels[batch_size*j:batch_size*(j+1)] = batch[1] y_hat[batch_size*j:batch_size*(j+1), :] += output[2] preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "noflip Misclass:", misclass y_merge = y_hat + y_hat_flip preds = y_merge.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "avg Misclass:", misclass ### Compute misclass y_hat += y_hat_flip preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print "Misclass:", misclass
def train(cli_params): cli_params['save_dir'] = prepare_dir(cli_params['save_to']) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=True) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim,) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) model=Model(ladder.costs.total) monitored_variables = [ ladder.costs.class_corr, ladder.costs.class_clean, ladder.error, # training_algorithm.total_gradient_norm, ladder.costs.total] \ # + ladder.costs.denois.values() # Make a global history recorder so that we can get summary at end of # training when we write to Sentinel # global_history records all relevant monitoring vars # updated by SaveLog every time global_history = {} main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm), model=model, extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( monitored_variables, make_datastream(data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( monitored_variables, make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), # DEPREC: we directly test now # make_datastream(data.valid, data.valid_ind, # p.valid_batch_size, # n_labeled=len(data.valid_ind), # whiten=whiten, cnorm=cnorm, # scheme=ShuffledScheme), # prefix="valid_final", make_datastream(data.test, data.test_ind, p.batch_size, n_labeled=len(data.test_ind), whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="final_test", after_n_epochs=p.num_epochs), TrainingDataMonitoring( variables=monitored_variables, prefix="train", after_epoch=True), # write out to sentinel file for experiment automator to work # REMOVE THIS if you're running test mode with early stopping immediately after SentinelWhenFinish(save_dir=p.save_dir, global_history=global_history), # originally use 'valid_approx_cost_class_clean' # turns out should use ER as early stopping # use CE as a fallback (secondary early stopvar) if ER is the same # SaveParams(('valid_approx_error_rate', 'valid_approx_cost_class_clean'), # model, p.save_dir), # doesn't do early stopping now SaveParams(None, model, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(save_dir=p.save_dir, after_epoch=True, global_history=global_history), Printing(), # ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # ================= Add testing at end of training ================= # DEPREC don't do early stopping anymore if False: p.load_from = p.save_dir ladder = setup_model(p) logger.info('Start testing on trained_params_best') main_loop = DummyLoop( extensions=[ # write to global history SaveLog(save_dir=p.save_dir, after_training=True, global_history=global_history), # write out to sentinel file for experiment automator to work SentinelWhenFinish(save_dir=p.save_dir, global_history=global_history), FinalTestMonitoring( [ladder.costs.class_clean, ladder.error] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.test, data.test_ind, p.batch_size, n_labeled=len(data.test_ind), n_unlabeled=len(data.test_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="test", before_training=True) ]) main_loop.run() # Get results df = main_loop.log.to_dataframe() # col = 'valid_final_error_rate' # logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
features = tensor.matrix('features') noise = tensor.matrix('noise') # g = MLP(activations=[Logistic()], dims=[100, 784]) # d = MLP(activations=[Identity()], dims=[784, 1]) g = MLP(activations=[Identity(), Identity(), Identity(), Identity(), Rectifier()], dims=[100, 2400, 2400, 2400, 2400, 784]) d = MLP(activations=[Tanh(), Tanh(), Identity()], dims=[784, 1200, 1200, 1]) generated_samples = g.apply(noise) discriminated_features = d.apply(features) discriminated_samples = d.apply(generated_samples) generator_cg = ComputationGraph(generated_samples) discriminator_cg = ComputationGraph(discriminated_features) dsamples_cg = ComputationGraph(discriminated_samples) generator_parameters = generator_cg.parameters m = 100 b_size = discriminated_features.shape[0] / 2 cost_generator = tensor.sum(tensor.log(1 + tensor.exp(-discriminated_samples))) / discriminated_samples.shape[0].astype('float32') cost_discriminator = (tensor.sum(discriminated_features[:b_size]) + tensor.sum(tensor.log(1 + tensor.exp(-discriminated_features)))) / b_size.astype('float32') g.weights_init = IsotropicGaussian(0.05) d.weights_init = IsotropicGaussian(0.005) g.biases_init = d.biases_init = Constant(0) g.initialize() d.initialize()
class AggregationBuffer(object): """Intermediate results of aggregating values of Theano variables. Encapsulates aggregators for a list of Theano variables. Collects the respective updates and provides initialization and readout routines. Parameters ---------- variables : list of :class:`~tensor.TensorVariable` The variable names are used as record names in the logs. Hence, all the variable names must be unique. use_take_last : bool When ``True``, the :class:`TakeLast` aggregation scheme is used instead of :class:`_DataIndependent` for those variables that do not require data to be computed. Attributes ---------- initialization_updates : list of tuples Initialization updates of the aggregators. accumulation_updates : list of tuples Accumulation updates of the aggregators. readout_variables : dict A dictionary of record names to :class:`~tensor.TensorVariable` representing the aggregated values. inputs : list of :class:`~tensor.TensorVariable` The list of inputs needed for accumulation. """ def __init__(self, variables, use_take_last=False): _validate_variable_names(variables) self.variables = variables self.variable_names = [v.name for v in self.variables] self.use_take_last = use_take_last self._computation_graph = ComputationGraph(self.variables) self.inputs = self._computation_graph.inputs self._initialized = False self._create_aggregators() self._compile() def _create_aggregators(self): """Create aggregators and collect updates.""" self.initialization_updates = [] self.accumulation_updates = [] self.readout_variables = OrderedDict() for v in self.variables: logger.debug('variable to evaluate: %s', v.name) if not hasattr(v.tag, 'aggregation_scheme'): if not self._computation_graph.has_inputs(v): scheme = (TakeLast if self.use_take_last else _DataIndependent) logger.debug('Using %s aggregation scheme' ' for %s since it does not depend on' ' the data', scheme.__name__, v.name) v.tag.aggregation_scheme = scheme(v) else: logger.debug('Using the default ' ' (average over minibatches)' ' aggregation scheme for %s', v.name) v.tag.aggregation_scheme = Mean(v, 1.0) aggregator = v.tag.aggregation_scheme.get_aggregator() self.initialization_updates.extend( aggregator.initialization_updates) self.accumulation_updates.extend(aggregator.accumulation_updates) self.readout_variables[v.name] = aggregator.readout_variable def _compile(self): """Compiles Theano functions. .. todo:: The current compilation method does not account for updates attached to `ComputationGraph` elements. Compiling should be out-sourced to `ComputationGraph` to deal with it. """ logger.debug("Compiling initialization and readout functions") if self.initialization_updates: self._initialize_fun = theano.function( [], [], updates=self.initialization_updates) else: self._initialize_fun = None # We need to call `as_tensor_variable` here # to avoid returning `CudaNdarray`s to the user, which # happens otherwise under some circumstances (see # https://groups.google.com/forum/#!topic/theano-users/H3vkDN-Shok) self._readout_fun = theano.function( [], [tensor.as_tensor_variable(v) for v in self.readout_variables.values()]) logger.debug("Initialization and readout functions compiled") def initialize_aggregators(self): """Initialize the aggregators.""" self._initialized = True if self._initialize_fun is not None: self._initialize_fun() def get_aggregated_values(self): """Readout the aggregated values.""" if not self._initialized: raise Exception("To readout you must first initialize, then " "process batches!") ret_vals = self._readout_fun() return OrderedDict(equizip(self.variable_names, ret_vals))
sp_mean = data_stats['sp_mean'] sp_std = data_stats['sp_std'] save_dir = os.environ['RESULTS_DIR'] save_dir = os.path.join(save_dir,'blizzard/') experiment_name = "sp_only_0" main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl") generator = main_loop.model.get_top_bricks()[0] steps = 2048 n_samples = 1 sample = ComputationGraph(generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) sample_fn = sample.get_theano_function() outputs = sample_fn()[-2] outputs = outputs*sp_std + sp_mean outputs = outputs.swapaxes(0,1) outputs = outputs[0] print outputs.max(), outputs.min() pyplot.figure(figsize=(100,15)) pyplot.imshow(outputs.T) pyplot.colorbar() pyplot.gca().invert_yaxis() pyplot.savefig(save_dir+"samples/best_"+experiment_name+"9.png")
def __init__(self, config, vocab_size): question = tensor.imatrix('question') # set up 32-bit integer matrices question_mask = tensor.imatrix('question_mask') context = tensor.imatrix('context') context_mask = tensor.imatrix('context_mask') answer = tensor.ivector('answer') candidates = tensor.imatrix('candidates') candidates_mask = tensor.imatrix('candidates_mask') # and the multple choice answers: ans1 = tensor.ivector('ans1') ans1_mask = tensor.ivector('ans1_mask') ans2 = tensor.ivector('ans2') ans2_mask = tensor.ivector('ans2_mask') ans3 = tensor.ivector('ans3') ans3_mask = tensor.ivector('ans3_mask') ans4 = tensor.ivector('ans4') ans4_mask = tensor.ivector('ans4_mask') bricks = [] # inverts 1st and 2nd dimensions of matrix question = question.dimshuffle(1, 0) question_mask = question_mask.dimshuffle(1, 0) context = context.dimshuffle(1, 0) context_mask = context_mask.dimshuffle(1, 0) # Embed questions and cntext embed = LookupTable(vocab_size, config.embed_size, name='question_embed') bricks.append(embed) qembed = embed.apply(question) cembed = embed.apply(context) a1embed = embed.apply(ans1) a2embed = embed.apply(ans2) a3embed = embed.apply(ans3) a4embed = embed.apply(ans4) qlstms, qhidden_list = make_bidir_lstm_stack(qembed, config.embed_size, question_mask.astype(theano.config.floatX), config.question_lstm_size, config.question_skip_connections, 'q') clstms, chidden_list = make_bidir_lstm_stack(cembed, config.embed_size, context_mask.astype(theano.config.floatX), config.ctx_lstm_size, config.ctx_skip_connections, 'ctx') bricks = bricks + qlstms + clstms # Calculate question encoding (concatenate layer1) if config.question_skip_connections: qenc_dim = 2*sum(config.question_lstm_size) qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list], axis=1) else: qenc_dim = 2*config.question_lstm_size[-1] qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list[-2:]], axis=1) qenc.name = 'qenc' # Calculate context encoding (concatenate layer1) if config.ctx_skip_connections: cenc_dim = 2*sum(config.ctx_lstm_size) cenc = tensor.concatenate(chidden_list, axis=2) else: cenc_dim = 2*config.ctx_lstm_size[-1] cenc = tensor.concatenate(chidden_list[-2:], axis=2) cenc.name = 'cenc' # Attention mechanism MLP attention_mlp = MLP(dims=config.attention_mlp_hidden + [1], activations=config.attention_mlp_activations[1:] + [Identity()], name='attention_mlp') attention_qlinear = Linear(input_dim=qenc_dim, output_dim=config.attention_mlp_hidden[0], name='attq') attention_clinear = Linear(input_dim=cenc_dim, output_dim=config.attention_mlp_hidden[0], use_bias=False, name='attc') bricks += [attention_mlp, attention_qlinear, attention_clinear] layer1 = Tanh().apply(attention_clinear.apply(cenc.reshape((cenc.shape[0]*cenc.shape[1], cenc.shape[2]))) .reshape((cenc.shape[0],cenc.shape[1],config.attention_mlp_hidden[0])) + attention_qlinear.apply(qenc)[None, :, :]) layer1.name = 'layer1' att_weights = attention_mlp.apply(layer1.reshape((layer1.shape[0]*layer1.shape[1], layer1.shape[2]))) att_weights.name = 'att_weights_0' att_weights = att_weights.reshape((layer1.shape[0], layer1.shape[1])) att_weights.name = 'att_weights' attended = tensor.sum(cenc * tensor.nnet.softmax(att_weights.T).T[:, :, None], axis=0) attended.name = 'attended' # Now we can calculate our output out_mlp = MLP(dims=[cenc_dim + qenc_dim] + config.out_mlp_hidden + [config.n_entities], activations=config.out_mlp_activations + [Identity()], name='out_mlp') bricks += [out_mlp] probs = out_mlp.apply(tensor.concatenate([attended, qenc], axis=1)) probs.name = 'probs' # not needed anymore, since we're not only looking at entities # is_candidate = tensor.eq(tensor.arange(config.n_entities, dtype='int32')[None, None, :], # tensor.switch(candidates_mask, candidates, -tensor.ones_like(candidates))[:, :, None]).sum(axis=1) # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs)) # Calculate prediction, cost and error rate # vocab = tensor.arange(10) # probs = numpy.asarray([0, 0.8, 0, 0.2], dtype=numpy.float32) # context = numpy.asarray([3, 2, 8, 1], dtype=numpy.int32) # ans3 = numpy.asarray([2, 8, 1], dtype=numpy.int32) # ans1 = numpy.asarray([1, 3, 4], dtype=numpy.int32) # ans2 = numpy.asarray([1, 1, 4], dtype=numpy.int32) # convert probs vector to one that's the same size as vocab, with all zeros except probs: # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs)) probsPadded = tensor.zeros_like(vocab_size, dtype=numpy.float32) probsSubset = probsPadded[cembed] #TODO this should be masked b = tensor.set_subtensor(probsSubset, probs) # get the similarity score of each (masked) answer with the context probs: ans1probs = b[a1enc] ans1score = tensor.switch(ans1_mask, ans1probs, tensor.zeros_like(ans1probs)).sum() ans2probs = b[a2enc] ans2score = ans2probs.sum() ans3probs = b[a3enc] ans3score = ans3probs.sum() ans4probs = b[a4enc] ans4score = ans4probs.sum() # and pick the best one: allans = tensor.stacklists([ans1score, ans2score, ans3score, ans4score]) pred = tensor.argmax(allans) cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, ans4probs, ans4score, allans, pred]) f = cg.get_theano_function() out = f() #pred = probs.argmax(axis=1) #print "pred" #print pred TODO CHANGE THIS! cost = Softmax().categorical_cross_entropy(answer, probs).mean() error_rate = tensor.neq(answer, pred).mean() # Apply dropout cg = ComputationGraph([cost, error_rate]) if config.w_noise > 0: noise_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, noise_vars, config.w_noise) if config.dropout > 0: cg = apply_dropout(cg, qhidden_list + chidden_list, config.dropout) [cost_reg, error_rate_reg] = cg.outputs # Other stuff cost_reg.name = cost.name = 'cost' error_rate_reg.name = error_rate.name = 'error_rate' self.probs = probs self.probs.name = "probs" self.cost = cost self.cost.name = "cost" # self.sgd_cost = cost_reg self.monitor_vars = [[cost_reg], [error_rate_reg]] self.monitor_vars_valid = [[cost], [error_rate]] # Initialize bricks for brick in bricks: brick.weights_init = config.weights_init brick.biases_init = config.biases_init brick.initialize()
def __init__(self, variable, **kwargs): super(SaveComputationGraph, self).__init__(**kwargs) variable_graph = ComputationGraph(variable) self.theano_function = variable_graph.get_theano_function()
def train(cli_params): cli_params['save_dir'] = prepare_dir(cli_params['save_to']) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim,) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, parameters=all_params, step_rule=Adam(learning_rate=ladder.lr.get_value())) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': ladder.costs.denois.values(), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', ladder.costs.denois.values()), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', ladder.costs.denois.values()), ]), } main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_final", after_n_epochs=p.num_epochs), TrainingDataMonitoring( [ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm] + ladder.costs.denois.values(), prefix="train", after_epoch=True), SaveParams(None, all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(p.save_dir, after_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = DataFrame.from_dict(main_loop.log, orient='index') col = 'valid_final_error_rate_clean' logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s"%p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]