def prepare_opti(cost, test, *args): model = Model(cost) logger.info("Model created") algorithm = GradientDescent(cost=cost, parameters=model.parameters, step_rule=Adam(learning_rate=0.0015), on_unused_sources='ignore') to_monitor = [algorithm.cost] if args: to_monitor.extend(args) extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='loglikelihood_nat', epochs=patience), TrainingDataMonitoring(to_monitor, prefix="train", after_epoch=True), DataStreamMonitoring(to_monitor, test_stream, prefix="test"), Printing(), ProgressBar(), ApplyMask(before_first_epoch=True, after_batch=True), Checkpoint(check, every_n_epochs=save_every), SaveModel(name=path + '/' + 'pixelcnn_{}'.format(dataset), every_n_epochs=save_every), GenerateSamples(every_n_epochs=save_every), #Checkpoint(path+'/'+'exp.log', save_separately=['log'],every_n_epochs=save_every), ] if resume: logger.info("Restoring from previous checkpoint") extensions = [Load(path + '/' + check)] return model, algorithm, extensions
def setup_mainloop(extension): """Set up a simple main loop for progress bar tests. Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by # `max_value`, which has a default value of 100. If we're still using # `maxval` by accident, this test should fail complaining that # the progress bar has received a value out of range. features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2]] * 101] dataset = IterableDataset(dict(features=features)) W = shared_floatx([0, 0], name='W') x = tensor.vector('features') cost = tensor.sum((x-W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, parameters=[W], step_rule=Scale(1e-3)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=1), extension]) return main_loop
def test_shared_variable_modifier_two_params(): weights = numpy.array([-1, 1], dtype=floatX) features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = ContainerDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y)**2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule) modifier = SharedVariableModifier( step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2)) main_loop = MainLoop(model=None, data_stream=dataset.get_default_stream(), algorithm=sgd, extensions=[FinishAfter(after_n_epochs=1), modifier]) main_loop.run() new_value = step_rule.learning_rate.get_value() assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
def main(): x = tensor.matrix("features") input_to_hidden1 = get_typical_layer(x, 784, 500) #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300) hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20) latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500) #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500) hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic()) hidden2_to_output.name = "last_before_output" from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy from blocks.graph import ComputationGraph from blocks.algorithms import Adam, GradientDescent, Scale from blocks.roles import WEIGHT cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output) cg = ComputationGraph(cost) weights = VariableFilter(roles=[WEIGHT]) (cg.variables) # cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights)) # cost.name = "regularized error" gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) from blocks.main_loop import MainLoop from blocks.extensions import FinishAfter, Printing, ProgressBar from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring monitor = TrainingDataMonitoring([cost], after_epoch=True) main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5), ProgressBar(), Printing()]) main_loop.run() showcase(cg, "last_before_output")
def build_bprop_graph(self): optimizer = self.get_optimizer() costs = self.link_here('costs').values() # there are either costs assigned to specific params isinstance_check = [isinstance(c, ParametersLink) for c in costs] if any(isinstance_check): assert all(isinstance_check), "Some costs have parameters associated "+\ "to them and others don't. None or all costs need to be bound." grads = OrderedDict() for paramlink in costs: cost = paramlink.raw_var assert len(cost) == 1 params = flatten([self.architecture[arch].parameters for arch in \ paramlink.architectures] + paramlink.parameters) grads.update(zip(params, theano.grad(cost[0], params))) cost = None # OR let blocks do the gradient else: assert len(costs) >= 1, "No cost variables?" cost = costs[0] for c in costs[1:]: cost += c grads = None algorithm = GradientDescent(cost=cost, gradients=grads, parameters=self.parameters, step_rule=optimizer, on_unused_sources='warn') self.algorithm = algorithm
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def setup_mainloop(extension): """Set up a simple main loop for progress bar tests. Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] dataset = IterableDataset(dict(features=features)) W = shared_floatx([0, 0], name='W') x = tensor.vector('features') cost = tensor.sum((x - W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, params=[W], step_rule=Scale(1e-3)) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=algorithm, extensions=[FinishAfter(after_n_epochs=1), extension]) return main_loop
def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y)**2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = (( (W.get_value() * data["features"]).sum() - data["targets"])**2) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, parameters=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring( [aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension() ]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([ main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1) ]) / n_batches)
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def test_shared_variable_modifier(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[ FinishAfter(after_n_epochs=1), SharedVariableModifier( step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10. / n) )]) main_loop.run() assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10. / n_batches))
def setup_mainloop(extensions): """Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]] dataset = IterableDataset(dict(features=features)) datastream = DataStream(dataset) W = shared_floatx([0, 0], name='W') add_role(W, PARAMETER) x = tensor.vector('features') cost = tensor.sum((x - W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, parameters=[W], step_rule=Scale(1e-3)) main_loop = MainLoop(model=Model(cost), data_stream=datastream, algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=1), ] + extensions) return main_loop
def algorithm(self): if self._algorithm is None: self._algorithm = GradientDescent(cost=self.cost, parameters=self.parameters, step_rule=CompositeRule( self.step_rules)) return self._algorithm
def test_gradient_descent_updates_keyword(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) z = shared_floatx(5) algorithm = GradientDescent(gradients=OrderedDict([(W, W / 2)]), updates=[(z, z + 1)]) assert len(algorithm.updates) == 2 assert z in dict(algorithm.updates)
def prepare_opti(cost, test): model = Model(cost) algorithm = GradientDescent( cost=cost, parameters=model.parameters, step_rule=RMSProp(), on_unused_sources='ignore' ) extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='test_cross_entropy', epochs=patience), TrainingDataMonitoring( [algorithm.cost], prefix="train", after_epoch=True), DataStreamMonitoring( [algorithm.cost], test_stream, prefix="test"), Printing(), ProgressBar(), #Checkpoint(path, after_epoch=True) ] if resume: print "Restoring from previous breakpoint" extensions.extend([ Load(path) ]) return model, algorithm, extensions
def train_base_model(self, train_data, test_data, input_dim): x = T.matrix('features') y = T.matrix('targets') mlp, cost, mis_cost = self.create_base_model(x, y, input_dim) cg = ComputationGraph([cost]) inputs = VariableFilter(roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) data_stream = train_data data_stream_test = test_data monitor = DataStreamMonitoring(variables=[mis_cost], data_stream=data_stream_test, prefix="test") plot_ext = Plot('F1-measure', channels=[['test_MisclassificationRate']], after_batch=True) main_loop = MainLoop(data_stream=data_stream, algorithm=algorithm, extensions=[ monitor, FinishAfter(after_n_epochs=50), Printing(), plot_ext ]) main_loop.run() return mlp
def test_load(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop(data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.picklebarrel') ]) main_loop.run() # Load the parameters, log and iteration state old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('myweirdmodel.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('mynonexisting.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training')
def test_gradient_descent_finds_inputs_additional_updates(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) n = shared_floatx(1) m = tensor.scalar('m') algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)])) algorithm.add_updates([(n, n + m)]) algorithm.initialize() assert m in algorithm.inputs
def test_gradient_descent_spurious_sources(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() assert_raises(lambda: algorithm.process_batch(dict(example_id='test'))) algorithm = GradientDescent(cost=cost, parameters=[W], on_unused_sources='ignore') algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict(example_id='test')) assert_allclose(W.get_value(), -0.5 * W_start_value)
def construct_main_loop(name, task_name, patch_shape, batch_size, n_spatial_dims, n_patches, n_epochs, learning_rate, hyperparameters, **kwargs): name = "%s_%s" % (name, task_name) hyperparameters["name"] = name task = get_task(**hyperparameters) hyperparameters["n_channels"] = task.n_channels x_uncentered, y = task.get_variables() x = task.preprocess(x_uncentered) # this is a theano variable; it may depend on the batch hyperparameters["image_shape"] = x.shape[-n_spatial_dims:] ram = construct_model(task=task, **hyperparameters) ram.initialize() hs = ram.compute(x, n_patches) cost = ram.emitter.cost(hs, y, n_patches) cost.name = "cost" print "setting up main loop..." graph = ComputationGraph(cost) uselessflunky = Model(cost) algorithm = GradientDescent(cost=cost, parameters=graph.parameters, step_rule=Adam(learning_rate=learning_rate)) monitors = construct_monitors(x=x, x_uncentered=x_uncentered, y=y, hs=hs, cost=cost, algorithm=algorithm, task=task, model=uselessflunky, ram=ram, graph=graph, **hyperparameters) main_loop = MainLoop( data_stream=task.get_stream("train"), algorithm=algorithm, extensions=( monitors + [ FinishAfter(after_n_epochs=n_epochs), DumpMinimum(name + '_best', channel_name='valid_error_rate'), Dump(name + '_dump', every_n_epochs=10), #Checkpoint(name+'_checkpoint.pkl', every_n_epochs=10, on_interrupt=False), ProgressBar(), Timing(), Printing(), PrintingTo(name + "_log") ]), model=uselessflunky) return main_loop
def test_gradient_descent(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def _test(f): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) gradients = OrderedDict() gradients[W] = tensor.grad(cost, W) algorithm = GradientDescent(gradients=f(gradients)) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def train(cost, error_rate, batch_size=100, num_epochs=150): # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/memory_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=Adam(learning_rate=0.001)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost, error_rate] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), Printing() ]) main_loop.run()
def __init__(self, worker, experiment, config): # Data dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) batch_size = 128 scheme = ShuffledScheme(dataset.num_examples, batch_size) datastream = DataStream(dataset, iteration_scheme=scheme) test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size) test_stream = DataStream(test_dataset, iteration_scheme=test_scheme) # Model m = ModelHelper(config) def score_func(mainloop): scores = mainloop.log.to_dataframe()["test_accur"].values return np.mean(np.sort(scores)[-4:-1]) # Algorithm cg = ComputationGraph([m.cost]) algorithm = GradientDescent(cost=m.cost, params=cg.parameters, step_rule=AdaM()) #job_name = os.path.basename(worker.running_job) job_name = os.path.basename(".") update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name)) if not os.path.exists(update_path): os.mkdir(update_path) self.main_loop = MainLoop( algorithm, datastream, model=Model(m.cost), extensions=[ Timing(), TrainingDataMonitoring([m.cost, m.accur], prefix="train", after_epoch=True), DataStreamMonitoring([m.cost, m.accur], test_stream, prefix="test"), FinishAfter(after_n_epochs=1), LogToFile(os.path.join(update_path, "log.csv")), Printing(), EpochProgress(dataset.num_examples // batch_size + 1) #, DistributeUpdate(worker, every_n_epochs=1) #, DistributeWhetlabFinish(worker, experiment, score_func) #, Plot('cifar10', #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']]) ])
def test_theano_profile_for_sgd_function(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W ** 2) algorithm = GradientDescent( cost=cost, parameters=[W], theano_func_kwargs={'profile': True}) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value) assert isinstance(algorithm._function.profile, ProfileStats)
def train_model(cost, train_stream, valid_stream, valid_freq, valid_rare, load_location=None, save_location=None): cost.name = 'nll' perplexity = 2**(cost / tensor.log(2)) perplexity.name = 'ppl' # Define the model model = Model(cost) # Load the parameters from a dumped model if load_location is not None: logger.info('Loading parameters...') model.set_param_values(load_parameter_values(load_location)) cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, step_rule=Scale(learning_rate=0.01), params=cg.parameters) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=[ DataStreamMonitoring([cost, perplexity], valid_stream, prefix='valid_all', every_n_batches=5000), # Overfitting of rare words occurs between 3000 and 4000 iterations DataStreamMonitoring([cost, perplexity], valid_rare, prefix='valid_rare', every_n_batches=500), DataStreamMonitoring([cost, perplexity], valid_freq, prefix='valid_frequent', every_n_batches=5000), Printing(every_n_batches=500) ]) main_loop.run() # Save the main loop if save_location is not None: logger.info('Saving the main loop...') dump_manager = MainLoopDumpManager(save_location) dump_manager.dump(main_loop) logger.info('Saved')
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, step_rule=SteepestDescent(learning_rate=0.1)) main_loop = MainLoop( mlp, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_every_epoch=True), SerializeMainLoop(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def run(): streams = create_celeba_streams(training_batch_size=100, monitoring_batch_size=500, include_targets=True) main_loop_stream = streams[0] train_monitor_stream = streams[1] valid_monitor_stream = streams[2] cg, bn_dropout_cg = create_training_computation_graphs() # Compute parameter updates for the batch normalization population # statistics. They are updated following an exponential moving average. pop_updates = get_batch_normalization_updates(bn_dropout_cg) decay_rate = 0.05 extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates] # Prepare algorithm step_rule = Adam() algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0], parameters=bn_dropout_cg.parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) # Prepare monitoring cost = bn_dropout_cg.outputs[0] cost.name = 'cost' train_monitoring = DataStreamMonitoring( [cost], train_monitor_stream, prefix="train", before_first_epoch=False, after_epoch=False, after_training=True, updates=extra_updates) cost, accuracy = cg.outputs cost.name = 'cost' accuracy.name = 'accuracy' monitored_quantities = [cost, accuracy] valid_monitoring = DataStreamMonitoring( monitored_quantities, valid_monitor_stream, prefix="valid", before_first_epoch=False, after_epoch=False, every_n_epochs=5) # Prepare checkpoint checkpoint = Checkpoint( 'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True) extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring, valid_monitoring, checkpoint, Printing(), ProgressBar()] main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def align_with_nam(config, args): """Main method for using the Neural Alignment Model. Args: config (dict): NMT configuration args (object): ArgumentParser object containing the command line arguments Returns: list. List of alignments, where alignments are represented as numpy matrices containing confidences between 0 and 1. """ global alignments config['attention'] = 'parameterized' alignments = [] nmt_model = NMTModel(config) nmt_model.set_up() align_stream = _get_align_stream(**config) extensions = [ FinishAfter(after_epoch=True), TrainingDataMonitoring([nmt_model.cost], after_batch=True), PrintCurrentLogRow(after_batch=True), NextSentenceExtension(align_stream=align_stream, every_n_batches=args.iterations, before_training=True) ] train_params = [] for p in nmt_model.cg.parameters: if p.name in 'alignment_matrix': train_params.append(p) break algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params) main_loop = MainLoop(model=nmt_model.training_model, algorithm=algorithm, data_stream=align_stream, extensions=extensions) nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config) loader = LoadNMTUtils(nmt_model_path, config['saveto'], nmt_model.training_model) loader.load_weights() try: main_loop.run() except StopIteration: logging.info("Alignment finished") return alignments