Пример #1
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples
        nbexamples -= nbexamples % (self.sequence_dim * self.time_dim)

        train_stream = ReshapeTransformer(
            DataStream(dataset=datafile,
                       iteration_scheme=ShuffledBatchChunkScheme(
                           nbexamples, self.sequence_dim * self.time_dim)),
            self.sequence_dim, self.time_dim)

        if self.image_size is not None:
            train_stream = Mapping(train_stream,
                                   spec_mapping,
                                   add_sources=['spectrogram'])

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(algorithm=algorithm,
                             data_stream=train_stream,
                             model=self.model,
                             extensions=[
                                 FinishAfter(after_n_epochs=EPOCHS),
                                 TrainingDataMonitoring(
                                     [aggregation.mean(self.model.outputs[0])],
                                     prefix="train",
                                     after_epoch=True),
                                 Printing(),
                                 SaveParams(EXP_PATH + NAME, after_epoch=True)
                             ])

        main_loop.run()
Пример #2
0
def test_main_loop():

    class TestDataStream(object):

        def __init__(self):
            self.epochs = self._generate_data()

        def _generate_data(self):
            def wrap_in_dicts(iterable):
                for x in iterable:
                    yield dict(data=x)
            yield iter(wrap_in_dicts([1, 2, 3]))
            yield iter(wrap_in_dicts([4, 5]))
            yield iter(wrap_in_dicts([6, 7, 8, 9]))

        def get_epoch_iterator(self, as_dict):
            assert as_dict is True
            return next(self.epochs)

    finish_extension = FinishAfter()
    finish_extension.add_condition(
        'after_epoch', predicate=lambda log: log.status.epochs_done == 2)
    main_loop = MainLoop(MockAlgorithm(), TestDataStream(),
                         extensions=[WriteBatchExtension(),
                                     finish_extension])
    main_loop.run()

    assert main_loop.log.status.iterations_done == 5
    assert main_loop.log.status._epoch_ends == [3, 5]
    assert len(list(main_loop.log)) == 7
    for i in range(1, 6):
        assert main_loop.log[i].batch == dict(data=i)
Пример #3
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W],
                          step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate,
        lambda _, val: numpy.cast[theano.config.floatX](val * 0.2))
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value,
                    0.001 * 0.2 ** n_batches,
                    atol=1e-5)
def main():
    print("Fetching dataset...")
    trainset, validset, testset = load_jsb_chorales()

    print("Initializing model...")
    lstm = LstmBlocks(trainset.input_size, 100, trainset.target_size)

    print("Building DataStream...")
    dataset_train = IterableDataset({'x': trainset.inputs, 'y': trainset.targets})
    dataset_valid = IterableDataset({'x': validset.inputs, 'y': validset.targets})

    stream_train = DataStream(dataset=dataset_train)
    stream_valid = DataStream(dataset=dataset_valid)

    print("Build training process...")
    algorithm = GradientDescent(cost=lstm.cost, parameters=lstm.computation_graph.parameters, step_rule=Adam())

    valid_monitor = DataStreamMonitoring(variables=[lstm.cost], data_stream=stream_valid, prefix="valid")
    train_monitor = TrainingDataMonitoring(variables=[lstm.cost], prefix="train", after_epoch=True)

    main_loop = MainLoop(data_stream=stream_train, algorithm=algorithm,
                         extensions=[valid_monitor, train_monitor, FinishAfter(after_n_epochs=N_EPOCHS), Printing(),
                                     ProgressBar()])

    print("Training...")
    main_loop.run()
Пример #5
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Пример #6
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Пример #7
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples

        train_stream = DataStream(
            dataset=datafile,
            iteration_scheme=OverlapSequentialScheme(
                nbexamples, self.time_dim))

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            extensions=[
                FinishAfter(after_n_epochs=EPOCHS),
                TrainingDataMonitoring(
                    [self.model.outputs[0]],
                    prefix="train",
                    after_epoch=True,
                    every_n_batches=4000),
                #ProgressBar(),
                Printing()
            ])

        main_loop.run()
Пример #8
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
Пример #9
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   parameters=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([
            main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)
        ]) / n_batches)
Пример #10
0
def train_model(cost, train_stream, valid_stream, args):
    step_rule = learning_algorithm(args)
    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, step_rule=step_rule,
                                parameters=cg.parameters)

    extensions = []

    # Training and Validation score monitoring
    extensions.extend([
        TrainingDataMonitoring([cost],
                               prefix='train',
                               every_n_batches=args.monitoring_freq),
        DataStreamMonitoring([cost],
                             stream=valid_stream,
                             prefix='valid',
                             every_n_batches=args.monitoring_freq)]
                      )

    # Printing
    extensions.append(ProgressBar())
    extensions.append(Printing(every_n_batches=args.monitoring_freq))

    main_loop = MainLoop(model=Model(cost),
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions
                         )

    # This is where the magic happens!
    main_loop.run()
Пример #11
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
Пример #12
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
                                                  num_batches))
    dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
                                                 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim, name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim, 1, name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test, prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm, stream_train,
                         extensions=[test_monitor, train_monitor,
                                     FinishAfter(after_n_epochs=num_epochs),
                                     Printing(), ProgressBar()])
    main_loop.run()

    print 'Learned weights:'
    for layer in (x_to_h, lstm, h_to_o):
        print "Layer '%s':" % layer.name
        for param in layer.parameters:
            print param.name, ': ', param.get_value()
        print
Пример #13
0
    def do_test(with_serialization):
        data_stream = ContainerDataset(range(10)).get_default_stream()
        main_loop = MainLoop(None,
                             data_stream,
                             MockAlgorithm(),
                             extensions=[FinishAfter(after_n_batches=14)])
        main_loop.run()
        assert main_loop.log.status.iterations_done == 14

        if with_serialization:
            string_io = BytesIO()
            dill.dump(main_loop, string_io, fmode=dill.CONTENTS_FMODE)
            string_io.seek(0)
            main_loop = dill.load(string_io)

        finish_after = unpack([
            ext
            for ext in main_loop.extensions if isinstance(ext, FinishAfter)
        ],
                              singleton=True)
        finish_after.add_condition(
            "after_batch",
            predicate=lambda log: log.status.iterations_done == 27)
        main_loop.run()
        assert main_loop.log.status.iterations_done == 27
        assert main_loop.log.status.epochs_done == 2
        for i in range(27):
            assert main_loop.log[i].batch == {"data": i % 10}
Пример #14
0
 def run(self):
     self.build_extensions_list()
     print "Calling MainLoop"
     main_loop = MainLoop(data_stream=self.streams['mainloop'],
                          algorithm=self.model.algorithm,
                          extensions=self.extensions)
     main_loop.run()
Пример #15
0
def test_main_loop():

    class TestDataStream(object):

        def __init__(self):
            self.epochs = self._generate_data()

        def _generate_data(self):
            def wrap_in_dicts(iterable):
                for x in iterable:
                    yield dict(data=x)
            yield iter(wrap_in_dicts([1, 2, 3]))
            yield iter(wrap_in_dicts([4, 5]))
            yield iter(wrap_in_dicts([6, 7, 8, 9]))

        def get_epoch_iterator(self, as_dict):
            assert as_dict is True
            return next(self.epochs)

    finish_extension = FinishAfter()
    finish_extension.add_condition(
        'after_epoch', predicate=lambda log: log.status['epochs_done'] == 2)
    main_loop = MainLoop(MockAlgorithm(), TestDataStream(),
                         extensions=[WriteBatchExtension(),
                                     finish_extension])
    main_loop.run()

    assert main_loop.log.status['iterations_done'] == 5
    assert main_loop.log.status['_epoch_ends'] == [3, 5]
    assert len(main_loop.log) == 5
    for i in range(1, 6):
        assert main_loop.log[i]['batch'] == dict(data=i)
Пример #16
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2))
    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=sgd,
                         extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
Пример #17
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
Пример #18
0
def main(save_to, num_batches):
    linear = Linear()
    rnn=SORN()
    x = tensor.vector('numbers')
    states_E, states_I, updates=rnn.apply(linear.apply(x[None, :]))
    y=linear.apply(states_E[-1])
    cost=SquaredError().apply(y[:,None], mlp.apply(states_E[-1]))
    # consider updates about linear from x and to y
    # 1. make all in SORN
    # 2. gradient?
    main_loop = MainLoop(
        UpdatesAlgorithm(
            updates=updates),
        get_data_stream(range(100)),
        model=Model(),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Пример #19
0
    def do_test(with_serialization):
        data_stream = IterableDataset(range(10)).get_example_stream()
        main_loop = MainLoop(MockAlgorithm(),
                             data_stream,
                             extensions=[
                                 WriteBatchExtension(),
                                 FinishAfter(after_n_batches=14)
                             ])
        main_loop.run()
        assert main_loop.log.status['iterations_done'] == 14

        if with_serialization:
            main_loop = cPickle.loads(cPickle.dumps(main_loop))

        finish_after = unpack([
            ext
            for ext in main_loop.extensions if isinstance(ext, FinishAfter)
        ],
                              singleton=True)
        finish_after.add_condition(
            ["after_batch"],
            predicate=lambda log: log.status['iterations_done'] == 27)
        main_loop.run()
        assert main_loop.log.status['iterations_done'] == 27
        assert main_loop.log.status['epochs_done'] == 2
        for i in range(27):
            assert main_loop.log[i + 1]['batch'] == {"data": i % 10}
Пример #20
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples
        nbexamples -= nbexamples%(self.sequence_dim*self.time_dim)

        train_stream = ReshapeTransformer(
            DataStream(
                dataset=datafile,
                iteration_scheme=ShuffledBatchChunkScheme(
                    nbexamples, self.sequence_dim*self.time_dim)),
            self.sequence_dim,
            self.time_dim)

        if self.image_size is not None :
            train_stream = Mapping(train_stream, spec_mapping, add_sources=['spectrogram'])

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            model=self.model,
            extensions=[
                FinishAfter(after_n_epochs=EPOCHS),
                TrainingDataMonitoring(
                    [aggregation.mean(self.model.outputs[0])],
                    prefix="train",
                    after_epoch=True),
                Printing(),
                SaveParams(EXP_PATH+NAME, after_epoch=True)
            ])

        main_loop.run()
Пример #21
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W],
                          step_rule=step_rule)
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(
                step_rule.learning_rate,
                lambda n: numpy.cast[theano.config.floatX](10. / n)
            )])

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(),
                    numpy.cast[theano.config.floatX](10. / n_batches))
Пример #22
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector("features")
    y = tensor.scalar("targets")
    W = shared_floatx([0, 0], name="W")
    cost = ((x * W).sum() - y) ** 2
    cost.name = "cost"

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule)
    main_loop = MainLoop(
        model=None,
        data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10.0 / n)),
        ],
    )

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10.0 / n_batches))
Пример #23
0
 def train_base_model(self, train_data, test_data, input_dim):
     x = T.matrix('features')
     y = T.matrix('targets')
     mlp, cost, mis_cost = self.create_base_model(x, y, input_dim)
     cg = ComputationGraph([cost])
     inputs = VariableFilter(roles=[INPUT])(cg.variables)
     cg = apply_dropout(cg, inputs, 0.2)
     algorithm = GradientDescent(cost=cost,
                                 parameters=cg.parameters,
                                 step_rule=Adam(learning_rate=0.001))
     data_stream = train_data
     data_stream_test = test_data
     monitor = DataStreamMonitoring(variables=[mis_cost],
                                    data_stream=data_stream_test,
                                    prefix="test")
     plot_ext = Plot('F1-measure',
                     channels=[['test_MisclassificationRate']],
                     after_batch=True)
     main_loop = MainLoop(data_stream=data_stream,
                          algorithm=algorithm,
                          extensions=[
                              monitor,
                              FinishAfter(after_n_epochs=50),
                              Printing(), plot_ext
                          ])
     main_loop.run()
     return mlp
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Пример #25
0
class Runner(object):
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(cost=m.cost,
                                    params=cg.parameters,
                                    step_rule=AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"),
                                    job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model=Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring([m.cost, m.accur],
                                       prefix="train",
                                       after_epoch=True),
                DataStreamMonitoring([m.cost, m.accur],
                                     test_stream,
                                     prefix="test"),
                FinishAfter(after_n_epochs=1),
                LogToFile(os.path.join(update_path, "log.csv")),
                Printing(),
                EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
            ])

    def run(self):
        self.main_loop.run()
Пример #26
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(
        cost=cost, parameters=all_params,
        step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(
        variables=monitored_variables,
        prefix="train",
        after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables,
        data_stream=valid_data_stream,
        prefix="valid",
        after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
	    SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
	    ProgressBar(),
            Printing()])
    main_loop.run()
Пример #27
0
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
Пример #28
0
def run(model_name):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = 100

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss', 'valid_loss_test'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Пример #29
0
class Runner(object):
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(
                cost = m.cost, params=cg.parameters,
                step_rule = AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model = Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring(
                    [m.cost, m.accur], prefix="train", after_epoch=True)
                , DataStreamMonitoring(
                    [m.cost, m.accur],
                    test_stream,
                    prefix="test")
                , FinishAfter(after_n_epochs=1)
                , LogToFile(os.path.join(update_path, "log.csv"))
                , Printing()
                , EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                    #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
                ])
    def run(self):
        self.main_loop.run()
Пример #30
0
def main():

    import configurations
    from stream import DStream
    logger = logging.getLogger(__name__)
    cfig = getattr(configurations, 'get_config_penn')()

    rnnlm = Rnnlm(cfig['vocabsize'], cfig['nemb'], cfig['nhids'])
    rnnlm.weights_init = IsotropicGaussian(0.1)
    rnnlm.biases_init = Constant(0.)
    rnnlm.push_initialization_config()
    rnnlm.generator.transition.weights_init = Orthogonal()

    sentence = tensor.lmatrix('sentence')
    sentence_mask = tensor.matrix('sentence_mask')
    batch_cost = rnnlm.cost(sentence, sentence_mask).sum()
    batch_size = sentence.shape[1].copy(name='batch_size')
    cost = aggregation.mean(batch_cost, batch_size)
    cost.name = "sequence_log_likelihood"
    logger.info("Cost graph is built")

    model = Model(cost)
    parameters = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat(
                    [(key, value.get_value().shape) for key, value
                        in parameters.items()],
                    width=120))

    for brick in model.get_top_bricks():
        brick.initialize()
    cg = ComputationGraph(cost)
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))

    gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
    step_norm = aggregation.mean(algorithm.total_step_norm)
    monitored_vars = [cost, gradient_norm, step_norm]

    train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                           before_first_epoch=True, prefix='tra')

    extensions = [train_monitor, Timing(), Printing(after_batch=True),
                  FinishAfter(after_n_epochs=1000),
                  Printing(every_n_batches=1)]

    train_stream = DStream(datatype='train', config=cfig)
    main_loop = MainLoop(model=model,
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Пример #31
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):

        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = (
                ((W.get_value() * data["features"]).sum() -
                 data["targets"]) ** 2)

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=GradientDescent(cost=cost, params=[W],
                                  step_rule=Scale(0.001)),
        extensions=[
            FinishAfter(after_n_epochs=1),
            TrainingDataMonitoring([W_sum, cost, V], prefix="train1",
                                   after_batch=True),
            TrainingDataMonitoring([aggregation.mean(W_sum), cost],
                                   prefix="train2", after_epoch=True),
            TrueCostExtension()])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([main_loop.log[i]['train1_W_sum']
             for i in range(1, n_batches + 1)]) / n_batches)
Пример #32
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
Пример #33
0
def train_model(m, train_stream, valid_stream, load_location=None, save_location=None):

    # Define the model
    model = Model(m.cost_reg)

    ae_excl_vars = set()
    if hasattr(m, 'ae_costs'):
        for i, cost in enumerate(m.ae_costs):
            print "Trianing stacked AE layer", i+1
            # train autoencoder component separately
            cost.name = 'ae_cost%d'%i

            cg = ComputationGraph(cost)
            params = set(cg.parameters) - ae_excl_vars
            ae_excl_vars = ae_excl_vars | params

            algorithm = GradientDescent(cost=cost, step_rule=config.step_rule, params=list(params))
            main_loop = MainLoop(
                data_stream=NoData(train_stream),
                algorithm=algorithm,
                extensions=[
                    TrainingDataMonitoring([cost], prefix='train', every_n_epochs=1),
                    Printing(every_n_epochs=1),
                    FinishAfter(every_n_epochs=1000),
                ]
            )
            main_loop.run()

    cg = ComputationGraph(m.cost_reg)
    params = list(set(cg.parameters) - ae_excl_vars)
    algorithm = GradientDescent(cost=m.cost_reg, step_rule=config.step_rule,
                                params=params)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            TrainingDataMonitoring(
                [m.cost_reg, m.ber_reg, m.cost, m.ber],
                prefix='train', every_n_epochs=1*config.pt_freq),
            DataStreamMonitoring([m.cost, m.ber], valid_stream, prefix='valid',
                                 after_epoch=False, every_n_epochs=5*config.pt_freq),

            Printing(every_n_epochs=1*config.pt_freq, after_epoch=False),
            Plot(document='tr_'+model_name+'_'+config.param_desc,
                 channels=[['train_cost', 'train_cost_reg', 'valid_cost'],
                           ['train_ber', 'train_ber_reg', 'valid_ber']],
                 server_url='http://eos21:4201',
                 every_n_epochs=1*config.pt_freq, after_epoch=False),

            FinishAfter(every_n_epochs=10000)
        ]
    )
    main_loop.run()
Пример #34
0
def run(model_name, port_train, port_valid):

	running_on_laptop = socket.gethostname() == 'yop'

	X = tensor.tensor4('image_features', dtype='float32')
	T = tensor.matrix('targets', dtype='float32')

	image_border_size = (100, 100)

	if running_on_laptop:
		host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train2')
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()
Пример #35
0
def test_main_loop():

    main_loop = MainLoop(
        MockAlgorithm(), IterableDataset(range(10)).get_example_stream(),
        extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)])
    main_loop.run()
    assert_raises(AttributeError, getattr, main_loop, 'model')

    assert main_loop.log.status['iterations_done'] == 20
    assert main_loop.log.status['_epoch_ends'] == [10, 20]
    assert len(main_loop.log) == 20
    for i in range(20):
        assert main_loop.log[i + 1]['batch'] == {'data': i % 10}
Пример #36
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(
        cost=nmt_model.cost,
        parameters=train_params
    )
    main_loop = MainLoop(
        model=nmt_model.training_model,
        algorithm=algorithm,
        data_stream=align_stream,
        extensions=extensions
    )
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path,
                          config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments
Пример #37
0
def train_model(cost,
                train_stream,
                valid_stream,
                valid_freq,
                valid_rare,
                load_location=None,
                save_location=None):
    cost.name = 'nll'
    perplexity = 2**(cost / tensor.log(2))
    perplexity.name = 'ppl'

    # Define the model
    model = Model(cost)

    # Load the parameters from a dumped model
    if load_location is not None:
        logger.info('Loading parameters...')
        model.set_param_values(load_parameter_values(load_location))

    cg = ComputationGraph(cost)
    algorithm = GradientDescent(cost=cost,
                                step_rule=Scale(learning_rate=0.01),
                                params=cg.parameters)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            DataStreamMonitoring([cost, perplexity],
                                 valid_stream,
                                 prefix='valid_all',
                                 every_n_batches=5000),
            # Overfitting of rare words occurs between 3000 and 4000 iterations
            DataStreamMonitoring([cost, perplexity],
                                 valid_rare,
                                 prefix='valid_rare',
                                 every_n_batches=500),
            DataStreamMonitoring([cost, perplexity],
                                 valid_freq,
                                 prefix='valid_frequent',
                                 every_n_batches=5000),
            Printing(every_n_batches=500)
        ])
    main_loop.run()

    # Save the main loop
    if save_location is not None:
        logger.info('Saving the main loop...')
        dump_manager = MainLoopDumpManager(save_location)
        dump_manager.dump(main_loop)
        logger.info('Saved')
Пример #38
0
def infer_population(data_stream, model, n_batches):
    """ Sets the population parameters for a given model"""
    # construct a main loop with algorithm
    algorithm = BatchNormAccumulate(model)
    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=data_stream,
        model=model,
        extensions=[FinishAfter(after_n_batches=n_batches), ProgressBar()])
    main_loop.run()
    parameters = get_batchnorm_parameters(model)
    batchnorm_bricks = set([get_brick(p) for p in parameters])
    for b in batchnorm_bricks:
        b.use_population = True
Пример #39
0
def train(model, batch_size=50, num_epochs=1500):
    # Setting Logger
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = "results/memory_" + timestr
    log_path = os.path.join(save_path, "log.txt")
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    cost = model.outputs["cost"]
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost, params=all_params, step_rule=Adam(learning_rate=model.default_lr))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_memory_streams(20, 10)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True
    )

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams("valid_MSE", blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            Printing(),
        ],
    )
    main_loop.run()
Пример #40
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                step_rule=SteepestDescent(learning_rate=0.1))
    main_loop = MainLoop(
        mlp,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
Пример #41
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558)

	ftensor5 = tensor.TensorType('float32', (False,)*5)

	input_var  = ftensor5('sax_features')
	target_var = tensor.matrix('targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(input_var, multiply_var)

	# load parameters
	cg = ComputationGraph(test_pred_mid)
	params_val = numpy.load('sunnybrook/best_weights.npz')
	
	for p, value in zip(cg.shared_variables, params_val['arr_0']):
		p.set_value(value)

	crps = tensor.abs_(test_prediction - target_var).mean()

	loss = squared_error(prediction, target_var).mean()

	loss.name = 'loss'
	crps.name = 'crps'

	algorithm = GradientDescent(
		cost=loss,
		parameters=params_top,
		step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train'),
		FinishAfter(after_n_epochs=20)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()
Пример #42
0
def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Пример #43
0
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Пример #44
0
def infer_population(data_stream, model, n_batches):
    """ Sets the population parameters for a given model"""
    # construct a main loop with algorithm
    algorithm = BatchNormAccumulate(model)
    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=data_stream,
        model=model,
        extensions=[FinishAfter(after_n_batches=n_batches),
                    ProgressBar()])
    main_loop.run()
    parameters = get_batchnorm_parameters(model)
    batchnorm_bricks = set([get_brick(p) for p in parameters])
    for b in batchnorm_bricks:
        b.use_population = True
Пример #45
0
def train(config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag,
          use_load_ext, load_log, fast_start):

    model, algorithm, data, extensions = initialize_all(
        config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag,
        use_load_ext, load_log, fast_start)

    # Save the config into the status
    log = NDarrayLog()
    log.status['_config'] = repr(config)
    main_loop = MainLoop(model=model,
                         log=log,
                         algorithm=algorithm,
                         data_stream=data.get_stream("train"),
                         extensions=extensions)
    main_loop.run()
Пример #46
0
def work():
    config_dict = yaml.load(open(sys.argv[1], 'r'))
    print config_dict

    if config_dict['working_mode'] == 'train_new':
        train, valid, alphabet = build_datasets(config_dict)
        generator, cost = build_model(len(alphabet), config_dict)
        algorithm = build_algorithm(generator, cost, config_dict)
        extensions = build_extensions(cost, algorithm, valid, config_dict)
        main_loop = MainLoop(algorithm=algorithm, data_stream=train,
                             model=Model(cost), extensions=extensions)
        main_loop.run()

    elif config_dict['working_mode'] == 'train_resume':
        # TODO
        pass
Пример #47
0
def test_load():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None],
              dims=[10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             FinishAfter(after_n_batches=5),
                             Checkpoint('myweirdmodel.picklebarrel')
                         ])
    main_loop.run()

    # Load the parameters, log and iteration state
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('myweirdmodel.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('mynonexisting.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
Пример #48
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558)

	input_var  = tensor.tensor4('image_features')
	target_var = tensor.tensor4('image_targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	test_prediction, prediction, params = get_model(input_var, target_var, multiply_var)

	loss = binary_crossentropy(prediction, target_var).mean()


	loss.name = 'loss'

	valid_error = T.neq((test_prediction>0.5)*1., target_var).mean()
	valid_error.name = 'error'

	scale = Scale(0.1)
	algorithm = GradientDescent(
		cost=loss,
		parameters=params,
		step_rule=scale,
		#step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'],['valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		# Checkpoint('train'),
		FinishAfter(after_n_epochs=10)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	cg = ComputationGraph(test_prediction)
	while True:
		main_loop.run()
		scale.learning_rate.set_value(numpy.float32(scale.learning_rate.get_value()*0.7))
		numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
Пример #49
0
def test_main_loop():
    old_config_profile_value = config.profile
    config.profile = True

    main_loop = MainLoop(
        MockAlgorithm(), IterableDataset(range(10)).get_example_stream(),
        extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)])
    main_loop.run()
    assert_raises(AttributeError, getattr, main_loop, 'model')

    assert main_loop.log.status['iterations_done'] == 20
    assert main_loop.log.status['_epoch_ends'] == [10, 20]
    assert len(main_loop.log) == 20
    for i in range(20):
        assert main_loop.log[i + 1]['batch'] == {'data': i % 10}

    config.profile = old_config_profile_value
Пример #50
0
def train(config, save_path, bokeh_name,
          params, bokeh_server, bokeh, test_tag, use_load_ext,
          load_log, fast_start):

    model, algorithm, data, extensions = initialize_all(
        config, save_path, bokeh_name,
        params, bokeh_server, bokeh, test_tag, use_load_ext,
        load_log, fast_start)

    # Save the config into the status
    log = NDarrayLog()
    log.status['_config'] = repr(config)
    main_loop = MainLoop(
        model=model, log=log, algorithm=algorithm,
        data_stream=data.get_stream("train"),
        extensions=extensions)
    main_loop.run()
Пример #51
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params)
    main_loop = MainLoop(model=nmt_model.training_model,
                         algorithm=algorithm,
                         data_stream=align_stream,
                         extensions=extensions)
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path, config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments
Пример #52
0
def test_checkpointing():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[FinishAfter(after_n_batches=5),
                    Checkpoint('myweirdmodel.tar', parameters=[W])]
    )
    main_loop.run()

    # Load it again
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('myweirdmodel.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('mynonexisting.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Cleaning
    if os.path.exists('myweirdmodel.tar'):
        os.remove('myweirdmodel.tar')
Пример #53
0
def train_model(cost,
                error_rate,
                train_stream,
                load_location=None,
                save_location=None):

    cost.name = "Cross_entropy"
    error_rate.name = 'Error_rate'

    # Define the model
    model = Model(cost)

    # Load the parameters from a dumped model
    if load_location is not None:
        logger.info('Loading parameters...')
        model.set_param_values(load_parameter_values(load_location))

    cg = ComputationGraph(cost)
    step_rule = Momentum(learning_rate=0.1, momentum=0.9)
    algorithm = GradientDescent(cost=cost,
                                step_rule=step_rule,
                                params=cg.parameters)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            # DataStreamMonitoring([cost], test_stream, prefix='test',
            #                      after_epoch=False, every_n_epochs=10),
            DataStreamMonitoring([cost],
                                 train_stream,
                                 prefix='train',
                                 after_epoch=True),
            Printing(after_epoch=True)
        ])
    main_loop.run()

    # Save the main loop
    if save_location is not None:
        logger.info('Saving the main loop...')
        dump_manager = MainLoopDumpManager(save_location)
        dump_manager.dump(main_loop)
        logger.info('Saved')
Пример #54
0
def train(config, save_path, bokeh_name,
          params, bokeh_server, bokeh, test_tag, use_load_ext,
          load_log, fast_start):

    conf_dump = pickle.dumps(config, protocol=0)

    model, algorithm, data, extensions = initialize_all(
        config, test_tag, save_path, bokeh_name,
        params, bokeh_server, bokeh, use_load_ext,
        load_log, fast_start)
    data.get_stream("train", **data_params_train)

    dataset_dump = pickle.dumps(data.info_dataset, protocol=0)
    postfix_dump = pickle.dumps(data.postfix_manager, protocol=0) 

    # Save the config into the status
    log = NDarrayLog()
    log.status['_config'] = repr(config)
    log.status['_config_pickle'] = repr(conf_dump)
    log.status['_dataset_pickle'] = repr(dataset_dump)
    log.status['_postfix_pickle'] = repr(postfix_dump)

    main_loop = MainLoop(
        model=model, log=log, algorithm=algorithm,
        data_stream=data.get_stream("train", **data_params_train),
        extensions=extensions)
    main_loop.conf_pickle_shared = theano.shared(
        numpy.frombuffer(conf_dump, numpy.byte),
        name='_config_pickle')
    main_loop.data_pickle_shared = theano.shared(
        numpy.frombuffer(dataset_dump, numpy.byte),
        name='_dataset_pickle')
    main_loop.post_pickle_shared = theano.shared(
        numpy.frombuffer(postfix_dump, numpy.byte),
        name='_postfix_pickle')
    main_loop.run()
    return main_loop
Пример #55
0
    def train(self, training_data):

        step_rules = [Adam(), StepClipping(1.0)]

        algorithm = GradientDescent(
            cost=self.Cost,
            parameters=self.ComputationGraph.parameters,
            step_rule=CompositeRule(step_rules))

        train_stream = DataStream.default_stream(
            training_data,
            iteration_scheme=SequentialScheme(training_data.num_examples,
                                              batch_size=20))

        main = MainLoop(model=Model(self.Cost),
                        data_stream=train_stream,
                        algorithm=algorithm,
                        extensions=[
                            FinishAfter(),
                            Printing(),
                            Checkpoint('trainingdata.tar', every_n_epochs=10)
                        ])

        main.run()
Пример #56
0
cg = ComputationGraph(cost)
model = Model(cost)

#################
# Algorithm
#################

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(10.0),
                                 Adam(lr)]))

train_monitor = TrainingDataMonitoring(variables=[cost],
                                       after_epoch=True,
                                       prefix="train")

extensions = extensions = [
    train_monitor,
    TrackTheBest('train_sequence_log_likelihood'),
    Printing(after_epoch=True)
]

main_loop = MainLoop(model=model,
                     data_stream=data_stream,
                     algorithm=algorithm,
                     extensions=extensions)

main_loop.run()
Пример #57
0
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter,
         enc_dim, dec_dim, z_dim, oldmodel):

    image_size, data_train, data_valid, data_test = datasets.get_data(dataset)

    train_stream = Flatten(
        DataStream(data_train,
                   iteration_scheme=SequentialScheme(data_train.num_examples,
                                                     batch_size)))
    valid_stream = Flatten(
        DataStream(data_valid,
                   iteration_scheme=SequentialScheme(data_valid.num_examples,
                                                     batch_size)))
    test_stream = Flatten(
        DataStream(data_test,
                   iteration_scheme=SequentialScheme(data_test.num_examples,
                                                     batch_size)))

    if name is None:
        name = dataset

    img_height, img_width = image_size
    x_dim = img_height * img_width

    rnninits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    if attention != "":
        read_N, write_N = attention.split(',')

        read_N = int(read_N)
        write_N = int(write_N)
        read_dim = 2 * read_N**2

        reader = AttentionReader(x_dim=x_dim,
                                 dec_dim=dec_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
        writer = AttentionWriter(input_dim=dec_dim,
                                 output_dim=x_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=write_N,
                                 **inits)
        attention_tag = "r%d-w%d" % (read_N, write_N)
    else:
        read_dim = 2 * x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        attention_tag = "full"

    #----------------------------------------------------------------------

    # Learning rate
    def lr_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    lr_str = lr_tag(learning_rate)

    subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name
    longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (
        dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str)
    pickle_file = subdir + "/" + longname + ".pkl"

    print("\nRunning experiment %s" % longname)
    print("               dataset: %s" % dataset)
    print("          subdirectory: %s" % subdir)
    print("         learning rate: %g" % learning_rate)
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print("            batch size: %d" % batch_size)
    print("                epochs: %d" % epochs)
    print()

    #----------------------------------------------------------------------

    encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                      name="MLP_enc",
                      **inits)
    decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                      name="MLP_dec",
                      **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    draw = DrawModel(n_iter,
                     reader=reader,
                     encoder_mlp=encoder_mlp,
                     encoder_rnn=encoder_rnn,
                     sampler=q_sampler,
                     decoder_mlp=decoder_mlp,
                     decoder_rnn=decoder_rnn,
                     writer=writer)
    draw.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    #x_recons = 1. + x
    x_recons, kl_terms = draw.reconstruct(x)
    #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100)
    #x_recons = x_recons[-1,:,:]

    #samples = draw.sample(100)
    #x_recons = samples[-1, :, :]
    #x_recons = samples[-1, :, :]

    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            StepClipping(10.),
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )
    #algorithm.add_updates(scan_updates)

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    for t in range(n_iter):
        kl_term_t = kl_terms[t, :].mean()
        kl_term_t.name = "kl_term_%d" % t

        #x_recons_t = T.nnet.sigmoid(c[t,:,:])
        #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        #recons_term_t = recons_term_t.mean()
        #recons_term_t.name = "recons_term_%d" % t

        monitors += [kl_term_t]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]
    # Live plotting...
    plot_channels = [
        ["train_nll_bound", "test_nll_bound"],
        ["train_kl_term_%d" % t for t in range(n_iter)],
        #["train_recons_term_%d" % t for t in range(n_iter)],
        ["train_total_gradient_norm", "train_total_step_norm"]
    ]

    #------------------------------------------------------------

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            TrainingDataMonitoring(train_monitors,
                                   prefix="train",
                                   after_epoch=True),
            #            DataStreamMonitoring(
            #                monitors,
            #                valid_stream,
            ##                updates=scan_updates,
            #                prefix="valid"),
            DataStreamMonitoring(
                monitors,
                test_stream,
                #                updates=scan_updates,
                prefix="test"),
            Checkpoint(name,
                       before_training=False,
                       after_epoch=True,
                       save_separately=['log', 'model']),
            #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']),
            Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()
        ])

    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_param_values(oldmodel.get_param_values())
        del oldmodel

    main_loop.run()