def prepare_opti(cost, test, *args):
    model = Model(cost)
    logger.info("Model created")

    algorithm = GradientDescent(cost=cost,
                                parameters=model.parameters,
                                step_rule=Adam(learning_rate=0.0015),
                                on_unused_sources='ignore')

    to_monitor = [algorithm.cost]
    if args:
        to_monitor.extend(args)

    extensions = [
        FinishAfter(after_n_epochs=nb_epoch),
        FinishIfNoImprovementAfter(notification_name='loglikelihood_nat',
                                   epochs=patience),
        TrainingDataMonitoring(to_monitor, prefix="train", after_epoch=True),
        DataStreamMonitoring(to_monitor, test_stream, prefix="test"),
        Printing(),
        ProgressBar(),
        ApplyMask(before_first_epoch=True, after_batch=True),
        Checkpoint(check, every_n_epochs=save_every),
        SaveModel(name=path + '/' + 'pixelcnn_{}'.format(dataset),
                  every_n_epochs=save_every),
        GenerateSamples(every_n_epochs=save_every),
        #Checkpoint(path+'/'+'exp.log', save_separately=['log'],every_n_epochs=save_every),
    ]

    if resume:
        logger.info("Restoring from previous checkpoint")
        extensions = [Load(path + '/' + check)]

    return model, algorithm, extensions
示例#2
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by
    # `max_value`, which has a default value of 100. If we're still using
    # `maxval` by accident, this test should fail complaining that
    # the progress bar has received a value out of range.
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2]] * 101]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop
示例#3
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2))
    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=sgd,
                         extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
示例#4
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
示例#5
0
    def build_bprop_graph(self):
        optimizer = self.get_optimizer()
        costs = self.link_here('costs').values()

        # there are either costs assigned to specific params
        isinstance_check = [isinstance(c, ParametersLink) for c in costs]
        if any(isinstance_check):
            assert all(isinstance_check), "Some costs have parameters associated "+\
                    "to them and others don't. None or all costs need to be bound."
            grads = OrderedDict()
            for paramlink in costs:
                cost = paramlink.raw_var
                assert len(cost) == 1
                params = flatten([self.architecture[arch].parameters for arch in \
                                  paramlink.architectures] + paramlink.parameters)
                grads.update(zip(params, theano.grad(cost[0], params)))
            cost = None
        # OR let blocks do the gradient
        else:
            assert len(costs) >= 1, "No cost variables?"
            cost = costs[0]
            for c in costs[1:]:
                cost += c
            grads = None

        algorithm = GradientDescent(cost=cost,
                                    gradients=grads,
                                    parameters=self.parameters,
                                    step_rule=optimizer,
                                    on_unused_sources='warn')

        self.algorithm = algorithm
示例#6
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
示例#7
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, params=[W], step_rule=Scale(1e-3))

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=algorithm,
                         extensions=[FinishAfter(after_n_epochs=1), extension])

    return main_loop
示例#8
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   parameters=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([
            main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)
        ]) / n_batches)
示例#9
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
示例#10
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W],
                          step_rule=step_rule)
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(
                step_rule.learning_rate,
                lambda n: numpy.cast[theano.config.floatX](10. / n)
            )])

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(),
                    numpy.cast[theano.config.floatX](10. / n_batches))
示例#11
0
def setup_mainloop(extensions):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)

    W = shared_floatx([0, 0], name='W')
    add_role(W, PARAMETER)
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost,
                                parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(model=Model(cost),
                         data_stream=datastream,
                         algorithm=algorithm,
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                         ] + extensions)

    return main_loop
示例#12
0
 def algorithm(self):
     if self._algorithm is None:
         self._algorithm = GradientDescent(cost=self.cost,
                                           parameters=self.parameters,
                                           step_rule=CompositeRule(
                                               self.step_rules))
     return self._algorithm
示例#13
0
def test_gradient_descent_updates_keyword():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    z = shared_floatx(5)
    algorithm = GradientDescent(gradients=OrderedDict([(W, W / 2)]),
                                updates=[(z, z + 1)])
    assert len(algorithm.updates) == 2
    assert z in dict(algorithm.updates)
示例#14
0
def prepare_opti(cost, test):
    model = Model(cost)

    algorithm = GradientDescent(
        cost=cost,
        parameters=model.parameters,
        step_rule=RMSProp(),
        on_unused_sources='ignore'
    )

    extensions = [
        FinishAfter(after_n_epochs=nb_epoch),
        FinishIfNoImprovementAfter(notification_name='test_cross_entropy', epochs=patience),
        TrainingDataMonitoring(
            [algorithm.cost],
            prefix="train",
            after_epoch=True),
        DataStreamMonitoring(
            [algorithm.cost],
            test_stream,
            prefix="test"),
        Printing(),
        ProgressBar(),
        #Checkpoint(path, after_epoch=True)
    ]

    if resume:
        print "Restoring from previous breakpoint"
        extensions.extend([
            Load(path)
        ])
    return model, algorithm, extensions
示例#15
0
 def train_base_model(self, train_data, test_data, input_dim):
     x = T.matrix('features')
     y = T.matrix('targets')
     mlp, cost, mis_cost = self.create_base_model(x, y, input_dim)
     cg = ComputationGraph([cost])
     inputs = VariableFilter(roles=[INPUT])(cg.variables)
     cg = apply_dropout(cg, inputs, 0.2)
     algorithm = GradientDescent(cost=cost,
                                 parameters=cg.parameters,
                                 step_rule=Adam(learning_rate=0.001))
     data_stream = train_data
     data_stream_test = test_data
     monitor = DataStreamMonitoring(variables=[mis_cost],
                                    data_stream=data_stream_test,
                                    prefix="test")
     plot_ext = Plot('F1-measure',
                     channels=[['test_MisclassificationRate']],
                     after_batch=True)
     main_loop = MainLoop(data_stream=data_stream,
                          algorithm=algorithm,
                          extensions=[
                              monitor,
                              FinishAfter(after_n_epochs=50),
                              Printing(), plot_ext
                          ])
     main_loop.run()
     return mlp
示例#16
0
def test_load():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None],
              dims=[10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             FinishAfter(after_n_batches=5),
                             Checkpoint('myweirdmodel.picklebarrel')
                         ])
    main_loop.run()

    # Load the parameters, log and iteration state
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('myweirdmodel.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('mynonexisting.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
示例#17
0
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
示例#18
0
def test_gradient_descent_spurious_sources():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W**2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    assert_raises(lambda: algorithm.process_batch(dict(example_id='test')))

    algorithm = GradientDescent(cost=cost,
                                parameters=[W],
                                on_unused_sources='ignore')
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict(example_id='test'))
    assert_allclose(W.get_value(), -0.5 * W_start_value)
示例#19
0
def construct_main_loop(name, task_name, patch_shape, batch_size,
                        n_spatial_dims, n_patches, n_epochs, learning_rate,
                        hyperparameters, **kwargs):
    name = "%s_%s" % (name, task_name)
    hyperparameters["name"] = name

    task = get_task(**hyperparameters)
    hyperparameters["n_channels"] = task.n_channels

    x_uncentered, y = task.get_variables()

    x = task.preprocess(x_uncentered)

    # this is a theano variable; it may depend on the batch
    hyperparameters["image_shape"] = x.shape[-n_spatial_dims:]

    ram = construct_model(task=task, **hyperparameters)
    ram.initialize()

    hs = ram.compute(x, n_patches)
    cost = ram.emitter.cost(hs, y, n_patches)
    cost.name = "cost"

    print "setting up main loop..."
    graph = ComputationGraph(cost)
    uselessflunky = Model(cost)
    algorithm = GradientDescent(cost=cost,
                                parameters=graph.parameters,
                                step_rule=Adam(learning_rate=learning_rate))
    monitors = construct_monitors(x=x,
                                  x_uncentered=x_uncentered,
                                  y=y,
                                  hs=hs,
                                  cost=cost,
                                  algorithm=algorithm,
                                  task=task,
                                  model=uselessflunky,
                                  ram=ram,
                                  graph=graph,
                                  **hyperparameters)
    main_loop = MainLoop(
        data_stream=task.get_stream("train"),
        algorithm=algorithm,
        extensions=(
            monitors + [
                FinishAfter(after_n_epochs=n_epochs),
                DumpMinimum(name + '_best', channel_name='valid_error_rate'),
                Dump(name + '_dump', every_n_epochs=10),
                #Checkpoint(name+'_checkpoint.pkl', every_n_epochs=10, on_interrupt=False),
                ProgressBar(),
                Timing(),
                Printing(),
                PrintingTo(name + "_log")
            ]),
        model=uselessflunky)
    return main_loop
示例#20
0
def test_gradient_descent():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W**2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
示例#21
0
def run(model_name):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = 100

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss', 'valid_loss_test'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
示例#22
0
 def _test(f):
     W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
     W_start_value = W.get_value()
     cost = tensor.sum(W**2)
     gradients = OrderedDict()
     gradients[W] = tensor.grad(cost, W)
     algorithm = GradientDescent(gradients=f(gradients))
     algorithm.step_rule.learning_rate.set_value(0.75)
     algorithm.initialize()
     algorithm.process_batch(dict())
     assert_allclose(W.get_value(), -0.5 * W_start_value)
示例#23
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
示例#24
0
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(cost=m.cost,
                                    params=cg.parameters,
                                    step_rule=AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"),
                                    job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model=Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring([m.cost, m.accur],
                                       prefix="train",
                                       after_epoch=True),
                DataStreamMonitoring([m.cost, m.accur],
                                     test_stream,
                                     prefix="test"),
                FinishAfter(after_n_epochs=1),
                LogToFile(os.path.join(update_path, "log.csv")),
                Printing(),
                EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
            ])
def test_theano_profile_for_sgd_function():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(
        cost=cost, parameters=[W], theano_func_kwargs={'profile': True})
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
    assert isinstance(algorithm._function.profile, ProfileStats)
示例#26
0
def train_model(cost,
                train_stream,
                valid_stream,
                valid_freq,
                valid_rare,
                load_location=None,
                save_location=None):
    cost.name = 'nll'
    perplexity = 2**(cost / tensor.log(2))
    perplexity.name = 'ppl'

    # Define the model
    model = Model(cost)

    # Load the parameters from a dumped model
    if load_location is not None:
        logger.info('Loading parameters...')
        model.set_param_values(load_parameter_values(load_location))

    cg = ComputationGraph(cost)
    algorithm = GradientDescent(cost=cost,
                                step_rule=Scale(learning_rate=0.01),
                                params=cg.parameters)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            DataStreamMonitoring([cost, perplexity],
                                 valid_stream,
                                 prefix='valid_all',
                                 every_n_batches=5000),
            # Overfitting of rare words occurs between 3000 and 4000 iterations
            DataStreamMonitoring([cost, perplexity],
                                 valid_rare,
                                 prefix='valid_rare',
                                 every_n_batches=500),
            DataStreamMonitoring([cost, perplexity],
                                 valid_freq,
                                 prefix='valid_frequent',
                                 every_n_batches=5000),
            Printing(every_n_batches=500)
        ])
    main_loop.run()

    # Save the main loop
    if save_location is not None:
        logger.info('Saving the main loop...')
        dump_manager = MainLoopDumpManager(save_location)
        dump_manager.dump(main_loop)
        logger.info('Saved')
示例#27
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                step_rule=SteepestDescent(learning_rate=0.1))
    main_loop = MainLoop(
        mlp,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
示例#28
0
def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
示例#29
0
文件: cifar_mlp.py 项目: oplatek/ALI
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
示例#30
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params)
    main_loop = MainLoop(model=nmt_model.training_model,
                         algorithm=algorithm,
                         data_stream=align_stream,
                         extensions=extensions)
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path, config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments