Exemplo n.º 1
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   parameters=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([
            main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)
        ]) / n_batches)
 def create_training_data_monitoring():
     if "every_n_epochs" in self.args.evaluate_every_n:
         return TrainingDataMonitoring(vars_to_monitor,
                                       prefix='train',
                                       after_epoch=True)
     else:
         return TrainingDataMonitoring(vars_to_monitor,
                                       prefix='train',
                                       after_epoch=True,
                                       **self.args.evaluate_every_n)
Exemplo n.º 3
0
def prepare_opti(cost, test):
    model = Model(cost)

    algorithm = GradientDescent(
        cost=cost,
        parameters=model.parameters,
        step_rule=RMSProp(),
        on_unused_sources='ignore'
    )

    extensions = [
        FinishAfter(after_n_epochs=nb_epoch),
        FinishIfNoImprovementAfter(notification_name='test_cross_entropy', epochs=patience),
        TrainingDataMonitoring(
            [algorithm.cost],
            prefix="train",
            after_epoch=True),
        DataStreamMonitoring(
            [algorithm.cost],
            test_stream,
            prefix="test"),
        Printing(),
        ProgressBar(),
        #Checkpoint(path, after_epoch=True)
    ]

    if resume:
        print "Restoring from previous breakpoint"
        extensions.extend([
            Load(path)
        ])
    return model, algorithm, extensions
Exemplo n.º 4
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
Exemplo n.º 5
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples
        nbexamples -= nbexamples % (self.sequence_dim * self.time_dim)

        train_stream = ReshapeTransformer(
            DataStream(dataset=datafile,
                       iteration_scheme=ShuffledBatchChunkScheme(
                           nbexamples, self.sequence_dim * self.time_dim)),
            self.sequence_dim, self.time_dim)

        if self.image_size is not None:
            train_stream = Mapping(train_stream,
                                   spec_mapping,
                                   add_sources=['spectrogram'])

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(algorithm=algorithm,
                             data_stream=train_stream,
                             model=self.model,
                             extensions=[
                                 FinishAfter(after_n_epochs=EPOCHS),
                                 TrainingDataMonitoring(
                                     [aggregation.mean(self.model.outputs[0])],
                                     prefix="train",
                                     after_epoch=True),
                                 Printing(),
                                 SaveParams(EXP_PATH + NAME, after_epoch=True)
                             ])

        main_loop.run()
Exemplo n.º 6
0
def test_training_data_monitoring_updates_algorithm():
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 5], [5, 8]]
    ]
    targets = numpy.array([f.sum() for f in features])
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    m = x.mean().copy(name='features_mean')
    t = y.sum().copy(name='targets_sum')

    main_loop = MainLoop(
        model=None,
        data_stream=dataset.get_example_stream(),
        algorithm=UpdatesAlgorithm(),
        extensions=[
            TrainingDataMonitoring([m, t], prefix="train1", after_batch=True)
        ],
    )
    main_loop.extensions[0].main_loop = main_loop
    assert len(main_loop.algorithm.updates) == 0
    main_loop.extensions[0].do('before_training')
    assert len(main_loop.algorithm.updates) > 0
Exemplo n.º 7
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 8
0
def prepare_opti(cost, test, *args):
    model = Model(cost)
    logger.info("Model created")

    algorithm = GradientDescent(cost=cost,
                                parameters=model.parameters,
                                step_rule=Adam(learning_rate=0.0015),
                                on_unused_sources='ignore')

    to_monitor = [algorithm.cost]
    if args:
        to_monitor.extend(args)

    extensions = [
        FinishAfter(after_n_epochs=nb_epoch),
        FinishIfNoImprovementAfter(notification_name='loglikelihood_nat',
                                   epochs=patience),
        TrainingDataMonitoring(to_monitor, prefix="train", after_epoch=True),
        DataStreamMonitoring(to_monitor, test_stream, prefix="test"),
        Printing(),
        ProgressBar(),
        ApplyMask(before_first_epoch=True, after_batch=True),
        Checkpoint(check, every_n_epochs=save_every),
        SaveModel(name=path + '/' + 'pixelcnn_{}'.format(dataset),
                  every_n_epochs=save_every),
        GenerateSamples(every_n_epochs=save_every),
        #Checkpoint(path+'/'+'exp.log', save_separately=['log'],every_n_epochs=save_every),
    ]

    if resume:
        logger.info("Restoring from previous checkpoint")
        extensions = [Load(path + '/' + check)]

    return model, algorithm, extensions
Exemplo n.º 9
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples

        train_stream = DataStream(
            dataset=datafile,
            iteration_scheme=OverlapSequentialScheme(
                nbexamples, self.time_dim))

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            extensions=[
                FinishAfter(after_n_epochs=EPOCHS),
                TrainingDataMonitoring(
                    [self.model.outputs[0]],
                    prefix="train",
                    after_epoch=True,
                    every_n_batches=4000),
                #ProgressBar(),
                Printing()
            ])

        main_loop.run()
Exemplo n.º 10
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
Exemplo n.º 11
0
def run(model_name):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = 100

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss', 'valid_loss_test'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Exemplo n.º 12
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
Exemplo n.º 13
0
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(cost=m.cost,
                                    params=cg.parameters,
                                    step_rule=AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"),
                                    job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model=Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring([m.cost, m.accur],
                                       prefix="train",
                                       after_epoch=True),
                DataStreamMonitoring([m.cost, m.accur],
                                     test_stream,
                                     prefix="test"),
                FinishAfter(after_n_epochs=1),
                LogToFile(os.path.join(update_path, "log.csv")),
                Printing(),
                EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
            ])
Exemplo n.º 14
0
 def monitor_stream(self, stream, prefix, **kwargs):
     variables = self.monitored_vars + self.get_quantitites_vars()
     if stream == self.train_stream:
         monitor = TrainingDataMonitoring(variables,
                                          prefix=prefix,
                                          **kwargs)
     else:
         monitor = DataStreamMonitoring(variables,
                                        data_stream=stream,
                                        prefix=prefix,
                                        **kwargs)
     self.extensions.insert(0, monitor)
Exemplo n.º 15
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                step_rule=SteepestDescent(learning_rate=0.1))
    main_loop = MainLoop(
        mlp,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
Exemplo n.º 16
0
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 17
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params)
    main_loop = MainLoop(model=nmt_model.training_model,
                         algorithm=algorithm,
                         data_stream=align_stream,
                         extensions=extensions)
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path, config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments
Exemplo n.º 18
0
def prepare_opti(cost, test):
    model = Model(cost)
    algorithm = GradientDescent(cost=cost,
                                parameters=model.parameters,
                                step_rule=Adam(),
                                on_unused_sources='ignore')

    extensions = [
        FinishAfter(after_n_epochs=nb_epoch),
        FinishIfNoImprovementAfter(notification_name='test_vae_cost',
                                   epochs=patience),
        TrainingDataMonitoring([algorithm.cost], after_epoch=True),
        DataStreamMonitoring([algorithm.cost], test, prefix="test"),
        Printing(),
        ProgressBar(),
        #SaveModel(name='vae', after_n_epochs=save_every)
    ]
    return model, algorithm, extensions
Exemplo n.º 19
0
def construct_common_graph(situation, args, outputs, dummy_states, Wy, by, y):

    ytilde = T.dot(outputs["h"][-1], Wy) + by
    yhat = ytilde[:, 0]

    ## mse cost
    mse = ((yhat - y)**2).mean()
    error_rate = mse.mean().copy(name="MSE")
    cost = error_rate.copy(name="cost")
    graph = ComputationGraph([cost, error_rate])

    state_grads = dict((k, T.grad(cost, v)) for k, v in dummy_states.items())

    extensions = []
    # extensions = [
    #     DumpVariables("%s_hiddens" % situation, graph.inputs,
    #                   [v.copy(name="%s%s" % (k, suffix))
    #                    for suffix, things in [("", outputs), ("_grad", state_grads)]
    #                    for k, v in things.items()],
    #                   batch=next(get_stream(which_set="train",
    #                                         batch_size=args.batch_size,
    #                                         num_examples=args.batch_size)
    #                              .get_epoch_iterator(as_dict=True)),
    #                   before_training=True, every_n_epochs=10)]

    h_grads = T.grad(mse, dummy_states['h'])
    c_grads = T.grad(mse, dummy_states['c'])
    grad_timesubsample = 1
    grad_norms = []
    for t in xrange(0, args.length, grad_timesubsample):
        grad_norms.append(
            h_grads[t, :, :].norm(2).copy(name="grad_norm:h_%04i" % t))
    for t in xrange(0, args.length, grad_timesubsample):
        grad_norms.append(
            c_grads[t, :, :].norm(2).copy(name="grad_norm:c_%04i" % t))
    extensions.append(
        TrainingDataMonitoring(grad_norms,
                               prefix="iteration",
                               after_batch=True))

    return graph, extensions
Exemplo n.º 20
0
    def build_extensions_list(self):
        epochs = self.config['epochs']
        save_freq = self.config['save_freq']

        original_save = self.config['original_save'] if self.config.has_key('original_save') \
                else False

        tracked_train = self.model.linksholder.filter_and_broadcast_request(
            'train', 'TrackingLink')
        tracked_valid = self.model.linksholder.filter_and_broadcast_request(
            'valid', 'TrackingLink')

        extensions = [
            Timing(),
            Experiment(self.filemanager.exp_name, self.filemanager.local_path,
                       self.filemanager.network_path,
                       self.filemanager.full_dump),
            FinishAfter(after_n_epochs=epochs),
            TrainingDataMonitoring(tracked_train,
                                   prefix="train",
                                   after_epoch=True),
            DataStreamMonitoring(tracked_valid,
                                 self.streams['valid'],
                                 prefix="valid",
                                 after_epoch=True),
            #Ipdb(after_batch=True),
            SaveExperiment(self.model.parameters,
                           original_save=original_save,
                           every_n_epochs=save_freq),
        ]

        extensions += [
            #LoadExperiment(
            #    parameters,
            #    full_load=False),
            ProgressBar(),
            Printing(),
        ]

        self.extensions = extensions
Exemplo n.º 21
0
    def train(self, data_file, output_data_file, n_epochs=0):

        training_data = dataset.T_H5PYDataset(data_file, which_sets=('train',))
        test_data = dataset.T_H5PYDataset(data_file, which_sets=('test',))

        session = Session(root_url='http://localhost:5006')

        if self.MainLoop is None:
            step_rules = [RMSProp(learning_rate=0.2, decay_rate=0.95), StepClipping(1)]

            algorithm = GradientDescent(cost=self.Cost,
                                        parameters=self.ComputationGraph.parameters,
                                        step_rule=CompositeRule(step_rules),
                                        on_unused_sources='ignore')

            train_stream = DataStream.default_stream(
                training_data, iteration_scheme=SequentialScheme(
                    training_data.num_examples, batch_size=100))

            test_stream = DataStream.default_stream(
                test_data, iteration_scheme=SequentialScheme(
                    test_data.num_examples, batch_size=100))

            self.MainLoop = MainLoop(
                model=Model(self.Cost),
                data_stream=train_stream,
                algorithm=algorithm,
                extensions=[
                    FinishAfter(after_n_epochs=n_epochs),
                    Printing(),
                    Checkpoint(output_data_file, every_n_epochs=50),
                    TrainingDataMonitoring([self.Cost], after_batch=True, prefix='train'),
                    DataStreamMonitoring([self.Cost], after_batch=True, data_stream=test_stream, prefix='test'),
                    Plot(output_data_file, channels=[['train_cost', 'test_cost']])
                ])

        self.MainLoop.run()
Exemplo n.º 22
0
cost.name = "sequence_log_likelihood"

cg = ComputationGraph(cost)
model = Model(cost)

#################
# Algorithm
#################

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(10.0),
                                 Adam(lr)]))

train_monitor = TrainingDataMonitoring(variables=[cost],
                                       after_epoch=True,
                                       prefix="train")

extensions = extensions = [
    train_monitor,
    TrackTheBest('train_sequence_log_likelihood'),
    Printing(after_epoch=True)
]

main_loop = MainLoop(model=model,
                     data_stream=data_stream,
                     algorithm=algorithm,
                     extensions=extensions)

main_loop.run()
Exemplo n.º 23
0
model = Model(cost)

#################
# Algorithm
#################

n_batches = 139 * 16

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(10.0),
                                 Adam(lr)]))

train_monitor = TrainingDataMonitoring(variables=[cost],
                                       every_n_batches=n_batches,
                                       prefix="train")

valid_monitor = DataStreamMonitoring(
    [cost],
    valid_stream,
    after_epoch=True,
    #before_first_epoch = False,
    prefix="valid")

extensions = extensions = [
    Timing(every_n_batches=n_batches), train_monitor, valid_monitor,
    TrackTheBest('valid_nll', after_epoch=True),
    Plot(save_dir + experiment_name + ".png", [['train_nll', 'valid_nll']],
         every_n_batches=4 * n_batches,
         email=True),
Exemplo n.º 24
0
    # Build model
    m = config.Model(config, ds.vocab_size)

    # Build the Blocks stuff for training
    model = Model(m.sgd_cost)
    # test_model = Model(m.generations)

    algorithm = GradientDescent(cost=m.sgd_cost,
                                step_rule=config.step_rule,
                                parameters=model.parameters,
                                on_unused_sources='ignore')

    extensions = [
        TrainingDataMonitoring([v for l in m.monitor_vars for v in l],
                               prefix='train',
                               after_epoch=True)
    ]
    if config.save_freq is not None and dump_path is not None:
        extensions += [
            SaveLoadParams(path=dump_path,
                           model=model,
                           before_training=False,
                           after_training=True,
                           after_epoch=True)
        ]
    if valid_stream is not None and config.valid_freq != -1:
        extensions += [
            DataStreamMonitoring([v for l in m.monitor_vars_valid for v in l],
                                 valid_stream,
                                 prefix='valid'),
Exemplo n.º 25
0
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter,
         enc_dim, dec_dim, z_dim, oldmodel):

    image_size, data_train, data_valid, data_test = datasets.get_data(dataset)

    train_stream = Flatten(
        DataStream(data_train,
                   iteration_scheme=SequentialScheme(data_train.num_examples,
                                                     batch_size)))
    valid_stream = Flatten(
        DataStream(data_valid,
                   iteration_scheme=SequentialScheme(data_valid.num_examples,
                                                     batch_size)))
    test_stream = Flatten(
        DataStream(data_test,
                   iteration_scheme=SequentialScheme(data_test.num_examples,
                                                     batch_size)))

    if name is None:
        name = dataset

    img_height, img_width = image_size
    x_dim = img_height * img_width

    rnninits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        #'weights_init': Orthogonal(),
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    if attention != "":
        read_N, write_N = attention.split(',')

        read_N = int(read_N)
        write_N = int(write_N)
        read_dim = 2 * read_N**2

        reader = AttentionReader(x_dim=x_dim,
                                 dec_dim=dec_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=read_N,
                                 **inits)
        writer = AttentionWriter(input_dim=dec_dim,
                                 output_dim=x_dim,
                                 width=img_width,
                                 height=img_height,
                                 N=write_N,
                                 **inits)
        attention_tag = "r%d-w%d" % (read_N, write_N)
    else:
        read_dim = 2 * x_dim

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        attention_tag = "full"

    #----------------------------------------------------------------------

    # Learning rate
    def lr_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    lr_str = lr_tag(learning_rate)

    subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name
    longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % (
        dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str)
    pickle_file = subdir + "/" + longname + ".pkl"

    print("\nRunning experiment %s" % longname)
    print("               dataset: %s" % dataset)
    print("          subdirectory: %s" % subdir)
    print("         learning rate: %g" % learning_rate)
    print("             attention: %s" % attention)
    print("          n_iterations: %d" % n_iter)
    print("     encoder dimension: %d" % enc_dim)
    print("           z dimension: %d" % z_dim)
    print("     decoder dimension: %d" % dec_dim)
    print("            batch size: %d" % batch_size)
    print("                epochs: %d" % epochs)
    print()

    #----------------------------------------------------------------------

    encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
    decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
    encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                      name="MLP_enc",
                      **inits)
    decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                      name="MLP_dec",
                      **inits)
    q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

    draw = DrawModel(n_iter,
                     reader=reader,
                     encoder_mlp=encoder_mlp,
                     encoder_rnn=encoder_rnn,
                     sampler=q_sampler,
                     decoder_mlp=decoder_mlp,
                     decoder_rnn=decoder_rnn,
                     writer=writer)
    draw.initialize()

    #------------------------------------------------------------------------
    x = tensor.matrix('features')

    #x_recons = 1. + x
    x_recons, kl_terms = draw.reconstruct(x)
    #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100)
    #x_recons = x_recons[-1,:,:]

    #samples = draw.sample(100)
    #x_recons = samples[-1, :, :]
    #x_recons = samples[-1, :, :]

    recons_term = BinaryCrossEntropy().apply(x, x_recons)
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.sum(axis=0).mean()
    cost.name = "nll_bound"

    #------------------------------------------------------------
    cg = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(cg.variables)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=CompositeRule([
            StepClipping(10.),
            Adam(learning_rate),
        ])
        #step_rule=RMSProp(learning_rate),
        #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95)
    )
    #algorithm.add_updates(scan_updates)

    #------------------------------------------------------------------------
    # Setup monitors
    monitors = [cost]
    for t in range(n_iter):
        kl_term_t = kl_terms[t, :].mean()
        kl_term_t.name = "kl_term_%d" % t

        #x_recons_t = T.nnet.sigmoid(c[t,:,:])
        #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t)
        #recons_term_t = recons_term_t.mean()
        #recons_term_t.name = "recons_term_%d" % t

        monitors += [kl_term_t]

    train_monitors = monitors[:]
    train_monitors += [aggregation.mean(algorithm.total_gradient_norm)]
    train_monitors += [aggregation.mean(algorithm.total_step_norm)]
    # Live plotting...
    plot_channels = [
        ["train_nll_bound", "test_nll_bound"],
        ["train_kl_term_%d" % t for t in range(n_iter)],
        #["train_recons_term_%d" % t for t in range(n_iter)],
        ["train_total_gradient_norm", "train_total_step_norm"]
    ]

    #------------------------------------------------------------

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=epochs),
            TrainingDataMonitoring(train_monitors,
                                   prefix="train",
                                   after_epoch=True),
            #            DataStreamMonitoring(
            #                monitors,
            #                valid_stream,
            ##                updates=scan_updates,
            #                prefix="valid"),
            DataStreamMonitoring(
                monitors,
                test_stream,
                #                updates=scan_updates,
                prefix="test"),
            Checkpoint(name,
                       before_training=False,
                       after_epoch=True,
                       save_separately=['log', 'model']),
            #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']),
            Plot(name, channels=plot_channels),
            ProgressBar(),
            Printing()
        ])

    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_param_values(oldmodel.get_param_values())
        del oldmodel

    main_loop.run()
Exemplo n.º 26
0
def main(save_to, num_epochs,
         regularization=0.0003, subset=None, num_batches=None,
         histogram=None, resume=False):
    batch_size = 500
    output_size = 10
    convnet = create_lenet_5()
    layers = convnet.layers

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = (CategoricalCrossEntropy().apply(y.flatten(), probs)
            .copy(name='cost'))
    components = (ComponentwiseCrossEntropy().apply(y.flatten(), probs)
            .copy(name='components'))
    error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                  .copy(name='error_rate'))
    confusion = (ConfusionMatrix().apply(y.flatten(), probs)
                  .copy(name='confusion'))
    confusion.tag.aggregation_scheme = Sum(confusion)

    cg = ComputationGraph([cost, error_rate, components])

    # Apply regularization to the cost
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    l2_norm = sum([(W ** 2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    cost = cost + regularization * l2_norm
    cost.name = 'cost_with_regularization'

    if subset:
        start = 30000 - subset // 2
        mnist_train = MNIST(("train",), subset=slice(start, start+subset))
    else:
        mnist_train = MNIST(("train",))
    mnist_train_stream = DataStream.default_stream(
        mnist_train, iteration_scheme=ShuffledScheme(
            mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test",))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(
            mnist_test.num_examples, batch_size))

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=AdaDelta(decay_rate=0.99))

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  DataStreamMonitoring(
                      [cost, error_rate, confusion],
                      mnist_test_stream,
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate, l2_norm,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  ProgressBar(),
                  Printing()]

    if histogram:
        attribution = AttributionExtension(
            components=components,
            parameters=cg.parameters,
            components_size=output_size,
            after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(save_to, True, True))

    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        mnist_train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
Exemplo n.º 27
0
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])
########### ALGORITHM of training#############
algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=Adam(learning_rate=0.0005))
extensions = [
    Timing(),
    FinishAfter(after_n_epochs=num_epochs),
    DataStreamMonitoring([cost, error_rate, error_rate2],
                         stream_valid,
                         prefix="valid"),
    TrainingDataMonitoring(
        [cost, error_rate,
         aggregation.mean(algorithm.total_gradient_norm)],
        prefix="train",
        after_epoch=True),
    Checkpoint("catsVsDogs256.pkl"),
    ProgressBar(),
    Printing()
]
#Adding a live plot with the bokeh server

extensions.append(
    Plot('CatsVsDogs_256',
         channels=[['train_error_rate', 'valid_error_rate'],
                   ['valid_cost', 'valid_error_rate2'],
                   ['train_total_gradient_norm']],
         after_epoch=True))
Exemplo n.º 28
0
def main(config,
         tr_stream,
         dev_stream,
         source_vocab,
         target_vocab,
         use_bokeh=False):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    initial_context = tensor.matrix('initial_context')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])

    # let user specify the target transition class name in config,
    # eval it and pass to decoder
    target_transition_name = config.get(
        'target_transition', 'GRUInitialStateWithInitialStateSumContext')
    target_transition = eval(target_transition_name)

    logger.info('Using target transition: {}'.format(target_transition_name))
    decoder = InitialContextDecoder(config['trg_vocab_size'],
                                    config['dec_embed'], config['dec_nhids'],
                                    config['enc_nhids'] * 2,
                                    config['context_dim'], target_transition)

    cost = decoder.cost(encoder.apply(source_sentence, source_sentence_mask),
                        source_sentence_mask, target_sentence,
                        target_sentence_mask, initial_context)

    cost.name = 'decoder_cost'

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # GRAPH TRANSFORMATIONS FOR BETTER TRAINING
    # TODO: validate performance with/without regularization
    if config.get('l2_regularization', False) is True:
        l2_reg_alpha = config['l2_regularization_alpha']
        logger.info(
            'Applying l2 regularization with alpha={}'.format(l2_reg_alpha))
        model_weights = VariableFilter(roles=[WEIGHT])(cg.variables)

        for W in model_weights:
            cost = cost + (l2_reg_alpha * (W**2).sum())

        # why do we need to name the cost variable? Where did the original name come from?
        cost.name = 'decoder_cost_cost'

    cg = ComputationGraph(cost)

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        # this is the probability of dropping out, so you probably want to make it <=0.5
        logger.info('Applying dropout')
        dropout_inputs = [
            x for x in cg.intermediary_variables
            if x.name == 'maxout_apply_output'
        ]
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(
        Selector(encoder).get_parameters(),
        Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}".format(
        len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # create the training directory, and copy this config there if directory doesn't exist
    if not os.path.isdir(config['saveto']):
        os.makedirs(config['saveto'])
        shutil.copy(config['config_file'], config['saveto'])

    # Set extensions

    # TODO: add checking for existing model and loading
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])
    ]

    # Create the theano variables that we need for the sampling graph
    sampling_input = tensor.lmatrix('input')
    sampling_context = tensor.matrix('context_input')

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config.get('bleu_script',
                                                 None) is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))

        generated = decoder.generate(sampling_input, sampling_representation,
                                     sampling_context)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(
                model=search_model,
                data_stream=tr_stream,
                hook_samples=config['hook_samples'],
                every_n_batches=config['sampling_freq'],
                src_vocab=source_vocab,
                trg_vocab=target_vocab,
                src_vocab_size=config['src_vocab_size'],
            ))

    # Add early stopping based on bleu
    if config.get('bleu_script', None) is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input,
                          sampling_context,
                          samples=samples,
                          config=config,
                          model=search_model,
                          data_stream=dev_stream,
                          src_vocab=source_vocab,
                          trg_vocab=target_vocab,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Add early stopping based on Meteor
    if config.get('meteor_directory', None) is not None:
        logger.info("Building meteor validator")
        extensions.append(
            MeteorValidator(sampling_input,
                            sampling_context,
                            samples=samples,
                            config=config,
                            model=search_model,
                            data_stream=dev_stream,
                            src_vocab=source_vocab,
                            trg_vocab=target_vocab,
                            normalize=config['normalized_bleu'],
                            every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot(config['model_save_directory'],
                 channels=[[
                     'decoder_cost', 'validation_set_bleu_score',
                     'validation_set_meteor_score'
                 ]],
                 every_n_batches=10))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    # if there is dropout or random noise, we need to use the output of the modified graph
    if config['dropout'] < 1.0 or config['weight_noise_ff'] > 0.0:
        algorithm = GradientDescent(cost=cg.outputs[0],
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))
    else:
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))

    # enrich the logged information
    extensions.append(Timing(every_n_batches=100))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()
Exemplo n.º 29
0
#################
# Algorithm
#################

n_batches = 200

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(10.0),
                                 Adam(lr)]))
algorithm.add_updates(extra_updates)

train_monitor = TrainingDataMonitoring(variables=monitoring_variables +
                                       data_monitoring,
                                       every_n_batches=n_batches,
                                       prefix="train")

valid_monitor = DataStreamMonitoring(monitoring_variables,
                                     valid_stream,
                                     every_n_batches=n_batches,
                                     prefix="valid")

extensions = [
    ProgressBar(),
    Timing(every_n_batches=n_batches), train_monitor, valid_monitor,
    TrackTheBest('valid_nll', every_n_batches=n_batches),
    Plot(save_dir + "progress/" + experiment_name + ".png",
         [['train_nll', 'valid_nll'], ['valid_learning_rate']],
         every_n_batches=n_batches,
         email=False),
Exemplo n.º 30
0
def main(save_to, model, train, test, num_epochs, input_size = (150,150), learning_rate=0.01,
batch_size=50, num_batches=None, flatten_stream=False):
    """ 
    save_to : where to save trained model
    model : model given in input must be already initialised (works with convnet and mlp)
    
    input_size : the shape of the reshaped image in input (before flattening is applied if flatten_stream is True)
    
    """
    if flatten_stream :
        x = tensor.matrix('image_features')
    else :
        x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    #Data augmentation
    #insert data augmentation here 
    
    #Generating stream
    train_stream = DataStream.default_stream(
        train,
        iteration_scheme=ShuffledScheme(train.num_examples, batch_size)
    )

    test_stream = DataStream.default_stream(
        test,
        iteration_scheme=ShuffledScheme(test.num_examples, batch_size)
    )
    
    
    #Reshaping procedure
    #Add a crop option in scikitresize so that the image is not deformed
    
    #Resize to desired square shape
    train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',))
    test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',))
    
    #Flattening the stream
    if flatten_stream is True:
        train_stream = Flatten(train_stream, which_sources=('image_features',))
        test_stream = Flatten(test_stream, which_sources=('image_features',))
    
    # Apply input to model
    probs = model.apply(x)
    
    #Defining cost and various indices to watch
    #print(probs)
    #cost = SquaredError().apply(y.flatten(),probs)

    cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')
    error_rate = MisclassificationRate().apply(y.flatten(), probs).copy(
            name='error_rate')

    #Building Computation Graph
    cg = ComputationGraph([cost, error_rate])

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=learning_rate))
    
    #Defining extensions
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  TrainingDataMonitoring([cost, error_rate,aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=5),
                  DataStreamMonitoring([cost, error_rate],test_stream,prefix="test", every_n_batches=25),
                  Checkpoint(save_to),
                  ProgressBar(),
                  Printing(every_n_batches=5)]

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.


    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()