Пример #1
0
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
Пример #2
0
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
Пример #3
0
def test_gradient_descent():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W**2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #4
0
def test_gradient_descent():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #5
0
 def _test(f):
     W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
     W_start_value = W.get_value()
     cost = tensor.sum(W**2)
     gradients = OrderedDict()
     gradients[W] = tensor.grad(cost, W)
     algorithm = GradientDescent(gradients=f(gradients))
     algorithm.step_rule.learning_rate.set_value(0.75)
     algorithm.initialize()
     algorithm.process_batch(dict())
     assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #6
0
 def _test(f):
     W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
     W_start_value = W.get_value()
     cost = tensor.sum(W ** 2)
     gradients = OrderedDict()
     gradients[W] = tensor.grad(cost, W)
     algorithm = GradientDescent(gradients=f(gradients))
     algorithm.step_rule.learning_rate.set_value(0.75)
     algorithm.initialize()
     algorithm.process_batch(dict())
     assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #7
0
def test_theano_profile_for_sgd_function():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(
        cost=cost, parameters=[W], theano_func_kwargs={'profile': True})
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
    assert isinstance(algorithm._function.profile, ProfileStats)
def test_theano_profile_for_sgd_function():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(
        cost=cost, parameters=[W], theano_func_kwargs={'profile': True})
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
    assert isinstance(algorithm._function.profile, ProfileStats)
Пример #9
0
def test_gradient_descent_spurious_sources():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    assert_raises(lambda:
                  algorithm.process_batch(dict(example_id='test')))

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                on_unused_sources='ignore')
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict(example_id='test'))
    assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #10
0
def test_gradient_descent_spurious_sources():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W**2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    assert_raises(lambda: algorithm.process_batch(dict(example_id='test')))

    algorithm = GradientDescent(cost=cost,
                                parameters=[W],
                                on_unused_sources='ignore')
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict(example_id='test'))
    assert_allclose(W.get_value(), -0.5 * W_start_value)
Пример #11
0
def construct_main_loop(name, task_name, batch_size, max_epochs,
                        patience_epochs, learning_rate, hyperparameters,
                        **kwargs):
    task = tasks.get_task(**hyperparameters)
    hyperparameters["n_channels"] = task.n_channels

    extensions = []

    print "constructing graphs..."
    graphs, outputs, updates = construct_graphs(task=task, **hyperparameters)

    print "setting up main loop..."

    from blocks.model import Model
    model = Model(outputs["train"]["cost"])

    from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, Adam
    algorithm = GradientDescent(cost=outputs["train"]["cost"],
                                parameters=graphs["train"].parameters,
                                step_rule=CompositeRule([
                                    StepClipping(1e1),
                                    Adam(learning_rate=learning_rate),
                                    StepClipping(1e2)
                                ]),
                                on_unused_sources="warn")
    algorithm.add_updates(updates["train"])

    extensions.extend(
        construct_monitors(algorithm=algorithm,
                           task=task,
                           model=model,
                           graphs=graphs,
                           outputs=outputs,
                           **hyperparameters))

    from blocks.extensions import FinishAfter, Printing, ProgressBar, Timing
    from blocks.extensions.stopping import FinishIfNoImprovementAfter
    from blocks.extensions.training import TrackTheBest
    from blocks.extensions.saveload import Checkpoint
    from dump import DumpBest, LightCheckpoint, PrintingTo
    extensions.extend([
        TrackTheBest("valid_error_rate", "best_valid_error_rate"),
        FinishIfNoImprovementAfter("best_valid_error_rate",
                                   epochs=patience_epochs),
        FinishAfter(after_n_epochs=max_epochs),
        DumpBest("best_valid_error_rate", name + "_best.zip"),
        Checkpoint(hyperparameters["checkpoint_save_path"],
                   on_interrupt=False,
                   every_n_epochs=5,
                   before_training=True,
                   use_cpickle=True),
        ProgressBar(),
        Timing(),
        Printing(),
        PrintingTo(name + "_log")
    ])

    from blocks.main_loop import MainLoop
    main_loop = MainLoop(data_stream=task.get_stream("train"),
                         algorithm=algorithm,
                         extensions=extensions,
                         model=model)

    # note blocks will crash and burn because it cannot deal with an
    # already-initialized Algorithm, so this should be enabled only for
    # debugging
    if False:
        with open("graph", "w") as graphfile:
            algorithm.initialize()
            theano.printing.debugprint(algorithm._function, file=graphfile)

    from tabulate import tabulate
    print "parameter sizes:"
    print tabulate(
        (key, "x".join(map(str,
                           value.get_value().shape)), value.get_value().size)
        for key, value in main_loop.model.get_parameter_dict().items())

    return main_loop
Пример #12
0
def construct_main_loop(name, task_name, batch_size, max_epochs,
                        patience_epochs, learning_rate,
                        hyperparameters, **kwargs):
    task = tasks.get_task(**hyperparameters)
    hyperparameters["n_channels"] = task.n_channels

    extensions = []

    print "constructing graphs..."
    graphs, outputs, updates = construct_graphs(task=task, **hyperparameters)

    print "setting up main loop..."

    from blocks.model import Model
    model = Model(outputs["train"]["cost"])

    from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, Adam
    algorithm = GradientDescent(
        cost=outputs["train"]["cost"],
        parameters=graphs["train"].parameters,
        step_rule=CompositeRule([Adam(learning_rate=learning_rate),
                                 StepClipping(1e3)]),
        on_unused_sources="warn")
    algorithm.add_updates(updates["train"])

    extensions.extend(construct_monitors(
        algorithm=algorithm, task=task, model=model, graphs=graphs,
        outputs=outputs, updates=updates, **hyperparameters))

    from blocks.extensions import FinishAfter, Printing, ProgressBar, Timing
    from blocks.extensions.stopping import FinishIfNoImprovementAfter
    from blocks.extensions.training import TrackTheBest
    from blocks.extensions.saveload import Checkpoint
    from dump import DumpBest, LightCheckpoint, PrintingTo
    extensions.extend([
        TrackTheBest("valid_error_rate", "best_valid_error_rate"),
        FinishIfNoImprovementAfter("best_valid_error_rate", epochs=patience_epochs),
        FinishAfter(after_n_epochs=max_epochs),
        DumpBest("best_valid_error_rate", name+"_best.zip"),
        Checkpoint(hyperparameters["checkpoint_save_path"],
                   on_interrupt=False, every_n_epochs=5,
                   before_training=True, use_cpickle=True),
        ProgressBar(), Timing(), Printing(), PrintingTo(name+"_log")])

    from blocks.main_loop import MainLoop
    main_loop = MainLoop(data_stream=task.get_stream("train"),
                         algorithm=algorithm,
                         extensions=extensions,
                         model=model)

    # note blocks will crash and burn because it cannot deal with an
    # already-initialized Algorithm, so this should be enabled only for
    # debugging
    if False:
        with open("graph", "w") as graphfile:
            algorithm.initialize()
            theano.printing.debugprint(algorithm._function, file=graphfile)

    from tabulate import tabulate
    print "parameter sizes:"
    print tabulate((key, "x".join(map(str, value.get_value().shape)), value.get_value().size)
                   for key, value in main_loop.model.get_parameter_dict().items())

    return main_loop