示例#1
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2))
    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=sgd,
                         extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
示例#2
0
def test_floatx():
    x = [numpy.array(d, dtype="float64") for d in [[1, 2], [3, 4]]]
    y = [numpy.array(d, dtype="int64") for d in [1, 2, 3]]
    dataset = ContainerDataset(OrderedDict([("x", x), ("y", y)]))
    data = next(ForceFloatX(dataset.get_default_stream()).get_epoch_iterator())
    assert str(data[0].dtype) == floatX
    assert str(data[1].dtype) == "int64"
示例#3
0
def get_data_stream(iterable):
    dataset = ContainerDataset({'numbers': iterable})
    data_stream = DataStreamMapping(dataset.get_default_stream(),
                                    _data_sqrt,
                                    add_sources=('roots', ))
    data_stream = DataStreamMapping(data_stream, _array_tuple)
    return BatchDataStream(data_stream, ConstantScheme(20))
示例#4
0
def get_data_stream(iterable):
    dataset = ContainerDataset({'numbers': iterable})
    data_stream = DataStreamMapping(dataset.get_default_stream(),
                                    lambda data: (math.sqrt(data[0]), ),
                                    add_sources=('roots', ))
    data_stream = DataStreamMapping(
        data_stream, lambda data: tuple(
            (numpy.asarray(d, dtype=floatX) for d in data)))
    return BatchDataStream(data_stream, ConstantScheme(20))
示例#5
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row.true_cost = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   params=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_every_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_every_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row.train1_V, 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i].true_cost,
                        main_loop.log[i + 1].train1_cost)
    assert_allclose(
        main_loop.log[n_batches].train2_cost,
        sum([main_loop.log[i].true_cost
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches].train2_W_sum,
        sum([main_loop.log[i].train1_W_sum
             for i in range(1, n_batches + 1)]) / n_batches)
示例#6
0
def setup_mainloop(extension):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = ContainerDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, params=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_default_stream(),
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop