Пример #1
0
def test_grad_scale():
    x = scalar()

    z = grad(grad_scale(x, 2) ** 2, x)
    z2 = grad(x ** 2, x)

    f = aesara.function([x], outputs=[z, z2])

    if config.mode != "FAST_COMPILE":
        topo = f.maker.fgraph.toposort()
        assert not any(isinstance(node.op, GradScale) for node in topo)
    out = f(2.0)

    assert np.allclose(out, (8, 4))
Пример #2
0
def test_grad_clip():
    x = scalar()

    z = grad(grad_clip(x, -1, 1)**2, x)
    z2 = grad(x**2, x)

    f = aesara.function([x], outputs=[z, z2])

    if config.mode != "FAST_COMPILE":
        topo = f.maker.fgraph.toposort()
        assert not any([isinstance(node.op, GradClip) for node in topo])
    out = f(2.0)
    assert np.allclose(out, (1, 4))
    assert not np.allclose(out[0], out[1])
Пример #3
0
def test_known_grads():
    # Tests that the grad method with no known_grads
    # matches what happens if you put its own known_grads
    # in for each variable

    full_range = aet.arange(10)
    x = scalar("x")
    t = iscalar("t")
    ft = full_range[t]
    ft.name = "ft"
    coeffs = vector("c")
    ct = coeffs[t]
    ct.name = "ct"
    p = x**ft
    p.name = "p"
    y = ct * p
    y.name = "y"
    cost = sqr(y)
    cost.name = "cost"

    layers = [[cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x]]

    inputs = [coeffs, t, x]

    rng = np.random.default_rng([2012, 11, 15])
    values = [
        rng.standard_normal((10)),
        rng.integers(10),
        rng.standard_normal()
    ]
    values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]

    true_grads = grad(cost, inputs, disconnected_inputs="ignore")
    true_grads = aesara.function(inputs, true_grads)
    true_grads = true_grads(*values)

    for layer in layers:
        first = grad(cost, layer, disconnected_inputs="ignore")
        known = OrderedDict(zip(layer, first))
        full = grad(cost=None,
                    known_grads=known,
                    wrt=inputs,
                    disconnected_inputs="ignore")
        full = aesara.function(inputs, full)
        full = full(*values)
        assert len(true_grads) == len(full)
        for a, b, var in zip(true_grads, full, inputs):
            assert np.allclose(a, b)
Пример #4
0
    def test_xent_thing_int32(self):
        x = matrix("x")
        y = lvector("y")
        yi = aet.cast(y, "int32")
        expressions = [
            aet_sum(-log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x))[aet.arange(yi.shape[0]), yi]),
            aet_sum(-log(softmax(x))[aet.arange(yi.shape[0]), yi]),
        ]

        for expr in expressions:
            fgraph = FunctionGraph([x, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 5
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            # Also verify the gradient wrt x
            fgraph = FunctionGraph([x, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 3
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops
Пример #5
0
    def test_connection_pattern_override(self, cls_ofg):
        x, y = vectors("xy")

        def f1(x, y):
            del x
            # but we know how to backpropagate for x for some reasons
            # and we don't care about the gradient wrt y.
            return y + aet_round(y)

        def f1_back(inputs, output_gradients):
            return [output_gradients[0], disconnected_type()]

        op = cls_ofg(
            inputs=[x, y],
            outputs=[f1(x, y)],
            grad_overrides=f1_back,
            connection_pattern=[[True], [False]],  # This is new
            on_unused_input="ignore",
        )  # This is new

        c = op(x, y)

        g1 = grad(c.sum(), x)

        out = g1.eval({
            x: np.ones((5, ), dtype=np.float32),
            y: np.ones((5, ), dtype=np.float32)
        })
        assert np.allclose(out, [1.0] * 5)
Пример #6
0
 def test_zero_gradient_shape(self):
     # Ensure that a zero gradient has the proper shape.
     x = dmatrix()
     f = aesara.function([x], grad(dscalar(), x, disconnected_inputs="ignore"))
     a = np.ones((3, 7))
     assert (f(a) == 0).all()  # Zero gradient
     assert a.shape == f(a).shape  # With proper shape
Пример #7
0
 def test_grad_name(self):
     A = matrix("A")
     x = vector("x")
     f = dot(x, dot(A, x))
     f.name = "f"
     g = grad(f, x)
     assert g.name == "(df/dx)"
Пример #8
0
    def test_scipy_paper_example2(self):
        """ This just sees if things compile well and if they run """
        rng = numpy.random

        x = matrix()
        y = vector()
        w = shared(rng.randn(100))
        b = shared(np.zeros(()))

        # Construct Aesara expression graph
        p_1 = 1 / (1 + exp(-dot(x, w) - b))
        xent = -y * log(p_1) - (1 - y) * log(1 - p_1)
        prediction = p_1 > 0.5
        cost = xent.mean() + 0.01 * (w ** 2).sum()
        gw, gb = grad(cost, [w, b])

        # Compile expressions to functions
        train = function(
            inputs=[x, y],
            outputs=[prediction, xent],
            updates=[(w, w - 0.1 * gw), (b, b - 0.1 * gb)],
        )
        function(inputs=[x], outputs=prediction)

        N = 4
        feats = 100
        D = (rng.randn(N, feats), rng.randint(size=4, low=0, high=2))
        training_steps = 10
        for i in range(training_steps):
            pred, err = train(D[0], D[1])
Пример #9
0
 def test_Nparam(self):
     # grad: Test passing multiple variable params
     o = TestGrad.Obj1()
     a1 = o.make_node()
     g0, g1 = grad(a1.outputs[0], a1.inputs)
     g0.name = None
     assert o.gval0 is g0
     assert o.gval1 is g1
Пример #10
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
        n_in = 4098
        n_out = 4099

    y = lvector("y")

    b = fvector("b")

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = fmatrix("dot_result")

    xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32)
    yy = np.ones((batch_size, ), dtype="int32")
    b_values = np.zeros((n_out, ), dtype="float32")
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32")

    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b)
    y_pred = argmax(p_y_given_x, axis=-1)
    loss = -mean(log(p_y_given_x)[aet.arange(y.shape[0]), y])
    dW = grad(loss, dot_result)
    classify = aesara.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = aesara.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)

    assert any([
        isinstance(node.op,
                   aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
Пример #11
0
    def test_disconnected_paths(self):
        # Test that taking gradient going through a disconnected
        # path rasises an exception
        a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)

        x = matrix("x")

        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        with pytest.raises(DisconnectedInputError):
            grad(disconnected_grad(x).sum(), x)

        # This MUST NOT raise a DisconnectedInputError error.
        y = grad((x + disconnected_grad(x)).sum(), x)

        a = matrix("a")
        b = matrix("b")
        y = a + disconnected_grad(b)
        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        with pytest.raises(DisconnectedInputError):
            grad(y.sum(), b)

        # This MUST NOT raise a DisconnectedInputError error.
        grad(y.sum(), a)
Пример #12
0
 def test_compute_test_value(self):
     x = scalar("x")
     x.tag.test_value = np.array(1.0, dtype=config.floatX)
     op = OpFromGraph([x], [x**3])
     y = scalar("y")
     y.tag.test_value = np.array(1.0, dtype=config.floatX)
     f = op(y)
     grad_f = grad(f, y)
     assert grad_f.tag.test_value is not None
Пример #13
0
    def test_shared_grad(self, cls_ofg):
        x, y, z = matrices("xyz")
        s = shared(np.random.rand(2, 2).astype(config.floatX))
        e = x + y * z + s
        op = cls_ofg([x, y, z], [e])
        f = op(x, y, z)
        f = f - grad(tt_sum(f), y)
        fn = function([x, y, z], f)
        xv = np.ones((2, 2), dtype=config.floatX)
        yv = np.ones((2, 2), dtype=config.floatX) * 3
        zv = np.ones((2, 2), dtype=config.floatX) * 5
        assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv))

        # grad again the shared variable
        f = op(x, y, z)
        f = f - grad(tt_sum(f), s)
        fn = function([x, y, z], f)
        assert np.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
Пример #14
0
 def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
     # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
     # 2013. The bug consists when through a dot operation there is only
     # one differentiable path (i.e. there is no gradient wrt to one of
     # the inputs).
     x = aet.arange(20.0).reshape([1, 20])
     v = aesara.shared(np.ones([20]))
     d = dot(x, v).sum()
     Rop(grad(d, v), v, v)
Пример #15
0
    def test_downsample(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # ws, shp
        examples = (
            ((2, ), (16, )),
            (
                (2, ),
                (
                    4,
                    16,
                ),
            ),
            (
                (2, ),
                (
                    4,
                    2,
                    16,
                ),
            ),
            ((1, 1), (4, 2, 16, 16)),
            ((2, 2), (4, 2, 16, 16)),
            ((3, 3), (4, 2, 16, 16)),
            ((3, 2), (4, 2, 16, 16)),
            ((3, 2, 2), (3, 2, 16, 16, 16)),
            ((2, 3, 2), (3, 2, 16, 16, 16)),
            ((2, 2, 3), (3, 2, 16, 16, 16)),
            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
        )

        for example, ignore_border in itertools.product(
                examples, [True, False]):
            (ws, shp) = example
            vx = rng.rand(*shp)
            vex = rng.rand(*shp)

            x = aesara.shared(vx)
            ex = aesara.shared(vex)

            maxpool_op = Pool(ignore_border, ndim=len(ws))
            a_pooled = maxpool_op(x, ws).flatten()
            yv = Rop(a_pooled, x, ex)
            mode = None
            if aesara.config.mode == "FAST_COMPILE":
                mode = "FAST_RUN"
            rop_f = function([], yv, on_unused_input="ignore", mode=mode)
            sy, _ = aesara.scan(
                lambda i, y, x, v: (grad(y[i], x) * v).sum(),
                sequences=aet.arange(a_pooled.shape[0]),
                non_sequences=[a_pooled, x, ex],
                mode=mode,
            )
            scan_f = function([], sy, on_unused_input="ignore", mode=mode)
            v1 = rop_f()
            v2 = scan_f()
            assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
Пример #16
0
    def test_1D_grad(self):
        c = vector()
        p_y = exp(c) / exp(c).sum()

        # test that function contains softmax and no div.
        g = aesara.function([c], grad(p_y.sum(), c), mode=self.mode)
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
        assert len(g_ops) == 2
        assert isinstance(g_ops[0], Softmax)
        assert isinstance(g_ops[1], SoftmaxGrad)
Пример #17
0
    def __init__(
        self,
        input=None,
        target=None,
        n_input=1,
        n_hidden=1,
        n_output=1,
        lr=1e-3,
        **kw,
    ):
        super().__init__(**kw)

        if input is None:
            input = dvector("input")
        if target is None:
            target = dvector("target")

        self.input = input
        self.target = target
        self.lr = shared(lr, "learning_rate")
        self.w1 = shared(np.zeros((n_hidden, n_input)), "w1")
        self.w2 = shared(np.zeros((n_output, n_hidden)), "w2")
        # print self.lr.type

        self.hidden = sigmoid(dot(self.w1, self.input))
        self.output = dot(self.w2, self.hidden)
        self.cost = aet_sum((self.output - self.target)**2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * grad(self.cost, self.w2),
        }

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates,
        )

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Пример #18
0
    def test_shared_grad(self, cls_ofg):
        x, y, z = matrices("xyz")
        s = shared(np.random.random((2, 2)).astype(config.floatX))
        e = x + y * z + s
        op = cls_ofg([x, y, z], [e])
        f = op(x, y, z)
        f = f - grad(at_sum(f), y)
        fn = function([x, y, z], f)
        xv = np.ones((2, 2), dtype=config.floatX)
        yv = np.ones((2, 2), dtype=config.floatX) * 3
        zv = np.ones((2, 2), dtype=config.floatX) * 5
        np.testing.assert_array_almost_equal(11.0 + s.get_value(),
                                             fn(xv, yv, zv), 4)

        # grad again the shared variable
        f = op(x, y, z)
        f = f - grad(at_sum(f), s)
        fn = function([x, y, z], f)
        np.testing.assert_array_almost_equal(15.0 + s.get_value(),
                                             fn(xv, yv, zv), 4)
Пример #19
0
    def test_transpose_grad(self):
        # this should be a transposed softmax
        c = matrix()
        p_y = exp(c) / exp(c).sum(axis=0)

        # test that function contains softmax and no div.
        g = aesara.function([c], grad(p_y.sum(), c), mode=self.mode)
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
        assert len(g_ops) == 2
        assert isinstance(g_ops[0], Softmax)
        assert isinstance(g_ops[1], SoftmaxGrad)
Пример #20
0
        def get_outputs(x, w):
            features, _ = scan(
                outer_scan_step,
                sequences=[x],
                non_sequences=[w],
                strict=True,
                name="the_outer_scan",
            )

            return_val = grad(features.sum(), w)
            return return_val
Пример #21
0
    def test_grad_constant(self):
        # Test that the gradient handles Constants and consider_constant variables
        # consistently

        x = scalar()
        y = scalar()
        z_x = x + y
        z_one = one + y
        g_x = grad(z_x, x, consider_constant=[x])
        g_one = grad(z_one, one)

        f = aesara.function([x, y], [g_x, g_one])

        g_x, g_one = f(1, 0.5)

        if not np.allclose(g_x, g_one):
            raise AssertionError(
                "Gradient using consider constant is " + str(g_x) +
                " but gradient with respect to the same Constant is " +
                str(g_one))
Пример #22
0
 def test_grad(self, cls_ofg):
     x, y, z = matrices("xyz")
     e = x + y * z
     op = cls_ofg([x, y, z], [e])
     f = op(x, y, z)
     f = f - grad(tt_sum(f), y)
     fn = function([x, y, z], f)
     xv = np.ones((2, 2), dtype=config.floatX)
     yv = np.ones((2, 2), dtype=config.floatX) * 3
     zv = np.ones((2, 2), dtype=config.floatX) * 5
     assert np.all(11.0 == fn(xv, yv, zv))
Пример #23
0
    def test_undefined_grad_grad(self):
        # tests that undefined grads are caught in the grad method

        class DummyOp(Op):
            __props__ = ()

            def make_node(self, x):
                return Apply(self, [x], [x.type()])

            def grad(self, inputs, output_grads):
                return [grad_undefined(self, 0, inputs[0])]

            def perform(self, *args, **kwargs):
                raise NotImplementedError()

        a = scalar()
        b = DummyOp()(a)

        with pytest.raises(TypeError):
            grad(b, a)
Пример #24
0
 def test_NNone_rval(self):
     # grad: Test returning some zero value from grad
     o = TestGrad.Obj1()
     a1 = o.make_node()
     g0, g1, g2 = grad(a1.outputs[0],
                       a1.inputs + [scalar("z")],
                       disconnected_inputs="ignore")
     assert o.gval0 is g0
     assert o.gval1 is g1
     assert g2.owner.op == aet.fill
     assert g2.owner.inputs[1].data == 0
Пример #25
0
 def setup_method(self):
     self.k = iscalar("k")
     self.A = vector("A")
     result, _ = scan(
         fn=lambda prior_result, A: prior_result * A,
         outputs_info=ones_like(self.A),
         non_sequences=self.A,
         n_steps=self.k,
     )
     result_check, _ = scan_checkpoints(
         fn=lambda prior_result, A: prior_result * A,
         outputs_info=ones_like(self.A),
         non_sequences=self.A,
         n_steps=self.k,
         save_every_N=100,
     )
     self.result = result[-1]
     self.result_check = result_check[-1]
     self.grad_A = grad(self.result.sum(), self.A)
     self.grad_A_check = grad(self.result_check.sum(), self.A)
Пример #26
0
def test_disconnected_cost_grad():
    # Tests that if we say the cost is disconnected via the
    # known_grads mechanism, it is treated as such by the rest of the
    # system.
    # This is so that Ops that are built around minigraphs like OpFromGraph
    # and scan can implement Op.grad by passing ograds to known_grads

    x = iscalar()
    y = iscalar()
    cost = x + y
    assert cost.dtype in discrete_dtypes
    try:
        grad(
            cost,
            [x, y],
            known_grads={cost: DisconnectedType()()},
            disconnected_inputs="raise",
        )
    except DisconnectedInputError:
        return
    raise AssertionError("A disconnected gradient has been ignored.")
Пример #27
0
 def setup_gpu_op(self,
                  activations,
                  labels,
                  input_length,
                  compute_grad=True):
     gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
     outputs = [gpu_ctc_cost]
     if compute_grad:
         # Symbolic gradient of CTC cost
         gpu_ctc_grad = grad(mean(gpu_ctc_cost), activations)
         outputs += [gpu_ctc_grad]
     return aesara.function([], outputs, mode=mode_with_gpu)
Пример #28
0
def test_gradient_scan():
    # Test for a crash when using MRG inside scan and taking the gradient
    # See https://groups.google.com/d/msg/theano-dev/UbcYyU5m-M8/UO9UgXqnQP0J
    aesara_rng = MRG_RandomStream(10)
    w = shared(np.ones(1, dtype="float32"))

    def one_step(x):
        return x + aesara_rng.uniform((1, ), dtype="float32") * w

    x = vector(dtype="float32")
    values, updates = scan(one_step, outputs_info=x, n_steps=10)
    gw = grad(aet_sum(values[-1]), w)
    f = function([x], gw)
    f(np.arange(1, dtype="float32"))
Пример #29
0
def test_observed():
    rv_var = normal(0, 1, size=3)
    obs_var = observed(rv_var, np.array([0.2, 0.1, -2.4], dtype=config.floatX))

    assert obs_var.owner.inputs[0] is rv_var

    with raises(TypeError):
        observed(rv_var, np.array([1, 2], dtype=int))

    with raises(TypeError):
        observed(rv_var, np.array([[1.0, 2.0]], dtype=rv_var.dtype))

    obs_rv = observed(None, np.array([0.2, 0.1, -2.4], dtype=config.floatX))

    assert isinstance(obs_rv.owner.inputs[0].type, NoneTypeT)

    rv_val = vector()
    rv_val.tag.test_value = np.array([0.2, 0.1, -2.4], dtype=config.floatX)

    obs_var = observed(rv_var, rv_val)

    with raises(NullTypeGradError):
        grad(obs_var.sum(), [rv_val])
Пример #30
0
def test_dxdx():
    # Tests that the gradient of a scalar with respect to itself is 1
    # I use an integer in this case because people keep changing this
    # gradient to be 0 on integers but according to our interpretation
    # of the gradient as defined in the Op contract, it should be 1.
    # If you feel the need to change this unit test you are probably
    # modifying the Op contract and should definitely get the approval
    # of multiple people on aesara-dev.

    x = iscalar()
    g = grad(x, x)

    g = g.eval({x: 12})

    assert np.allclose(g, 1.0)