Exemplo n.º 1
0
def test_subgraph_grad():
    # Tests that the grad method with no known_grads
    # matches what happens if you use successive subgraph_grads

    x = fvector("x")
    t = fvector("t")
    w1 = aesara.shared(np.random.randn(3, 4))
    w2 = aesara.shared(np.random.randn(4, 2))
    a1 = tanh(dot(x, w1))
    a2 = tanh(dot(a1, w2))
    cost2 = sqr(a2 - t).sum()
    cost2 += sqr(w2.sum())
    cost1 = sqr(w1.sum())

    params = [[w2], [w1]]
    costs = [cost2, cost1]
    grad_ends = [[a1], [x]]

    inputs = [t, x]
    rng = np.random.RandomState([2012, 11, 15])
    values = [rng.randn(2), rng.randn(3)]
    values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]

    wrt = [w2, w1]
    cost = cost2 + cost1
    true_grads = grad(cost, wrt)
    true_grads = aesara.function(inputs, true_grads)
    true_grads = true_grads(*values)
    next_grad = None
    param_grads = []
    for i in range(2):
        param_grad, next_grad = subgraph_grad(wrt=params[i],
                                              end=grad_ends[i],
                                              start=next_grad,
                                              cost=costs[i])
        next_grad = OrderedDict(zip(grad_ends[i], next_grad))
        param_grads.extend(param_grad)

    pgrads = aesara.function(inputs, param_grads)
    pgrads = pgrads(*values)

    for true_grad, pgrad in zip(true_grads, pgrads):
        assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
Exemplo n.º 2
0
def test_known_grads():
    # Tests that the grad method with no known_grads
    # matches what happens if you put its own known_grads
    # in for each variable

    full_range = aet.arange(10)
    x = scalar("x")
    t = iscalar("t")
    ft = full_range[t]
    ft.name = "ft"
    coeffs = vector("c")
    ct = coeffs[t]
    ct.name = "ct"
    p = x**ft
    p.name = "p"
    y = ct * p
    y.name = "y"
    cost = sqr(y)
    cost.name = "cost"

    layers = [[cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x]]

    inputs = [coeffs, t, x]

    rng = np.random.default_rng([2012, 11, 15])
    values = [
        rng.standard_normal((10)),
        rng.integers(10),
        rng.standard_normal()
    ]
    values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]

    true_grads = grad(cost, inputs, disconnected_inputs="ignore")
    true_grads = aesara.function(inputs, true_grads)
    true_grads = true_grads(*values)

    for layer in layers:
        first = grad(cost, layer, disconnected_inputs="ignore")
        known = OrderedDict(zip(layer, first))
        full = grad(cost=None,
                    known_grads=known,
                    wrt=inputs,
                    disconnected_inputs="ignore")
        full = aesara.function(inputs, full)
        full = full(*values)
        assert len(true_grads) == len(full)
        for a, b, var in zip(true_grads, full, inputs):
            assert np.allclose(a, b)