Exemplo n.º 1
def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5, verbose=1, progressbar=1):
    Check gradient along random directions (a faster alternative to axis-aligned directions).

    Tim Vieira (2017) "How to test gradient implementations"

    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.flatten()

    w = np.asarray(w.flat)
    g = np.asarray(g.flat)

    dim = len(w)

    for k in (iterview(keys) if progressbar else keys):
        d = spherical(dim)
        G[k] = g.dot(d)
        w[:] = was + eps*d
        b = func()
        w[:] = was - eps*d
        a = func()
        w[:] = was
        H[k] = (b-a) / (2*eps)

    return compare(H, G, verbose=verbose)
Exemplo n.º 3
def test_gradients(M):

    J = lambda: M.J(π)

    π = random_dist(M.S, M.A)
    r = M.r

    # The policy gradient theorem
        1 / (1 - M.γ) * M.d(π)[:, None] *
        M.Q(π),  # Note: not Q is not interchangeable with Advantage!
    )  #.show(title='policy gradient v1.')

    print('[policy gradient theorem]', ok)

    # Jacobians of the implicit d(p) and v(p) functions.
    z = spherical(M.S)
    _d, d_grad = M.d(π, jac=True)
    fdcheck(lambda: z @ M.d(π), π, d_grad(z))  #.show(title='implicit d')
    _v, v_grad = M.V(π, jac=True)
    fdcheck(lambda: z @ M.V(π), π, v_grad(z))  #.show(title='implicit v')

    # check that the implicit functions are consistent with the other methods for computing them.
    assert np.allclose(_d, M.d(π))
    assert np.allclose(_v, M.V(π))

    # The policy gradient theorem
    #    fdcheck(J, p,
    #            1/(1-M.γ) * (
    #                np.einsum('s,sa->sa', M.d(p), M.Advantage(p))
    #                + (M.d(p) * M.V(p))[:,None]
    #            )
    #    ) .show(title='policy gradient v1.')

    # Extract the full Jacobian, flatten SA dim of policy
    Jdp = np.zeros((M.S, M.S * M.A))
    for s in range(M.S):
        Jdp[s, :] = d_grad(onehot(s, M.S)).flat

    # The stuff below is the chaining from J to derivatives thru π
    fdcheck(J, π, 1 / (1 - M.γ) * (np.einsum('sa,sa->s', r, π) @ Jdp +
                                   np.einsum('s,sa->sa', M.d(π), r).flatten())
            )  #.show(title='policy gradient v2.')

    # Extract the full Jacobian, flatten SA dim of policy
    Jvp = np.zeros((M.S, M.S * M.A))
    for s in range(M.S):
        Jvp[s, :] = v_grad(onehot(s, M.S)).flat
    fdcheck(J, π, M.s0 @ Jvp)  #.show(title='policy gradient v2a.')
    fdcheck(J, π, v_grad(M.s0))  #.show(title='policy gradient v2b.')
Exemplo n.º 4
Arquivo: util.py Projeto: timvieira/rl
def test():
    from arsenal.maths import spherical

    A = np.array([[0.06052216, 1.93689366], [-0.84213754, 1.19210556],
                  [-0.16304193, 1.48302313]])
    B = np.array([-0.6249662, -1.85593953, -0.82883232])

    A = [[0.22032854, 0.65650877], [0.78670232, 0.24127292],
         [0.08878384, 0.68543539]]
    B = [1.47872332, 0.24802336, 1.60300663]

    #    A *= -1
    #    B *= -1

    A = np.array(A)
    B = np.array(B)

    for _ in range(10):
        [a, b, c] = spherical(3)

        print([a, b, c])

        #    for i in range(len(B)):
        #        [a,b],c = A[i], B[i]
        #        print('>>>>', a, b, c)

        H = Halfspaces(
            [[a, b]],

        r = 10
        H.viz([-r, r], [-r, r])

        pts = np.random.uniform(-r, r, size=(10, 2))

        for x, y in pts:
            pl.scatter([x], [y], c='g' if H([x, y]) else 'r')

        pl.title(f'{a} x + {b} y <= {c}')
Exemplo n.º 6
def main():
    from arsenal.integerizer import FeatureHashing
    from arsenal.maths import spherical

    bits = 8
    D = 2**bits

    alphabet = FeatureHashing(lambda x: abs(hash(x)), bits)
    weights = spherical(D)
    direction = spherical(D)

    #sentence = 'Papa ate the caviar with the spoon in the park .'.split()
    sentence = 'Papa ate the caviar with the spoon .'.split()

    grammar = load_grammar("""
    S       X .
    X       X X
    X       Papa
    X       ate
    X       the
    X       caviar
    X       with
    X       spoon
    X       in
    X       park

    if 1:
        # This code branch enumerates all (exponentially many) valid
        # derivations.
        root = semiring_enumeration(sentence, grammar)
        for d in root:

#            print(post_process(lambda e: e, d))
        assert len(list(root)) == len(set(root))

    def binary_features(_, X, Y, Z, i, j, k):
        return alphabet(['%s -> %s %s [%s,%s,%s]' % (X, Y, Z, i, j, k)])

    def unary_features(_, X, Y, i, k):
        return alphabet(['%s -> %s [%s,%s]' % (X, Y, i, k)])

    root = semiring_mert(sentence, grammar, weights, direction,
                         binary_features, unary_features)

    mert_derivations = []  #set()
    for x in root:

    assert len(mert_derivations) == len(set(mert_derivations))

    # Compare the set of derivations found by the MERT semiring to 'brute force'
    # linesearch. Note: Linesearch might only find a subset of derivations found
    # by MERT if the grid isn't fine enough.
    brute_derivations = set()
    for step in np.linspace(-20, 20, 1000):
        root = semiring_linesearch(sentence, grammar, weights, step, direction,
                                   binary_features, unary_features)
        d = derivation(root)

    # NOTE: need to take upper hull of mert (so it's currently an over estimate)
    print('mert:', len(mert_derivations))
    print('brute:', len(brute_derivations))
    assert brute_derivations.issubset(mert_derivations)
Exemplo n.º 7
def main():
    from arsenal.alphabet import Alphabet
    from arsenal.maths import spherical

    D = 100

    alphabet = Alphabet(random_int=D)
    weights = spherical(D)
    direction = spherical(D)

    #sentence = 'Papa ate the caviar with the spoon in the park .'.split()
    sentence = 'Papa ate the caviar with the spoon .'.split()

    if 0:
        grammar = """
        S       S .
        S       NP VP
        NP      D N
        NP      NP PP
        VP      V NP
        VP      VP PP
        PP      P NP
        NP      Papa
        N       caviar
        N       spoon
        N       park
        V       ate
        P       with
        P       in
        D       the

        grammar = """
        S       X .
        X       X X
        X       Papa
        X       ate
        X       the
        X       caviar
        X       with
        X       spoon
        X       in
        X       park

    rhs = load_grammar(grammar)

    if 1:
        # This code branch enumerates all (exponentially many) valid
        # derivations.
        root = semiring_enumeration(sentence, rhs)
        for d in root.x:
        assert len(root.x) == len(set(root.x))

    def binary_features(sentence,X,Y,Z,i,j,k):
        return alphabet.map(['%s -> %s %s [%s,%s,%s]' % (X,Y,Z,i,j,k)])

    def unary_features(sentence,X,Y,i,k):
        return alphabet.map(['%s -> %s [%s,%s]' % (X,Y,i,k)])

    root = semiring_mert(sentence, rhs, weights, direction, binary_features, unary_features)

    mert_derivations = [] #set()
    for x in root.points:
        d = x.derivation()

    assert len(mert_derivations) == len(set(mert_derivations))

    # Compare the set of derivations found by the MERT semiring to 'brute force'
    # linesearch. Note: Linesearch might only find a subset of derivations found
    # by MERT if the grid isn't fine enough.
    brute_derivations = set()
    for step in np.linspace(-20,20,1000):
        root = semiring_linesearch(sentence, rhs, weights, step, direction, binary_features, unary_features)
        d = root.derivation()

    # NOTE: need to take upper hull of mert (so it's currently an over estimate)
    print('mert:', len(mert_derivations))
    print('brute:', len(brute_derivations))
    assert brute_derivations.issubset(mert_derivations)