Пример #1
0
 def compare(self, expect='expect', got='got', show_regression=1, scatter=1, **kw):
     from arsenal.maths import compare
     if self.ax is None:
         self.ax = pl.figure().add_subplot(111)
     if self.df.empty:
         return
     with update_ax(self.ax):
         compare(expect, got, data=self.df).plot(ax=self.ax, **kw)
Пример #2
0
 def compare(self, expect='expect', got='got', show_regression=1, scatter=1, **kw):
     from arsenal.maths import compare
     if self.ax is None:
         self.ax = pl.figure().add_subplot(111)
     if self.df.empty:
         return
     with update_ax(self.ax):
         compare(expect, got, data=self.df).plot(ax=self.ax, **kw)
Пример #3
0
 def compare(self,
             want='want',
             have='have',
             show_regression=1,
             scatter=1,
             **kw):
     from arsenal.maths import compare
     if self.ax is None:
         self.ax = pl.figure().add_subplot(111)
     if self.df.empty:
         return
     with update_ax(self.ax):
         compare(want, have, data=self.df).plot(ax=self.ax, **kw)
Пример #4
0
def test():
    methods = [
        swor_heap1,
        #        swor_heap2,
        swor_heap3,
    ]

    R = 50_000
    v = random_dist(4)

    S = {f.__name__: f(v, R) for f in methods}

    D = {name: counts(S[name]) for name in S}

    R = {}
    n = len(v)
    for z in permute(range(n)):
        R[z] = p_perm(v, z)
        for d in D.values():
            d[z] += 0

    # Check that p_perm sums to one.
    np.testing.assert_allclose(sum(R.values()), 1)
    for name, d in sorted(D.items()):
        compare(R, d)  #.show(title=name);

    T = timers()
    R = 50
    for i in range(1, 15):
        n = 2**i
        #print('n=', n, 'i=', i)
        for _ in range(R):
            v = random_dist(n)
            np.random.shuffle(methods)
            for f in methods:
                name = f.__name__
                with T[name](n=n):
                    S = f(v, R=1)
                assert S.shape == (1, n)  # some sort of sanity check
    print('done')

    fig, ax = pl.subplots(ncols=2, figsize=(12, 5))
    T.plot_feature('n', ax=ax[0])
    fig.tight_layout()
    T.plot_feature('n', ax=ax[1])
    ax[1].set_yscale('log')
    ax[1].set_xscale('log')
    T.compare()

    pl.show()
Пример #5
0
def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5, verbose=1, progressbar=1):
    """
    Check gradient along random directions (a faster alternative to axis-aligned directions).

    Tim Vieira (2017) "How to test gradient implementations"
    https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/

    """
    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.flatten()

    w = np.asarray(w.flat)
    g = np.asarray(g.flat)

    dim = len(w)

    for k in (iterview(keys) if progressbar else keys):
        d = spherical(dim)
        G[k] = g.dot(d)
        w[:] = was + eps*d
        b = func()
        w[:] = was - eps*d
        a = func()
        w[:] = was
        H[k] = (b-a) / (2*eps)

    return compare(H, G, verbose=verbose)
Пример #6
0
def fdcheck(func, w, g, keys = None, eps = 1e-5):
    """
    Finite-difference check.

    Returns `arsenal.maths.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if keys is None:
        if hasattr(w, 'keys'):
            keys = list(w.keys())
        else:
            keys = list(range(len(w)))
    fd = {}
    for key in iterview(keys):
        was = w[key]
        w[key] = was + eps
        b = func()
        w[key] = was - eps
        a = func()
        w[key] = was
        fd[key] = (b-a) / (2*eps)

    return compare([fd[k] for k in keys],
                   [g[k] for k in keys])
Пример #7
0
def quick_fdcheck(func, w, g, n_checks=20, eps=1e-5, verbose=1, progressbar=1):
    """
    Check gradient along random directions (a faster alternative to axis-aligned directions).

    Tim Vieira (2017) "How to test gradient implementations"
    https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/

    """
    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.flatten()

    w = np.asarray(w.flat)
    g = np.asarray(g.flat)

    dim = len(w)

    for k in (iterview(keys) if progressbar else keys):
        d = spherical(dim)
        G[k] = g.dot(d)
        w[:] = was + eps * d
        b = func()
        w[:] = was - eps * d
        a = func()
        w[:] = was
        H[k] = (b - a) / (2 * eps)

    return compare(H, G, verbose=verbose)
Пример #8
0
def test_stationary(M):
    print('[test stationary]')

    π = random_dist(M.S, M.A)
    [_, _, γ, r] = M = M | π
    T = 1 / (1 - γ)

    d1 = M.d()
    d2 = M.d_by_eigen()
    assert compare(d1, d2).max_relative_error < 1e-5

    J0 = M.J()
    d0 = M.d()

    def estimate(N):
        d = np.zeros(M.S)
        J = 0.0
        for t, [s, r, _] in enumerate(M.run(), start=1):
            if t >= N: break

            d += (onehot(s, M.S) - d) / t

            # Note the 'importance sampling correction' T, which accounts for
            # the (1-γ)-resetting dynamics.
            J += (r * T - J) / t

            if t % 1000 == 0:
                yield [
                    t,
                    0.5 * abs(J - J0),
                    0.5 * abs(d - d0).sum(),
                ]

    ns, J_err, d_err = np.array(list(estimate(1_000_000))).T

    dmax = 1
    Jmax = T * r.max(
    )  # scaled by T because of the importance sampling correction.

    # Very loose bounds on total variation distance
    J_bnd = Jmax / np.sqrt(ns)
    d_bnd = M.S * dmax / np.sqrt(ns)

    if 0:
        # Error decays at a rate of 1/sqrt(N)
        pl.title('performance estimate')
        pl.loglog(ns, J_bnd, label='error bound')
        pl.loglog(ns, J_err, label='error observed')
        pl.show()

        pl.title('distribution estimate')
        pl.loglog(ns, d_bnd, label='error bound')
        pl.loglog(ns, d_err, label='error observed')
        pl.show()

    assert (J_err <= J_bnd).all()
    assert (d_err <= d_bnd).all()
Пример #9
0
def fdcheck(func,
            w,
            g,
            keys=None,
            eps=1e-5,
            quiet=0,
            verbose=1,
            progressbar=1):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if quiet:
        verbose = 0
        progressbar = 0

    if keys is None:
        if hasattr(
                w, 'keys'
        ):  # support for sparse vectors represented as a dictionary-like object.
            keys = list(w.keys())
            d = {}
        else:
            # use flat views, if need be.
            if len(w.shape) > 1: w = w.flat
            if len(g.shape) > 1: g = g.flat
            d = np.zeros_like(w)
            keys = list(
                range(len(w))
            )  # TODO: these keys have lost their names. So not good for debugging.
    else:
        d = {}

    for k in (iterview(keys) if progressbar else keys):
        was = w[k]
        w[k] = was + eps
        b = func()
        w[k] = was - eps
        a = func()
        w[k] = was
        d[k] = (b - a) / (2 * eps)

    return compare([d[k] for k in keys], [g[k] for k in keys], verbose=verbose)
Пример #10
0
def fdcheck(func, w, g, keys = None, eps = 1e-5, quiet=0, verbose=1, progressbar=1):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if quiet:
        verbose = 0
        progressbar = 0

    if keys is None:
        if hasattr(w, 'keys'):  # support for sparse vectors represented as a dictionary-like object.
            keys = list(w.keys())
            d = {}
        else:
            # use flat views, if need be.
            if len(w.shape) > 1: w = w.flat
            if len(g.shape) > 1: g = g.flat
            d = np.zeros_like(w)
            keys = list(range(len(w)))    # TODO: these keys have lost their names. So not good for debugging.
    else:
        d = {}

    for k in (iterview(keys) if progressbar else keys):
        was = w[k]
        w[k] = was + eps
        b = func()
        w[k] = was - eps
        a = func()
        w[k] = was
        d[k] = (b-a) / (2*eps)

    return compare([d[k] for k in keys],
                   [g[k] for k in keys],
                   verbose=verbose)
Пример #11
0
def test_lp_solver(M):
    # primary testing strategy is to compare the linear programming solver (LP)
    # to anther solver (e.g., value iteration or policy iteration). In addition
    # to checking equivalence of the policies found by each, we also compare
    # equivalence of other quantities found by the LP. The dual variables should
    # be value functions (equal to VI's). The primal variables should be the
    # joint state-action distribution of the policy.
    vi = M.solve_by_policy_iteration()

    D = M.solve_by_lp_dual()
    P = M.solve_by_lp_primal()

    π = P['policy']
    assert np.allclose(P['policy'], vi['policy'])
    print('[lp-solver] policy', ok)

    # Objective value matches the solution found by VI.
    assert abs(D['obj'] - vi['obj']) / abs(vi['obj']) < 0.01
    print('[lp-solver] objective value', ok)

    d = D['mu'].sum(axis=1)
    assert is_distribution(
        d), 'stationary distribution is not a valid distribution.'
    assert compare(D['mu'].sum(axis=1), M.d(π), verbose=False).max_err < 1e-5
    print('[lp-solver] stationary distribution', ok)

    assert np.allclose(vi['V'], D['V'])
    print('[lp-solver] value function', ok)

    # Test the relationships between primal and dual LPs
    #    assert np.allclose(P['policy'], D['policy'])    # behavior with ties is different.
    assert np.allclose(P['mu'], D['mu'])
    print('[dual-lp-solver]', ok)

    # Test that the objectives match
    assert np.allclose(D['obj'], M.J(π))
    assert np.allclose(P['obj'], M.J(π))
    print('[lp-objectives]', ok)
Пример #12
0
def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5, verbose=1, progressbar=1):
    "Check gradient along random directions (a faster alternative to axis-aligned directions)."
    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.flatten()

    w = np.asarray(w.flat)
    g = np.asarray(g.flat)

    dim = len(w)

    for k in (iterview(keys) if progressbar else keys):
        d = spherical(dim)
        G[k] = g.dot(d)
        w[:] = was + eps*d
        b = func()
        w[:] = was - eps*d
        a = func()
        w[:] = was
        H[k] = (b-a) / (2*eps)

    return compare(H, G, verbose=verbose)
Пример #13
0
def fdcheck(E, root, eps=1e-4):
    """Finite-difference approximation of gradient of numerator and denominator wrt
    edge probability.

    """
    def fn(W):
        "Evaluate numerator and denominator of risk."
        g = Hypergraph()
        g.root = root
        for e, [_, r, f] in list(E.items()):
            p = LogVal(np.exp(f.dot(W).to_real()))
            g.edge(Semiring1(p, p * r), *e)
        B = g.inside(Semiring1.Zero)
        Q = B[g.root]
        return Q.p.to_real(), Q.r.to_real(), (Q.r / Q.p).to_real()

    features = {k for [_, _, f] in E.values() for k in f}
    W = LogValVector({k: LogVal(np.random.uniform(-1, 1)) for k in features})

    # For gradient of risk we use <p, p*r, D[p], r*D[p]>, but my code computes
    # <p, p*r, p*s, p*r*s>, so we pass in s=D[p]/p.
    #
    # D[p] = D[exp(f.dot(W))] = exp(s.dot(W))*D[f.dot(W)] = exp(s.dot(W))*f
    #
    # therefore D[p]/p = f
    if 0:
        E1 = {}
        for e, [_, r, f] in list(E.items()):
            p = LogVal(np.exp(f.dot(W).to_real()))
            E1[e] = (p, r, f * p)

        #S = secondorder_expectation_semiring(E, root)
        from hypergraphs.insideout3 import inside_outside_speedup
        khat, xhat = inside_outside_speedup(E1, root)

    else:
        E1 = {}
        for e, [_, r, f] in list(E.items()):
            p = LogVal(np.exp(f.dot(W).to_real()))
            E1[e] = (p, r, f)

        #S = secondorder_expectation_semiring(E, root)
        from hypergraphs.insideout import inside_outside_speedup
        khat, xhat = inside_outside_speedup(E1, root)

    ad_Z = xhat.s
    ad_rbar = xhat.t
    Z = khat.p
    rbar = khat.r

    ad_risk = ad_rbar / Z - rbar * ad_Z / Z / Z

    dd = []
    for k in features:
        was = W[k]
        W.x[k] = was + LogVal(eps)
        b_Z, b_rbar, b_risk = fn(W)
        W.x[k] = was - LogVal(eps)
        a_Z, a_rbar, a_risk = fn(W)
        W.x[k] = was

        fd_rbar = (b_rbar - a_rbar) / (2 * eps)
        fd_Z = (b_Z - a_Z) / (2 * eps)
        fd_risk = (b_risk - a_risk) / (2 * eps)

        dd.append({
            'key': k,
            'ad_risk': ad_risk[k].to_real(),
            'fd_risk': fd_risk,
            'ad_Z': ad_Z[k].to_real(),
            'fd_Z': fd_Z,
            'ad_rbar': ad_rbar[k].to_real(),
            'fd_rbar': fd_rbar
        })

    from arsenal.maths import compare
    from pandas import DataFrame
    df = DataFrame(dd)
    compare(df.fd_Z, df.ad_Z, alphabet=df.key).show()
    compare(df.fd_rbar, df.ad_rbar, alphabet=df.key).show()
    compare(df.fd_risk, df.ad_risk, alphabet=df.key).show()
Пример #14
0
    def active_set(self):
        for outer in range(1, self.outer_iterations+1):
            print()
            print(colors.green % '=====================')
            print(colors.green % 'Outer %s' % outer)

            self.inner_optimization(self.inner_iterations)

            if outer != self.outer_iterations:
                print()
                print(colors.yellow % 'Grow %s' % outer)

                # old feature index
                old = {c: self.context_feature_id(c) for c in self.C}
                w = self.dense.w.copy()
                q = np.array(self.dense.q, copy=1)

                TEST_EXPECT = 0

                if TEST_EXPECT:
                    # Record expectations under previous model. Technically,
                    # this is observed-expected features.
                    predictions = []
                    for x in self.train:
                        S = ScoringModel(x, self.A, self.feature_backoff, self.sparse, self.dense)
                        self.gradient(x.N, x.tags, S)   # don't backprop thru scoring model because we don't change the parameters.
                        predictions.append({k: S.d_dense[i] for k,i in old.items()})

                # "Grow" Z by extending active features with on more character.
                active = self.active_features()

                # Heuristic: Use an intelligent guess for 'new' q values in the
                # next iterations.
                #
                # This improves active set's ability to monotonically improve
                # after growing. Otherwise, adagrad will update too aggressively
                # compared to the sensible alternative of start at the last seen
                # value (if possible) or at the fudge value.
                #
                # In other words, new features get huge learning rates compared
                # to existing ones. Features that used to exist also get pretty
                # big learning rates too. This is because adagrad learning rates
                # decrease quickly with time as they are 1/sqrt(sum-of-squares).
                #
                # I found that guessing the mean q works better than min or max.
                self.dense.w[:] = 0
                self.dense.q[:] = float(q.mean())   # [2018-08-13 Mon] the use of `float` is workaround for "BufferError: Object is not writable."

                # Grow active contexts to the right.
                cc = {p+(y,) for p in active for y in self.sigma}

                ####
                # Note that just because we extended a bunch of active elements
                # by all elements of sigma, this does not mean that we are
                # last-character closed.
                #
                # Feel free to check via the following (failing) assertion
                #
                #   assert set(prefix_closure(cc)) == set(last_char_sub_closure(self.sigma, prefix_closure(cc)))
                #
                # The reason is that some elements go to zero and, thus, get
                # pruned. This is the same reason why `active` is not
                # automatically prefix closed.

                ####
                # Is the growing set prefix closed by construction?
                #
                # No. The grown set is also not prefix closed either because
                # it's possible for a parent to be zero with nonzero children.
                #
                # Here is an assertion that will fail.
                #
                # assert set(prefix_closure(cc)) == set(cc)
                #
                #cc = set(prefix_closure(cc))

                ####
                # XXX: In general, we probably do not want to do last-char-sub
                # closure. I've added it in because it seems to help use
                # more-closely preserve the distribution after manipulating the
                # active set.
                #cc = set(last_char_sub_closure(self.sigma, cc))

                # Filter active set by allowed-context constraints, if supplied.
                if self.allowed_contexts:
                    cc &= set(self.allowed_contexts)

                # Update DFA and group lasso data structures.
                self.update(self.sigma, cc)
                self.dense.set_groups(self.group_structure())
                print(colors.yellow % '=> new', '|C| = %s' % len(self.C))

                # Copy previous weights
                for c in self.C:
                    i = self.context_feature_id(c)
                    if c in old:
                        o = old[c]
                        self.dense.w[i] = w[o]
                        self.dense.q[i] = q[o]

                if 0:
                    print()
                    print(colors.light.red % 'is accuracy the same???????')
                    self.after_inner_pass()
                    print(colors.light.red % '^^^^^^^^^^^^^^^^^^^^^^^^^^^')
                    print()

                if TEST_EXPECT:
                    # DEBUGGING: check that expections match
                    #
                    # I'm not sure this test is implemented perfectly because we
                    # need to compute the expected value of all the old features
                    # under the new model.
                    #
                    # We get away using the new model because it has backoff
                    # features.
                    #
                    # In the case of a unigram model (order-0 model), this test
                    # fails. Why? are the unigrams used incorrectly?
                    #
                    new = {c: self.context_feature_id(c) for c in self.C}

                    for x, want in zip(self.train, predictions):
                        S = ScoringModel(x, self.A, self.feature_backoff, self.sparse, self.dense)
                        self.gradient(x.N, x.tags, S)    # don't backprop thru scoring model because we don't change the parameters.

                        # just check on *old* features.
                        E = {k: 0 for k in want}
                        E.update({k: S.d_dense[new[k]] for k in want if k in new})

                        # XXX: filter down to features in both vectors, I guess?
                        E = {k: v for k, v in E.items() if k in new}
                        want = {k: v for k, v in want.items() if k in new}

                        c = compare(want, E, verbose=1)

                        if c.pearson <= .99:
                            c.show()
Пример #15
0

#    sample = lazy_sampler()
    sample = iter(sampler())

    for r in range(1, 1+reps):
        _, z = next(sample)
        c[z] += 1
        if r % 10_000 == 0:
            print(f'err({r})=', 0.5*np.abs(p - c/r).sum())

    c /= reps
    print(p)
    print(c)

    compare(p, c)
    #pl.plot(c/reps)
    #pl.plot(p)
    #pl.show()


from hypergraphs.apps.parser2 import parse, load_grammar
def parser(sentence, grammar, w, Weights):
    def binary(sentence,X,Y,Z,i,j,k):
        return Weights(w(X,Y,Z,i,j,k), X)
    def unary(sentence,X,Y,i,k):
        return Weights(w(X,Y,i,k), X)
    def terminal(sentence,W,i):
        return Weights(1.0, W)
    return parse(sentence, grammar, binary, unary, terminal, zero = Weights.zero)[0,len(sentence),'S']