Python compare示例，arsenal.math.compare Python示例

示例#1

0

显示文件

文件： util.py 项目： blastbao/arsenal

 def compare(self, expect='expect', got='got', show_regression=1, scatter=1, **kw):
     from arsenal.math import compare
     if self.ax is None:
         self.ax = pl.figure().add_subplot(111)
     if self.df.empty:
         return
     with update_ax(self.ax):
         compare(expect, got, data=self.df).plot(ax=self.ax, **kw)

示例#2

0

显示文件

 def compare(self, expect='expect', got='got', show_regression=1, scatter=1, **kw):
     from arsenal.math import compare
     if self.ax is None:
         self.ax = pl.figure().add_subplot(111)
     if self.df.empty:
         return
     with update_ax(self.ax):
         compare(expect, got, data=self.df).plot(ax=self.ax, **kw)

示例#3

0

显示文件

def fdcheck(func, w, g, keys=None, eps=1e-5):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if keys is None:
        if hasattr(w, 'keys'):
            keys = w.keys()
        else:
            keys = range(len(w))
    fd = {}
    for key in iterview(keys):
        was = w[key]
        w[key] = was + eps
        b = func()
        w[key] = was - eps
        a = func()
        w[key] = was
        fd[key] = (b - a) / (2 * eps)

    return compare([fd[k] for k in keys], [g[k] for k in keys])

示例#4

0

显示文件

文件： crf.py 项目： TPLink32/nlp

    def test_gradient(self, data, subsetsize=100):
        def fd(x, i, eps=1e-5):
            """Compute `i`th component of the finite-difference approximation to the
            gradient of log-likelihood at current parameters on example `x`.

            """

            was = self.W[i]  # record value

            self.W[i] = was + eps
            b = self.likelihood(x)

            self.W[i] = was - eps
            a = self.likelihood(x)

            self.W[i] = was  # restore original value

            return (b - a) / 2 / eps

        for x in iterview(data, msg='test grad'):

            g = defaultdict(float)
            for k, v in self.expectation(x).iteritems():
                g[k] -= 1 * v
            for k in x.target_features:
                g[k] += 1

            # pick a subset of features to test
            d = np.random.choice(g.keys(), subsetsize, replace=0)

            f = {}
            for i in iterview(d, msg='fd approx'):  # loop over active features
                f[i] = fd(x, i)

            from arsenal.math import compare
            compare([f[k] for k in d], [g[k] for k in d],
                    name='test gradient %s' % x,
                    scatter=1,
                    show_regression=1)
            import pylab as pl
            pl.show()

示例#5

0

显示文件

文件： crf.py 项目： MahdiehNejati/crf

    def test_gradient(self, data, subsetsize=100):

        def fd(x, i, eps=1e-5):
            """Compute `i`th component of the finite-difference approximation to the
            gradient of log-likelihood at current parameters on example `x`.

            """

            was = self.W[i]   # record value

            self.W[i] = was+eps
            b = self.likelihood(x)

            self.W[i] = was-eps
            a = self.likelihood(x)

            self.W[i] = was   # restore original value

            return (b - a) / 2 / eps

        for x in iterview(data, msg='test grad'):

            g = defaultdict(float)
            for k, v in self.expectation(x).iteritems():
                g[k] -= 1*v
            for k in x.target_features:
                g[k] += 1

            # pick a subset of features to test
            d = np.random.choice(g.keys(), subsetsize, replace=0)

            f = {}
            for i in iterview(d, msg='fd approx'):     # loop over active features
                f[i] = fd(x, i)

            from arsenal.math import compare
            compare([f[k] for k in d],
                    [g[k] for k in d], name='test gradient %s' % x, scatter=1, show_regression=1)
            import pylab as pl
            pl.show()

示例#6

0

显示文件

文件： checkgrad.py 项目： srhrshr/arsenal

def fdcheck(func,
            w,
            g,
            keys=None,
            eps=1e-5,
            quiet=0,
            verbose=1,
            progressbar=1):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if quiet:
        verbose = 0
        progressbar = 0

    if keys is None:
        if hasattr(w, 'keys'):
            keys = w.keys()
            d = {}
        else:
            d = np.zeros_like(w)

            # use flat views, if need be.
            if len(w.shape) > 1:
                w = w.flat
            if len(g.shape) > 1:
                g = g.flat
            if len(d.shape) > 1:
                d = d.flat

            keys = range(len(w))

    for k in (iterview(keys) if progressbar else keys):
        was = w[k]
        w[k] = was + eps
        b = func()
        w[k] = was - eps
        a = func()
        w[k] = was
        d[k] = (b - a) / (2 * eps)

    return compare([d[k] for k in keys], [g[k] for k in keys], verbose=verbose)

示例#7

0

显示文件

def test(e, grammar, m):
    """
    Compare runtime of running parser v. inside algorithm.

    Note: This isn't the same rollouts.
    """

    M = m*1.0
    steps = 2

    # TODO: compare_hypergraph_to_cky(grammar, example, m, steps)

    grammar.exp_the_grammar_weights()   # Do exp outside the timing loop. It will be cached in the Grammar object.

    with T['dp'](N=e.N):
        expected_recall = InsideOut(e, grammar, M*1.0, steps=steps, with_gradient=0).val

    with T['parse'](N=e.N):
        state = pruned_parser(e.tokens, grammar, m)
        coarse = grammar.coarse_derivation(state.derivation)
        c,_,w = e.recall(coarse)
        recall = c/w

    data.append({'example': e,
                 'N': e.N,
                 'expected_recall': expected_recall,
                 'recall': recall})

    T.compare()

    with axman('compare runtimes') as ax:
        T.plot_feature('N', ax=ax, loglog=1, show='scatter')

    df = DataFrame(data)
    with axman('compare rewards') as ax:
        compare(df.recall, df.expected_recall, scatter=1, show_regression=1, ax=ax)

示例#8

0

显示文件

文件： checkgrad.py 项目： blastbao/arsenal

def fdcheck(func, w, g, keys = None, eps = 1e-5, quiet=0, verbose=1, progressbar=1):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if quiet:
        verbose = 0
        progressbar = 0

    if keys is None:
        if hasattr(w, 'keys'):
            keys = w.keys()
            d = {}
        else:
            d = np.zeros_like(w)

            # use flat views, if need be.
            if len(w.shape) > 1:
                w = w.flat
            if len(g.shape) > 1:
                g = g.flat
            if len(d.shape) > 1:
                d = d.flat

            keys = range(len(w))

    for k in (iterview(keys) if progressbar else keys):
        was = w[k]
        w[k] = was + eps
        b = func()
        w[k] = was - eps
        a = func()
        w[k] = was
        d[k] = (b-a) / (2*eps)

    return compare([d[k] for k in keys],
                   [g[k] for k in keys],
                   verbose=verbose)

示例#9

0

显示文件

文件： checkgrad.py 项目： srhrshr/arsenal

def quick_fdcheck(func, w, g, n_checks, eps=1e-5, verbose=1, progressbar=1):
    "Check gradient along random directions (a faster alternative to axis-aligned directions)."
    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.copy()
    for k in (iterview(keys) if progressbar else keys):
        d = spherical(w.shape[0])
        G[k] = g.dot(d)
        w[:] = was + eps * d
        b = func()
        w[:] = was - eps * d
        a = func()
        w[:] = was
        H[k] = (b - a) / (2 * eps)

    return compare(H, G, verbose=verbose)

示例#10

0

显示文件

文件： checkgrad.py 项目： blastbao/arsenal

def quick_fdcheck(func, w, g, n_checks, eps = 1e-5, verbose=1, progressbar=1):
    "Check gradient along random directions (a faster alternative to axis-aligned directions)."
    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.copy()
    for k in (iterview(keys) if progressbar else keys):
        d = spherical(w.shape[0])
        G[k] = g.dot(d)
        w[:] = was + eps*d
        b = func()
        w[:] = was - eps*d
        a = func()
        w[:] = was
        H[k] = (b-a) / (2*eps)

    return compare(H, G, verbose=verbose)

示例#11

0

显示文件

文件： final-pareto-plots.py 项目： timvieira/learning-to-prune

def aggregate_multiple_runtime_trials(Ds, Ps):
    """Collapse multiple dataframes `Ds` from different timing runes into a single
    one, by taking the min over runtimes (i.e., new runtime will be "best-of k"
    where k=|Ds|).

    Actually, this function does more than that. It appears to collapse over
    sentence too, e.g., computing corpus-EVALB and avg[best-of-k runtimes].

    """
    D0 = Ds[0]

    # Append trials together
    foo = Ds[0]
    for dd in Ds[1:]:
        foo = foo.append(dd)

    # Take min over time_total for this policy-example pair.
    minz = foo[['policy','example','time_total']].groupby(['policy','example']).min()

    data = []
    for policy in iterview(Ps):
        dump = path(policy).dirname()
        args = cPickle.load(file(dump / 'args.pkl'))
        log = pd.read_csv(dump / 'log.csv')

        # TODO: will need to add extra cases.
        if 'DP' in args.roll_out:
            type_ = 'DP'
        elif 'CP' in args.roll_out:
            type_ = 'CP'
        elif 'HY' in args.roll_out:
            type_ = 'HY'
        elif 'BODEN' in args.roll_out:
            type_ = 'baseline'
        else:
            raise ValueError(args.roll_out)

        min_times = minz.ix[policy]['time_total']

        P = D0[D0.policy == policy]
        f = cgw_f(P.want_and_got.sum(), P.got.sum(), P.want.sum())

        #pl.scatter(df.avg_bestof_time, df.evalb, c=C[name], lw=0)
        #show_frontier(df.avg_bestof_time, df.evalb, c=C[name], interpolation='linear', label=name)
        #[w,b] = np.polyfit(df.pushes, df.avg_bestof_time, deg=1)
        #show_frontier(df.pushes*w + b, df.evalb, interpolation='linear', c=C[name])

        if 0:
            # log-log plot of pushes v. seconds. Really great correlation!
            PP = P[['example','pushes']].join(min_times, on='example')
            PP['log(pushes)'] = np.log(PP.pushes)
            PP['log(seconds)'] = np.log(PP.time_total)
            compare('log(pushes)', 'log(seconds)', data=PP, scatter=1, show_regression=1)
            #pl.figure()
            # pushes v. seconds. Really great correlation!
            #PP = P[['example','pushes']].join(min_times, on='example')
            #compare('pushes', 'time_total', data=PP, scatter=1, show_regression=1)
            pl.ioff(); pl.show()

        if 0:
            # empirical runtime estimates

            # scatter plot sentence length against runtime.
            n_by_time = P[['example','N']].join(min_times, on='example')
            pl.scatter(n_by_time.N, n_by_time.time_total, alpha=0.5, lw=0)

            # highlight median runtime per sentence length.
            n_by_median_time = n_by_time.groupby('N').median()
            pl.plot(n_by_median_time.index, n_by_median_time.time_total, c='k', lw=2)

            # empirical exponent and constant factor
            compare(np.log(n_by_time.time_total), np.log(n_by_time.N), scatter=1, show_regression=1)
            pl.ioff(); pl.show()

        # use early stopping on dev to pick the policy.
        dev = log.ix[log['dev_new_policy_reward'].argmax()]

        row = {'avg_bestof_time': np.mean(min_times),
               'wps': np.mean(P.N) / np.mean(min_times),
               'pushes': np.mean(P.pushes),
               'pops': np.mean(P.pops),
               'policy': policy,
               'dev_pushes': dev.dev_new_policy_pushes,
               'dev_evalb': dev.dev_new_policy_evalb_corpus,
               'type': type_,
               'evalb': f}

        row.update({'args_'+k: v for k,v in args.__dict__.items()})

        data.append(row)

    # remove unused baselines (sorry this is a bit ugly).
    ddd = pd.DataFrame(data)
    others = ddd[ddd.type != 'baseline']
    B = ddd[ddd.type == 'baseline']
    used = set()
    for _, z in others.iterrows():
        [ix] = B[B.policy == z.args_init_weights].index
        used.add(ix)
    B = B.ix[list(used)]
    ddd = others.append(B)

    return ddd

示例#12

0

显示文件

    def active_set(self):
        for outer in xrange(1, self.outer_iterations + 1):
            print
            print colors.green % '====================='
            print colors.green % 'Outer %s' % outer

            self.inner_optimization(self.inner_iterations)

            if outer != self.outer_iterations:
                print
                print colors.yellow % 'Grow %s' % outer

                # old feature index
                old = {c: self.context_feature_id(c) for c in self.C}
                w = self.dense.w.copy()
                q = np.array(self.dense.q, copy=1)

                TEST_EXPECT = 0

                if TEST_EXPECT:
                    # Record expectations under previous model. Technically,
                    # this is observed-expected features.
                    predictions = []
                    for x in self.train:
                        S = ScoringModel(x, self.A, self.feature_backoff,
                                         self.sparse, self.dense)
                        self.gradient(
                            x.N, x.tags, S
                        )  # don't backprop thru scoring model because we don't change the parameters.
                        predictions.append(
                            {k: S.d_dense[i]
                             for k, i in old.iteritems()})

                # "Grow" Z by extending active features with on more character.
                active = self.active_features()

                # Heuristic: Use an intelligent guess for 'new' q values in the
                # next iterations.
                #
                # This improves active set's ability to monotonically improve
                # after growing. Otherwise, adagrad will update too aggressively
                # compared to the sensible alternative of start at the last seen
                # value (if possible) or at the fudge value.
                #
                # In other words, new features get huge learning rates compared
                # to existing ones. Features that used to exist also get pretty
                # big learning rates too. This is because adagrad learning rates
                # decrease quickly with time as they are 1/sqrt(sum-of-squares).
                #
                # I found that guessing the mean q works better than min or max.
                self.dense.w[:] = 0
                self.dense.q[:] = q.mean()

                # Grow active contexts to the right.
                cc = {p + (y, ) for p in active for y in self.sigma}

                ####
                # Note that just because we extended a bunch of active elements
                # by all elements of sigma, this does not mean that we are
                # last-character closed.
                #
                # Feel free to check via the following (failing) assertion
                #
                #   assert set(prefix_closure(cc)) == set(last_char_sub_closure(self.sigma, prefix_closure(cc)))
                #
                # The reason is that some elements go to zero and, thus, get
                # pruned. This is the same reason why `active` is not
                # automatically prefix closed.

                ####
                # Is the growing set prefix closed by construction?
                #
                # No. The grown set is also not prefix closed either because
                # it's possible for a parent to be zero with nonzero children.
                #
                # Here is an assertion that will fail.
                #
                # assert set(prefix_closure(cc)) == set(cc)
                #
                #cc = set(prefix_closure(cc))

                ####
                # XXX: In general, we probably do not want to do last-char-sub
                # closure. I've added it in because it seems to help use
                # more-closely preserve the distribution after manipulating the
                # active set.
                #cc = set(last_char_sub_closure(self.sigma, cc))

                # Filter active set by allowed-context constraints, if supplied.
                if self.allowed_contexts:
                    cc &= set(self.allowed_contexts)

                # Update DFA and group lasso data structures.
                self.update(self.sigma, cc)
                self.dense.set_groups(self.group_structure())
                print colors.yellow % '=> new', '|C| = %s' % len(self.C)

                # Copy previous weights
                for c in self.C:
                    i = self.context_feature_id(c)
                    if c in old:
                        o = old[c]
                        self.dense.w[i] = w[o]
                        self.dense.q[i] = q[o]

                if 0:
                    print
                    print colors.light_red % 'is accuracy the same???????'
                    self.after_inner_pass()
                    print colors.light_red % '^^^^^^^^^^^^^^^^^^^^^^^^^^^'
                    print

                if TEST_EXPECT:
                    # DEBUGGING: check that expections match
                    #
                    # I'm not sure this test is implemented perfectly because we
                    # need to compute the expected value of all the old features
                    # under the new model.
                    #
                    # We get away using the new model because it has backoff
                    # features.
                    #
                    # In the case of a unigram model (order-0 model), this test
                    # fails. Why? are the unigrams used incorrectly?
                    #
                    new = {c: self.context_feature_id(c) for c in self.C}

                    for x, want in zip(self.train, predictions):
                        S = ScoringModel(x, self.A, self.feature_backoff,
                                         self.sparse, self.dense)
                        self.gradient(
                            x.N, x.tags, S
                        )  # don't backprop thru scoring model because we don't change the parameters.

                        # just check on *old* features.
                        E = {k: 0 for k in want}
                        E.update(
                            {k: S.d_dense[new[k]]
                             for k in want if k in new})

                        # XXX: filter down to features in both vectors, I guess?
                        E = {k: v for k, v in E.iteritems() if k in new}
                        want = {k: v for k, v in want.iteritems() if k in new}

                        c = compare(want, E, verbose=1)

                        if c.cosine < .99:
                            c.show()

示例#13

0

显示文件

文件： correctness.py 项目： timvieira/learning-to-prune

def __test_gradient(example, grammar, m, gamma):
    """
    Finite-difference test for gradient of numerator and denominator.
    """

    assert gamma == 1, 'gamma = %g no longer supported' % gamma

    M = m*1.0
    steps = 2

#    print 'steps = %s' % steps
#    print colors.cyan % '>>> roll-in'

    test_grad      = 1
#    test_onebest   = 0
    test_linearity = 0
#    test_viterbi   = 0    # test that hg-viterbi alg matches cky
#    show_items     = 0
#    get_marginals  = 0

#    if show_items:
#        get_marginals = 1

#    if test_viterbi:
#        compare_hypergraph_to_cky(grammar, example, m, steps)

    f_c = InsideOut(example, grammar, M*1.0, steps=steps, with_gradient=True)
#    f_c = InsideOut2(example, grammar, M*1.0, steps=steps, with_gradient=True, DEBUG=True, IOSPEEDUP=True)
#    f_c = InsideOut2(example, grammar, M*1.0, steps=steps, with_gradient=True, DEBUG=True, IOSPEEDUP=False)

    est = f_c.est

#    import ldp.dp.risk2
#    io2 = ldp.dp.risk2.InsideOut(example, grammar, M*1.0, steps=steps, with_gradient=True)
#    debug = 0
#    from arsenal.math import assert_equal
#    zoo = []
#    for k,v in io2.est.items():
#        assert_equal(v, est[k], name='est %s' % (k,), throw=0, tol=1e-4)
#        zoo.append([v, est[k]])
#    assert_equal(f_c.val, io2.val, name='rollin', throw=0, tol=1e-4)
#    boo,bar = zip(*zoo)
#    compare(boo, bar, show_regression=1, name='compare to old version', scatter=1)
#    pl.show()
#    return

#    from arsenal.debug import ip; ip()

#    if test_onebest:
#        for k,v in f_c.marginals().iteritems():
#            assert 0 <= v <= 1.000001, [k,v]
#            if 0.05 <= v <= 0.95:   # not entirely saturated
#                print 'tie (rollin):', nice(grammar, k), v

    # initial roll-in
#    if test_onebest:
#        rollout(grammar, example, m)

    old_mask = M.copy()

    del m, M

    data = []

#    for x in iterview(example.nodes, msg='fd'):
    for x in example.nodes:
        d = {'span': x, 'action': 'prune' if old_mask[x] else 'unprune'}

#        print '--------------------------------'
#        print colors.cyan % '>>>', d['action'], x

        ad_num = f_c.A2_r[x[0], x[1]]
        ad_den = f_c.A2_p[x[0], x[1]]

        was = old_mask[x]
        new_mask = old_mask*1
        new_mask[x] = 1-was

        tie = False

#        if test_viterbi:
#            compare_hypergraph_to_cky(grammar, example, new_mask, steps)

        if test_grad:

            S = InsideOut(example, grammar, new_mask*1.0, steps=steps, with_gradient=0)
            #S = InsideOut2(example, grammar, new_mask*1.0, steps=steps, with_gradient=True, DEBUG=True, IOSPEEDUP=True)

            # Note: Since the function is multi-linear (for gamma=1) we can
            # extrapolate as far as we'd like. Thus, we take the full step
            # (change 0->1 and 1->0).
            if was == 1:
                f_m = S
                f_p = f_c
            else:
                f_p = S
                f_m = f_c

            surrogate = S.val

#            if show_items:
#                dump_items(S)

#            if get_marginals:
#                for k,v in S.marginals().iteritems():
#                    assert 0 <= v <= 1.000001, [k,v]
#                    if 0.05 <= v <= 0.95:   # not entirely saturated
#                        print 'tie (unprune, %s):' % (x,), nice(grammar, k), v
#                        tie = True

            fd_num = (f_p.num - f_m.num)
            fd_den = (f_p.den - f_m.den)

            if 0:
                assert fd_num >= 0 and fd_den >= 0, [fd_num, fd_den]
                assert ad_num >= 0 and ad_den >= 0, [ad_num, ad_den]

                # Yup, gradient should always be positive! Why? Well, for
                # unnormalized risk and Z it's always beneficial to increase the
                # score of an edge -- there is no competition among edges (until
                # there is normalization; the gradient of normalized risk would
                # have variation in sign). Thus, the gradient is always
                # positive.
                assert f_c.A2_p[x[0], x[1]] >= 0
                assert f_c.A2_r[x[0], x[1]] >= 0

            d.update({'surrogate': surrogate,
                      'fd_num': fd_num,
                      'fd_den': fd_den})

            if test_linearity:
                cross_section(example, grammar, steps, old_mask*1.0, x)

                # NOTE: this test fails when gamma != 1.
                mid_mask = old_mask*1.0
                mid_mask[x] = 0.5
                mid = InsideOut(example, grammar, mid_mask, steps=steps, with_gradient=False)

                # Multilinearity check. Check that three points form a line
                #assert abs(mid.den - (0.5 * (f_p.den - f_m.den) / 1 + f_m.den)) < 1e-8
                #assert abs(mid.num - (0.5 * (f_p.num - f_m.num) / 1 + f_m.num)) < 1e-8
                assert abs(mid.den - 0.5 * f_p.den) < 1e-8
                assert abs(mid.num - 0.5 * f_p.num) < 1e-8

        show = False

#        if test_onebest:
#            # one-best rollout
#            r1 = rollout(grammar, example, new_mask)
#            d['onebest'] = r1
#            if abs(surrogate - r1) > 0.0001:
#                print colors.red % '** error **', \
#                    'surrogate does not equal onebest'
#                show = True

        estimate = est[x]

        d.update({'estimate': estimate,
                  'ad_num': ad_num,
                  'ad_den': ad_den,
                  'rel-error': relative_error(surrogate, estimate),
                  'abs-error': abs(surrogate-estimate),
                  'tie': tie})

        if abs(f_c.val - surrogate) > 0.001:  # need a big enough change.
            if surrogate < f_c.val:
                d['delta_type'] = 'decr'
            else:
                d['delta_type'] = 'incr'
        else:
            d['delta_type'] = 'same'


        is_error = abs(surrogate - estimate) > 0.001 or not np.isfinite(estimate)

        if is_error:
            print "%s: estimate doesn't match surrogate" % (colors.red % 'error')
            show = True

        # Taylor expansion should match brute-force method.
        #
        # Note: we're not comparing directly to onebest, since it'll just result
        # in confusion.
        Errors.data.append({'action': d['action'],
                            'delta': d['delta_type'],
                            #'zero': float(d['surrogate']==0),
                            'n_error': int(is_error),
                            'tie': tie})

        if show:
            for k, v in sorted(d.items()):
                if isinstance(v, float):
                    print '%30s: %g' % (k, v)
                else:
                    print '%30s: %s' % (k, v)

            #foobar(f_c)

        data.append(d)

    df = DataFrame(data)

#    print df

    if df.empty:
        print '** dataframe empty **'
        return

    Errors.show()

    if 0:
        #scale = 1
        scale = max(np.abs(df.fd_den).max(), np.abs(df.ad_den).max()) or 1
        compare(df.fd_den/scale,
                df.ad_den/scale,
                alphabet=example.nodes,
                show_regression=1, scatter=1,
                name='test_grad denominator')
        #scale = 1
        scale = max(np.abs(df.fd_num).max(), np.abs(df.ad_num).max()) or 1
        compare(df.fd_num/scale,
                df.ad_num/scale,
                scatter=1, show_regression=1,
                alphabet=example.nodes,
                name='test_grad numerator')

#        if test_onebest:
#            compare(df.onebest,
#                    df.estimate,
#                    alphabet=example.nodes,
#                    show_regression=1,
#                    scatter=1,
#                    name='onebest v. estimate')

    if 0:
        compare(df.surrogate, df.estimate,
                alphabet=example.nodes,
                show_regression=1, scatter=1,
                name='surrogate v. estimate')

#    if 1:
#        if test_grad and test_onebest:
#            compare(df.onebest, df.surrogate,
#                    alphabet=example.nodes,
#                    show_regression=1, scatter=1,
#                    name='onebest v. surrogate')

#    goal = {d['span']: d['surrogate'] for d in data}

    if 0:
        pl.ioff()
        pl.show()