Exemplo n.º 1
0
def test_max_marginals():
    """
    Test that max-marginals are correct.
    """
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)
        print w.show(h)

        path = ph.best_path(h, w)
        best = w.dot(path)
        print "BEST"

        print "\n".join(["%20s : %s"%(edge.label, w[edge]) for edge in path.edges])
        print best
        nt.assert_not_equal(best, 0.0)
        max_marginals = ph.compute_marginals(h, w)
        for node in h.nodes:
            other = max_marginals[node]
            nt.assert_less_equal(other, best + 1e-4)

        for edge in h.edges:
            other = max_marginals[edge]
            nt.assert_less_equal(other, best + 1e-4)
            if edge in path:
                nt.assert_almost_equal(other, best)
Exemplo n.º 2
0
    def get_marginals(self):
        if not self.potentials:
            self.get_potentials()

        marginal_values = \
            ph.compute_marginals(self.hypergraph, self.potentials)

        marginals = {}

	if not self.total_potentials:
		self.sum_potentials()
        root_value = self.total_potentials

        assert root_value > 0, "sentence is " + " ".join(self.words)

        for edge in self.hypergraph.edges:
           marginals[edge.id] =  marginal_values[edge] / root_value

        return marginals
Exemplo n.º 3
0
def em(distribution_table, label_map, hypergraph, base=None, epochs=10):
    base_potentials = base
    if base is None:
        base_potentials = np.zeros([len(hypergraph)])

    ll = []
    for i in range(epochs):
        print "epoch:", i
        potentials = ph.LogProbPotentials(hypergraph).from_array(
            base_potentials + np.log(distribution_table.to_array(hypergraph, label_map))
        )

        print "start"
        margs = ph.compute_marginals(hypergraph, potentials)
        print "stop"
        for node in hypergraph.nodes:
            distribution_table.inc(label_map(node.label), math.exp(margs[node] - margs[hypergraph.root]))
        distribution_table.reestimate()
        print margs[hypergraph.root]
        ll.append(margs[hypergraph.root])
    return ll
Exemplo n.º 4
0
def test_semirings():
    for hypergraph in hypergraphs():
        potentials = ph.ViterbiPotentials(hypergraph).build(lambda l: 10.0)
        marg = ph.Viterbi.compute_marginals(hypergraph, potentials)

        log_potentials = ph.LogViterbiPotentials(hypergraph).build(lambda l: 10.0)
        potentials = ph.LogViterbiPotentials(hypergraph).build(lambda l: 10.0)
        chart = ph.inside(hypergraph, log_potentials)
        chart2 = ph.inside_values(hypergraph, potentials)
        for node in hypergraph.nodes:
            nt.assert_equal(chart[node], chart2[node])

        marg = ph.LogViterbi.compute_marginals(hypergraph, log_potentials)
        marg2 = ph.compute_marginals(hypergraph, potentials)
        for edge in hypergraph.edges:
            nt.assert_almost_equal(marg[edge], marg2[edge])


        potentials = ph.Inside.Potentials(hypergraph).build(lambda l: 0.5)
        chart = ph.Inside.inside(hypergraph, potentials)

        potentials = ph.Inside.Potentials(hypergraph).build(lambda l: 0.5)
Exemplo n.º 5
0
def test_pruning():
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)

        original_path = ph.best_path(h, w)
        new_hyper, new_potentials = ph.prune_hypergraph(h, w, -0.99)
        prune_path = ph.best_path(new_hyper, new_potentials)
        assert len(original_path.edges) > 0
        for edge in original_path.edges:
            assert edge in prune_path
        valid_path(new_hyper, prune_path)

        original_score = w.dot(original_path)
        print original_score
        print new_potentials.dot(prune_path)
        nt.assert_almost_equal(original_score,
                               new_potentials.dot(prune_path))

        # Test pruning amount.
        prune = 0.001
        max_marginals = ph.compute_marginals(h, w)
        new_hyper, new_potentials = ph.prune_hypergraph(h, w, prune)

        assert (len(new_hyper.edges) > 0)
        original_edges = {}
        for edge in h.edges:
            original_edges[edge.label] = edge

        new_edges = {}
        for edge in new_hyper.edges:
            new_edges[edge.label] = edge

        for name, edge in new_edges.iteritems():

            orig = original_edges[name]
            nt.assert_almost_equal(w[orig], new_potentials[edge])
            m = max_marginals[orig]
            nt.assert_greater(m, prune)
Exemplo n.º 6
0
def test_posteriors():
    "Check the posteriors by enumeration."
    for h in hypergraphs():
        w = utils.random_inside_potentials(h)
        marg = ph.compute_marginals(h, w)


        paths = utils.all_paths(h)
        m = defaultdict(lambda: 0.0)
        total_score = 0.0
        for path in paths:
            path_score = w.dot(path)
            total_score += path_score
            for edge in path:
                m[edge.id] += path_score

        for edge in h.edges:
            nt.assert_almost_equal(
                marg[edge] / marg[h.root],
                m[edge.id] / total_score, places=4)

        chart = ph.inside(h, w)
        nt.assert_almost_equal(chart[h.root], total_score, places=4)
Exemplo n.º 7
0
def em(distribution_table, label_map, hypergraph, base=None,
       epochs=10):
    base_potentials = base
    if base is None:
        base_potentials = np.zeros([len(hypergraph)])

    ll = []
    for i in range(epochs):
        print "epoch:", i
        potentials = ph.LogProbPotentials(hypergraph).from_array(
            base_potentials + np.log(distribution_table.to_array(hypergraph, label_map)))

        print "start"
        margs = ph.compute_marginals(hypergraph, potentials)
        print "stop"
        for node in hypergraph.nodes:
            distribution_table.inc(
                label_map(node.label),
                math.exp(margs[node] - margs[hypergraph.root]))
        distribution_table.reestimate()
        print margs[hypergraph.root]
        ll.append(margs[hypergraph.root])
    return ll
Exemplo n.º 8
0
# In[ ]:

def build_potentials(arc):
    print arc
    return random.random()
potentials = ph.Potentials(hypergraph).build(build_potentials)

# phyper, ppotentials = ph.prune_hypergraph(hypergraph, potentials, 0.5)


# In[ ]:

path = ph.best_path(hypergraph, potentials)
best = potentials.dot(path)
maxmarginals = ph.compute_marginals(hypergraph, potentials)
avg = 0.0
for edge in hypergraph.edges:
    avg += float(maxmarginals[edge])
avg = avg / float(len(hypergraph.edges))
thres = ((0.9) * best + (0.1) * avg)

kept = set()
for edge in hypergraph.edges:
    score = float(maxmarginals[edge])
    if score >= thres:
        kept.add(edge.id)


# In[ ]: