def test_max_marginals(): """ Test that max-marginals are correct. """ for h in hypergraphs(): w = utils.random_viterbi_potentials(h) print w.show(h) path = ph.best_path(h, w) best = w.dot(path) print "BEST" print "\n".join(["%20s : %s"%(edge.label, w[edge]) for edge in path.edges]) print best nt.assert_not_equal(best, 0.0) max_marginals = ph.compute_marginals(h, w) for node in h.nodes: other = max_marginals[node] nt.assert_less_equal(other, best + 1e-4) for edge in h.edges: other = max_marginals[edge] nt.assert_less_equal(other, best + 1e-4) if edge in path: nt.assert_almost_equal(other, best)
def get_marginals(self): if not self.potentials: self.get_potentials() marginal_values = \ ph.compute_marginals(self.hypergraph, self.potentials) marginals = {} if not self.total_potentials: self.sum_potentials() root_value = self.total_potentials assert root_value > 0, "sentence is " + " ".join(self.words) for edge in self.hypergraph.edges: marginals[edge.id] = marginal_values[edge] / root_value return marginals
def em(distribution_table, label_map, hypergraph, base=None, epochs=10): base_potentials = base if base is None: base_potentials = np.zeros([len(hypergraph)]) ll = [] for i in range(epochs): print "epoch:", i potentials = ph.LogProbPotentials(hypergraph).from_array( base_potentials + np.log(distribution_table.to_array(hypergraph, label_map)) ) print "start" margs = ph.compute_marginals(hypergraph, potentials) print "stop" for node in hypergraph.nodes: distribution_table.inc(label_map(node.label), math.exp(margs[node] - margs[hypergraph.root])) distribution_table.reestimate() print margs[hypergraph.root] ll.append(margs[hypergraph.root]) return ll
def test_semirings(): for hypergraph in hypergraphs(): potentials = ph.ViterbiPotentials(hypergraph).build(lambda l: 10.0) marg = ph.Viterbi.compute_marginals(hypergraph, potentials) log_potentials = ph.LogViterbiPotentials(hypergraph).build(lambda l: 10.0) potentials = ph.LogViterbiPotentials(hypergraph).build(lambda l: 10.0) chart = ph.inside(hypergraph, log_potentials) chart2 = ph.inside_values(hypergraph, potentials) for node in hypergraph.nodes: nt.assert_equal(chart[node], chart2[node]) marg = ph.LogViterbi.compute_marginals(hypergraph, log_potentials) marg2 = ph.compute_marginals(hypergraph, potentials) for edge in hypergraph.edges: nt.assert_almost_equal(marg[edge], marg2[edge]) potentials = ph.Inside.Potentials(hypergraph).build(lambda l: 0.5) chart = ph.Inside.inside(hypergraph, potentials) potentials = ph.Inside.Potentials(hypergraph).build(lambda l: 0.5)
def test_pruning(): for h in hypergraphs(): w = utils.random_viterbi_potentials(h) original_path = ph.best_path(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, -0.99) prune_path = ph.best_path(new_hyper, new_potentials) assert len(original_path.edges) > 0 for edge in original_path.edges: assert edge in prune_path valid_path(new_hyper, prune_path) original_score = w.dot(original_path) print original_score print new_potentials.dot(prune_path) nt.assert_almost_equal(original_score, new_potentials.dot(prune_path)) # Test pruning amount. prune = 0.001 max_marginals = ph.compute_marginals(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, prune) assert (len(new_hyper.edges) > 0) original_edges = {} for edge in h.edges: original_edges[edge.label] = edge new_edges = {} for edge in new_hyper.edges: new_edges[edge.label] = edge for name, edge in new_edges.iteritems(): orig = original_edges[name] nt.assert_almost_equal(w[orig], new_potentials[edge]) m = max_marginals[orig] nt.assert_greater(m, prune)
def test_posteriors(): "Check the posteriors by enumeration." for h in hypergraphs(): w = utils.random_inside_potentials(h) marg = ph.compute_marginals(h, w) paths = utils.all_paths(h) m = defaultdict(lambda: 0.0) total_score = 0.0 for path in paths: path_score = w.dot(path) total_score += path_score for edge in path: m[edge.id] += path_score for edge in h.edges: nt.assert_almost_equal( marg[edge] / marg[h.root], m[edge.id] / total_score, places=4) chart = ph.inside(h, w) nt.assert_almost_equal(chart[h.root], total_score, places=4)
def em(distribution_table, label_map, hypergraph, base=None, epochs=10): base_potentials = base if base is None: base_potentials = np.zeros([len(hypergraph)]) ll = [] for i in range(epochs): print "epoch:", i potentials = ph.LogProbPotentials(hypergraph).from_array( base_potentials + np.log(distribution_table.to_array(hypergraph, label_map))) print "start" margs = ph.compute_marginals(hypergraph, potentials) print "stop" for node in hypergraph.nodes: distribution_table.inc( label_map(node.label), math.exp(margs[node] - margs[hypergraph.root])) distribution_table.reestimate() print margs[hypergraph.root] ll.append(margs[hypergraph.root]) return ll
# In[ ]: def build_potentials(arc): print arc return random.random() potentials = ph.Potentials(hypergraph).build(build_potentials) # phyper, ppotentials = ph.prune_hypergraph(hypergraph, potentials, 0.5) # In[ ]: path = ph.best_path(hypergraph, potentials) best = potentials.dot(path) maxmarginals = ph.compute_marginals(hypergraph, potentials) avg = 0.0 for edge in hypergraph.edges: avg += float(maxmarginals[edge]) avg = avg / float(len(hypergraph.edges)) thres = ((0.9) * best + (0.1) * avg) kept = set() for edge in hypergraph.edges: score = float(maxmarginals[edge]) if score >= thres: kept.add(edge.id) # In[ ]: