def test_max_marginals(): """ Test that max-marginals are correct. """ for h in hypergraphs(): w = utils.random_viterbi_potentials(h) print w.show(h) path = ph.best_path(h, w) best = w.dot(path) print "BEST" print "\n".join(["%20s : %s"%(edge.label, w[edge]) for edge in path.edges]) print best nt.assert_not_equal(best, 0.0) max_marginals = ph.compute_marginals(h, w) for node in h.nodes: other = max_marginals[node] nt.assert_less_equal(other, best + 1e-4) for edge in h.edges: other = max_marginals[edge] nt.assert_less_equal(other, best + 1e-4) if edge in path: nt.assert_almost_equal(other, best)
def inference(self, x, w, relaxed=False): relaxed = relaxed or self._use_relaxed if self._debug: a = time.time() hypergraph = self._build_hypergraph(x) if self._debug: print >>sys.stderr, "BUILD HYPERGRAPH:", time.time() -a if self._debug: a = time.time() potentials = self._build_potentials(hypergraph, x, w) if self._debug: print >>sys.stderr, "BUILD POTENTIALS:", time.time() - a if not self._constrained: if self._debug: a = time.time() path = ph.best_path(hypergraph, potentials) if self._debug: print >>sys.stderr, "BEST PATH:", time.time() - a else: if self._debug: a = time.time() constraints = self.constraints(x, hypergraph) hyperlp = lp.HypergraphLP.make_lp(hypergraph, potentials, integral=not relaxed) hyperlp.add_constraints(constraints) if self._debug: print >>sys.stderr, "BUILD LP:", time.time() - a if self._debug: a = time.time() if self._use_gurobi: hyperlp.solve(pulp.solvers.GUROBI(mip=1 if not relaxed else 0)) else: hyperlp.solve(pulp.solvers.GLPK(mip=1 if not relaxed else 0 )) if self._debug: print >>sys.stderr, "SOLVE LP:", time.time() - a if relaxed: path = hyperlp.decode_fractional() else: path = hyperlp.path if self._debug: print y = set([edge.label for edge in path]) return y
def fn(x): mod_weights = ph.pairwise_dot(potentials, x); dual_weights = weight_potentials.times(mod_weights) path = ph.best_path(graph, dual_weights) score = dual_weights.dot(path) vec = potentials.dot(path) subgrad = np.zeros(len(x)) for i, j in vec: subgrad[i] = j return score, subgrad, path
def test_subgradient(): for h in hypergraphs(): w = utils.random_log_viterbi_potentials(h) constraints, edge = random_have_constraint(h) path = ph.best_path(h, w) match = constraints.check(path) if edge not in path: nt.assert_equal(match[0], "have") cpath = opt.best_constrained_path(h, w, constraints) assert edge in cpath
def test_pruning(): for h in hypergraphs(): w = utils.random_viterbi_potentials(h) original_path = ph.best_path(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, -0.99) prune_path = ph.best_path(new_hyper, new_potentials) assert len(original_path.edges) > 0 for edge in original_path.edges: assert edge in prune_path valid_path(new_hyper, prune_path) original_score = w.dot(original_path) print original_score print new_potentials.dot(prune_path) nt.assert_almost_equal(original_score, new_potentials.dot(prune_path)) # Test pruning amount. prune = 0.001 max_marginals = ph.compute_marginals(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, prune) assert (len(new_hyper.edges) > 0) original_edges = {} for edge in h.edges: original_edges[edge.label] = edge new_edges = {} for edge in new_hyper.edges: new_edges[edge.label] = edge for name, edge in new_edges.iteritems(): orig = original_edges[name] nt.assert_almost_equal(w[orig], new_potentials[edge]) m = max_marginals[orig] nt.assert_greater(m, prune)
def backtrace(self, item): self.hypergraph = self.chart.finish() scores = np.zeros(len(self.hypergraph.edges)) self.skips = np.zeros(len(self.hypergraph.edges)) self.scores = scores for edge_num, label, tail_labels in self.hypergraph.node_labels(): scores[edge_num] = self.score(label, len(tail_labels)) self.pot = ph.LogViterbiPotentials(self.hypergraph) \ .from_array(scores) if self.m != None: counts = np.zeros(len(self.hypergraph.edges), dtype=np.int32) if self.m > (self.n / 2): m2 = self.n - self.m for edge_num, label, tail_labels in self.hypergraph.node_labels(): typ, d, s, t, _ = label if typ == interface.Tri and d == interface.Right and len(tail_labels) == 1: if t != s: counts[edge_num] = (t - s) self.counts = ph.CountingPotentials(self.hypergraph) \ .from_array(counts) hmap = ph.extend_hypergraph_by_count(self.hypergraph, self.counts, 0, m2, m2) else: for edge_num, label in self.hypergraph.head_labels(): counts[edge_num] = 1 if (label[0] == interface.Trap) else 0 self.counts = ph.CountingPotentials(self.hypergraph) \ .from_array(counts) hmap = ph.extend_hypergraph_by_count(self.hypergraph, self.counts, 0, self.m, self.m) new_pot = self.pot.up_project(hmap.domain_hypergraph, hmap) path = ph.best_path(hmap.domain_hypergraph, new_pot) else: path = ph.best_path(self.hypergraph, self.pot) return [node.label for node in path.nodes]
def test_best_path(): """ Test viterbi path finding. """ for h in hypergraphs(): w = utils.random_log_viterbi_potentials(h) path = ph.best_path(h, w) nt.assert_not_equal(w.dot(path), 0.0) valid_path(h, path) same = False for other_path in utils.all_paths(h): assert w.dot(path) >= w.dot(other_path) if path == other_path: same = True assert same
def regen(self, penalty, counts): # scores = np.zeros(len(self.hypergraph.edges)) # for edge_num, label, tail_labels in self.hypergraph.node_labels(): # #scores[edge_num] = self.score(label, len(self.hypergraph.edges[edge_num].tail)) # #len(self.hypergraph.edges[edge_num].tail) # #scores[edge_num] = self.score(label, len(tail_labels)) # typ, d, s, t, _ = label # if typ == interface.Trap: # if d == interface.Left: s, t = t, s # scores[edge_num] = scorer.arc_score(s, t) # if typ == interface.Tri and d == interface.Right and len(tail_labels) == 1: # scores[edge_num] = scorer.bigram_score(s, t+1) - \ # ((t+1 - s - 1) * scorer.skip_penalty) if s != t+1 else 0.0 self.pot = ph.LogViterbiPotentials(self.hypergraph) \ .from_array(self.scores + (penalty * counts)) path = ph.best_path(self.hypergraph, self.pot) return [node.label for node in path.nodes]
def test_outside(): """ Test outside chart properties. """ for h in hypergraphs(): w = utils.random_viterbi_potentials(h) path = ph.best_path(h, w) chart = ph.inside_values(h, w) best = w.dot(path) nt.assert_not_equal(best, 0.0) out_chart = ph.outside_values(h, w, chart) for node in h.nodes: other = chart[node] * out_chart[node] nt.assert_less_equal(other, best + 1e-4) for edge in path.edges: for node in edge.tail: if node.is_terminal: nt.assert_almost_equal(other, best)
def test_variables(): """ Test variable constraint checking. """ for h in hypergraphs(): w = utils.random_viterbi_potentials(h) variables, edge = random_constraint_trans(h) path = ph.best_path(h, w) match = list(variables.check(path)) if edge not in path: print "Should not have", edge.id assert "have" in match assert "not" not in match else: print "Should have", edge.id assert "have" not in match nt.assert_equal(len(match), 1)
def parse(self, sentence): words = sentence.strip().split(" ") n = len(words) nodes = {} for i, word in enumerate(words): if word not in self.terminals or self.terminals[word] < 5: words[i] = '_RARE_' sentence_graph = ph.Hypergraph() with sentence_graph.builder() as b: for i, word in enumerate(words, start=1): relevant_rules = (rule for rule in self.get_unary_rules() if rule.rhs_first == word) r = RuleSpan(word, i, i) nodes[r] = b.add_node(label=r) for rule in relevant_rules: nodes[RuleSpan(rule.lhs, i, i)] = b.add_node( [([nodes[RuleSpan(rule.rhs_first, i, i)]], rule)], label=RuleSpan(rule.lhs, i, i)) for l in xrange(1, n): for i in xrange(1, n-l+1): j = i+l for nonterminal in list(self.multinomials) + ["S"]: edgelist = [] for rule in self.multinomials[nonterminal]: if rule.unary: continue for s in xrange(i, j): rule_span1 = RuleSpan(rule.rhs_first, i, s) rule_span2 = RuleSpan(rule.rhs_second, s+1, j) if rule_span1 in nodes.keys()\ and rule_span2 in nodes.keys(): edgelist.append(( [nodes[rule_span1], nodes[rule_span2]], rule)) if edgelist: rs = RuleSpan(nonterminal, i, j) nodes[rs] = b.add_node(edgelist, label=rs) weights = ph.Potentials(sentence_graph).build(self.build_potentials) path = ph.best_path(sentence_graph, weights) for edge in path.edges: print edge.label, self.build_potentials(edge.label)
def test_lp(): import pydecode.lp as lp for h in hypergraphs(): w = utils.random_log_viterbi_potentials(h) g = lp.HypergraphLP.make_lp(h, w) g.solve() path = g.path opath = ph.best_path(h, w) nt.assert_almost_equal(w.dot(path), w.dot(opath)) for edge in path.edges: assert edge in opath # Constraint. constraints, edge = random_have_constraint(h) g = lp.HypergraphLP.make_lp(h, w) g.add_constraints(constraints) g.solve() assert edge in g.path
# 0.398276388645 # 0.686978042126 # 0.0620245561004 # 0.156915932894 # 0.227964177728 # 0.761591732502 # 0.0153166977689 # 0.402361214161 # 0.468028366566 # 0.351031720638 # 0.130741521716 # # In[55]: path = hyper.best_path(hyper1, potentials) potentials.dot(path) # Out[55]: # 3.458035945892334 # In[56]: import pydecode.optimization as opt cpath = opt.best_constrained_path(hyper1, potentials, constraints) # In[57]:
def best_path(self): viterbi_potentials = self.viterbi_potentials() return ph.best_path(self.hypergraph, viterbi_potentials)
def test_diff_potentials_fail(): h1, w1 = random_hypergraph() h2, w2 = random_hypergraph() ph.best_path(h1, w2)
for edge in hyper1.edges: print hyper1.label(edge), potentials[edge] # Out[]: # First Edge 1.0 # Second Edge 5.0 # Third Edge 5.0 # # We use the best path. # In[ ]: path = ph.best_path(hyper1, potentials) # In[ ]: print potentials.dot(path) # Out[]: # 6.0 # # In[ ]: display.HypergraphFormatter(hyper1).to_ipython()
n3 = b.add_node(label = "c") n4 = b.add_node(label = "d") n5 = b.add_node((([n1, n2], "edge1"),), label = "e") b.add_node([([n5], "edge3"), ([n3, n4], "edge2")], label = "root") def build_potentials(label): return {"edge1" : 3, "edge2" : 1, "edge3" : 1}[label] potentials = ph.Potentials(hyp).build(build_potentials) # Draw the graph # In[3]: display.HypergraphPotentialFormatter(hyp, potentials).to_ipython() # Out[3]: # <IPython.core.display.Image at 0x2fb1410> # In[4]: path = ph.best_path(hyp, potentials) display.HypergraphPathFormatter(hyp, [path]).to_ipython() # Out[4]: # <IPython.core.display.Image at 0x33e2910>
TypeError: not all arguments converted during string formatting # In[ ]: def build_potentials(arc): print arc return random.random() potentials = ph.Potentials(hypergraph).build(build_potentials) # phyper, ppotentials = ph.prune_hypergraph(hypergraph, potentials, 0.5) # In[ ]: path = ph.best_path(hypergraph, potentials) best = potentials.dot(path) maxmarginals = ph.compute_marginals(hypergraph, potentials) avg = 0.0 for edge in hypergraph.edges: avg += float(maxmarginals[edge]) avg = avg / float(len(hypergraph.edges)) thres = ((0.9) * best + (0.1) * avg) kept = set() for edge in hypergraph.edges: score = float(maxmarginals[edge]) if score >= thres: kept.add(edge.id)
for edge in hyper1.edges: print hyper1.label(edge), weights[edge] # Out[14]: # First Edge 1.0 # Second Edge 5.0 # Third Edge 5.0 # # We use the best path. # In[15]: path = ph.best_path(hyper1, weights) # In[16]: print weights.dot(path) # Out[16]: # 6.0 # # In[17]: display.HypergraphFormatter(hyper1).to_ipython()
def decode_fractional(self): vec = [pulp.value(self.edge_vars[edge.id]) for edge in self.hypergraph.edges] weights = ph.LogViterbiPotentials(self.hypergraph).from_vector(vec) return ph.best_path(self.hypergraph, weights)