def emit_tree(idx, lines): nodes = {} edge_list = [] max_node = -1 for node_id, form, lemma, pos, feat, head, deprel in lines: nodes[node_id] = sent.SyntaxTreeNode(node_id=node_id, value=form, head=pos) max_node = max(max_node, node_id) nodes[max_node + 1] = sent.SyntaxTreeNode( node_id=max_node + 1, value=vocabs.Vocab.ES_STR, head=vocabs.Vocab.ES_STR) root = -1 for node_id, form, lemma, pos, feat, head, deprel in lines: if head == 0: root = node_id else: edge_list.append(HyperEdge(head, [node_id], None, deprel)) edge_list.append( HyperEdge(root, [max_node + 1], None, vocabs.Vocab.ES_STR)) return sent.DepTreeRNNGSequenceSentence( idx, score=None, graph=HyperGraph(edge_list, nodes), surface_vocab=self.value_vocab, nt_vocab=self.node_vocab, edge_vocab=self.edge_vocab, all_surfaces=True, output_procs=self.output_procs)
def read_sent(self, line, idx): edge_list = [] if self.text_input: # Node List nodes = [sent.LatticeNode(node_id=0, value=vocabs.Vocab.SS)] for i, word in enumerate(line.strip().split()): nodes.append(sent.LatticeNode(node_id=i+1, value=self.vocab.convert(word))) nodes.append(sent.LatticeNode(node_id=len(nodes), value=vocabs.Vocab.ES)) # Flat edge list for i in range(len(nodes)-1): edge_list.append(HyperEdge(i, [i+1])) else: node_list, arc_list = ast.literal_eval(line) nodes = [sent.LatticeNode(node_id=i, value=self.vocab.convert(item[0]), fwd_log_prob=item[1], marginal_log_prob=item[2], bwd_log_prob=item[3]) for i, item in enumerate(node_list)] if self.flatten: for i in range(len(nodes)-1): edge_list.append(HyperEdge(i, [i+1])) nodes[i].reset_prob() nodes[-1].reset_prob() else: for from_index, to_index in arc_list: edge_list.append(HyperEdge(from_index, [to_index])) assert nodes[0].value == self.vocab.SS and nodes[-1].value == self.vocab.ES # Construct graph graph = HyperGraph(edge_list, {node.node_id: node for node in nodes}) assert len(graph.roots()) == 1 # <SOS> assert len(graph.leaves()) == 1 # <EOS> # Construct LatticeSentence return sent.GraphSentence(idx=idx, graph=graph, vocab=self.vocab)
def test_toposort(self): # Taken from https://www.geeksforgeeks.org/topological-sorting/ nodes = {} for i in range(6): nodes[i] = HyperNode(i, i) edges = [ HyperEdge(5, [2, 0]), HyperEdge(4, [0, 1]), HyperEdge(2, [3]), HyperEdge(3, [1]) ] graph = HyperGraph(edges, nodes) self.assertListEqual(graph.topo_sort(), [5, 4, 2, 3, 1, 0])
def normalize_space_at_conll(tree): graph = tree.graph leaves = [] node_list = {} edge_list = [] now_id = graph.len_nodes for edge in graph.iter_edges(): edge_list.append(edge) edge_list = edge_list[:-1] for i in range(1, graph.len_nodes): node = graph[i] word = node.value for j, subword in enumerate(word.split()): if j == 0: node_list[node.node_id] = SyntaxTreeNode( node.node_id, subword, node.head, node.node_type) leaves.append(node_list[node.node_id]) else: node_list[now_id] = SyntaxTreeNode(now_id, subword, node.head, node.node_type) leaves.append(node_list[now_id]) edge_list.append( HyperEdge(node.node_id, [now_id], None, "[whtsp]")) now_id += 1 return remap_id(node_list, edge_list, leaves)
def setUp(self): nodes = { 1: HyperNode('a', 1), 2: HyperNode('b', 2), 3: HyperNode('c', 3), 4: HyperNode('d', 4), 5: HyperNode('e', 5) } edg_list = [ HyperEdge(1, [2]), HyperEdge(1, [3]), HyperEdge(2, [4]), HyperEdge(2, [5]) ] self.nodes = nodes self.graph = HyperGraph(edg_list, nodes)
def emit_tree(idx, lines): nodes = {} edge_list = [] for node_id, form, lemma, pos, feat, head, deprel in lines: nodes[node_id] = sent.SyntaxTreeNode(node_id=node_id, value=form, head=pos) for node_id, form, lemma, pos, feat, head, deprel in lines: if head != 0 and deprel != "ROOT": edge_list.append(HyperEdge(head, [node_id], None, deprel)) return sent.RNNGSequenceSentence(idx, HyperGraph(edge_list, nodes), self.surface_vocab, self.nt_vocab, all_surfaces=True)
def write_changes(buffer, idx, now_id): now_node = graph[idx] node_list[idx] = SyntaxTreeNode(now_node.node_id, buffer[0], now_node.head, now_node.node_type) leaves.append(node_list[idx]) for i in range(1, len(buffer)): node_list[now_id] = SyntaxTreeNode(now_id, buffer[i], "[" + now_node.head + "]", now_node.node_type) edge_list.append(HyperEdge(idx, [now_id], edge.features, "[sp]")) leaves.append(node_list[now_id]) now_id += 1 return now_id
def remap_id(node_list, edge_list, leaves): id_mapping = {} for i, node in enumerate(leaves): id_mapping[node.node_id] = i + 1 # New edge + node with new id mapping out_node_list = {} out_edge_list = [] for node_id, node in node_list.items(): out_node_list[id_mapping[node_id]] = SyntaxTreeNode( id_mapping[node_id], node.value, node.head, node.node_type) for edge in edge_list: out_edge_list.append( HyperEdge(id_mapping[edge.node_from], [id_mapping[edge.node_to[0]]], edge.features, edge.label)) return HyperGraph(out_edge_list, out_node_list)
def _read_tree_from_line(self, line): stack = [] edges = [] nodes = {} now_depth = 0 now_id = 0 for token in line.split(): # Process "(" if token.startswith("("): stack.append([ now_depth, sent.SyntaxTreeNode(now_id, None, token[1:], sent.SyntaxTreeNode.Type.NT) ]) nodes[now_id] = stack[-1][1] now_id += 1 now_depth += 1 else: try: end_idx = token.index(")") except IndexError: end_idx = len(token) if end_idx != 0: stack.append([ now_depth, sent.SyntaxTreeNode(now_id, token[:end_idx], None, sent.SyntaxTreeNode.Type.T) ]) nodes[now_id] = stack[-1][1] now_id += 1 # Process ")" for _ in range(end_idx, len(token)): depth, child = stack.pop() children = [child] while len(stack) > 0 and stack[-1][0] == depth: children.append(stack.pop()[1]) if len(stack) > 0: parent = stack[-1][1] for child in children: edges.append( HyperEdge(parent.node_id, [child.node_id])) now_depth -= 1 return HyperGraph(edges, nodes)