예제 #1
0
 def emit_tree(idx, lines):
     nodes = {}
     edge_list = []
     max_node = -1
     for node_id, form, lemma, pos, feat, head, deprel in lines:
         nodes[node_id] = sent.SyntaxTreeNode(node_id=node_id,
                                              value=form,
                                              head=pos)
         max_node = max(max_node, node_id)
     nodes[max_node + 1] = sent.SyntaxTreeNode(
         node_id=max_node + 1,
         value=vocabs.Vocab.ES_STR,
         head=vocabs.Vocab.ES_STR)
     root = -1
     for node_id, form, lemma, pos, feat, head, deprel in lines:
         if head == 0:
             root = node_id
         else:
             edge_list.append(HyperEdge(head, [node_id], None, deprel))
     edge_list.append(
         HyperEdge(root, [max_node + 1], None, vocabs.Vocab.ES_STR))
     return sent.DepTreeRNNGSequenceSentence(
         idx,
         score=None,
         graph=HyperGraph(edge_list, nodes),
         surface_vocab=self.value_vocab,
         nt_vocab=self.node_vocab,
         edge_vocab=self.edge_vocab,
         all_surfaces=True,
         output_procs=self.output_procs)
예제 #2
0
  def read_sent(self, line, idx):
    edge_list = []
    if self.text_input:
      # Node List
      nodes = [sent.LatticeNode(node_id=0, value=vocabs.Vocab.SS)]
      for i, word in enumerate(line.strip().split()):
        nodes.append(sent.LatticeNode(node_id=i+1, value=self.vocab.convert(word)))
      nodes.append(sent.LatticeNode(node_id=len(nodes), value=vocabs.Vocab.ES))
      # Flat edge list
      for i in range(len(nodes)-1):
        edge_list.append(HyperEdge(i, [i+1]))
    else:
      node_list, arc_list = ast.literal_eval(line)
      nodes = [sent.LatticeNode(node_id=i,
                                value=self.vocab.convert(item[0]),
                                fwd_log_prob=item[1], marginal_log_prob=item[2], bwd_log_prob=item[3])
               for i, item in enumerate(node_list)]
      if self.flatten:
        for i in range(len(nodes)-1):
          edge_list.append(HyperEdge(i, [i+1]))
          nodes[i].reset_prob()
        nodes[-1].reset_prob()
      else:
        for from_index, to_index in arc_list:
          edge_list.append(HyperEdge(from_index, [to_index]))

      assert nodes[0].value == self.vocab.SS and nodes[-1].value == self.vocab.ES
    # Construct graph
    graph = HyperGraph(edge_list, {node.node_id: node for node in nodes})
    assert len(graph.roots()) == 1 # <SOS>
    assert len(graph.leaves()) == 1 # <EOS>
    # Construct LatticeSentence
    return sent.GraphSentence(idx=idx, graph=graph, vocab=self.vocab)
예제 #3
0
 def test_toposort(self):
     # Taken from https://www.geeksforgeeks.org/topological-sorting/
     nodes = {}
     for i in range(6):
         nodes[i] = HyperNode(i, i)
     edges = [
         HyperEdge(5, [2, 0]),
         HyperEdge(4, [0, 1]),
         HyperEdge(2, [3]),
         HyperEdge(3, [1])
     ]
     graph = HyperGraph(edges, nodes)
     self.assertListEqual(graph.topo_sort(), [5, 4, 2, 3, 1, 0])
예제 #4
0
def normalize_space_at_conll(tree):
    graph = tree.graph
    leaves = []
    node_list = {}
    edge_list = []
    now_id = graph.len_nodes
    for edge in graph.iter_edges():
        edge_list.append(edge)
    edge_list = edge_list[:-1]
    for i in range(1, graph.len_nodes):
        node = graph[i]
        word = node.value
        for j, subword in enumerate(word.split()):
            if j == 0:
                node_list[node.node_id] = SyntaxTreeNode(
                    node.node_id, subword, node.head, node.node_type)
                leaves.append(node_list[node.node_id])
            else:
                node_list[now_id] = SyntaxTreeNode(now_id, subword, node.head,
                                                   node.node_type)
                leaves.append(node_list[now_id])
                edge_list.append(
                    HyperEdge(node.node_id, [now_id], None, "[whtsp]"))
                now_id += 1
    return remap_id(node_list, edge_list, leaves)
예제 #5
0
    def setUp(self):
        nodes = {
            1: HyperNode('a', 1),
            2: HyperNode('b', 2),
            3: HyperNode('c', 3),
            4: HyperNode('d', 4),
            5: HyperNode('e', 5)
        }

        edg_list = [
            HyperEdge(1, [2]),
            HyperEdge(1, [3]),
            HyperEdge(2, [4]),
            HyperEdge(2, [5])
        ]
        self.nodes = nodes
        self.graph = HyperGraph(edg_list, nodes)
예제 #6
0
 def emit_tree(idx, lines):
   nodes = {}
   edge_list = []
   for node_id, form, lemma, pos, feat, head, deprel in lines:
     nodes[node_id] = sent.SyntaxTreeNode(node_id=node_id, value=form, head=pos)
   for node_id, form, lemma, pos, feat, head, deprel in lines:
     if head != 0 and deprel != "ROOT":
       edge_list.append(HyperEdge(head, [node_id], None, deprel))
   return sent.RNNGSequenceSentence(idx,
                                    HyperGraph(edge_list, nodes),
                                    self.surface_vocab,
                                    self.nt_vocab,
                                    all_surfaces=True)
예제 #7
0
 def write_changes(buffer, idx, now_id):
     now_node = graph[idx]
     node_list[idx] = SyntaxTreeNode(now_node.node_id, buffer[0],
                                     now_node.head, now_node.node_type)
     leaves.append(node_list[idx])
     for i in range(1, len(buffer)):
         node_list[now_id] = SyntaxTreeNode(now_id, buffer[i],
                                            "[" + now_node.head + "]",
                                            now_node.node_type)
         edge_list.append(HyperEdge(idx, [now_id], edge.features, "[sp]"))
         leaves.append(node_list[now_id])
         now_id += 1
     return now_id
예제 #8
0
def remap_id(node_list, edge_list, leaves):
    id_mapping = {}
    for i, node in enumerate(leaves):
        id_mapping[node.node_id] = i + 1
    # New edge + node with new id mapping
    out_node_list = {}
    out_edge_list = []
    for node_id, node in node_list.items():
        out_node_list[id_mapping[node_id]] = SyntaxTreeNode(
            id_mapping[node_id], node.value, node.head, node.node_type)
    for edge in edge_list:
        out_edge_list.append(
            HyperEdge(id_mapping[edge.node_from],
                      [id_mapping[edge.node_to[0]]], edge.features,
                      edge.label))
    return HyperGraph(out_edge_list, out_node_list)
예제 #9
0
 def _read_tree_from_line(self, line):
     stack = []
     edges = []
     nodes = {}
     now_depth = 0
     now_id = 0
     for token in line.split():
         # Process "("
         if token.startswith("("):
             stack.append([
                 now_depth,
                 sent.SyntaxTreeNode(now_id, None, token[1:],
                                     sent.SyntaxTreeNode.Type.NT)
             ])
             nodes[now_id] = stack[-1][1]
             now_id += 1
             now_depth += 1
         else:
             try:
                 end_idx = token.index(")")
             except IndexError:
                 end_idx = len(token)
             if end_idx != 0:
                 stack.append([
                     now_depth,
                     sent.SyntaxTreeNode(now_id, token[:end_idx], None,
                                         sent.SyntaxTreeNode.Type.T)
                 ])
                 nodes[now_id] = stack[-1][1]
                 now_id += 1
             # Process ")"
             for _ in range(end_idx, len(token)):
                 depth, child = stack.pop()
                 children = [child]
                 while len(stack) > 0 and stack[-1][0] == depth:
                     children.append(stack.pop()[1])
                 if len(stack) > 0:
                     parent = stack[-1][1]
                     for child in children:
                         edges.append(
                             HyperEdge(parent.node_id, [child.node_id]))
                 now_depth -= 1
     return HyperGraph(edges, nodes)