Пример #1
0
    def test_equals(self):
        p1 = core.Passage("1")
        p2 = core.Passage("2")
        p1l0 = layer0.Layer0(p1)
        p2l0 = layer0.Layer0(p2)
        p1l1 = layer1.Layer1(p1)
        p2l1 = layer1.Layer1(p2)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))

        # Checks basic passage equality and Attrib/tag/len differences
        p1l0.add_terminal("0", False)
        p1l0.add_terminal("1", False)
        p1l0.add_terminal("2", False)
        p2l0.add_terminal("0", False)
        p2l0.add_terminal("1", False)
        p2l0.add_terminal("2", False)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        pnct2 = p2l0.add_terminal("3", True)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        temp = p1l0.add_terminal("3", False)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        temp.destroy()
        pnct1 = p1l0.add_terminal("3", True)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))

        # Check Edge and node equality
        ps1 = p1l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps2 = p2l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Participant)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        self.assertTrue(ps1.equals(ps2, recursive=False))
        p2l1.add_fnode(ps2, layer1.EdgeTags.Process)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p2l1.add_fnode(ps2, layer1.EdgeTags.Participant)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Process)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        self.assertFalse(
            p1.equals(p2, ordered=True) or p2.equals(p1, ordered=True))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Adverbial, implicit=True)
        ps2d3 = p2l1.add_fnode(ps2, layer1.EdgeTags.Adverbial)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps2d3.attrib["implicit"] = True
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        ps2[2].attrib["remote"] = True
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps1[2].attrib["remote"] = True
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        p1l1.add_punct(None, pnct1)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p2l1.add_punct(None, pnct2)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        core.Layer("2", p1)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
Пример #2
0
def crossing():
    """Creates a :class:`Passage` with multiple sentences and paragraphs, with crossing edges.

    Passage: [1 2 [3 P(remote)] H] .
             [[3 P] . 4 . H]

    """
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [
        l0.add_terminal("1", False),
        l0.add_terminal("2", False),
        l0.add_terminal(".", True),
        l0.add_terminal("3", False, paragraph=2),
        l0.add_terminal(".", True, paragraph=2),
        l0.add_terminal("4", False, paragraph=2),
        l0.add_terminal(".", True, paragraph=2),
    ]
    h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(h2, layer1.EdgeTags.Process)
    l1.add_remote(h1, layer1.EdgeTags.Process, p1)
    h1.add(layer1.EdgeTags.Terminal, terms[0])
    h1.add(layer1.EdgeTags.Terminal, terms[1])
    l1.add_punct(None, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_punct(h2, terms[4])
    h2.add(layer1.EdgeTags.Terminal, terms[5])
    l1.add_punct(h2, terms[6])
    return p
Пример #3
0
def function1():
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 5 terminals (1-5), #5 is punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i == 5)) for i in range(1, 6)]

    # Scene #1: [H [P 1] [A 2]]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    a = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1.add(layer1.EdgeTags.Terminal, terms[0])
    a.add(layer1.EdgeTags.Terminal, terms[1])

    # Function #1 with terminal 3 - its location should not affect evaluation
    f = l1.add_fnode(None, layer1.EdgeTags.Function)
    f.add(layer1.EdgeTags.Terminal, terms[2])

    # Scene #2: [H [A* 2] [S 4]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.State)
    p2.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_fnode(ps2, layer1.EdgeTags.Participant, implicit=True)  # implicit should not affect evaluation

    # Punctuation #5 - not under a scene
    l1.add_punct(ps2, terms[4])  # punctuation should not affect evaluation

    # adding remote argument to scene #2
    l1.add_remote(ps2, layer1.EdgeTags.Participant, a)

    return p
Пример #4
0
def _from_site_terminals(elem, passage, elem2node):
    """Extract the Terminals from the site XML format.

    Some of the terminals metadata (remarks, type) is saved in a wrapper unit
    which excapsulates each terminal, so we use both for creating our
    :class:layer0.Terminal objects.

    Args:
        elem: root element of the XML heirarchy
        passage: passage to add the Terminals to, already with Layer0 object
        elem2node: dictionary whose keys are site IDs and values are the
            created UCCA Nodes which are equivalent. This function updates the
            dictionary by mapping each word wrapper to a UCCA Terminal.

    """
    l0 = layer0.Layer0(passage)
    for para_num, paragraph in enumerate(
            elem.iterfind(SiteCfg.Paths.Paragraphs)):
        words = list(paragraph.iter(SiteCfg.Tags.Terminal))
        wrappers = []
        for word in words:
            # the list added has only one element, because XML is hierarichal
            wrappers.extend([
                x for x in paragraph.iter(SiteCfg.Tags.Unit) if word in list(x)
            ])
        for word, wrapper in zip(words, wrappers):
            punct = (wrapper.get(SiteCfg.Attr.ElemTag) == SiteCfg.Types.Punct)
            text = SiteUtil.unescape(word.text)
            # Paragraphs start at 1 and enumeration at 0, so add +1 to para_num
            t = passage.layer(layer0.LAYER_ID).add_terminal(
                text, punct, para_num + 1)
            SiteUtil.set_id(word, t.ID)
            SiteUtil.set_node(wrapper, t, elem2node)
Пример #5
0
def discontiguous():
    """Creates a highly-discontiguous Passage object."""
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0))
             for i in range(1, 21)]

    # First parallel scene, stretching on terminals 1-10
    # The dashed edge tags (e.g. -C, C-) mean discontiguous units
    # [PS [D [E 0] [C- 1] [E 2] [-C 3]]
    #     [A- 4] [P- 5 6] [-A 7] [F 8] [-P [U 9]]]
    # In addition, D takes P as a remote G
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    d1 = l1.add_fnode(ps1, layer1.EdgeTags.Adverbial)
    e1 = l1.add_fnode(d1, layer1.EdgeTags.Elaborator)
    c1 = l1.add_fnode(d1, layer1.EdgeTags.Center)
    e2 = l1.add_fnode(d1, layer1.EdgeTags.Elaborator)
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    f1 = l1.add_fnode(ps1, layer1.EdgeTags.Function)
    l1.add_remote(d1, layer1.EdgeTags.Ground, p1)
    e1.add(layer1.EdgeTags.Terminal, terms[0])
    c1.add(layer1.EdgeTags.Terminal, terms[1])
    e2.add(layer1.EdgeTags.Terminal, terms[2])
    c1.add(layer1.EdgeTags.Terminal, terms[3])
    a1.add(layer1.EdgeTags.Terminal, terms[4])
    p1.add(layer1.EdgeTags.Terminal, terms[5])
    p1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    f1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(p1, terms[9])

    # Second parallel scene, stretching on terminals 11-14 + 18-20
    # [PS- [D IMPLICIT] [G IMPLICIT] [P 10 11 12 13]]
    # [-PS [A 17 18 [U 19]]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    l1.add_fnode(ps2, layer1.EdgeTags.Adverbial, implicit=True)
    l1.add_fnode(ps2, layer1.EdgeTags.Ground, implicit=True)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.Process)
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.Participant)
    p2.add(layer1.EdgeTags.Terminal, terms[10])
    p2.add(layer1.EdgeTags.Terminal, terms[11])
    p2.add(layer1.EdgeTags.Terminal, terms[12])
    p2.add(layer1.EdgeTags.Terminal, terms[13])
    a2.add(layer1.EdgeTags.Terminal, terms[17])
    a2.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_punct(a2, terms[19])

    # Third parallel scene, stretching on terminals 15-17
    # [PS [P IMPLICIT] 14 [A 15 16]]
    ps3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    ps3.add(layer1.EdgeTags.Terminal, terms[14])
    l1.add_fnode(ps3, layer1.EdgeTags.Process, implicit=True)
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)
    a3.add(layer1.EdgeTags.Terminal, terms[15])
    a3.add(layer1.EdgeTags.Terminal, terms[16])

    return p
Пример #6
0
def function2():
    p = core.Passage("2")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 5 terminals (1-5), #5 is punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i == 5)) for i in range(1, 6)]

    # Scene #1: [H [S 1] [D 2] [F 2]]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.State)
    a = l1.add_fnode(ps1, layer1.EdgeTags.Adverbial)
    p1.add(layer1.EdgeTags.Terminal, terms[0])
    a.add(layer1.EdgeTags.Terminal, terms[1])
    f = l1.add_fnode(ps1, layer1.EdgeTags.Function)
    f.add(layer1.EdgeTags.Terminal, terms[2])

    # Scene #2: [H [A* 2] [S 4]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.State)
    p2.add(layer1.EdgeTags.Terminal, terms[3])

    # Punctuation #5 - not under a scene
    l1.add_punct(None, terms[4])

    # adding remote argument to scene #2
    l1.add_remote(ps2, layer1.EdgeTags.Adverbial, a)

    return p
Пример #7
0
    def test_terminals(self):
        """Tests :class:layer0.Terminal new and inherited functionality."""
        p = core.Passage("1")
        layer0.Layer0(p)
        terms = [
            layer0.Terminal(ID="0.1", root=p,
                            tag=layer0.NodeTags.Word,
                            attrib={"text": "1",
                                    "paragraph": 1,
                                    "paragraph_position": 1}),
            layer0.Terminal(ID="0.2", root=p,
                            tag=layer0.NodeTags.Word,
                            attrib={"text": "2",
                                    "paragraph": 2,
                                    "paragraph_position": 1}),
            layer0.Terminal(ID="0.3", root=p,
                            tag=layer0.NodeTags.Punct,
                            attrib={"text": ".",
                                    "paragraph": 2,
                                    "paragraph_position": 2})
        ]

        p_copy = core.Passage("2")
        layer0.Layer0(p_copy)
        equal_term = layer0.Terminal(ID="0.1", root=p_copy,
                                     tag=layer0.NodeTags.Word,
                                     attrib={"text": "1",
                                             "paragraph": 1,
                                             "paragraph_position": 1})
        unequal_term = layer0.Terminal(ID="0.2", root=p_copy,
                                       tag=layer0.NodeTags.Word,
                                       attrib={"text": "two",
                                               "paragraph": 2,
                                               "paragraph_position": 1})

        self.assertSequenceEqual([t.punct for t in terms],
                                 [False, False, True])
        self.assertSequenceEqual([t.text for t in terms], ["1", "2", "."])
        self.assertSequenceEqual([t.position for t in terms], [1, 2, 3])
        self.assertSequenceEqual([t.paragraph for t in terms], [1, 2, 2])
        self.assertSequenceEqual([t.para_pos for t in terms], [1, 1, 2])
        self.assertFalse(terms[0] == terms[1])
        self.assertFalse(terms[0] == terms[2])
        self.assertFalse(terms[1] == terms[2])
        self.assertTrue(terms[0] == terms[0])
        self.assertTrue(terms[0].equals(equal_term))
        self.assertFalse(terms[1].equals(unequal_term))
Пример #8
0
def test_layer0():
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    t1 = l0.add_terminal(text="1", punct=False)
    l0.add_terminal(text="2", punct=True, paragraph=2)
    t3 = l0.add_terminal(text="3", punct=False, paragraph=2)
    assert [x[0] for x in l0.pairs] == [1, 2, 3]
    assert [t.para_pos for t in l0.all] == [1, 1, 2]
    assert l0.words == (t1, t3)
Пример #9
0
def create_passage(num_terms=3, *punct):
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [
        l0.add_terminal(text=str(i), punct=(i in punct))
        for i in range(1, num_terms + 1)
    ]
    return p, l1, terms
Пример #10
0
    def _build_passage(self):
        p = core.Passage(self.sentence_id or self.passage_id)
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        paragraph = 1

        # add normal nodes
        while self.pending_nodes:
            for i in reversed(range(len(self.pending_nodes))):
                parent_id, edge_tag, node_id = self.pending_nodes[i]
                parent = self.node_by_id.get(parent_id, -1)
                if parent != -1:
                    del self.pending_nodes[i]
                    implicit = node_id not in self.node_ids_with_children
                    node = l1.add_fnode(parent, edge_tag, implicit=implicit)
                    if edge_tag == EdgeTags.Punctuation:
                        node.tag = layer1.NodeTags.Punctuation
                    self.node_by_id[node_id] = node

        # add remotes
        for parent_id, edge_tag, node_id in self.remotes:
            l1.add_remote(self.node_by_id[parent_id], edge_tag,
                          self.node_by_id[node_id])

        # add linkages
        for node_id, children in self.linkages.items():
            link_relation = next(self.node_by_id[i] for i, t in children
                                 if t == EdgeTags.LinkRelation)
            link_arguments = [
                self.node_by_id[i] for i, t in children
                if t == EdgeTags.LinkArgument
            ]
            l1.add_linkage(link_relation, *link_arguments)

        # add terminals
        for text, tag, edge_tag, parent_id in self.terminals:
            punctuation = (tag == layer0.NodeTags.Punct)
            terminal = l0.add_terminal(text=text,
                                       punct=punctuation,
                                       paragraph=paragraph)
            try:
                parent = self.node_by_id[parent_id]
            except KeyError as e:
                raise ValueError(
                    "Terminal ('%s') with bad parent (%s) in passage %s" %
                    (text, parent_id, p.ID)) from e
            if parent is None:
                print("Terminal is a child of the root: '%s'" % text,
                      file=sys.stderr)
                parent = l1.add_fnode(parent, edge_tag)
            if edge_tag != EdgeTags.Terminal:
                print("Terminal with incoming %s edge: '%s'" %
                      (edge_tag, text),
                      file=sys.stderr)
            parent.add(EdgeTags.Terminal, terminal)

        return p
Пример #11
0
 def test_layer0(self):
     p = core.Passage("1")
     l0 = layer0.Layer0(p)
     t1 = l0.add_terminal(text="1", punct=False)
     l0.add_terminal(text="2", punct=True, paragraph=2)
     t3 = l0.add_terminal(text="3", punct=False, paragraph=2)
     self.assertSequenceEqual([x[0] for x in l0.pairs], [1, 2, 3])
     self.assertSequenceEqual([t.para_pos for t in l0.all], [1, 1, 2])
     self.assertSequenceEqual(l0.words, (t1, t3))
Пример #12
0
 def build_passage(self, graph, terminals_only=False):
     passage = core.Passage(graph.id)
     self.is_ucca = (graph.format == "ucca")
     if graph.format is None or graph.format == self.format:
         passage.extra["format"] = self.format
     self.create_terminals(graph, layer0.Layer0(passage))
     if not terminals_only:
         self.create_non_terminals(graph, layer1.Layer1(passage))
         graph.link_pre_terminals()
     return passage
Пример #13
0
def main(args):

    streusle_file = args[0]
    outpath = args[1]

    for doc_id, doc in get_streusle_docs(streusle_file).items():
        for unit in list(doc['exprs'].values()):
            ID = f'{doc_id}_{unit["sent_offs"]}_{unit["local_toknums"][0]}-{unit["local_toknums"][-1]}'
            sent = doc['sents'][int(unit['sent_offs'])-1]

            # print(sent)
            # print(unit)

            p = ucore.Passage(ID)
            l0 = ul0.Layer0(p)
            l1 = ul1.Layer1(p)

            root = l1.add_fnode(l1._head_fnode, ul1.EdgeTags.ParallelScene)

            # gov
            preterminal = l1.add_fnode(root, 'gov')
            # preterminal._fedge().attrib['remote'] = True
            if unit['heuristic_relation']['gov'] is not None:
                rel = sent['toks'][unit['heuristic_relation'][f'local_gov']-1]
                rel_unit = sent['swes'].get(str(rel['#']))
                if rel_unit is None:
                    rel_unit = sent['smwes'].get(str(rel.get('smwe', [-1, -1])[0]), None)
                term = create_terminal(rel, rel_unit, l0, False)
                preterminal.add(ul1.EdgeTags.Terminal, term)


            # P unit
            preterminal = l1.add_fnode(root, unit['ss'])
            for i in unit["toknums"]:
                tok = doc['toks'][i-1]
                term = create_terminal(tok, unit, l0, True)
                preterminal.add(ul1.EdgeTags.Terminal, term)

            # obj
            preterminal = l1.add_fnode(root, 'obj')
            # preterminal._fedge().attrib['remote'] = True
            if unit['heuristic_relation']['obj'] is not None and unit['lexcat'] != 'PP':
                rel = sent['toks'][unit['heuristic_relation'][f'local_obj'] - 1]
                rel_unit = sent['swes'].get(str(rel['#']))
                if rel_unit is None:
                    rel_unit = sent['smwes'].get(str(rel.get('smwe', [-1, -1])[0]), None)
                term = create_terminal(rel, rel_unit, l0, False)
                preterminal.add(ul1.EdgeTags.Terminal, term)


            uconv.passage2file(p, f'{outpath}/{ID}.xml')
Пример #14
0
def main(args):
    for i, line in enumerate(tqdm(gen_lines(args.filenames),
                                  unit=" lines",
                                  desc="Creating passages"),
                             start=1):
        p = core.Passage(args.format % i)
        l0 = layer0.Layer0(p)
        layer1.Layer1(p)
        for tok in line.split():
            l0.add_terminal(text=tok, punct=PUNCTUATION.issuperset(tok))
        write_passage(p,
                      outdir=args.out_dir,
                      binary=args.binary,
                      verbose=False)
Пример #15
0
    def _build_passage(self, stream):
        # p = core.Passage(self.sentence_id or self.passage_id)
        p = core.Passage(self.passage_id)
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        paragraph = 1

        next(self.parse(stream))

        # add normal nodes
        self.pending_nodes = list(reversed(self.pending_nodes))
        while self.pending_nodes:
            for i in reversed(range(len(self.pending_nodes))):
                parent_id, edge_tag, node_id = self.pending_nodes[i]
                parent = self.node_by_id.get(parent_id, -1)
                if parent != -1:
                    del self.pending_nodes[i]
                    implicit = node_id not in self.node_ids_with_children
                    node = l1.add_fnode(parent, edge_tag, implicit=implicit)
                    if edge_tag == EdgeTags.Punctuation:
                        node.tag = layer1.NodeTags.Punctuation
                    self.node_by_id[node_id] = node

        # add terminals
        for text, tag, edge_tag, parent_id in self.terminals:
            punctuation = (tag == layer0.NodeTags.Punct)
            terminal = l0.add_terminal(text=text,
                                       punct=punctuation,
                                       paragraph=paragraph)
            try:
                parent = self.node_by_id[parent_id]
            except KeyError as e:
                raise ValueError(
                    "Terminal ('%s') with bad parent (%s) in passage %s" %
                    (text, parent_id, p.ID)) from e
            if parent is None:
                print("Terminal is a child of the root: '%s'" % text,
                      file=sys.stderr)
                parent = l1.add_fnode(parent, edge_tag)
            if edge_tag != EdgeTags.Terminal:
                print("Terminal with incoming %s edge: '%s'" %
                      (edge_tag, text),
                      file=sys.stderr)
            parent.add(EdgeTags.Terminal, terminal)
        return p
Пример #16
0
def multi_sent_with_quotes():
    """Creates a :class:`Passage` with multiple sentences and paragraphs, with quotes in them.

    Passage: [1 2 [" U] [3 P] H] . [" U] [[5 6 . P] H]
             [[8 P] . 10 . H]

    """
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [l0.add_terminal(str(i), False) for i in range(1, 3)]
    terms.append(l0.add_terminal('"', True))
    terms.append(l0.add_terminal("3", False))
    terms.append(l0.add_terminal(".", True))
    terms.append(l0.add_terminal('"', True))
    terms.append(l0.add_terminal("5", False))
    terms.append(l0.add_terminal("6", False))
    terms.append(l0.add_terminal(".", True))
    terms.append(l0.add_terminal("8", False, paragraph=2))
    terms.append(l0.add_terminal(".", True, paragraph=2))
    terms.append(l0.add_terminal("10", False, paragraph=2))
    terms.append(l0.add_terminal(".", True, paragraph=2))
    h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(h1, layer1.EdgeTags.Process)
    p2 = l1.add_fnode(h2, layer1.EdgeTags.Process)
    p3 = l1.add_fnode(h3, layer1.EdgeTags.Process)
    h1.add(layer1.EdgeTags.Terminal, terms[0])
    h1.add(layer1.EdgeTags.Terminal, terms[1])
    l1.add_punct(None, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_punct(None, terms[4])
    l1.add_punct(None, terms[5])
    p2.add(layer1.EdgeTags.Terminal, terms[6])
    p2.add(layer1.EdgeTags.Terminal, terms[7])
    l1.add_punct(p2, terms[8])
    p3.add(layer1.EdgeTags.Terminal, terms[9])
    l1.add_punct(h3, terms[10])
    h3.add(layer1.EdgeTags.Terminal, terms[11])
    l1.add_punct(h3, terms[12])
    return p
Пример #17
0
def graph2passage(graph, input):
    passage = core.Passage(graph.id)
    l0 = layer0.Layer0(passage)
    anchors = {(anchor["from"], anchor["to"], is_punct(node))
               for node in graph.nodes for anchor in node.anchors or ()}
    terminals = {(i, j): l0.add_terminal(text=input[i:j], punct=punct)
                 for i, j, punct in sorted(anchors)}

    l1 = layer1.Layer1(passage)
    queue = [(node, None if node.is_top else layer1.FoundationalNode(
        root=l1.root, tag=layer1.NodeTags.Foundational, ID=l1.next_id()))
             for node in graph.nodes if is_primary_root(node)]

    id_to_unit = {node.id: unit for (node, unit) in queue}
    remotes = []
    while queue:
        parent, parent_unit = queue.pop(0)
        for tgt, edges in groupby(sorted(parent.outgoing_edges,
                                         key=attrgetter("tgt")),
                                  key=attrgetter("tgt")):
            edges = list(edges)
            labels = [edge.lab for edge in edges]
            if is_remote(edges[0]):
                remotes.append((parent_unit, labels, tgt))
            else:
                child = graph.find_node(tgt)
                child_unit = id_to_unit[tgt] = l1.add_fnode_multiple(
                    parent_unit, labels, implicit=is_implicit(child))
                queue.append((child, child_unit))
        for anchor in parent.anchors or ():
            if parent_unit is None:  # Terminal children of the root are not valid in UCCA, so warn but be faithful
                print(
                    "graph2passage(): anchors of the root node converted to Terminal children in ‘{}’."
                    "".format(graph.id),
                    file=sys.stderr)
                parent_unit = l1.heads[0]
            parent_unit.add(layer1.EdgeTags.Terminal, terminals[anchor["from"],
                                                                anchor["to"]])
    for parent, labels, tgt in remotes:
        l1.add_remote_multiple(parent, labels, id_to_unit[tgt])
    return passage
Пример #18
0
def from_text(text, passage_id='1'):
    """Converts from tokenized strings to a Passage object.

    Args:
        text: a sequence of strings, where each one will be a new paragraph.

    Returns:
        a Passage object with only Terminals units.

    """
    p = core.Passage(passage_id)
    l0 = layer0.Layer0(p)
    punct = re.compile('^[{}]+$'.format(string.punctuation))

    for i, par in enumerate(text):
        for token in par.split():
            # i is paragraph index, but it starts with 0, so we need to add +1
            l0.add_terminal(text=token,
                            punct=punct.match(token),
                            paragraph=(i + 1))
    return p
Пример #19
0
    def create_multi_passage():
        """Creates a :class:Passage with multiple sentences and paragraphs.

        Passage: [1 2 [3 P] H] . [[5 6 . P] H]
                 [[8 P] . 10 . H]

        """
        p = core.Passage('1')
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        terms = [l0.add_terminal(str(i), False) for i in range(1, 4)]
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('5', False))
        terms.append(l0.add_terminal('6', False))
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('8', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        terms.append(l0.add_terminal('10', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        p1 = l1.add_fnode(h1, layer1.EdgeTags.Process)
        p2 = l1.add_fnode(h2, layer1.EdgeTags.Process)
        p3 = l1.add_fnode(h3, layer1.EdgeTags.Process)
        h1.add(layer1.EdgeTags.Terminal, terms[0])
        h1.add(layer1.EdgeTags.Terminal, terms[1])
        p1.add(layer1.EdgeTags.Terminal, terms[2])
        l1.add_punct(None, terms[3])
        p2.add(layer1.EdgeTags.Terminal, terms[4])
        p2.add(layer1.EdgeTags.Terminal, terms[5])
        l1.add_punct(p2, terms[6])
        p3.add(layer1.EdgeTags.Terminal, terms[7])
        l1.add_punct(h3, terms[8])
        h3.add(layer1.EdgeTags.Terminal, terms[9])
        l1.add_punct(h3, terms[10])
        return p
Пример #20
0
def empty():
    p = core.Passage(ID="1")
    layer0.Layer0(p)
    layer1.Layer1(p)
    return p
Пример #21
0
def l1_passage():
    """Creates a Passage to work with using layer1 objects.

    Annotation layout (what annotation each terminal has):
        1: Linker, linked with the first parallel scene
        2-10: Parallel scene #1, 2-5 ==> Participant #1
            6-9 ==> Process #1, 10 ==> Punctuation, remote Participant is
            Adverbial #2
        11-19: Parallel scene #23, which encapsulated 2 scenes and a linker
            (not a real scene, has no process, only for grouping)
        11-15: Parallel scene #2 (under #23), 11-14 ==> Participant #3,
            15 ==> Adverbial #2, remote Process is Process #1
        16: Linker #2, links Parallel scenes #2 and #3
        17-19: Parallel scene #3, 17-18 ==> Process #3,
            19 ==> Participant #3, implicit Participant
        20: Punctuation (under the head)

    """

    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0))
             for i in range(1, 21)]

    # Linker #1 with terminal 1
    link1 = l1.add_fnode(None, layer1.EdgeTags.Linker)
    link1.add(layer1.EdgeTags.Terminal, terms[0])

    # Scene #1: [[2 3 4 5 P] [6 7 8 9 A] [10 U] H]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1.add(layer1.EdgeTags.Terminal, terms[1])
    p1.add(layer1.EdgeTags.Terminal, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    p1.add(layer1.EdgeTags.Terminal, terms[4])
    a1.add(layer1.EdgeTags.Terminal, terms[5])
    a1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    a1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(ps1, terms[9])

    # Scene #2: [[11 12 13 14 P] [15 D]]
    #ps12 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.Participant)
    a2.add(layer1.EdgeTags.Terminal, terms[10])
    a2.add(layer1.EdgeTags.Terminal, terms[11])
    a2.add(layer1.EdgeTags.Terminal, terms[12])
    a2.add(layer1.EdgeTags.Terminal, terms[13])
    d2 = l1.add_fnode(ps2, layer1.EdgeTags.Adverbial)
    d2.add(layer1.EdgeTags.Terminal, terms[14])

    # Linker #2: [16 L]
    link2 = l1.add_fnode(None, layer1.EdgeTags.Linker)
    link2.add(layer1.EdgeTags.Terminal, terms[15])

    # Scene #3: [[16 17 S] [18 A] (implicit participant) H]
    ps3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p3 = l1.add_fnode(ps3, layer1.EdgeTags.State)
    p3.add(layer1.EdgeTags.Terminal, terms[16])
    p3.add(layer1.EdgeTags.Terminal, terms[17])
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)
    a3.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_fnode(ps3, layer1.EdgeTags.Participant, implicit=True)

    # Punctuation #20 - not under a scene
    l1.add_punct(None, terms[19])

    # adding remote argument to scene #1, remote process to scene #2
    # creating linkages L1->H1, H2<-L2->H3
    l1.add_remote(ps1, layer1.EdgeTags.Participant, d2)
    l1.add_remote(ps2, layer1.EdgeTags.Process, p1)
    l1.add_linkage(link1, ps1)
    l1.add_linkage(link2, ps2, ps3)

    return p
Пример #22
0
    def create_passage(self, verify=True):
        """
        Create final passage from temporary representation
        :param verify: fail if this results in an improper passage
        :return: core.Passage created from self.nodes
        """
        passage = core.Passage(self.passage.ID)
        l0 = layer0.Layer0(passage)
        terminals = [
            l0.add_terminal(text=terminal.text,
                            punct=terminal.tag == layer0.NodeTags.Punct,
                            paragraph=terminal.paragraph)
            for terminal in self.terminals
        ]
        l1 = layer1.Layer1(passage)
        self.root.node = l1.heads[0]
        self.root.set_node_label()
        if self.labeled:  # We have a reference passage
            self.root.set_node_id()
            self.fix_terminal_tags(terminals)
        remotes = []  # To be handled after all nodes are created
        linkages = []  # To be handled after all non-linkage nodes are created
        self.topological_sort()  # Sort self.nodes
        for node in self.nodes:
            if self.labeled and verify:
                assert node.text or node.outgoing or node.implicit, "Non-terminal leaf node: %s" % node
            if node.is_linkage:
                linkages.append(node)
            else:
                for edge in node.outgoing:
                    if edge.remote:
                        remotes.append((node, edge))
                    else:
                        edge.child.add_to_l1(l1, node, edge.tag, terminals,
                                             self.labeled)

        for node, edge in remotes:  # Add remote edges
            try:
                assert node.node is not None, "Remote edge from nonexistent node"
                assert edge.child.node is not None, "Remote edge to nonexistent node"
                l1.add_remote(node.node, edge.tag, edge.child.node)
            except AssertionError:
                if verify:
                    raise

        for node in linkages:  # Add linkage nodes and edges
            try:
                link_relation = None
                link_args = []
                for edge in node.outgoing:
                    assert edge.child.node is not None, "Linkage edge to nonexistent node"
                    if edge.tag == EdgeTags.LinkRelation:
                        assert link_relation is None, \
                            "Multiple link relations: %s, %s" % (link_relation, edge.child.node)
                        link_relation = edge.child.node
                    elif edge.tag == EdgeTags.LinkArgument:
                        link_args.append(edge.child.node)
                assert link_relation is not None, "No link relations: %s" % node
                # if len(link_args) < 2:
                #     print("Less than two link arguments for linkage %s" % node, file=sys.stderr)
                node.node = l1.add_linkage(link_relation, *link_args)
                if node.node_id:  # We are in training and we have a gold passage
                    node.node.extra["remarks"] = node.node_id  # For reference
            except AssertionError:
                if verify:
                    raise

        return passage
Пример #23
0
    def test_terminals(self):
        """Tests :class:layer0.Terminal new and inherited functionality."""
        p = core.Passage('1')
        layer0.Layer0(p)
        terms = [
            layer0.Terminal(ID='0.1',
                            root=p,
                            tag=layer0.NodeTags.Word,
                            attrib={
                                'text': '1',
                                'paragraph': 1,
                                'paragraph_position': 1
                            }),
            layer0.Terminal(ID='0.2',
                            root=p,
                            tag=layer0.NodeTags.Word,
                            attrib={
                                'text': '2',
                                'paragraph': 2,
                                'paragraph_position': 1
                            }),
            layer0.Terminal(ID='0.3',
                            root=p,
                            tag=layer0.NodeTags.Punct,
                            attrib={
                                'text': '.',
                                'paragraph': 2,
                                'paragraph_position': 2
                            })
        ]

        p_copy = core.Passage('2')
        layer0.Layer0(p_copy)
        equal_term = layer0.Terminal(ID='0.1',
                                     root=p_copy,
                                     tag=layer0.NodeTags.Word,
                                     attrib={
                                         'text': '1',
                                         'paragraph': 1,
                                         'paragraph_position': 1
                                     })
        unequal_term = layer0.Terminal(ID='0.2',
                                       root=p_copy,
                                       tag=layer0.NodeTags.Word,
                                       attrib={
                                           'text': 'two',
                                           'paragraph': 2,
                                           'paragraph_position': 1
                                       })

        self.assertSequenceEqual([t.punct for t in terms],
                                 [False, False, True])
        self.assertSequenceEqual([t.text for t in terms], ['1', '2', '.'])
        self.assertSequenceEqual([t.position for t in terms], [1, 2, 3])
        self.assertSequenceEqual([t.paragraph for t in terms], [1, 2, 2])
        self.assertSequenceEqual([t.para_pos for t in terms], [1, 1, 2])
        self.assertFalse(terms[0] == terms[1])
        self.assertFalse(terms[0] == terms[2])
        self.assertFalse(terms[1] == terms[2])
        self.assertTrue(terms[0] == terms[0])
        self.assertTrue(terms[0].equals(equal_term))
        self.assertFalse(terms[1].equals(unequal_term))
Пример #24
0
def n_evaluate(sent_tensor, model, attn, ori_sent, dev_passage, pos,
               pos_tensor):
    """
    predict a passage
    :param sent_tensor:
    :param model:
    :param attn:
    :param ori_sent:
    :param dev_passage:
    :param pos:
    :return:
    """

    # print("original sent")
    # print(ori_sent)

    create_by_leftmost = True

    max_recur = 5
    i = 0
    k = 0
    l1_node_list = []
    l0_node_list = []

    output, hidden = model(sent_tensor, pos_tensor)

    # initialize passage
    passageID = dev_passage.ID
    passage = core.Passage(passageID)
    l0 = layer0.Layer0(root=passage)
    l1 = layer1.Layer1(passage)

    while i < len(ori_sent):
        terminal_token = ori_sent[i]
        pos_tag = pos[i]

        # proper nouns (only use when there are more than one consecutive PROPNs
        if pos_tag == "PROPN" and i + 1 < len(ori_sent) and (pos[i + 1] == "PROPN" or pos[i + 1] == "NUM") \
                or (pos_tag == "DET" and i + 1 < len(ori_sent) and pos[i + 1] == "PROPN"):

            left_most_idx = i
            output_i = output[i]
            combine_list = []

            # For cases like "April(PROPN) 30(NUM) ,(PUNCT) 2008(NUM)"
            if i + 3 < len(ori_sent) and pos[i + 1] == "NUM" and pos[
                    i + 2] == "PUNCT" and pos[i + 3] == "NUM":
                for _ in range(4):
                    # create terminal node in l0
                    terminal_token = ori_sent[i]
                    is_punc = terminal_token in punc
                    terminal_node = l0.add_terminal(terminal_token, is_punc)
                    l0_node_list.append(terminal_node)
                    combine_list.append(terminal_node)
                    i += 1

            # elif pos_tag == "PROPN":
            #     while True:
            #         if pos[i] != "PROPN":
            #             break
            #         # create terminal node in l0
            #         terminal_token = ori_sent[i]
            #         is_punc = terminal_token in punc
            #         terminal_node = l0.add_terminal(terminal_token, is_punc)
            #         l0_node_list.append(terminal_node)
            #         combine_list.append(terminal_node)
            #         i += 1
            # else:
            #     # for cases like "The Bahamas"
            #     while True:
            #         # create terminal node in l0
            #         terminal_token = ori_sent[i]
            #         is_punc = terminal_token in punc
            #         terminal_node = l0.add_terminal(terminal_token, is_punc)
            #         l0_node_list.append(terminal_node)
            #         combine_list.append(terminal_node)
            #         i += 1
            #         if pos[i] != "PROPN":
            #             break

            # including cases like "The Bahamas"
            else:
                while True:
                    # create terminal node in l0
                    terminal_token = ori_sent[i]
                    is_punc = terminal_token in punc
                    terminal_node = l0.add_terminal(terminal_token, is_punc)
                    l0_node_list.append(terminal_node)
                    combine_list.append(terminal_node)
                    i += 1

                    if i >= len(ori_sent):
                        break
                    # for cases like "Lara Croft: Tomb Raider"
                    if ori_sent[i] == ":" and i + 1 < len(pos) and pos[
                            i + 1] == "PROPN":
                        continue
                    elif pos[i] != "PROPN":
                        break

            # combine the nodes in combine_list to one node in l1
            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(
                ID, passage, tag=layer1.NodeTags.Foundational)
            for terminal_node in combine_list:
                terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)

            i -= 1

        else:
            # create terminal node in l0
            is_punc = terminal_token in punc
            terminal_node = l0.add_terminal(terminal_token, is_punc)
            l0_node_list.append(terminal_node)

            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(
                ID,
                passage,
                tag=layer1.NodeTags.Punctuation
                if is_punc else layer1.NodeTags.Foundational)
            terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)

            output_i = output[i]
            attn_i = attn(output_i)
            top_k_value, top_k_ind = torch.topk(attn_i, 1)

            # for debugging
            tki = top_k_ind.data[0][0]

            # attend to the current terminal itself
            if top_k_ind.data[0] >= i:
                i += 1
                continue
            else:
                top_k_node = l0_node_list[top_k_ind]
                parent_node = get_parent_node(top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR,
                                              new_node_position)
                new_node = FoundationalNode(new_node_ID,
                                            passage,
                                            tag=layer1.NodeTags.Foundational)
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    itemid = item_node.ID
                    pid = parent_node.ID
                    children.append(item_node)
                    if item_node.ID == parent_node.ID:
                        for child in children:
                            new_node.add(str(k), child)
                            k += 1
                        l1_node_list.append(new_node)
                        break
                left_most_idx = get_left_most_id(new_node)

        # recursive call to see if need to create new node
        for r in range(1, max_recur + 1):
            new_node_output = output_i - output[left_most_idx]
            new_node_attn_weight = attn(new_node_output)
            r_top_k_value, r_top_k_ind = torch.topk(new_node_attn_weight, 1)
            #predict out of boundary
            if r_top_k_ind > i:
                break
            # attend to the new node itself
            elif left_most_idx <= r_top_k_ind <= i:
                break
            # create new node
            else:
                r_top_k_node = l0_node_list[r_top_k_ind]
                r_parent_node = get_parent_node(r_top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR,
                                              new_node_position)
                new_node = FoundationalNode(new_node_ID,
                                            passage,
                                            tag=layer1.NodeTags.Foundational)
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    children.append(item_node)
                    if item_node.ID == r_parent_node.ID:
                        for child in children:
                            new_node.add(str(k), child)
                            k += 1
                        l1_node_list.append(new_node)
                        break
                left_most_idx = get_left_most_id(new_node)

        i += 1

        # print(passage)

    # check if Node(1.1) is empty
    head_node = l1.heads[0]
    if len(head_node.get_terminals()) == 0:
        for node in l1_node_list:
            head_node.add(str(k), node)
            k += 1

    return passage
Пример #25
0
def passage2():
    p = core.Passage("2")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0)) for i in range(1, 21)]

    # Linker #1 with terminal 1
    link1 = l1.add_fnode(None, layer1.EdgeTags.Linker)  # true
    link1.add(layer1.EdgeTags.Terminal, terms[0])

    # Scene #1: [[2 3 4 5 P] [6 7 8 9 A] [10 U] H]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)  # true
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)  # true
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)  # true
    p1.add(layer1.EdgeTags.Terminal, terms[1])
    p1.add(layer1.EdgeTags.Terminal, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    p1.add(layer1.EdgeTags.Terminal, terms[4])
    a1.add(layer1.EdgeTags.Terminal, terms[5])
    a1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    a1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(ps1, terms[9])

    # Scene #23: [[11 12 13 14 15 H] [16 L] [17 18 19 H] H]
    # Scene #2: [[11 12 13 14 H] [15 E]]
    ps23 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)  # true
    ps2 = l1.add_fnode(ps23, layer1.EdgeTags.ParallelScene)  # true
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.ParallelScene)  # false
    a2.add(layer1.EdgeTags.Terminal, terms[10])
    a2.add(layer1.EdgeTags.Terminal, terms[11])
    a2.add(layer1.EdgeTags.Terminal, terms[12])
    a2.add(layer1.EdgeTags.Terminal, terms[13])
    d2 = l1.add_fnode(ps1, layer1.EdgeTags.Elaborator)  # false
    d2.add(layer1.EdgeTags.Terminal, terms[14])

    # Linker #2: [16 L]
    link2 = l1.add_fnode(ps23, layer1.EdgeTags.Linker)  # true
    link2.add(layer1.EdgeTags.Terminal, terms[15])

    # Scene #3: [[16 17 P] [18 A] (implicit participant) H]
    ps3 = l1.add_fnode(ps23, layer1.EdgeTags.ParallelScene)  # true
    p3 = l1.add_fnode(ps3, layer1.EdgeTags.Process)  # false
    p3.add(layer1.EdgeTags.Terminal, terms[16])
    p3.add(layer1.EdgeTags.Terminal, terms[17])
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)  # true
    a3.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_fnode(ps3, layer1.EdgeTags.Participant, implicit=True)

    # Punctuation #20 - not under a scene
    l1.add_punct(None, terms[19])

    # adding remote argument to scene #1, remote process to scene #2
    # creating linkages L1->H1, H2<-L2->H3
    l1.add_remote(ps1, layer1.EdgeTags.Participant, d2)
    l1.add_remote(ps1, layer1.EdgeTags.Participant, a3)
    l1.add_remote(ps2, layer1.EdgeTags.State, p1)
    l1.add_linkage(link1, ps1)
    l1.add_linkage(link2, ps2, ps3)

    return p
Пример #26
0
def evaluate_with_label(sent_tensor, model, a_model, label_model, s_model, rm_model, rm_lstm_model,
                        ori_sent, dev_passage, pos,
                        pos_tensor, labels, label2index, ent, ent_tensor, case_tensor, unroll):
    """

    :param sent_tensor:
    :param model:
    :param a_model:
    :param label_model:
    :param ori_sent:
    :param dev_passage:
    :param pos:
    :param pos_tensor:
    :param labels:
    :param label2index:
    :return:
    """

    # print("original sent")
    # print(ori_sent)

    create_by_leftmost = True

    using_s_model = False
    if not isinstance(s_model, str):
        using_s_model = True

    using_rm_model = False
    if not isinstance(rm_model, str):
        using_rm_model = True
        output_rm, hidden_rm = rm_lstm_model(sent_tensor, pos_tensor, ent_tensor, case_tensor, unroll)
        output_2d_rm = output_rm.squeeze(1)

    max_recur = 7
    i = 0
    sent_length = len(ori_sent)

    l1_node_list = []
    l0_node_list = []
    node_encoding = {}
    ck_node_encoding = {}

    output, hidden = model(sent_tensor, pos_tensor, ent_tensor, case_tensor, unroll)

    output_2d = output.squeeze(1)

    # initialize passage
    passageID = dev_passage.ID
    passage = core.Passage(passageID)
    l0 = layer0.Layer0(root=passage)
    l1 = layer1.Layer1(passage)

    predicted_scene = False

    already_in_propn = []
    rm_to_add = defaultdict(list)

    while i < sent_length:
        terminal_token = ori_sent[i]
        pos_tag = pos[i]
        ent_type = ent[i]

        if not predict_l1:
            # moved to l0_l1_rule.py
            pass
        # predict l0 to l1
        else:
            # create terminal node in l0
            is_punc = terminal_token in punc
            terminal_node = l0.add_terminal(terminal_token, is_punc)
            l0_node_list.append(terminal_node)

            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(ID, passage, tag=layer1.NodeTags.Punctuation if
                                                   is_punc else layer1.NodeTags.Foundational)
            terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)
            node_encoding[terminal_node_in_l1] = output[i]
            ck_node_encoding[terminal_node_in_l1] = [i, i]

            output_i = output[i]
            attn_i = a_model(output_i, output_2d, i)
            top_k_value, top_k_ind = torch.topk(attn_i, 1)

            # for debugging
            tki = top_k_ind.data[0][0]

            # attend to the current terminal itself
            if top_k_ind.data[0] >= i:

                # # remote node to a node to the right of the parent
                # if i in rm_to_add:
                #     for remote_pred in rm_to_add[i]:
                #         rm_parent, rm_label = remote_pred
                #         rm_parent.add(rm_label, terminal_node_in_l1, edge_attrib={'remote': True})

                i += 1
                continue
            else:
                top_k_node = l0_node_list[top_k_ind]
                parent_node = get_parent_node(top_k_node)
                # new_node_position = len(l1._all) + 1
                # new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                # new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                """TODO: check this. not sure if it should be the left most child or top_k_ind"""
                debug_left_most_id = get_left_most_id(parent_node)
                # debug_left_most_id = top_k_ind

                # if using_s_model:
                #     output_boundary = output[debug_left_most_id: i + 1]
                #     if unroll and debug_left_most_id > 0:
                #         new_node_enc, combine_l0 = s_model(output_boundary, inp_hidden=hidden[debug_left_most_id - 1],
                #                                            layer0=True)
                #     else:
                output_boundary = output[debug_left_most_id: i + 1]
                new_node_enc, combine_l0, is_dis = s_model(output_boundary, layer0=True, dis=True)
                if using_rm_model:
                    output_boundary_rm = output_rm[debug_left_most_id: i + 1]
                    new_node_enc_rm, _ = s_model(output_boundary_rm)
                # else:
                #     new_node_enc = output[i] - output[debug_left_most_id]

                propn_topk_value, propn_topk_ind = torch.topk(combine_l0, 1)
                dis_topk_value, dis_topk_ind = torch.topk(is_dis, 1)
                # need to combine nodes in l0

                if dis_topk_ind.data[0] == 1 and propn_topk_ind.data[0] == 1:
                    dis_left_node_l0 = l0_node_list[top_k_ind]
                    dis_left_node_l1 = dis_left_node_l0.parents[0]
                    dis_left_node_l0._incoming = []
                    dis_left_node_l1._outgoing = []
                    terminal_node_in_l1.add(terminal_tag, dis_left_node_l0)

                    # i += 1
                    # continue

                combined = False
                if propn_topk_ind.data[0] == 1 and dis_topk_ind.data[0] == 0 and \
                        debug_left_most_id not in already_in_propn:
                    # check if within the left and right boundary if there is already a node in propn
                    valid_attention = True
                    for j in range(debug_left_most_id, i + 1):
                        if j in already_in_propn:
                            valid_attention = False

                    if valid_attention:
                        combine_list = []
                        while True:
                            item_node = l1_node_list.pop()
                            l1_node_to_l0_idx = get_left_most_id(item_node)
                            itemid = item_node.ID
                            pid = parent_node.ID
                            combine_list.append(item_node)
                            if l1_node_to_l0_idx == debug_left_most_id:
                                break

                        # make sure not to attend to a node with parents
                        for ck_node in combine_list:
                            # ck_node can be a combined node
                            ck_node_l0 = l0_node_list[get_left_most_id(ck_node)]
                            ck_node_l1 = ck_node_l0.parents[0]
                            if len(ck_node_l1.parents) > 0:
                                valid_attention = False
                                break
                        # push back without change
                        if not valid_attention:
                            combined = False
                            # to be consistent with popping, we loop in the reverse order
                            for ck_node in reversed(combine_list):
                                l1_node_list.append(ck_node)
                        else:
                            combined = True
                            l1_position = len(l1._all) + 1
                            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
                            terminal_node_in_l1 = FoundationalNode(ID, passage, tag=layer1.NodeTags.Foundational)
                            for l1_node in combine_list:
                                assert len(l1_node.children) == 1, "l1_node has more than 1 children"
                                terminal_node = l1_node.children[0]
                                # remove node_in_l1
                                # cannot use "remove" function
                                # l1_node.remove(terminal_node)
                                terminal_node._incoming = []
                                l1_node._outgoing = []
                                # if remove node from l1 then ID will be a problem
                                # try:
                                #     l1._remove_node(l1_node)
                                # except:
                                #     pass
                                # combine nodes
                                terminal_node_in_l1.add(terminal_tag, terminal_node)
                                already_in_propn.append(get_left_most_id(terminal_node))
                            l1_node_list.append(terminal_node_in_l1)
                            left_most_idx = get_left_most_id(terminal_node_in_l1)
                            node_encoding[terminal_node_in_l1] = new_node_enc
                            ck_node_encoding[terminal_node_in_l1] = [debug_left_most_id, i]

                # # remote node to a node to the right of the parent
                # if i in rm_to_add:
                #     for remote_pred in rm_to_add[i]:
                #         rm_parent, rm_label = remote_pred
                #         rm_parent.add(rm_label, terminal_node_in_l1, edge_attrib={'remote': True})
                        
                if not combined:
                    children = []
                    new_node_position = len(l1._all) + 1
                    new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                    new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                    while True:
                        item_node = l1_node_list.pop()
                        itemid = item_node.ID
                        pid = parent_node.ID
                        children.append(item_node)
                        if item_node.ID == parent_node.ID:
                            for child in children:
                                child_enc = node_encoding[child]
                                ck_child_enc = ck_node_encoding[child]
                                label_weight = label_model(new_node_enc, child_enc)

                                # restrict predicting "H" label
                                label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
                                # label_top_k_values, label_top_k_inds = torch.topk(label_weight, 2)
                                # label_top_k_ind = label_top_k_inds[0][0]
                                # if label_top_k_ind == label2index["H"]:
                                #     if not (debug_left_most_id == 0 and i == len(ori_sent) - 1):
                                #         label_top_k_ind = label_top_k_inds[0][1]
                                #     else:
                                #         predicted_scene = True

                                pred_label = labels[label_top_k_ind]
                                new_node.add(pred_label, child)

                            # predict remote edge
                            if using_rm_model:
                                rm_weight = rm_model(new_node_enc_rm, output_2d_rm, sent_length)
                                rm_top_k_value, rm_top_k_ind = torch.topk(rm_weight, 1)
                                if rm_top_k_ind < get_left_most_id(new_node):
                                    rm_pred_label = "A"
                                    new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                                 edge_attrib={'remote': True})
                                elif rm_top_k_ind > get_right_most_id(new_node):
                                    rm_pred_label = "A"
                                    # new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                    #              edge_attrib={'remote': True})
                                    rm_to_add[rm_top_k_ind.data.cpu().numpy()[0][0]].append((new_node, rm_pred_label))

                            l1_node_list.append(new_node)
                            node_encoding[new_node] = new_node_enc
                            ck_node_encoding[new_node] = [debug_left_most_id, i]
                            break
                    left_most_idx = get_left_most_id(new_node)

                    if left_most_idx > top_k_ind:
                        left_most_idx = top_k_ind

        # recursive call to see if need to create new node
        for r in range(1, max_recur + 1):
            if using_s_model:
                output_boundary = output[left_most_idx: i + 1]
                if left_most_idx >= i + 1:
                    print("ERROR:")
                    print("Combined?")
                    print(combined)
                    print("left_most_idx")
                    print(left_most_idx)
                    print("i")
                    print(i)
                if unroll and left_most_idx > 0:
                    new_node_output, combine_l0 = s_model(output_boundary, inp_hidden=hidden[left_most_idx - 1])
                else:
                    new_node_output, combine_l0 = s_model(output_boundary)
            else:
                new_node_output = output[i] - output[left_most_idx]

            new_node_attn_weight = a_model(new_node_output, output_2d, i)
            r_top_k_value, r_top_k_ind = torch.topk(new_node_attn_weight, 1)

            # predict out of boundary
            if r_top_k_ind > i:
                break
            # attend to the new node itself
            elif left_most_idx <= r_top_k_ind <= i:
                break
            # create new node
            else:
                r_top_k_node = l0_node_list[r_top_k_ind]
                r_parent_node = get_parent_node(r_top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                """TODO: same as before. check this. not sure if it should be the left most child or top_k_ind"""
                debug_left_most_id = get_left_most_id(r_parent_node)

                if debug_left_most_id > r_top_k_ind:
                    debug_left_most_id = r_top_k_ind

                if using_s_model:
                    output_boundary = output[debug_left_most_id: i + 1]
                    if unroll and debug_left_most_id > 0:
                        r_new_node_enc, combine_l0 = s_model(output_boundary, inp_hidden=hidden[debug_left_most_id - 1])
                    else:
                        r_new_node_enc, combine_l0 = s_model(output_boundary)

                        if using_rm_model:
                            output_boundary_rm = output_rm[debug_left_most_id: i + 1]
                            r_new_node_enc_rm, _ = s_model(output_boundary_rm)
                else:
                    r_new_node_enc = output[i] - output[debug_left_most_id]

                # r_new_node_enc = output[i] - output[get_left_most_id(r_parent_node)]
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    children.append(item_node)
                    if item_node.ID == r_parent_node.ID:
                        for child in children:
                            child_enc = node_encoding[child]
                            ck_child_enc = ck_node_encoding[child]
                            label_weight = label_model(r_new_node_enc, child_enc)

                            # restrict predicting "H" label
                            label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
                            # label_top_k_values, label_top_k_inds = torch.topk(label_weight, 2)
                            # label_top_k_ind = label_top_k_inds[0][0]
                            # if label_top_k_ind == label2index["H"]:
                            #     if not (debug_left_most_id == 0 and i == len(ori_sent) - 1):
                            #         label_top_k_ind = label_top_k_inds[0][1]
                            #     else:
                            #         predicted_scene = True

                            pred_label = labels[label_top_k_ind]
                            new_node.add(pred_label, child)

                        # predict remote edge
                        if using_rm_model:
                            rm_weight = rm_model(r_new_node_enc_rm, output_2d_rm, sent_length)
                            rm_top_k_value, rm_top_k_ind = torch.topk(rm_weight, 1)
                            if rm_top_k_ind < get_left_most_id(new_node):
                                rm_pred_label = "A"
                                new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                             edge_attrib={'remote': True})
                            elif rm_top_k_ind > get_right_most_id(new_node):
                                rm_pred_label = "A"
                                # new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                #              edge_attrib={'remote': True})
                                rm_to_add[rm_top_k_ind.data.cpu().numpy()[0][0]].append((new_node, rm_pred_label))

                        l1_node_list.append(new_node)
                        """WARNING: seems this is wrong. changed"""
                        # node_encoding[new_node] = output[i] - r_new_node_enc
                        node_encoding[new_node] = r_new_node_enc
                        ck_node_encoding[new_node] = [debug_left_most_id, i]
                        break
                left_most_idx = get_left_most_id(new_node)

        i += 1

    # # check if Node(1.1) is empty
    # if not predicted_scene:
    #     head_node = l1.heads[0]
    #     head_node_enc = output[-1] - output[0]
    #     for node in l1_node_list:
    #         # print(node.get_terminals())
    #         current_node_encoding = node_encoding[node]
    #         label_weight = label_model(head_node_enc, current_node_encoding)
    #         label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
    #         pred_label = labels[label_top_k_ind]
    #         head_node.add(pred_label, node)

    # passage = clean_nodes(passage)

    # print(passage.ID)
    # ioutil.write_passage(passage, outdir="pred_test/")

    return passage
Пример #27
0
def test_terminals():
    """Tests :class:`layer0`.Terminal new and inherited functionality."""
    p = core.Passage("1")
    layer0.Layer0(p)
    terms = [
        layer0.Terminal(ID="0.1",
                        root=p,
                        tag=layer0.NodeTags.Word,
                        attrib={
                            "text": "1",
                            "paragraph": 1,
                            "paragraph_position": 1
                        }),
        layer0.Terminal(ID="0.2",
                        root=p,
                        tag=layer0.NodeTags.Word,
                        attrib={
                            "text": "2",
                            "paragraph": 2,
                            "paragraph_position": 1
                        }),
        layer0.Terminal(ID="0.3",
                        root=p,
                        tag=layer0.NodeTags.Punct,
                        attrib={
                            "text": ".",
                            "paragraph": 2,
                            "paragraph_position": 2
                        })
    ]

    p_copy = core.Passage("2")
    layer0.Layer0(p_copy)
    equal_term = layer0.Terminal(ID="0.1",
                                 root=p_copy,
                                 tag=layer0.NodeTags.Word,
                                 attrib={
                                     "text": "1",
                                     "paragraph": 1,
                                     "paragraph_position": 1
                                 })
    unequal_term = layer0.Terminal(ID="0.2",
                                   root=p_copy,
                                   tag=layer0.NodeTags.Word,
                                   attrib={
                                       "text": "two",
                                       "paragraph": 2,
                                       "paragraph_position": 1
                                   })

    assert [t.punct for t in terms] == [False, False, True]
    assert [t.text for t in terms] == ["1", "2", "."]
    assert [t.position for t in terms] == [1, 2, 3]
    assert [t.paragraph for t in terms] == [1, 2, 2]
    assert [t.para_pos for t in terms] == [1, 1, 2]
    assert not (terms[0] == terms[1])
    assert not (terms[0] == terms[2])
    assert not (terms[1] == terms[2])
    assert terms[0] == terms[0]
    assert terms[0].equals(equal_term)
    assert not (terms[1].equals(unequal_term))
    assert p.copy(layer0.LAYER_ID).equals(p)
    assert p_copy.copy(layer0.LAYER_ID).equals(p_copy)