Пример #1
0
def get_relations_minimal_centers(ucca_passage: Passage):
    """
    Return all the most internal centers of main relations in each passage
    """
    scenes = [x for x in ucca_passage.layer("1").all if x.tag == "FN" and x.is_scene()]
    minimal_centers = []
    for sc in scenes:
        min_relations = [e.child for e in sc.outgoing if e.tag == 'P' or e.tag == 'S']
        for mr in min_relations:
            centers = [e.child for e in mr.outgoing if e.tag == 'C']
            if centers:
                while centers:
                    for c in centers:
                        ccenters = [e.child for e in c.outgoing if e.tag == 'C']
                    lcenters = centers
                    centers = ccenters
                minimal_centers.append(lcenters)
            else:
                minimal_centers.append(min_relations)

    y = ucca_passage.layer("0")
    output = []
    for scp in minimal_centers:
        for par in scp:
            output2 = []
            positions = [d.position for d in par.get_terminals(False, True)]
            for pos in positions:
                if not output2:
                    output2.append(str(y.by_position(pos)))
                elif str(y.by_position(pos)) != output2[-1]:
                    output2.append(str(y.by_position(pos)))

        output.append(output2)

    return output
Пример #2
0
 def get_yields(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         if unit.tag:  # UCCA
             if unit.tag == layer1.NodeTags.Foundational:
                 if unit.connector:  # nominal coordination
                     yield from map(self.get_terminals, unit.centers)
                 else:  # predicate coordination, expressed as linkers + parallel scenes
                     ccs = {l.ID for l in unit.linkers if all(t.tok[Attr.POS.value] == CCONJ for t in l.get_terminals())}
                     if ccs:
                         terminals = []
                         for edge in unit:
                             if edge.child.ID in ccs:
                                 if not terminals:
                                     break
                             elif edge.tag in (layer1.EdgeTags.Linker, layer1.EdgeTags.ParallelScene):
                                 if edge.tag == layer1.EdgeTags.ParallelScene and terminals and \
                                         terminals[-1].position + 1 < edge.child.start_position:
                                     yield terminals
                                     terminals = []
                                 terminals += self.get_terminals(edge.child)
                                 continue
                             if terminals:
                                 yield terminals
                                 terminals = []
                         if terminals:
                             yield terminals
         else:  # UD
             children = [e.child for e in unit if e.tag == "conj"]
             if children:  # UD and there is a coordination
                 yield from (self.get_terminals(c, excluded_tags=COORDINATION_UD_TAGS) for c in [unit] + children)
Пример #3
0
 def get_yields(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         if unit.tag:
             for edge in unit:
                 if edge.tag in self.relations \
                         and not edge.attrib.get("remote") and not edge.child.attrib.get("implicit"):
                     yield self.get_terminals(edge.child), edge.tag
         else:
             if unit.token and unit.token.pos == "VERB":
                 for edge in unit:
                     if edge.tag not in {EdgeTags.Punctuation}:
                         yield [unit], edge.tag
Пример #4
0
 def get_yields(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         if unit.tag:  # UCCA
             if unit.tag == layer1.NodeTags.Foundational:
                 yield from map(
                     self.get_terminals,
                     filter(
                         layer1.FoundationalNode.is_scene,
                         unit.participants if self.relations == ["xcomp"]
                         else unit.elaborators))
         else:  # UD
             children = [e.child for e in unit if e.tag in self.relations]
             if children:  # UD and there is a subordinate clause
                 yield from map(self.get_terminals, children)
Пример #5
0
 def get_yields(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         if unit.ftag in (EdgeTags.Process, EdgeTags.State):
             yield self.get_terminals(unit), unit.ftag
         elif passage.extra.get("format") == "conllu":  # UD predicate
             ftag = tag = unit.incoming[0].tag if unit.incoming else None
             funit = unit
             while ftag in LINK_RELATIONS:
                 funit = funit.incoming[0].parent
                 ftag = funit.incoming[0].tag if funit.incoming else None
             if ftag in PREDICATE_RELATIONS or ftag is None:
                 heads = [e.child for e in unit if e.tag == "head"]
                 while heads:
                     unit = heads[0]
                     heads = [e.child for e in unit if e.tag == "head"]
                 yield self.get_terminals(unit), tag
Пример #6
0
 def get_yields(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         if unit.tag:  # UCCA
             if unit.tag == layer1.NodeTags.Foundational:
                 if unit.connector and unit.fparent and unit.fparent.connector:  # nominal coordination
                     yield from map(self.get_terminals, unit.centers)
         else:  # UD
             children = [
                 e.child for e in unit
                 if e.tag == "conj" and any(e1.tag == "conj"
                                            for e1 in unit.incoming)
             ]
             if children:  # UD and there is a coordination
                 yield from (self.get_terminals(
                     c, excluded_tags=COORDINATION_UD_TAGS)
                             for c in [unit] + children)
Пример #7
0
def get_scenes(ucca_passage: Passage):
    """Return all the ucca scenes in the given text"""
    ucca_scenes = [x for x in ucca_passage.layer('1').all if x.tag == "FN" and x.is_scene()]
    text_scenes = []
    for scene in ucca_scenes:
        words = []
        previous_word = ''
        for terminal in scene.get_terminals(False, True):
            word = terminal.text
            if word == previous_word:
                # TODO: Iterating this way on the scene sometimes yields duplicates.
                continue
            words.append(word)
            previous_word = word
        text_scenes.append(words)
    return text_scenes
Пример #8
0
def get_minimal_centers_from_relations(ucca_passage: Passage):
    """
    Return all the most internal centers of main relations in each passage
    """
    scenes = get_scenes_ucca(ucca_passage)
    scenes_minimal_centers = []
    for sc in scenes:
        scenes_minimal_centers += _get_minimal_centers_from_scene(sc)

    y = ucca_passage.layer("0")
    output = []
    for scp in scenes_minimal_centers:
        for par in scp:
            output2 = []
            positions = [d.position for d in par.get_terminals(False, True)]
            for pos in positions:
                if not output2:
                    output2.append(str(y.by_position(pos)))
                elif str(y.by_position(pos)) != output2[-1]:
                    output2.append(str(y.by_position(pos)))

        output.append(output2)

    return output
Пример #9
0
def get_scenes_ucca(ucca_passage: Passage):
    return [
        x for x in ucca_passage.layer('1').all
        if x.tag == "FN" and x.is_scene()
    ]
Пример #10
0
def get_minimal_centers_from_participants(P: Passage):
    """
    P is a ucca passage. Return all the minimal participant centers in each scene
    """
    scenes = get_scenes_ucca(P)
    n = []
    for sc in scenes:  # find participant nodes
        minimal_centers = []
        participants = [e.child for e in sc.outgoing if e.tag == 'A']
        for pa in participants:
            centers = [e.child for e in pa.outgoing if e.tag == 'C']
            if centers:
                while centers:
                    for c in centers:
                        ccenters = [e.child for e in c.outgoing if e.tag in ['C', 'P', 'S']]
                        #also addresses center Scenes
                    lcenters = centers
                    centers = ccenters
                minimal_centers.append(lcenters)
            elif pa.is_scene():  # address the case of Participant Scenes
                scene_centers = [e.child for e in pa.outgoing if e.tag in ['P', 'S']]
                for scc in scene_centers:
                    centers = [e.child for e in scc.outgoing if e.tag == 'C']
                    if centers:
                        while centers:
                            for c in centers:
                                ccenters = [e.child for e in c.outgoing if e.tag == 'C']
                            lcenters = centers
                            centers = ccenters
                        minimal_centers.append(lcenters)
                    else:
                        minimal_centers.append(scene_centers)
            elif any(e.tag == "H" for e in pa.outgoing):  # address the case of multiple parallel Scenes inside a participant
                hscenes = [e.child for e in pa.outgoing if e.tag == 'H']
                mh = []
                for h in hscenes:
                    hrelations = [e.child for e in h.outgoing if e.tag in ['P', 'S']]  # in case of multiple
                    # parallel scenes we generate new multiple centers
                    for hr in hrelations:
                        centers = [e.child for e in hr.outgoing if e.tag == 'C']
                        if centers:
                            while centers:
                                for c in centers:
                                    ccenters = [e.child for e in c.outgoing if e.tag == 'C']
                                lcenters = centers
                                centers = ccenters
                            mh.append(lcenters[0])
                        else:
                            mh.append(hrelations[0])
                minimal_centers.append(mh)
            else:
                minimal_centers.append([pa])

        n.append(minimal_centers)

    y = P.layer("0")  # find cases of multiple centers
    output = []
    s = []
    I = []
    for scp in n:
        r = []
        u = n.index(scp)
        for par in scp:
            if len(par) > 1:
                d = scp.index(par)
                par = [par[i:i+1] for i in range(len(par))]
                for c in par:
                    r.append(c)
                I.append([u, d])
            else:
                r.append(par)
        s.append(r)

    for scp in s:  # find the spans of the participant nodes
        output1 = []
        for par in scp:
            # TODO: sometimes "par" does not contain anything, which caused the original implementation (without the if) to crash when unpacking
            if len(par) != 1:
                # output2 = []
                continue
            else:
                [par] = par
                output2 = flatten_unit(par)
            output1.append(output2)
        output.append(output1)

    y = []  # unify spans in case of multiple centers
    for scp in output:
        x = []
        u = output.index(scp)
        for par in scp:
            for v in I:
                if par == output[v[0]][v[1]]:
                    for l in range(1, len(n[v[0]][v[1]])):
                        par.append((output[v[0]][v[1]+l])[0])

                    x.append(par)
                elif all(par != output[v[0]][v[1]+l] for l in range(1, len(n[v[0]][v[1]]))):
                    x.append(par)
            if not I:
                x.append(par)
        y.append(x)

    return y
Пример #11
0
def get_participants_minimal_centers(P: Passage):
    """
    P is a ucca passage. Return all the minimal participant centers in each scene
    """
    scenes = [x for x in P.layer("1").all if x.tag == "FN" and x.is_scene()]
    n = []
    for sc in scenes:  # find participant nodes
        m = []
        participants = [e.child for e in sc.outgoing if e.tag == 'A']
        for pa in participants:
            centers = [e.child for e in pa.outgoing if e.tag == 'C']
            if centers:
                while centers:
                    for c in centers:
                        ccenters = [e.child for e in c.outgoing if e.tag == 'C' or e.tag == 'P' or e.tag == 'S']   #also addresses center Scenes
                    lcenters = centers
                    centers = ccenters
                m.append(lcenters)
            elif pa.is_scene():  # address the case of Participant Scenes
                scenters = [e.child for e in pa.outgoing if e.tag == 'P' or e.tag == 'S']
                for scc in scenters:
                    centers = [e.child for e in scc.outgoing if e.tag == 'C']
                    if centers:
                        while centers:
                            for c in centers:
                                ccenters = [e.child for e in c.outgoing if e.tag == 'C']
                            lcenters = centers
                            centers = ccenters
                        m.append(lcenters)
                    else:
                        m.append(scenters)
            elif any(e.tag == "H" for e in pa.outgoing):  # address the case of multiple parallel Scenes inside a participant
                hscenes = [e.child for e in pa.outgoing if e.tag == 'H']
                mh = []
                for h in hscenes:
                    hrelations = [e.child for e in h.outgoing if e.tag == 'P' or e.tag == 'S']  # in case of multiple parallel scenes we generate new multiple centers
                    for hr in hrelations:
                        centers = [e.child for e in hr.outgoing if e.tag == 'C']
                        if centers:
                            while centers:
                                for c in centers:
                                    ccenters = [e.child for e in c.outgoing if e.tag == 'C']
                                lcenters = centers
                                centers = ccenters
                            mh.append(lcenters[0])
                        else:
                            mh.append(hrelations[0])
                m.append(mh)
            else:
                m.append([pa])

        n.append(m)

    y = P.layer("0")  # find cases of multiple centers
    output = []
    s = []
    I = []
    for scp in n:
        r = []
        u = n.index(scp)
        for par in scp:
            if len(par) > 1:
                d = scp.index(par)
                par = [par[i:i+1] for i in range(len(par))]
                for c in par:
                    r.append(c)
                I.append([u,d])
            else:
                r.append(par)
        s.append(r)

    for scp in s:  # find the spans of the participant nodes
        output1 = []
        for [par] in scp:
            output2 = []
            p = []
            d = par.get_terminals(False,True)
            for i in range(0, len(d)):
                p.append(d[i].position)

            for k in p:
                if len(output2) == 0:
                    output2.append(str(y.by_position(k)))
                elif str(y.by_position(k)) != output2[-1]:
                    output2.append(str(y.by_position(k)))
            output1.append(output2)
        output.append(output1)

    y = []  # unify spans in case of multiple centers
    for scp in output:
        x = []
        u = output.index(scp)
        for par in scp:
            for v in I:
                if par == output[v[0]][v[1]]:
                    for l in range(1,len(n[v[0]][v[1]])):
                        par.append((output[v[0]][v[1]+l])[0])

                    x.append(par)
                elif all(par != output[v[0]][v[1]+l] for l in range(1, len(n[v[0]][v[1]]))):
                    x.append(par)
            if not I:
                x.append(par)
        y.append(x)

    return y
Пример #12
0
def get_num_scenes(ucca_passage: Passage):
    """
    Returns the number of scenes in the ucca_passage.
    """
    scenes = [x for x in ucca_passage.layer("1").all if x.tag == "FN" and x.is_scene()]
    return len(scenes)
Пример #13
0
 def get_units(self, passage: Passage):
     for unit in passage.layer(layer1.LAYER_ID).all:
         for edge in unit:
             if (self.relations is None or edge.tag in self.relations) and edge.tag != layer1.EdgeTags.Terminal \
                     and not edge.attrib.get("remote") and not edge.child.attrib.get("implicit"):
                 yield edge.child, edge.tag