Пример #1
0
def and_or(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = pattern.create_node()
    some_node = pattern.create_node()
    and_node = pattern.create_node(LEMMA=r"\band\b")
    or_node = pattern.create_node(LEMMA=r"\bor\b")

    pattern.add_dependency(parent_node, some_node, r'\bconj:\w*')
    pattern.add_dependency(some_node, and_node, r'\bcc\b')
    pattern.add_dependency(some_node, or_node, r'\bcc\b')
    pattern.add_dependency(and_node, or_node, r'\bconj')

    for match in list(dep_graph.match(pattern)):

        dep_parent_node = match[parent_node]
        dep_some_node = match[some_node]
        dep_and_node = match[and_node]
        dep_or_node = match[or_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_some_node)

        if not rel.startswith("conj:and") and not rel.startswith("conj:or"):
            continue

        and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC]

        if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]):
            continue

        and_or_nodes.append(dep_and_node)
        and_or_nodes.append(dep_or_node)
        and_or_nodes.sort(key=lambda n: n.LOC)

        if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]):
            continue

        new_and_or_node = merge_dep_nodes(and_or_nodes,
                                          UPOS=dep_and_node.UPOS,
                                          LOC=dep_and_node.LOC,
                                          FEATS=dep_and_node.FEATS
                                          )

        dep_graph.replace_nodes(and_or_nodes, new_and_or_node)
        dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
Пример #2
0
def amod_xcomp_to_acl(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN")
    adj_node = pattern.create_node(UPOS="ADJ")
    verb_node = pattern.create_node(UPOS="VERB")

    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, verb_node, r"xcomp")

    for match in list(dep_graph.match(pattern)):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]
        dep_adj_node = match[adj_node]

        try:
            [
                dep_graph.get_node(x.ID)
                for x in [dep_noun_node, dep_verb_node, dep_adj_node]
            ]
        except Exception as e:
            # has been processed by previous match
            continue

        xcomp_nodes = [
            n for n, l in dep_graph.children(
                dep_adj_node, filter=lambda n, l: l.startswith("xcomp"))
        ]

        mark_nodes_list = []

        for dep_xcomp_node in xcomp_nodes:

            mark_nodes = [
                n for n, l in dep_graph.children(
                    dep_xcomp_node,
                    filter=lambda n, l: l.startswith("mark") and dep_adj_node.
                    LOC < n.LOC < dep_xcomp_node.LOC)
            ]
            if mark_nodes:
                mark_nodes_list.append(mark_nodes)

        if len(mark_nodes_list) > 1:
            raise Exception("Unexpected Situation Happened")

        new_verb_nodes = [dep_adj_node]
        if mark_nodes_list:
            mark_nodes = mark_nodes_list[0]

            new_verb_nodes.extend(mark_nodes)
            new_verb_nodes.sort(key=lambda x: x.LOC)

        new_verb_nodes = ["(be)"] + new_verb_nodes

        new_node = merge_dep_nodes(new_verb_nodes,
                                   UPOS="VERB",
                                   LOC=new_verb_nodes[-1].LOC,
                                   FEATS={"VerbForm": "Ger"})

        dep_graph.replace_nodes(new_verb_nodes, new_node)

        dep_graph.set_dependency(dep_noun_node, new_node, "acl")

        for dep_xcomp_node in xcomp_nodes:
            dep_graph.remove_dependency(dep_xcomp_node, new_node)
            dep_graph.set_dependency(new_node, dep_verb_node, "obj")
Пример #3
0
def continuous_asas(dep_graph: DependencyGraph):
    """
    ##### as far as I known #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|PRON|PROPN")
    adv_node = DependencyGraphNode(UPOS="ADV|ADJ")
    as1_node = DependencyGraphNode(LEMMA="as")
    as2_node = DependencyGraphNode(LEMMA="as")
    verb2_node = DependencyGraphNode(UPOS="VERB|ADJ|NOUN|PROPN|PRON")
    # ADJ is for as soon as possible
    pattern1 = DependencyGraph()
    pattern1.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern1.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern1.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern1.add_dependency(as1_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern1.add_dependency(verb2_node, as2_node, r'mark|case')

    pattern2 = DependencyGraph()
    pattern2.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern2.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern2.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern2.add_dependency(adv_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern2.add_dependency(verb2_node, as2_node, r'mark|case')

    as_as_pred = []
    for match in list(dep_graph.match(pattern1)) + list(
            dep_graph.match(pattern2)):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_verb2_node = match[verb2_node]

        if not (dep_as1_node.LOC < dep_adv_node.LOC < dep_as2_node.LOC <
                dep_verb2_node.LOC):
            continue

        as_as_pred.append((dep_as1_node, dep_as2_node, dep_adv_node,
                           dep_verb_node, dep_verb2_node))

        pred = [
            node for node in dep_graph.nodes()
            if dep_as1_node.LOC <= node.LOC <= dep_adv_node.LOC
        ]
        pred.append(dep_as2_node)
        pred.sort(key=lambda x: x.LOC)
        head = dep_adv_node

        dep_asas_node = merge_dep_nodes(pred, UPOS="ADP", LOC=head.LOC)

        dep_graph.replace_nodes(pred, dep_asas_node)
        dep_graph.remove_dependency(dep_verb2_node, dep_asas_node)
        dep_graph.remove_dependency(dep_asas_node, dep_verb2_node)
        dep_graph.remove_dependency(dep_verb_node, dep_asas_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "advcl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "obl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "case")
Пример #4
0
def gradation(dep_graph: DependencyGraph):
    """
    TODO: do not match with the tech report, and the verb is not considered
    ##### Comparative #####
    ##### Periphrastic gradation #####
    ##### He runs faster than her #####
    ##### Martin is more intelligent than Donald #####
    ##### He is a nicer person than Tom
    ##### She is more than a regular cook
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = pattern.create_node(UPOS="VERB|NOUN|PRON|PROPN|SYM")
    advj_node = pattern.create_node(UPOS="ADJ|ADV", FEATS={"Degree": "Cmp"})
    than_node = pattern.create_node(FORM="than")
    obj_node = pattern.create_node()

    pattern.add_dependency(verb_node, advj_node, r'advmod|amod')
    pattern.add_dependency(advj_node, obj_node,
                           r'\w*(nmod:than|obl:than|advcl:than)\w*')
    pattern.add_dependency(obj_node, than_node, r'\w*case|mark\w*')

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_advj_node = match[advj_node]
        dep_than_node = match[than_node]
        dep_obj_node = match[obj_node]

        def __valid_mod(n, l):
            return (l == "amod" or l == "advmod") and in_interval(
                n, None, dep_advj_node)

        aux_node = list(dep_graph.children(dep_advj_node, filter=__valid_mod))

        if aux_node:
            aux_node = aux_node[0][0]
            offsprings = dep_graph.offsprings(aux_node)

            more_than_nodes = offsprings + [dep_than_node]
        else:
            more_than_nodes = (dep_advj_node, dep_than_node)

        dep_more_than_node = merge_dep_nodes(more_than_nodes,
                                             UPOS="ADP",
                                             LOC=dep_than_node.LOC)

        dep_graph.replace_nodes(more_than_nodes, dep_more_than_node)
        dep_graph.remove_dependency(dep_obj_node, dep_more_than_node)
        dep_graph.remove_dependency(dep_more_than_node, dep_obj_node)
        dep_graph.remove_dependency(dep_verb_node, dep_more_than_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "advcl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "obl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "case")