Пример #1
0
def ccomp_mark_sconj(dep_graph: DependencyGraph):
    """
    See them as they are
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    pred1_node = pattern.create_node(UPOS="VERB")
    pred2_node = pattern.create_node()
    sconj_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(pred1_node, pred2_node, r'ccomp')
    pattern.add_dependency(pred2_node, sconj_node, 'mark')

    for match in list(dep_graph.match(pattern)):

        dep_pred1_node = match[pred1_node]
        dep_pred2_node = match[pred2_node]
        dep_sconj_node = match[sconj_node]

        if dep_sconj_node.LEMMA == "as":
            dep_graph.remove_dependency(dep_pred2_node, dep_sconj_node)
            new_verb = [dep_pred1_node, "{1}", dep_sconj_node, "{2}"]

            new_verb_node = merge_dep_nodes(new_verb,
                                            UPOS=dep_pred1_node.UPOS,
                                            LOC=dep_pred1_node.LOC)
            # print("Noun detected", noun_node.ID)
            dep_graph.replace_nodes(new_verb, new_verb_node)
Пример #2
0
def whose_noun(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()
    noun_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")
    owner_node = pattern.create_node()
    whose_node = pattern.create_node(LEMMA="whose")

    pattern.add_dependency(noun_node, owner_node, "nmod:poss")
    pattern.add_dependency(owner_node, whose_node, "ref")

    whose_noun_phrase = []
    for match in dep_graph.match(pattern):
        dep_owner_node = match[owner_node]
        dep_noun_node = match[noun_node]
        dep_whose_node = match[whose_node]

        whose_noun_phrase.append(
            (dep_owner_node, dep_whose_node, dep_noun_node))

    for owner, whose, noun in whose_noun_phrase:
        noun_node = merge_dep_nodes([whose, noun],
                                    UPOS=noun.UPOS,
                                    LOC=noun.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.remove_dependency(owner_node, whose)
        dep_graph.remove_dependency(noun, owner_node, "nmod:poss")
        dep_graph.replace_nodes([whose, noun], noun_node)
def ever_since(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    ever_nodes = []
    since_nodes = []
    for node in dep_graph.nodes():
        if node.LEMMA == "ever":
            ever_nodes.append(node)
        elif node.LEMMA == "since":
            since_nodes.append(node)
    if not ever_nodes or not since_nodes:
        return
    since_LOCs = [node.LOC for node in since_nodes]
    rel_remove = []
    union_nodes = []
    for ever_node in ever_nodes:
        expect_LOC = ever_node.LOC + 1
        if expect_LOC not in since_LOCs:
            continue
        union_nodes.append(
            (ever_node, since_nodes[since_LOCs.index(expect_LOC)]))
        for p_node, p_rel in dep_graph.parents(ever_node):
            if 'advmod' not in p_rel:
                continue
            rel_remove.append((p_node, ever_node, 'advmod'))
    for src, trg, rel in rel_remove:
        dep_graph.remove_dependency(src, trg, rel)
    for ever_node, since_node in union_nodes:
        new_since_node = merge_dep_nodes([ever_node, since_node],
                                         UPOS=since_node.UPOS,
                                         LOC=since_node.LOC)
        dep_graph.replace_nodes([ever_node, since_node], new_since_node)
Пример #4
0
def separated_asas(dep_graph: DependencyGraph):
    """
    ##### Equality comparison #####
    ##### A is as X a C as B #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = DependencyGraphNode(UPOS="ADJ")
    noun_node = DependencyGraphNode(UPOS="NOUN")
    as1_node = DependencyGraphNode(FORM="as")
    as2_node = DependencyGraphNode(FORM="as")
    obj_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*')
    pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*')
    pattern.add_dependency(obj_node, as2_node, r'mark')

    as_as_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_obj_node = match[obj_node]

        if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC:
            pred = [
                node for node in dep_graph.nodes()
                if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC
            ]
            pred.append(dep_as2_node)
            pred.sort(key=lambda x: x.LOC)
            head = dep_adj_node

            asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC)

            as_as_pred.append(
                (pred, head, asas_node, dep_noun_node, dep_obj_node))

    for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred:
        dep_graph.replace_nodes(pred, asas_node)

        dep_graph.remove_dependency(asas_node, dep_obj_node)
        dep_graph.remove_dependency(dep_noun_node, asas_node)

        dep_graph.add_dependency(dep_noun_node, dep_obj_node,
                                 "acl:" + asas_node.FORM)
Пример #5
0
def be_adp_phrase(dep_graph: DependencyGraph):
    """
    example: is for xxx
    this should be not applied:
    1. if xxx is adj, then be_adj_verb will be applied;
    2. if xxx is NOUN, then copula_phrase will be applied
    note that there may be multiple adp:
    the insurgency is out of the picture
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    some_node = pattern.create_node()

    adp_node = pattern.create_node(UPOS="ADP")
    be_node = pattern.create_node(UPOS="AUX")

    pattern.add_dependency(some_node, be_node, r'cop')
    pattern.add_dependency(some_node, adp_node, r'case')

    verb_phrases = []

    for match in dep_graph.match(pattern):

        dep_be_node = match[be_node]
        dep_some_node = match[some_node]

        dep_adp_nodes = [
            n for n, l in dep_graph.children(
                dep_some_node,
                filter=lambda n, l: "case" in l and n.UPOS == "ADP")
        ]

        if not all(dep_be_node.LOC < x.LOC < dep_some_node.LOC
                   for x in dep_adp_nodes):
            continue

        pred = [dep_be_node] + dep_adp_nodes
        head = dep_be_node

        verb_phrases.append((dep_some_node, pred, head))

    for dep_some_node, verbs, root in verb_phrases:

        if not all(dep_graph.get_node(v.ID) for v in verbs):
            continue  # has been processed

        verb_node = merge_dep_nodes(verbs, UPOS="AUX", LOC=root.LOC)

        for node in verbs:
            dep_graph.remove_dependency(dep_some_node, node)
        dep_graph.replace_nodes(verbs, verb_node)
        dep_graph.add_dependency(dep_some_node, verb_node, "cop")
Пример #6
0
def amod_obl(dep_graph: DependencyGraph):
    """
    ##### include: more than, successful by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON")
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adp_node = DependencyGraphNode(UPOS="ADP")
    obl_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, adp_node, obl_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, obl_node, r'obl:\w+')
    pattern.add_dependency(obl_node, adp_node, r'case')

    more_than_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_obl_node = match[obl_node]
        dep_adp_node = match[adp_node]

        obl_nodes = list(
            dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l))

        if len(obl_nodes) > 1:
            # similar in form to the one
            continue

        if dep_adp_node.FORM not in dep_graph.get_dependency(
                dep_adj_node, dep_obl_node).values():
            continue

        if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC:
            more_than_pred.append(
                (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node))

    for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred:
        nodes = [dep_adj_node, dep_adp_node]
        more_than_pred = merge_dep_nodes(nodes,
                                         UPOS="ADP",
                                         LOC=dep_adp_node.LOC)
        dep_graph.remove_dependency(dep_noun_node, dep_adj_node)
        dep_graph.remove_dependency(dep_adj_node, dep_obl_node)

        dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred)
        dep_graph.add_dependency(dep_noun_node, dep_obl_node,
                                 "nmod:" + more_than_pred.FORM)
def process_head_conj(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    first_word = dep_graph.get_node_by_loc(0)
    if first_word and first_word.LEMMA in {"and", "but"}:
        cc_parents = [n for n, l in dep_graph.parents(first_word) if l == "cc"]
        for p in cc_parents:
            dep_graph.remove_dependency(p, first_word)
            dep_graph.add_dependency(first_word, p, "arg_conj:1")
Пример #8
0
def multi_word_sconj(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    verb_node = pattern.create_node(UPOS="VERB")
    verb2_node = pattern.create_node(UPOS="VERB")
    mark_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*')
    pattern.add_dependency(verb2_node, mark_node, r'mark')

    mark_phrases = []
    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_verb2_node = match[verb2_node]
        dep_mark_node = match[mark_node]

        if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values():
            continue

        new_marks = list(dep_graph.offsprings(dep_mark_node))
        if len(new_marks) == 1:
            continue

        new_marks.sort(key=lambda n: n.LOC)
        mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks))

    for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases:

        if not all([dep_graph.get_node(x.ID) for x in new_marks]):
            continue

        dep_graph.remove_dependency(dep_verb2_node, dep_mark_node)
        dep_graph.remove_dependency(dep_verb_node, dep_verb2_node)

        new_mark_node = merge_dep_nodes(new_marks,
                                        UPOS=dep_mark_node.UPOS,
                                        LOC=dep_mark_node.LOC
                                        )

        dep_graph.replace_nodes(new_marks, new_mark_node)
        dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA)
        dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
Пример #9
0
def multi_words_mark(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # print('multi_words_mark')
    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "mark" in l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue
        # print('multi_words_mark marks:', marks)
        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB", "ADJ", "ADV", "PRON"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:
        # print('multi_words_mark marks:', marks)
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]
        marks.sort(key=lambda n: n.LOC)

        if any([x.UPOS in NOUN_UPOS for x in marks]):
            continue
        # print('marks:')
        # for nnnn in marks:
        #     print(nnnn)
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )
        for mark in marks:
            dep_graph.remove_dependency(node, mark)
        dep_graph.replace_nodes(marks, new_mark_node)
        dep_graph.add_dependency(node, new_mark_node, "mark")
def acl_loop(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for n1, n2, deps in dep_graph.dependencies():

        if "acl:relcl" in deps:
            back_deps = dep_graph.get_dependency(n2, n1)
            if any(x in back_deps
                   for x in {"obl", "nsubj", "obj", "mark", "advmod"}):
                dep_graph.remove_dependency(n2, n1)
Пример #11
0
def multi_words_cc(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "cc" == l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue

        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]

        if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
            continue
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )

        dep_graph.replace_nodes(marks, new_mark_node)
        for mark in marks:
            dep_graph.remove_dependency(node, mark)

        if dep_graph.get_node(node.ID):
            dep_graph.add_dependency(node, new_mark_node, "cc")
Пример #12
0
def be_not_phrase(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    pattern = DependencyGraph()

    be_node = pattern.create_node()  # contain the be verb
    obj_node = pattern.create_node()
    # not_node = pattern.create_node(UPOS="PART")
    not_node = pattern.create_node()

    pattern.add_node(be_node)
    pattern.add_node(obj_node)
    pattern.add_node(not_node)

    pattern.add_dependency(be_node, obj_node, r'\w*obj\w*')
    pattern.add_dependency(obj_node, not_node, r'\w*advmod\w*')

    be_not = []
    for match in dep_graph.match(pattern):
        # print("be_not_phrase match !!!!!!!!!!!!!!")
        dep_be_node = match[be_node]
        dep_obj_node = match[obj_node]
        dep_not_node = match[not_node]

        if not "be" in dep_be_node.LEMMA.split(" "):
            continue

        if not "not" in dep_not_node.LEMMA.split(" "):
            continue

        if (dep_not_node.LOC > dep_obj_node.LOC) or (dep_be_node.LOC >
                                                     dep_not_node.LOC):
            continue
        be_not.append((dep_be_node, dep_obj_node, dep_not_node))

    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Пример #13
0
def be_not_phrase2(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    be_not = []
    # for pred_node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
    for pred_node in dep_graph.nodes():
        # print('pred_node LEMMA:', pred_node.LEMMA, 'pred_node UPOS:', pred_node.UPOS)
        if not "be" in pred_node.LEMMA.split(" "):
            continue
        objs = []
        for child, rel in dep_graph.children(pred_node):
            if rel.startswith('obj'):
                objs.append(child)
        if not objs:
            continue
        objs.sort(key=lambda x: x.LOC)
        for obj in objs:

            def __interested_node2(n):
                # that conj is ommited
                return (n.UPOS == "PART" and "not" in n.LEMMA.split(" "))

            nodes_of_interests2 = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "advmod" and __interested_node2(n
                                                                             ))
            ]
            if not nodes_of_interests2:
                continue
            assert len(nodes_of_interests2) == 1
            not_node = nodes_of_interests2[0]
            be_not.append((pred_node, obj, not_node))
    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Пример #14
0
def advp_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    case: english-UD-12774
    """
    # return
    phrases = []
    remove_rels = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADP"}):
        # is_root = True
        need_merge_node = set()
        # if str(node.FORM).lower() != 'after':
        #     continue
        # print('advp node:', str(node.FORM))

        for parent, rel in dep_graph.parents(node):

            if "case" in rel and \
                    any(node.FORM in l.values() or node.LEMMA in l.values() for x, l in dep_graph.parents(parent)):
                break

            remove_rel = False

            # we find neighborhood adjvs
            silibings = list(dep_graph.children(parent))
            silibings.sort(key=lambda x: x[0].LOC)

            start_loc = -1
            for child, ch_rel in reversed(silibings):
                # print(str(node.FORM))
                if child.LOC >= node.LOC:
                    start_loc = child.LOC
                    continue

                if "advmod" in ch_rel and child.UPOS in {
                        "ADJ", "ADV"
                } and child.LOC == start_loc - 1:
                    # is_root = True
                    need_merge_node.update(
                        set(valid_adjv_element(child, dep_graph)))
                    remove_rel = True
                    start_loc = child.LOC
                    # adjv_element = valid_adjv_element(child, dep_graph)
            if remove_rel:
                if 'case' in rel:
                    remove_rels.append((parent, node, 'case'))
        if len(need_merge_node) == 0:
            continue
        need_merge_node.add(node)
        adjv_element = sorted(list(need_merge_node), key=lambda x: x.LOC)
        phrases.append((adjv_element, node))
    for src, trg, rel in remove_rels:
        dep_graph.remove_dependency(src, trg, rel)
    for adjv_phrase, node in phrases:
        advp_node = merge_dep_nodes(
            adjv_phrase,
            # UPOS=node.UPOS,
            UPOS='ADV',
            LOC=node.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(adjv_phrase, advp_node)
Пример #15
0
def multi_words_case(dep_graph: DependencyGraph):
    """
    :TODO  add example case
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode()
    x_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_node(noun_node)
    pattern.add_node(x_node)
    pattern.add_node(case_node)

    pattern.add_dependency(noun_node, x_node, r'\w*:\w*')
    pattern.add_dependency(x_node, case_node, r'\bcase\b')



    for match in list(dep_graph.match(pattern)):

        multiword_cases = []

        dep_noun_node = match[noun_node]
        dep_x_node = match[x_node]
        dep_case_node = match[case_node]

        if not dep_graph.has_node(dep_case_node):
            continue

        direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)]
        all_case_nodes = set()
        for node in direct_case_nodes:
            all_case_nodes.update(dep_graph.offsprings(node))

        if len(all_case_nodes) == 1:
            continue

        all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC)
        logger.debug("multi case discovered")
        for node in all_case_nodes:
            logger.debug(str(node))

        #        if len(case_nodes) > 2:
        #            raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases")

        x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node)

        for rel in x_rel:
            if ":" in rel:
                # print('-----------------rel:        ',rel)

                rel_str, case_str = rel.split(":")
                # some times, the rel only contains one word
                # Example :
                # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications)
                if case_str in "_".join([x.LEMMA for x in all_case_nodes]):
                    multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str))

        for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases:

            logger.debug("we are merging:")
            for node in case_nodes:
                logger.debug(str(node))

            if not all([dep_graph.has_node(x) for x in case_nodes]):
                continue

            new_case_node = merge_dep_nodes(case_nodes,
                                            UPOS=dep_case_node.UPOS,
                                            LOC=dep_case_node.LOC
                                            )
            dep_graph.replace_nodes(case_nodes, new_case_node)
            dep_graph.remove_dependency(dep_noun_node, dep_x_node)
            dep_graph.add_dependency(dep_noun_node, dep_x_node,
                                     rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))
Пример #16
0
def amod_xcomp_to_acl(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN")
    adj_node = pattern.create_node(UPOS="ADJ")
    verb_node = pattern.create_node(UPOS="VERB")

    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, verb_node, r"xcomp")

    for match in list(dep_graph.match(pattern)):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]
        dep_adj_node = match[adj_node]

        try:
            [
                dep_graph.get_node(x.ID)
                for x in [dep_noun_node, dep_verb_node, dep_adj_node]
            ]
        except Exception as e:
            # has been processed by previous match
            continue

        xcomp_nodes = [
            n for n, l in dep_graph.children(
                dep_adj_node, filter=lambda n, l: l.startswith("xcomp"))
        ]

        mark_nodes_list = []

        for dep_xcomp_node in xcomp_nodes:

            mark_nodes = [
                n for n, l in dep_graph.children(
                    dep_xcomp_node,
                    filter=lambda n, l: l.startswith("mark") and dep_adj_node.
                    LOC < n.LOC < dep_xcomp_node.LOC)
            ]
            if mark_nodes:
                mark_nodes_list.append(mark_nodes)

        if len(mark_nodes_list) > 1:
            raise Exception("Unexpected Situation Happened")

        new_verb_nodes = [dep_adj_node]
        if mark_nodes_list:
            mark_nodes = mark_nodes_list[0]

            new_verb_nodes.extend(mark_nodes)
            new_verb_nodes.sort(key=lambda x: x.LOC)

        new_verb_nodes = ["(be)"] + new_verb_nodes

        new_node = merge_dep_nodes(new_verb_nodes,
                                   UPOS="VERB",
                                   LOC=new_verb_nodes[-1].LOC,
                                   FEATS={"VerbForm": "Ger"})

        dep_graph.replace_nodes(new_verb_nodes, new_node)

        dep_graph.set_dependency(dep_noun_node, new_node, "acl")

        for dep_xcomp_node in xcomp_nodes:
            dep_graph.remove_dependency(dep_xcomp_node, new_node)
            dep_graph.set_dependency(new_node, dep_verb_node, "obj")
Пример #17
0
def acl_verb_obl_case(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    subj_node = pattern.create_node()
    verb_node = pattern.create_node(UPOS="VERB")
    obj_node = pattern.create_node()
    case_node = pattern.create_node()

    pattern.add_dependency(subj_node, verb_node, r'acl')
    pattern.add_dependency(verb_node, obj_node, r'obl:\w*')
    pattern.add_dependency(obj_node, case_node, r'case')

    phrases = []

    for match in dep_graph.match(pattern):

        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]
        dep_obj_node = match[obj_node]
        dep_case_node = match[case_node]

        obl_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: l.startswith("obl"))
        ]
        if len(obl_nodes) > 1:
            continue

        existing_obj_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: "obj" in l or "comp" in l)
        ]
        if existing_obj_nodes:
            continue

        obl_rel = dep_graph.get_dependency(dep_verb_node, dep_obj_node)

        if dep_case_node.FORM not in obl_rel.values():
            continue

        # there are may be other cases, join them all
        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_obj_node,
                               filter=lambda n, l: l.startswith("case") and
                               dep_verb_node.LOC < n.LOC < dep_obj_node.LOC)
        ]

        subjs = list(
            dep_graph.children(dep_verb_node, filter=lambda n, l: "subj" in l))

        if len(subjs) > 1:
            continue

        phrases.append(
            (dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes))

    for dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes in phrases:
        new_verb_phrase = [dep_verb_node] + dep_case_nodes
        logging.debug("acl_verb_obl_case: we are merging nodes")
        logging.debug("\n".join(str(node) for node in new_verb_phrase))

        new_verb_node = merge_dep_nodes(new_verb_phrase,
                                        UPOS=dep_verb_node.UPOS,
                                        LOC=dep_verb_node.LOC,
                                        FEATS=dep_verb_node.FEATS)

        logging.debug("acl_verb_obl_case: we obtain a new node")
        logging.debug(str(new_verb_node))

        dep_graph.remove_dependency(dep_verb_node, dep_obj_node)
        for node in dep_case_nodes:
            dep_graph.remove_dependency(dep_obj_node, node)

        dep_graph.replace_nodes(new_verb_phrase, new_verb_node)
        dep_graph.add_dependency(new_verb_node, dep_obj_node, "obj")
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN")
    adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART")  # part is for "not"
    ccomp_node = pattern.create_node()

    # pattern.add_dependency(verb_node, adv_node, r'advmod')
    pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp")

    patterns = []
    for match in dep_graph.match(pattern):

        # dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_ccomp_node = match[ccomp_node]

        if oia_graph.has_relation(dep_adv_node, dep_ccomp_node):
            continue

        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_ccomp_node,
                               filter=lambda n, l: "case" == l and dep_adv_node
                               .LOC < n.LOC < dep_ccomp_node.LOC)
        ]

        if dep_case_nodes:
            dep_case_nodes = continuous_component(dep_case_nodes,
                                                  dep_case_nodes[0])
            predicate_nodes = [dep_adv_node] + dep_case_nodes
            predicate_nodes.sort(key=lambda n: n.LOC)
        else:
            predicate_nodes = [dep_adv_node]

        dep_subj_nodes = [
            n for n, l in dep_graph.parents(dep_adv_node,
                                            filter=lambda n, l: "advmod" == l
                                            and n.UPOS in {"ADV", "X", "NOUN"})
        ]
        if len(dep_subj_nodes) > 1:
            raise Exception("Multiple subject")
        elif len(dep_subj_nodes) > 0:
            dep_subj_node = dep_subj_nodes[0]
        else:
            dep_subj_node = None

        patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node])

    for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns:

        if len(predicate_nodes) > 1:

            new_pred_node = dep_graph.create_node(
                ID=" ".join([x.ID for x in predicate_nodes]),
                FORM=" ".join([x.FORM for x in predicate_nodes]),
                LEMMA=" ".join([x.LEMMA for x in predicate_nodes]),
                UPOS="ADV",
                LOC=predicate_nodes[0].LOC)

            new_pred_node.aux = True

            dep_graph.replace_nodes(predicate_nodes, new_pred_node)

            dep_graph.remove_dependency(dep_ccomp_node, new_pred_node)

        else:
            new_pred_node = predicate_nodes[0]

        oia_pred_node = oia_graph.add_words(new_pred_node.position)

        if dep_subj_node:
            oia_subj_node = oia_graph.add_words(dep_subj_node.position)
            oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True)

        else:
            oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position)
            oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
def secondary_predicate(dep_graph: DependencyGraph):
    """
    detect the case of xcomp as a secondary predicate,
    and add implicit (be) node to make a predicate
    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()

    pred_node = pattern.create_node()
    xcomp_node = pattern.create_node(UPOS=r'(?!VERB\b)\b\w+')
    xcomp_subj_node = pattern.create_node()

    pattern.add_dependency(pred_node, xcomp_node, "xcomp")
    pattern.add_dependency(xcomp_node, xcomp_subj_node, "nsubj")
    pattern.add_dependency(pred_node, xcomp_subj_node, "obj")

    for match in list(dep_graph.match(pattern)):

        dep_pred_node = match[pred_node]
        dep_xcomp_node = match[xcomp_node]
        dep_xcomp_subj_node = match[xcomp_subj_node]

        # if not (dep_pred_node.LOC < dep_xcomp_subj_node.LOC and dep_pred_node.LOC < dep_xcomp_node.LOC):
        #    raise Exception("Unexpected Situation, let's throw out to see what happens")
        # the position of dep_xcomp_subj_node and dep_xcomp_node may be reversed in questions
        # I can't tell you how ominous I found Bush's performance in that interview.

        if dep_pred_node.LOC < dep_xcomp_subj_node.LOC < dep_xcomp_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                new_pred_nodes = ["(be)", dep_xcomp_node]
                dep_be_node = merge_dep_nodes(new_pred_nodes,
                                              UPOS="VERB",
                                              LOC=dep_xcomp_node.LOC)

                dep_graph.add_node(dep_be_node)

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")

                for child, l in list(dep_graph.children(dep_xcomp_node)):
                    dep_graph.remove_dependency(dep_xcomp_node, child)
                    dep_graph.add_dependency(dep_be_node, child, l)

                dep_graph.remove_node(dep_xcomp_node)

            else:
                dep_be_node = dep_graph.create_node(FORM="(be)",
                                                    LEMMA="(be)",
                                                    UPOS="VERB",
                                                    LOC=dep_xcomp_node.LOC -
                                                    0.5)
                dep_be_node.aux = True

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")

        elif dep_xcomp_node.LOC < dep_pred_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            # in question, for example : how ominous
            # I can't tell you how ominous I found Bush's performance in that interview.

            dep_be_node = dep_graph.create_node(FORM="(be)",
                                                LEMMA="(be)",
                                                UPOS="VERB",
                                                LOC=dep_xcomp_node.LOC - 0.5)
            dep_be_node.aux = True

            dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
            dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj")

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "amod")
            else:
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")
Пример #20
0
def gradation(dep_graph: DependencyGraph):
    """
    TODO: do not match with the tech report, and the verb is not considered
    ##### Comparative #####
    ##### Periphrastic gradation #####
    ##### He runs faster than her #####
    ##### Martin is more intelligent than Donald #####
    ##### He is a nicer person than Tom
    ##### She is more than a regular cook
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = pattern.create_node(UPOS="VERB|NOUN|PRON|PROPN|SYM")
    advj_node = pattern.create_node(UPOS="ADJ|ADV", FEATS={"Degree": "Cmp"})
    than_node = pattern.create_node(FORM="than")
    obj_node = pattern.create_node()

    pattern.add_dependency(verb_node, advj_node, r'advmod|amod')
    pattern.add_dependency(advj_node, obj_node,
                           r'\w*(nmod:than|obl:than|advcl:than)\w*')
    pattern.add_dependency(obj_node, than_node, r'\w*case|mark\w*')

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_advj_node = match[advj_node]
        dep_than_node = match[than_node]
        dep_obj_node = match[obj_node]

        def __valid_mod(n, l):
            return (l == "amod" or l == "advmod") and in_interval(
                n, None, dep_advj_node)

        aux_node = list(dep_graph.children(dep_advj_node, filter=__valid_mod))

        if aux_node:
            aux_node = aux_node[0][0]
            offsprings = dep_graph.offsprings(aux_node)

            more_than_nodes = offsprings + [dep_than_node]
        else:
            more_than_nodes = (dep_advj_node, dep_than_node)

        dep_more_than_node = merge_dep_nodes(more_than_nodes,
                                             UPOS="ADP",
                                             LOC=dep_than_node.LOC)

        dep_graph.replace_nodes(more_than_nodes, dep_more_than_node)
        dep_graph.remove_dependency(dep_obj_node, dep_more_than_node)
        dep_graph.remove_dependency(dep_more_than_node, dep_obj_node)
        dep_graph.remove_dependency(dep_verb_node, dep_more_than_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "advcl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "obl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "case")
Пример #21
0
def continuous_asas(dep_graph: DependencyGraph):
    """
    ##### as far as I known #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|PRON|PROPN")
    adv_node = DependencyGraphNode(UPOS="ADV|ADJ")
    as1_node = DependencyGraphNode(LEMMA="as")
    as2_node = DependencyGraphNode(LEMMA="as")
    verb2_node = DependencyGraphNode(UPOS="VERB|ADJ|NOUN|PROPN|PRON")
    # ADJ is for as soon as possible
    pattern1 = DependencyGraph()
    pattern1.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern1.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern1.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern1.add_dependency(as1_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern1.add_dependency(verb2_node, as2_node, r'mark|case')

    pattern2 = DependencyGraph()
    pattern2.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern2.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern2.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern2.add_dependency(adv_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern2.add_dependency(verb2_node, as2_node, r'mark|case')

    as_as_pred = []
    for match in list(dep_graph.match(pattern1)) + list(
            dep_graph.match(pattern2)):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_verb2_node = match[verb2_node]

        if not (dep_as1_node.LOC < dep_adv_node.LOC < dep_as2_node.LOC <
                dep_verb2_node.LOC):
            continue

        as_as_pred.append((dep_as1_node, dep_as2_node, dep_adv_node,
                           dep_verb_node, dep_verb2_node))

        pred = [
            node for node in dep_graph.nodes()
            if dep_as1_node.LOC <= node.LOC <= dep_adv_node.LOC
        ]
        pred.append(dep_as2_node)
        pred.sort(key=lambda x: x.LOC)
        head = dep_adv_node

        dep_asas_node = merge_dep_nodes(pred, UPOS="ADP", LOC=head.LOC)

        dep_graph.replace_nodes(pred, dep_asas_node)
        dep_graph.remove_dependency(dep_verb2_node, dep_asas_node)
        dep_graph.remove_dependency(dep_asas_node, dep_verb2_node)
        dep_graph.remove_dependency(dep_verb_node, dep_asas_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "advcl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "obl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "case")
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode):
    """

    :param dep_graph:
    :param root:
    :return:
    """
    conj_childs = [
        child for child, rels in dep_graph.children(
            root, filter=lambda n, l: l.startswith("conj"))
    ]

    assert conj_childs

    parallel_components = [root]

    for child in conj_childs:

        is_nest = any(
            grand_rels.startswith("conj")
            for grand_sun, grand_rels in dep_graph.children(child))
        if is_nest:
            logger.debug("nested conj is found ")
            logger.debug(str(child))

            conj_node, parallel_nodes = process_conjunction(dep_graph, child)
            logger.debug("conj_node is created ")
            logger.debug(str(conj_node))

            for node in parallel_nodes:
                logger.debug("Containing nodes  ")
                logger.debug(str(node))
                rels = list(dep_graph.get_dependency(root, node))
                for rel in rels:
                    if rel.startswith("conj"):
                        logger.debug("remove dependency {0}".format(
                            (root.ID, node.ID, rel)))

                        dep_graph.remove_dependency(root, node, rel)
                        dep_graph.add_dependency(root, conj_node, rel)
            child = conj_node

        parallel_components.append(child)

    parallel_components.sort(key=lambda x: x.LOC)

    # if all(n.UPOS in NOUN_UPOS for n in parallel_components):
    #
    #     logger.debug("Processing all noun conjunction")
    #
    #     is_pure_noun = True
    #
    #     merging_noun_nodes = []
    #     min_loc = 10000
    #     max_loc = -1
    #     for child in parallel_components:
    #         if isinstance(child, DependencyGraphNode):
    #             min_loc = min(min_loc, child.LOC)
    #             max_loc = max(min_loc, child.LOC)
    #         elif isinstance(child, DependencyGraphSuperNode):
    #             min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
    #             max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
    #         merging_noun_nodes.extend(dep_graph.offsprings(child))
    #
    #         logger.debug("Checking acl for {0}".format(child))
    #         for n, l in dep_graph.children(child):
    #             logger.debug(n)
    #             logger.debug("label {0}".format(l))
    #             if "acl" in l:
    #                 is_pure_noun = False
    #                 break
    #
    #     if is_pure_noun:
    #         merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc]
    #         is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes)
    #
    #     if is_pure_noun:
    #         # merged_noun_nodes.sort(key=lambda x: x.LOC)
    #         for node in merging_noun_nodes:
    #             logger.debug("merging {0}".format(node))
    #
    #         new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC)
    #         dep_graph.replace_nodes(merging_noun_nodes, new_noun)
    #
    #         return new_noun, []

    root_parents = list(set(parent
                            for parent, rels in dep_graph.parents(root)))
    root_parents.sort(key=lambda x: x.LOC)

    # ic(list(map(str, root_parents)))

    conj_node, with_arg_palceholder = build_conjunction_node(
        dep_graph, root, root_parents, parallel_components)

    relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents,
                                            parallel_components)

    case_marks = dict()
    for index, node in enumerate(parallel_components):
        case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node)
                               if ("case" in l or "mark" in l or "cc" in l)]
    for key, values in case_marks.items():
        for v in values:
            logger.debug("case_marker = {} {} {}".format(
                key, v[0].ID, v[1].rels))

    logger.debug("relation_to_conj = {}".format(relation_to_conj))

    for parent in root_parents:
        # ic(parent)

        prefix, shared_prefix, required_mark = relation_to_conj[parent.ID]
        if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \
                or not required_mark or len(set(required_mark)) == 1:

            for node in parallel_components:
                dep_graph.remove_dependency(parent, node)

            relation = prefix

            if required_mark and len(set(required_mark)) == 1:
                ## with same mark

                mark_lemma = list(set(required_mark))[0]

                relation += ":" + mark_lemma

                mark_node = find_mark(case_marks, parallel_components,
                                      mark_lemma)

                if mark_node:

                    mark_node, mark_rel = mark_node

                    dep_graph.remove_node(mark_node)
                    dep_graph.add_node(mark_node)  # clear the dependency

                    dep_graph.add_dependency(conj_node, mark_node, mark_rel)
                else:
                    logger.error("cannot find the mark node")

            dep_graph.add_dependency(parent, conj_node, relation)

        else:

            complete_missing_case_mark(dep_graph, root, root_parents,
                                       parallel_components, relation_to_conj,
                                       case_marks)

            if not required_mark:
                required_mark = [None] * len(parallel_components)

            for index, (node, mark) in enumerate(
                    zip(parallel_components, required_mark)):
                if mark:
                    rel = prefix + ":" + mark
                else:
                    rel = prefix

                # if rel.startswith("conj"):
                #    continue
                logger.debug("add dependency {0}".format(
                    (parent.ID, node.ID, rel)))

                dep_graph.add_dependency(parent, node, rel)

        for idx, node in enumerate(parallel_components):
            if node != root:
                rels = dep_graph.get_dependency(root, node)
                for rel in rels:
                    if rel.startswith("conj"):
                        dep_graph.remove_dependency(root, node)

            if with_arg_palceholder:
                index = idx + 1
            else:
                # a, but b, b should be the arg1 and a be the arg2
                index = len(parallel_components) - idx

            dep_graph.add_dependency(conj_node, node,
                                     "arg_conj:{0}".format(index))

    return conj_node, parallel_components