示例#1
0
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    list_phrases = []
    for n in dep_graph.nodes():

        list_nodes = [
            n
            for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l)
        ]

        if not list_nodes:
            continue

        list_nodes.append(n)
        list_nodes.sort(key=lambda n: n.LOC)

        list_phrases.append(list_nodes)

    for list_nodes in list_phrases:

        pred = oia_graph.add_aux("LIST")

        for idx, node in enumerate(list_nodes):
            oia_arg = oia_graph.add_words(node.position)
            oia_graph.add_argument(pred, oia_arg, idx + 1)
示例#2
0
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    #### Coordination ####
    #### I like apples, bananas and oranges. conj:and/or with punct
    #### @return a list of list of conjuncted entities
    :param sentence:
    :return:
    """

    for node in dep_graph.nodes():

        conj_components = list(
            dep_graph.children(node,
                               filter=lambda n, l: l.startswith("arg_con")))

        if not conj_components:
            continue

        oia_conj_root_node = oia_graph.add_words(node.position)

        for child, rels in conj_components:
            soake_child_node = oia_graph.add_words(child.position)
            arg_index = int(rels.values()[0])

            oia_graph.add_argument(oia_conj_root_node, soake_child_node,
                                   arg_index)
示例#3
0
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """
    Adverbial Clause
##### run in order to catch it. advcl with mark (in order to) #####
##### he worked hard, replacing his feud. advcl without mark #####

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    pattern = DependencyGraph()
    verb_node = pattern.create_node()
    modifier_node = pattern.create_node()

    pattern.add_dependency(verb_node, modifier_node, "advcl")

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_modifier_node = match[modifier_node]

        if context.is_processed(dep_verb_node, dep_modifier_node):
            continue

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        logger.debug("adverbial clause: verb={0}, modifier={1}".format(
            dep_verb_node.position, dep_modifier_node.position))

        if oia_graph.has_relation(oia_verb_node, oia_modifier_node):
            continue

        mark = list(
            dep_graph.children(dep_modifier_node,
                               filter=lambda n, rel: "mark" in rel))

        if mark:
            mark, rel = mark[0]
            pred_node = oia_graph.add_words(mark.position)
            if pred_node is None:
                continue

            if mark.LEMMA in CONJUNCTION_WORDS[language]:
                continue

            oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True)
            oia_graph.add_argument(pred_node, oia_modifier_node, 2)
        else:

            oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """

    #################### nmod:x ########################

    ##### the office of the chair #####
    ##### Istanbul in Turkey #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = DependencyGraphNode()
    child_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_nodes([parent_node, child_node, case_node])

    pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*')
    pattern.add_dependency(child_node, case_node, r'\w*case\w*')

    for match in dep_graph.match(pattern):

        dep_parent_node = match[parent_node]
        dep_child_node = match[child_node]
        dep_case_node = match[case_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_child_node)

        # vs, lemma = versus
        # according, lemma = accord,
        # but rel always select the shorter one

        if oia_graph.has_relation(dep_parent_node, dep_child_node):
            continue

        if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM:
            pred_node = oia_graph.add_words(dep_case_node.position)
        else:
            pred_node = oia_graph.add_words(dep_case_node.position)

        arg1_node = oia_graph.add_words(dep_parent_node.position)
        arg2_node = oia_graph.add_words(dep_child_node.position)

        oia_graph.add_argument(pred_node, arg1_node, 1, mod=True)
        oia_graph.add_argument(pred_node, arg2_node, 2)
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                         context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    # cut X by a knife
    pattern = DependencyGraph()
    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON")
    oblique_node = DependencyGraphNode()
    pattern.add_node(verb_node)
    pattern.add_node(oblique_node)
    pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_oblique_node = match[oblique_node]

        if oia_graph.has_relation(dep_verb_node,
                                  dep_oblique_node,
                                  direct_link=False):
            continue

        oblique_edge = dep_graph.get_dependency(dep_verb_node,
                                                dep_oblique_node)
        oblique_types = oblique_edge.values()

        if "tmod" in oblique_types:

            oia_pred_node = oia_graph.add_aux("TIME_IN")

            arg1_node = oia_graph.add_words(dep_verb_node.position)
            arg2_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True)
            oia_graph.add_argument(oia_pred_node, arg2_node, 2)

        else:  # "npmod" in oblique_types and others

            oia_verb_node = oia_graph.add_words(dep_verb_node.position)
            obl_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_mod(obl_node, oia_verb_node)
示例#6
0
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for node in dep_graph.nodes():

        if oia_graph.has_word(node.position):
            continue

        if node.UPOS == "SCONJ" and node.LEMMA in {
                "because", "so", "if", "then", "otherwise", "after", "before",
                "and", "or", "but"
        }:

            parents = [n for n, l in dep_graph.parents(node) if "mark" in l]

            if not parents:
                continue

            assert len(parents) == 1

            parent = parents[0]

            logger.debug("context = " + str(context.processed_edges))

            if context.is_processed(parent, node):
                continue

            oiar_node = oia_graph.add_words(parent.position)
            oia_sconj_node = oia_graph.add_words(node.position)

            if node.LEMMA in {"because", "if"}:
                oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
            else:
                oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
def it_be_adjv_that(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                    context: UD2OIAContext):
    """
    ##### Expletive #####
    ##### it is xxx that #####
    #####  #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    it_node = pattern.create_node(LEMMA="it")
    be_node = pattern.create_node(UPOS="VERB")
    csubj_node = pattern.create_node(UPOS="ADJ|ADV")
    that_node = pattern.create_node(LEMMA="that")

    pattern.add_dependency(be_node, it_node, r'expl')
    pattern.add_dependency(be_node, csubj_node, r'csubj')
    pattern.add_dependency(csubj_node, that_node, r'mark')

    for match in dep_graph.match(pattern):
        dep_be_node, dep_it_node, dep_that_node, dep_csubj_node = \
            [match[x] for x in [be_node, it_node, that_node, csubj_node]]

        if context.is_processed(dep_be_node, dep_it_node):
            continue

        oia_it_node = oia_graph.add_words(dep_it_node.position)
        oia_csubj_node = oia_graph.add_words(dep_csubj_node.position)
        # oia_that_node = oia_graph.add_word_with_head(dep_that_node)
        oia_be_node = oia_graph.add_words(dep_be_node.position)

        oia_graph.add_argument(oia_be_node, oia_it_node, 1)

        oia_graph.add_ref(oia_csubj_node, oia_it_node)

        context.processed(dep_be_node, dep_it_node)
def it_verb_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """
    ##### Expletive #####
    ##### it is xxx to do  #####
    #####  #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    it_node = pattern.create_node(LEMMA="it")
    verb_node = pattern.create_node(UPOS="VERB")
    subj_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|VERB")

    pattern.add_dependency(verb_node, it_node, r'expl')
    pattern.add_dependency(verb_node, subj_node, r'nsubj|csubj')

    for match in dep_graph.match(pattern):

        dep_verb_node, dep_it_node, dep_subj_node = \
            [match[x] for x in [verb_node, it_node, subj_node]]

        if context.is_processed(dep_verb_node, dep_it_node):
            continue

        oia_it_node = oia_graph.add_words(dep_it_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)
        # oia_that_node = oia_graph.add_word_with_head(dep_that_node)
        oia_verb_node = oia_graph.add_words(dep_verb_node.position)

        if dep_it_node.LOC < dep_subj_node.LOC:
            # it VERB subj that ...

            oia_graph.add_argument(oia_verb_node, oia_it_node, 1)
            oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
            oia_graph.add_ref(oia_it_node, oia_subj_node)
        else:
            # subj VERB it that ...
            oia_graph.add_argument(oia_verb_node, oia_it_node, 2)
            oia_graph.add_argument(oia_verb_node, oia_subj_node, 2)
            oia_graph.add_ref(oia_it_node, oia_subj_node)

        # dep_graph.remove_dependency(dep_verb_node, dep_subj_node)

        context.processed(dep_verb_node, dep_it_node)
        context.processed(dep_verb_node, dep_subj_node)
示例#9
0
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    regular_nodes = [
        n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"}
    ]
    #logger.debug("regular nodes")
    #for node in regular_nodes:
    #    logger.debug(str(node))

    if len(regular_nodes) == 2:
        regular_nodes.sort(key=lambda x: x.LOC)
        case_node, noun_node = regular_nodes
        if dep_graph.get_dependency(noun_node, case_node) == "case":
            oia_case_node = oia_graph.add_words(case_node.position)
            oia_noun_node = oia_graph.add_words(noun_node.position)

            oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
示例#10
0
def adv_relative_clause(dep_graph, oia_graph: OIAGraph,
                        context: UD2OIAContext):
    """

    #### When/Where Relative clause #####
    #### a time when US troops won/ a place where US troops won. acl:relcl with time/place
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    modified_node = pattern.create_node()
    modifier_node = pattern.create_node()
    adv_rel_node = pattern.create_node()

    pattern.add_dependency(modified_node, modifier_node, r'acl:relcl\w*')
    pattern.add_dependency(modifier_node, adv_rel_node, r'advmod')

    for match in dep_graph.match(pattern):

        dep_modified_node = match[modified_node]
        dep_modifier_node = match[modifier_node]
        dep_rel_node = match[adv_rel_node]

        if not any(x in dep_rel_node.LEMMA
                   for x in {"when", "where", "how", "why", "what"}):
            continue

        oia_pred_node = oia_graph.add_words(dep_rel_node.position)
        oia_modified_node = oia_graph.add_words(dep_modified_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        if oia_graph.has_relation(oia_modifier_node, oia_modified_node):
            continue

        oia_graph.add_argument(oia_pred_node, oia_modified_node, 1, mod=True)
        oia_graph.add_argument(oia_pred_node, oia_modifier_node, 2)
def oblique_with_prep(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    # cut X by a knife
    pattern = DependencyGraph()
    verb_node = DependencyGraphNode(UPOS="VERB|ADJ|ADV|NOUN|X|PROPN|PRON")
    # adj is for "has more on "
    # adv is for "south of XXXX"
    prep_node = DependencyGraphNode(UPOS=r"PRON|ADP|VERB|SCONJ|ADJ")
    # verb is for including/according, adj is for "prior to"

    oblique_node = DependencyGraphNode()
    pattern.add_node(verb_node)
    pattern.add_node(prep_node)
    pattern.add_node(oblique_node)
    pattern.add_dependency(verb_node, oblique_node, r'\bobl')
    pattern.add_dependency(oblique_node, prep_node, r"case|mark")

    for match in dep_graph.match(pattern):

        dep_prep_node = match[prep_node]
        dep_verb_node = match[verb_node]
        dep_oblique_node = match[oblique_node]

        if oia_graph.has_relation(dep_verb_node, dep_oblique_node):
            continue

        oblique_edge = dep_graph.get_dependency(dep_verb_node,
                                                dep_oblique_node)
        oblique_cases = oblique_edge.values()

        # if dep_prop_node.LEMMA.lower() not in cases:
        #    continue

        prop_nodes = [
            x for x, l in dep_graph.children(
                dep_oblique_node,
                filter=lambda n, l: l == "case" or l == "mark")
        ]
        connected_case_nodes = continuous_component(prop_nodes, dep_prep_node)

        predicate = tuple([x.ID for x in connected_case_nodes])
        head_node = None
        for node in connected_case_nodes:
            if node.LEMMA.lower() in oblique_cases:
                head_node = node

        if not head_node:
            head_node = connected_case_nodes[-1]

        pred_node = oia_graph.add_words(head_node.position)
        arg1_node = oia_graph.add_words(dep_verb_node.position)
        arg2_node = oia_graph.add_words(dep_oblique_node.position)

        oia_graph.add_argument(pred_node, arg1_node, 1, mod=True)
        oia_graph.add_argument(pred_node, arg2_node, 2)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                      context: UD2OIAContext):
    """
    the adv before the verb should be processed by verb_phrase
    this converter should process the adv after the verb
    verb1 in order to verb2
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    verb_node = DependencyGraphNode(
        UPOS="VERB|NOUN|PROPN|AUX|PRON")  # aux is for be word
    adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB")

    pattern.add_nodes([verb_node, adv_node])

    pattern.add_dependency(verb_node, adv_node, r'advmod')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]

        if context.is_processed(dep_verb_node, dep_adv_node):
            continue

        if oia_graph.has_relation(dep_verb_node, dep_adv_node):
            continue

        obl_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("obl"))
        ]

        obl_node = None
        obl_has_case = False
        if len(obl_children) == 1:

            obl_node = obl_children[0]

            case_nodes = list(n for n, l in dep_graph.children(
                obl_node, filter=lambda n, l: "case" in l))

            if case_nodes:
                # if obl with case, let the oblique to process it
                obl_has_case = True

        mark_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("mark"))
        ]

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_adv_node = oia_graph.add_words(dep_adv_node.position)

        if obl_node and not obl_has_case:
            # arg_nodes = list(dep_graph.offsprings(obl_node))
            # arg_nodes.sort(key=lambda x: x.LOC)
            # arg_words = [x.ID for x in arg_nodes]
            # head = obl_node.ID

            oia_arg_node = oia_graph.add_words(obl_node.position)

            oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True)
            oia_graph.add_argument(oia_adv_node, oia_arg_node, 2)
        else:
            if mark_children:
                mark_node = mark_children[0]
                oia_pred_node = oia_graph.add_words(mark_node.position)

                oia_graph.add_argument(oia_pred_node,
                                       oia_verb_node,
                                       1,
                                       mod=True)
                oia_graph.add_argument(oia_pred_node, oia_adv_node, 2)

            else:
                oia_graph.add_mod(oia_adv_node, oia_verb_node)
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN")
    adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART")  # part is for "not"
    ccomp_node = pattern.create_node()

    # pattern.add_dependency(verb_node, adv_node, r'advmod')
    pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp")

    patterns = []
    for match in dep_graph.match(pattern):

        # dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_ccomp_node = match[ccomp_node]

        if oia_graph.has_relation(dep_adv_node, dep_ccomp_node):
            continue

        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_ccomp_node,
                               filter=lambda n, l: "case" == l and dep_adv_node
                               .LOC < n.LOC < dep_ccomp_node.LOC)
        ]

        if dep_case_nodes:
            dep_case_nodes = continuous_component(dep_case_nodes,
                                                  dep_case_nodes[0])
            predicate_nodes = [dep_adv_node] + dep_case_nodes
            predicate_nodes.sort(key=lambda n: n.LOC)
        else:
            predicate_nodes = [dep_adv_node]

        dep_subj_nodes = [
            n for n, l in dep_graph.parents(dep_adv_node,
                                            filter=lambda n, l: "advmod" == l
                                            and n.UPOS in {"ADV", "X", "NOUN"})
        ]
        if len(dep_subj_nodes) > 1:
            raise Exception("Multiple subject")
        elif len(dep_subj_nodes) > 0:
            dep_subj_node = dep_subj_nodes[0]
        else:
            dep_subj_node = None

        patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node])

    for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns:

        if len(predicate_nodes) > 1:

            new_pred_node = dep_graph.create_node(
                ID=" ".join([x.ID for x in predicate_nodes]),
                FORM=" ".join([x.FORM for x in predicate_nodes]),
                LEMMA=" ".join([x.LEMMA for x in predicate_nodes]),
                UPOS="ADV",
                LOC=predicate_nodes[0].LOC)

            new_pred_node.aux = True

            dep_graph.replace_nodes(predicate_nodes, new_pred_node)

            dep_graph.remove_dependency(dep_ccomp_node, new_pred_node)

        else:
            new_pred_node = predicate_nodes[0]

        oia_pred_node = oia_graph.add_words(new_pred_node.position)

        if dep_subj_node:
            oia_subj_node = oia_graph.add_words(dep_subj_node.position)
            oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True)

        else:
            oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position)
            oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
示例#14
0
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [n for n, degree in in_degrees if degree == 0]

    if len(zero_degree_nodes) == 0:
        return
    elif len(zero_degree_nodes) == 1:
        root = zero_degree_nodes[0]
    else:
        # len(zero_degree_nodes) >= 2
        dists_to_root = []
        for oia_node in zero_degree_nodes:

            related_dep_nodes = set()
            if isinstance(oia_node, OIAWordsNode):
                dep_node = dep_graph.get_node_by_spans(oia_node.spans)

                if dep_node:
                    if isinstance(dep_node, DependencyGraphNode):
                        related_dep_nodes.add(dep_node)
                    elif isinstance(dep_node, list):
                        for node in dep_node:
                            related_dep_nodes.add(node)
                    else:
                        logger.error("get_node_by_spans return type unknown.")

            children = [n for n, l in oia_graph.children(oia_node)]

            for child in children:
                if isinstance(child, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(child.spans)

                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            related_dep_nodes.add(dep_node)
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                related_dep_nodes.add(node)
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

            dep_root = dep_graph.get_node("0")
            real_dep_root = next(n for n, l in dep_graph.children(dep_root))

            min_dist_to_root = min([
                len(
                    nx.shortest_path(dep_graph.g.to_undirected(),
                                     real_dep_root.ID, dep_node.ID))
                for dep_node in related_dep_nodes
            ])

            dists_to_root.append((oia_node, min_dist_to_root))

        dists_to_root.sort(key=lambda x: x[1])
        root_candidates = []

        min_dist = dists_to_root[0][1]

        for oia_node, dist in dists_to_root:
            if dist == min_dist:
                root_candidates.append(oia_node)

        if len(root_candidates) == 1:

            root = root_candidates[0]

        else:

            scores = []

            score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10}

            for cand in root_candidates:

                score = -100
                if any([
                        "func" in rel.label
                        for n, rel in oia_graph.children(cand)
                ]):
                    score = 100

                children = [n for n, l in oia_graph.children(cand)]
                dep_children = []
                for child in children:
                    if isinstance(child, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(child.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_children.append(dep_node)
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_children.append(node)
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")
                # check what between them
                dep_children.sort(key=lambda x: x.LOC)

                for node in dep_graph.nodes():
                    if node.LOC is None:
                        continue
                    if dep_children[0].LOC < node.LOC < dep_children[-1].LOC:

                        if node.FORM in score_map:
                            score = max(score, score_map[node.FORM])

                if isinstance(cand, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(cand.spans)
                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                score += 8
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                if node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                    score += 8
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

                elif isinstance(cand,
                                OIAAuxNode) and cand.label == "PARATAXIS":
                    score += 4

                scores.append((cand, score))

            scores.sort(key=lambda x: x[1], reverse=True)

            top_nodes = []
            for node, score in scores:
                if score == scores[0][1]:
                    top_nodes.append(node)

            if len(top_nodes) == 1:
                root = top_nodes[0]

            elif len(top_nodes) >= 3:
                # multiple top node found, merge them to one
                if all(
                        isinstance(node, OIAAuxNode)
                        and node.label == "PARATAXIS" for node in top_nodes):
                    next_nodes = []
                    for top in top_nodes:
                        for n, l in list(oia_graph.children(top)):
                            next_nodes.append(n)
                        oia_graph.remove_node(top)
                        for node in zero_degree_nodes:
                            if node.ID == top.ID:
                                zero_degree_nodes.remove(node)
                    root = oia_graph.add_aux("PARATAXIS")
                    oia_graph.add_node(root)
                    next_nodes.sort(key=lambda x: x.ID)
                    for index, second_node in enumerate(next_nodes):
                        oia_graph.add_argument(root, second_node, index)
                else:
                    logger.error(
                        "Deep intersection point, currently cannot process")
                    return
                # raise Exception("Two top nodes? I think it is not possible ")

            else:  # len(top_nodes) == 2:
                # check who is prev, and who is next

                dep_tops = []

                for top in top_nodes:
                    if isinstance(top, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(top.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_tops.append((top, dep_node))
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_tops.append((top, node))
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")

                if not len(dep_tops) >= 1:
                    logger.error("Multiple AUX head ")
                    return

                dep_tops.sort(key=lambda x: x[1].LOC)

                root = dep_tops[0][0]

    # root obtained, change other zero-in-degree node

    logger.debug("Root obtained ")
    logger.debug(root)

    for node in zero_degree_nodes:
        # print('zero_degree_nodes:', node)
        if root.ID == node.ID:
            continue

        if is_conj_node(node, dep_graph):
            # print('is_conj_node:',node,'  !!!!!!!!!!')
            for child, rel in list(oia_graph.children(node)):
                label = rel.label
                if "pred.arg." in label:
                    arg_no = label.split(".")[-1]
                    new_rel = "as:pred.arg." + arg_no
                    oia_graph.remove_relation(node, child)
                    oia_graph.add_relation(child, node, new_rel)

            continue

        ref_childs = [
            child for child, rel in oia_graph.children(node)
            if rel.label == "ref"
        ]

        if ref_childs:
            for child in ref_childs:
                oia_graph.remove_relation(node, child)
                oia_graph.add_relation(child, node, "as:ref")

            continue

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [
        n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
    ]

    while len(zero_degree_nodes) > 0:

        logger.debug("we found zero_degree_nodes: ")
        for node in zero_degree_nodes:
            logger.debug(node)

        root_offsprings = set(oia_graph.offsprings(root))

        logger.debug("root offsprings :")
        for n in root_offsprings:
            logger.debug(n)

        intersections = []
        for node in zero_degree_nodes:

            node_offspring = set(oia_graph.offsprings(node))

            logger.debug("node offsprings :")
            for n in node_offspring:
                logger.debug(n)

            intersection = root_offsprings.intersection(node_offspring)

            logger.debug("we found {0} initial intersection :".format(
                len(intersection)))
            for n in intersection:
                logger.debug(n)

            if intersection:

                top_intersection_point = None
                parents_to_root = None
                parents_to_other = None
                for x in intersection:
                    parents = set([n for n, l in oia_graph.parents(x)])
                    if not parents.intersection(intersection):
                        top_intersection_point = x
                        parents_to_root = parents.intersection(root_offsprings)
                        parents_to_other = parents.intersection(node_offspring)
                        break

                if top_intersection_point is None:
                    logger.error("It seems we have a problem ")
                    continue

                logger.debug("we found a intersections: ")
                logger.debug(top_intersection_point)

                logger.debug("Its parents to root: ")
                for x in parents_to_root:
                    logger.debug(x)

                logger.debug("Its parents to other: ")
                for x in parents_to_other:
                    logger.debug(x)

                intersections.append((top_intersection_point, parents_to_root,
                                      parents_to_other))

        if len(intersections) == 0:
            logger.error("seems we have disconnected compoenent")
            break
            # raise Exception("Unexpected situation")

        for intersection_point, parents_to_root, parents_to_other in intersections:

            # if node not in set([n for n, l in oia_graph.parents(intersection_point)]):
            #     logger.error("Deep intersection point, currently cannot process")
            #     # raise Exception("Deep intersection point, currently cannot process")
            #     continue

            for node in parents_to_other:

                if isinstance(node, OIAAuxNode) and node.label == "LIST":
                    logger.error("lets see what happens for LIST")
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for LIST "
                        )

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)
                    # for parent, l in list(oia_graph.parents(intersection_point)):
                    #     if parent != node:
                    #         oia_graph.remove_relation(parent, intersection_point)
                    #         oia_graph.add_relation(parent, node, l.label)
                elif (isinstance(node, OIAAuxNode)
                      and node.label == "WHETHER"):

                    # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root))
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for WHETHER "
                        )

                    for parent in parents_to_root:
                        relation = oia_graph.get_edge(parent,
                                                      intersection_point)
                        oia_graph.remove_relation(parent, intersection_point)
                        oia_graph.add_relation(parent, node, relation.label)
                else:

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)

        in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                      for node in oia_graph.nodes()]

        zero_degree_nodes = [
            n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
        ]
示例#15
0
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                           context: UD2OIAContext):
    """
    ##### Object-extracted/referred relative clause #####
    ##### the person that Andy knows #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode()

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]

        if dep_subj_node.LEMMA in {"what", "who", "which", "that"}:
            continue

        logger.debug("we found a objective relative clause")
        logger.debug("entity: {0}".format(dep_entity_node))
        logger.debug("subject: {0}".format(dep_subj_node))
        logger.debug("verb: {0}".format(dep_verb_node))

        if context.is_processed(dep_entity_node, dep_verb_node):
            logger.debug("processed")
            continue

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_entity_node = oia_graph.add_words(dep_entity_node.position)
        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        if oia_graph.has_relation(oia_entity_node, oia_verb_node):
            logger.debug("has relation between entity and verb")
            continue

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]

            oia_ref_node = oia_graph.add_words(ref_node.position)

            oia_graph.add_ref(oia_entity_node, oia_ref_node)

            logger.debug("we are coping with ref between:")
            logger.debug(dep_verb_node)
            logger.debug(ref_node)

            ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node)

            case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            case_nodes.sort(key=lambda x: x.LOC)

            if ref_relation:
                if case_nodes:
                    # with which xxxx, the with will become the root pred
                    case_node = case_nodes[-1]
                    oia_case_node = oia_graph.add_words(case_node.position)

                    oia_graph.add_argument(oia_case_node,
                                           oia_verb_node,
                                           1,
                                           mod=True)
                    oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                    oia_graph.add_mod(oia_verb_node, oia_entity_node)
                else:

                    if "obj" in ref_relation:
                        oia_graph.add_argument(oia_verb_node, oia_ref_node, 2)
                    elif ref_relation == "advmod":
                        oia_graph.add_mod(oia_ref_node, oia_verb_node)
                    else:
                        raise Exception(
                            "unknown relation: {}".format(ref_relation))
                    # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
        oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node)

        #if rels.endswith("obj"):
        for node, l in dep_graph.children(dep_verb_node):
            if l == "ccomp":
                oia_ccomp_node = oia_graph.add_words(node.position)
                oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
示例#16
0
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """
    :TODO badcase  Attached is a new link
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # for node in dep_graph.nodes():
    #     print('node:',node)
    for pred_node in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}):
        # ADJ is for "With the demand so high,"
        # NOUN is for "X the best for Y"
        # AUX is for have in "I have a cat"
        # print('pred_node', pred_node)
        expl = None
        nsubj = None
        subj = None
        objs = []

        for child, rel in dep_graph.children(pred_node):
            # print('child node:', child)
            # print('child rel:', rel)
            if ('nsubj' in rel or "csubj" in rel):  # and ":xsubj" not in rel:
                nsubj = child
            elif rel.startswith('obj'):
                objs.append((child, 1))
            elif rel.startswith('iobj'):
                objs.append((child, 0))
            elif 'ccomp' in rel or "xcomp" in rel:  # and child.UPOS == "VERB":
                objs.append((child, 2))
            elif "expl" in rel:
                expl = child

        if nsubj:
            # if pred_node.LOC < nsubj.LOC:
            #     # TODO: in what situation?
            #     objs.insert(0, nsubj)
            # else:
            subj = nsubj

        if expl:  # It VERB subj that    # VERB subj it that
            if expl.LOC < pred_node.LOC:
                subj = expl
                objs.insert(0, (subj, -1))
            else:  # expl.LOC > pred_node.LOC:
                objs.insert(0, (expl, -1))

        if not subj and not objs:
            continue

        pred_node = oia_graph.add_words(pred_node.position)

        if not pred_node:
            continue

        arg_index = 1

        if subj is not None:
            if not oia_graph.has_relation(pred_node, subj):
                subj_node = oia_graph.add_words(subj.position)
                oia_graph.add_argument(pred_node, subj_node, arg_index)

        arg_index += 1

        objs.sort(key=lambda x: x[1])

        for obj, weight in objs:
            # print('obj:',obj)
            oia_obj_node = oia_graph.add_words(obj.position)

            # def __sconj_node(n):
            #    # that conj is ommited
            #    return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"})

            def __adv_question_node(n):
                return ((n.UPOS == "ADV"
                         and n.LEMMA in {"when", "where", "how", "whether"}))

            #
            # def __pron_question_node(n):
            #     return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"})

            # def __interested_node2(n):
            #     # that conj is ommited
            #     return (n.UPOS == "PART")

            # sconj_nodes = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "mark" and __sconj_node(n))]
            adv_question_nodes = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "mark" and __adv_question_node(n))
            ]

            # subj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                        filter=lambda n,l: "subj" in l and __pron_question_node(n))]
            #
            # obj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                         filter=lambda n,
            #                                       l: ("obj" in l or "comp") in l and __pron_question_node(
            #                             n))]
            # nodes_of_interests2 = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "advmod" and __interested_node2(n))]
            # print('nodes_of_interests:', nodes_of_interests)
            # if nodes_of_interests2:
            #     assert len(nodes_of_interests2) == 1
            #     interest_node = nodes_of_interests2[0]
            #     oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC)
            #     oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
            #     # oia_graph.add_function(oia_interest_node, oia_obj_node)
            #     arg_index += 1
            #     oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index)
            #     arg_index += 1

            if adv_question_nodes:
                assert len(adv_question_nodes) == 1
                interest_node = adv_question_nodes[0]
                oia_interest_node = oia_graph.add_words(interest_node.position)
                oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
                oia_graph.add_function(oia_interest_node, oia_obj_node)

            else:
                if not oia_graph.has_relation(pred_node, obj):
                    oia_graph.add_argument(pred_node, oia_obj_node, arg_index)

            arg_index += 1

    pattern = DependencyGraph()
    parent_pred = pattern.create_node()
    child_pred = pattern.create_node()
    question_word = pattern.create_node(LEMMA=r'what|who')

    pattern.add_dependency(parent_pred, child_pred,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(parent_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(child_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')

    for match in dep_graph.match(pattern):
        dep_parent_pred, dep_child_pred, dep_question_word = [
            match[x] for x in [parent_pred, child_pred, question_word]
        ]

        oia_parent_pred, oia_child_pred, oia_question_word = [
            oia_graph.add_words(x.position)
            for x in [dep_parent_pred, dep_child_pred, dep_question_word]
        ]

        oia_question_word.is_func = True

        rel = oia_graph.get_edge(oia_child_pred, oia_question_word)

        oia_graph.remove_relation(oia_child_pred, oia_question_word)
        oia_graph.remove_relation(oia_parent_pred, oia_child_pred)

        oia_graph.add_relation(oia_question_word, oia_child_pred,
                               "mod_by:" + rel.label)
示例#17
0
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    #################### adverbs like however, then, etc ########################
    :param sentence:
    :return:
    """

    for dep_node in list(dep_graph.nodes()):

        parallel_nodes = [
            n for n, l in dep_graph.children(dep_node) if "parataxis" == l
        ]

        if not parallel_nodes:
            continue

        parallel_nodes.append(dep_node)
        parallel_nodes.sort(key=lambda x: x.LOC)

        predicates = []

        for index, (former, latter) in enumerate(
                more_itertools.pairwise(parallel_nodes)):

            advcon = [
                n for n, l in
                dep_graph.children(latter,
                                   filter=lambda n, l: "advmod" in l and
                                   (former.LOC < n.LOC < latter.LOC) and
                                   (n.UPOS == "SCONJ" or n.LEMMA in {"so"}))
            ]

            coloncon = [
                n for n, l in
                dep_graph.children(dep_node,
                                   filter=lambda n, l: "punct" in l and n.FORM
                                   in {":", ";", "--", ","} and
                                   (former.LOC < n.LOC < latter.LOC))
            ]

            if advcon:
                dep_con = advcon[0]
                # dep_graph.remove_dependency(para, dep_con)
                # otherwise, the dep_con will be recovered by adv_modifier, may cause further question
            elif coloncon:
                dep_con = coloncon[0]
            else:
                dep_con = None

            predicates.append(dep_con)

        if all(x is None for x in predicates):
            oia_pred_node = oia_graph.add_aux("PARATAXIS")
        else:
            if len(predicates) == 1:
                oia_pred_node = oia_graph.add_words(predicates[0].position)
            else:
                position = ["{1}"]
                for i, node in enumerate(predicates):
                    if node is not None:
                        position.extend(node.position)
                    position.append("{{{0}}}".format(i + 2))
                oia_pred_node = oia_graph.add_words(position)

        for idx, node in enumerate(parallel_nodes):
            oia_arg_node = oia_graph.add_words(node.position)
            oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
示例#18
0
def advcl_mark_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    pred1_node = pattern.create_node()
    pred2_node = pattern.create_node()
    # sconj_node = pattern.create_node(UPOS="SCONJ")
    sconj_node = pattern.create_node()

    pattern.add_dependency(pred1_node, pred2_node, r'advcl\w*')
    # pattern.add_dependency(pred1_node, pred2_node, r'\w*')
    # pattern.add_dependency(pred2_node, sconj_node, r'mark|advmod')
    pattern.add_dependency(pred2_node, sconj_node, 'mark')

    for match in list(dep_graph.match(pattern)):

        dep_pred1_node = match[pred1_node]
        dep_pred2_node = match[pred2_node]
        dep_sconj_node = match[sconj_node]
        # advcl_rel = dep_graph.get_dependency(dep_pred1_node, dep_pred2_node)
        if dep_sconj_node.LEMMA not in CONJUNCTION_WORDS[language]:
            continue

        context.processed(dep_pred2_node, dep_sconj_node)
        context.processed(dep_pred1_node, dep_pred2_node)

        oia_pred1_node = oia_graph.add_words(dep_pred1_node.position)
        oia_pred2_node = oia_graph.add_words(dep_pred2_node.position)

        if dep_sconj_node.LEMMA == "if":
            # check whether there is "then"
            dep_then_nodes = [
                n for n, l in dep_graph.children(dep_pred1_node)
                if n.LEMMA == "then" and l == "advmod"
            ]

            if dep_then_nodes:
                assert len(dep_then_nodes) == 1
                dep_then_node = dep_then_nodes[0]
                context.processed(dep_pred1_node, dep_then_node)

                if_then_position = dep_sconj_node.position + [
                    "{1}"
                ] + dep_then_node.position + ["{2}"]
                oia_condition_node = oia_graph.add_words(if_then_position)
            else:
                oia_condition_node = oia_graph.add_words(
                    dep_sconj_node.position)

            oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1)
            oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2)
        else:
            oia_condition_node = oia_graph.add_words(dep_sconj_node.position)
            if dep_sconj_node.LEMMA in CONJUNCTION_WORDS[language]:
                oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1)
                oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2)
            else:
                oia_graph.add_argument(oia_condition_node,
                                       oia_pred1_node,
                                       1,
                                       mod=True)

                oia_graph.add_argument(oia_condition_node, oia_pred2_node, 2)