Пример #1
0
def det_adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        parent_rels = itertools.chain.from_iterable(
            (rel for parent, rel in dep_graph.parents(node)))
        if any([rel in valid_adj_form for rel in parent_rels]):
            continue

        if any([rel in {"amod", "advmod"} for rel in parent_rels]):
            continue

        det = [
            n for n, l in dep_graph.children(node,
                                             filter=lambda n, l: l == "det")
        ]

        if not det:
            continue

        det.sort(key=lambda x: x.LOC)

        det = det[-1]

        if det.LEMMA not in {"the", "a", "an", "some", "any", "all"}:
            continue

        root = node
        np_elements = list(
            dep_graph.offsprings(
                root, filter=lambda n: det.LOC <= n.LOC <= root.LOC))

        # check the element should be continuous

        np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")

        phrases.append((np_elements, root))

    for np, root in phrases:
        noun_node = merge_dep_nodes(np, UPOS="NOUN", LOC=root.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(np, noun_node)
Пример #2
0
def multi_word_sconj(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    verb_node = pattern.create_node(UPOS="VERB")
    verb2_node = pattern.create_node(UPOS="VERB")
    mark_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*')
    pattern.add_dependency(verb2_node, mark_node, r'mark')

    mark_phrases = []
    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_verb2_node = match[verb2_node]
        dep_mark_node = match[mark_node]

        if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values():
            continue

        new_marks = list(dep_graph.offsprings(dep_mark_node))
        if len(new_marks) == 1:
            continue

        new_marks.sort(key=lambda n: n.LOC)
        mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks))

    for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases:

        if not all([dep_graph.get_node(x.ID) for x in new_marks]):
            continue

        dep_graph.remove_dependency(dep_verb2_node, dep_mark_node)
        dep_graph.remove_dependency(dep_verb_node, dep_verb2_node)

        new_mark_node = merge_dep_nodes(new_marks,
                                        UPOS=dep_mark_node.UPOS,
                                        LOC=dep_mark_node.LOC
                                        )

        dep_graph.replace_nodes(new_marks, new_mark_node)
        dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA)
        dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
Пример #3
0
def multi_words_mark(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # print('multi_words_mark')
    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "mark" in l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue
        # print('multi_words_mark marks:', marks)
        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB", "ADJ", "ADV", "PRON"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:
        # print('multi_words_mark marks:', marks)
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]
        marks.sort(key=lambda n: n.LOC)

        if any([x.UPOS in NOUN_UPOS for x in marks]):
            continue
        # print('marks:')
        # for nnnn in marks:
        #     print(nnnn)
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )
        for mark in marks:
            dep_graph.remove_dependency(node, mark)
        dep_graph.replace_nodes(marks, new_mark_node)
        dep_graph.add_dependency(node, new_mark_node, "mark")
Пример #4
0
def multi_words_cc(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "cc" == l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue

        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]

        if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
            continue
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )

        dep_graph.replace_nodes(marks, new_mark_node)
        for mark in marks:
            dep_graph.remove_dependency(node, mark)

        if dep_graph.get_node(node.ID):
            dep_graph.add_dependency(node, new_mark_node, "cc")
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                      context: UD2OIAContext):
    """

    #################### nmod:x ########################

    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    center_node = pattern.create_node()
    modifier_node = pattern.create_node()

    pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*')

    for match in dep_graph.match(pattern):

        dep_center_node = match[center_node]
        dep_modifier_node = match[modifier_node]

        rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node)

        if "nmod:poss" in rels and dep_center_node in set(
                dep_graph.offsprings(dep_modifier_node)):
            # whose in there
            continue

        if oia_graph.has_relation(dep_center_node,
                                  dep_modifier_node,
                                  direct_link=False):
            continue

        oia_center_node = oia_graph.add_words(dep_center_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        oia_graph.add_mod(oia_modifier_node, oia_center_node)
Пример #6
0
def noun_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    nouns = []
    # we first find np roots
    for root in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"NOUN", "PROPN", "X", "NUM", "SYM"}):

        logger.debug("checking the node:")
        logger.debug(str(root))

        # np_elements = valid_np_element(root, dep_graph)
        parent_rels = set(
            itertools.chain.from_iterable(l.values()
                                          for n, l in dep_graph.parents(root)))
        parent_rels = set(rel.replace("_", " ") for rel in parent_rels)

        escaped_case_node = set()
        if parent_rels:
            case_nodes = [
                x
                for x, l in dep_graph.children(root,
                                               filter=lambda n, l: l == "case")
            ]
            for node in case_nodes:
                if node.LEMMA.lower() in parent_rels or node.FORM.lower(
                ) in parent_rels:
                    # lemma is for including
                    escaped_case_node.add(node)

        valid_np_children = [(n, l) for n, l in dep_graph.children(
            root, filter=lambda n, l: is_valid_np_child(dep_graph, root, l, n))
                             ]
        logger.debug("noun_phrase: valid_np_children:")
        for node, l in valid_np_children:
            logger.debug(str(node))

        np_elements = [root]

        for n, l in valid_np_children:
            if n.UPOS == "ADP":
                continue
            if n.LOC > root.LOC and \
                    not any(l.startswith(x)
                            for x in {"fixed", "compound", "nummod",
                                      "nmod:tmod", "flat", "nmod:npmod", "dep"}):
                continue
            if n in escaped_case_node:
                continue

            if isinstance(n, DependencyGraphSuperNode) and n.is_conj:
                continue

            offsprings = list(dep_graph.offsprings(n))
            valid_np_component = True

            for x in offsprings:
                for parent, rels in dep_graph.parents(x):
                    if any(x in rels
                           for x in {"acl", "obl", "advcl", "subj", "obj"}):
                        valid_np_component = False
                        break
                if not valid_np_component:
                    break
            if valid_np_component:
                np_elements.extend(offsprings)

        logger.debug("noun_phrase: candidate np_elements:")
        for node in np_elements:
            logger.debug(str(node))

        det = [
            n for n, l in dep_graph.children(root,
                                             filter=lambda n, l: l == "det")
        ]
        det = [x for x in det if x.LOC <= root.LOC]
        det.sort(key=lambda x: x.LOC)

        if det:
            # raise Exception("noun phrase without det ")

            det = det[-1]
            # check the element should be continuous
            np_elements = [x for x in np_elements if det.LOC <= x.LOC]
            logger.debug(
                "noun_phrase: det found, cut the nodes before the det")

        filtered_np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")
        changed = True
        while changed:
            changed = False
            if filtered_np_elements and filtered_np_elements[0].LEMMA in {
                    "-", "--"
            }:
                filtered_np_elements.pop(0)
                changed = True
            if filtered_np_elements and filtered_np_elements[0].UPOS in {
                    "ADP", "CCONJ", "PUNCT"
            }:
                filtered_np_elements.pop(0)
                changed = True

        if filtered_np_elements:
            nouns.append((set(filtered_np_elements), root))

    sub_nouns = []
    for idx1, (phrase1, head1) in enumerate(nouns):
        for idx2, (phrase2, head2) in enumerate(nouns):
            if idx1 == idx2:
                continue

            phrasex, phrasey = (
                phrase1, phrase2) if len(phrase1) > len(phrase2) else (phrase2,
                                                                       phrase1)
            common = phrasex.intersection(phrasey)

            if not common:
                continue
            elif len(common) == len(phrasey):
                # node2 is a sub np of node1, delete
                sub_nouns.append(phrasey)
            else:
                print("Phrase 1", [x.ID for x in phrase1])
                print("Phrase 2", [x.ID for x in phrase2])
                # return
                raise Exception("duplicate words found")

    for idx, (phrase, head) in enumerate(nouns):

        if phrase in sub_nouns:
            continue

        phrase = sorted(list(phrase), key=lambda x: x.LOC)

        for node in phrase:
            for child, _ in dep_graph.children(node):
                if child.LOC == phrase[0].LOC - 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.insert(0, child)
                if child.LOC == phrase[-1].LOC + 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.append(child)

        noun_node = merge_dep_nodes(phrase, UPOS=head.UPOS, LOC=phrase[-1].LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(phrase, noun_node)
def general_question(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for verb in dep_graph.nodes(filter=lambda n: n.UPOS == "VERB"):

        if any(
                any(x in n.LEMMA
                    for x in {"what", "how", "why", "when", "where"})
                for n in dep_graph.offsprings(verb)):
            continue

        parents = [n for n, _ in dep_graph.parents(verb)]

        # if not(len(parents) == 1 and parents[0].ID == "0"):
        #    continue
        # check subj and aux

        subj = None
        aux = None
        for child, rel in dep_graph.children(verb):
            if "subj" in rel:
                subj = child
            if "aux" in rel:
                aux = child

        is_be_verb = False

        if not isinstance(verb, DependencyGraphSuperNode):
            is_be_verb = verb.LEMMA == "be"
        else:
            assert isinstance(verb, DependencyGraphSuperNode)
            assert aux is None
            for n in verb.nodes:
                if isinstance(n, DependencyGraphNode):
                    if n.LEMMA == "be":
                        is_be_verb = True
                        # print('verb.nodes:', str(" ".join(str(xx.LEMMA) for xx in verb.nodes)))
                        # print('is_be_verb222:', is_be_verb)
                    if n.UPOS == "AUX":
                        aux = n
        # print('is_be_verb:', is_be_verb)
        if aux is None and not is_be_verb:
            # cannot be a general question
            continue

        expl_child = [n for n, l in dep_graph.children(verb) if l == "expl"]
        if expl_child:
            assert len(expl_child) == 1
            subj = expl_child[0]

        if subj is None:
            logger.warning(
                "subject is none, cannot decide whether it is a question")
            continue
        #        print('subj.LOC:', subj.LOC)
        #        print('subj.LOC type:', type(subj.LOC))
        oia_verb_node = oia_graph.add_words(verb.position)

        is_there_be_verb = is_be_verb and ("there" in verb.LEMMA.split(' ')
                                           or "here" in verb.LEMMA.split(' '))

        is_question = False

        if is_there_be_verb:

            assert isinstance(verb, DependencyGraphSuperNode)
            be_node = [n for n in verb.nodes if n.LEMMA == "be"][0]
            there_node = [
                n for n in verb.nodes
                if n.LEMMA == "there" or n.LEMMA == "here"
            ][0]
            # print('there_node:', there_node)
            if be_node.LOC < there_node.LOC:
                is_question = True

        elif (is_be_verb and verb.LOC < subj.LOC):

            is_question = True

        elif (aux is not None and aux.LOC < subj.LOC):

            is_question = True

        if is_question:
            # if aux is not None and aux.LEMMA == "do":
            #    oia_question_node = oia_graph.add_word_with_head(aux.LOC)
            # else:

            oia_question_node = oia_graph.add_aux("WHETHER")

            oia_graph.add_function(oia_question_node, oia_verb_node)
Пример #8
0
def verb_phrase(dep_graph: DependencyGraph):
    """
    ##### Merging aux and cop with their head VERB #####
    Cases:

    :param sentence:
    :return:
    """
    verb_phrases = []

    for node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB", "AUX"}):

        if node.UPOS == "AUX":
            parent = [
                n for n, l in dep_graph.parents(node,
                                                filter=lambda n, l: l == "aux")
            ]
            if len(parent) > 0:
                continue

        #        if "VerbForm" in node.FEATS and "Ger" in node.FEATS["VerbForm"]:
        #            continue

        if "Tense" in node.FEATS and "Past" in node.FEATS["Tense"]:
            # if the verb is before the noun, it will be processed by noun_phrase and taken as a part of the noun
            parent = [
                n for n, l in dep_graph.parents(
                    node, filter=lambda n, l: l == "amod" and node.LOC < n.LOC)
            ]
            if len(parent) > 0:
                continue
        # logger.debug("We are checking node {0}".format(node))

        root = node
        verbs = [root]
        for n, l in dep_graph.children(root):
            if dep_graph.get_dependency(n, root):
                continue

            if n.LEMMA in {"so", "also", "why"}:
                continue

            if "advmod" in l:
                offsprings = list(dep_graph.offsprings(n))
                if any(x.UPOS in {"VERB", "NOUN", "AUX", "PRON"}
                       for x in offsprings):
                    continue

                verbs.extend(offsprings)
            elif "compound" in l:
                verbs.append(n)

        verbs = [
            x for x in verbs if x.LOC <= root.LOC
            or "compound" in dep_graph.get_dependency(root, x)
        ]

        # logger.debug("Verb: before continuous component ")
        # logger.debug("\n".join(str(verb) for verb in verbs))

        verbs = continuous_component(verbs, root)

        # add aux
        verbs.extend(n for n, l in dep_graph.children(root) if "aux" in l)

        # logger.debug("Verb: after continuous component ")
        # for verb in verbs:
        #    logger.debug(verb)

        verbs.sort(key=lambda x: x.LOC)
        last_loc = verbs[-1].LOC

        #        next_node = dep_graph.get_node_by_loc(last_loc + 1)
        #        if next_node and next_node.LEMMA == "not":
        #            verbs.append(next_node)

        if len(verbs) > 1:
            verb_phrases.append((verbs, root))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs,
                                    UPOS="VERB",
                                    LOC=root.LOC,
                                    FEATS=root.FEATS)

        dep_graph.replace_nodes(verbs, verb_node)
Пример #9
0
def gradation(dep_graph: DependencyGraph):
    """
    TODO: do not match with the tech report, and the verb is not considered
    ##### Comparative #####
    ##### Periphrastic gradation #####
    ##### He runs faster than her #####
    ##### Martin is more intelligent than Donald #####
    ##### He is a nicer person than Tom
    ##### She is more than a regular cook
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = pattern.create_node(UPOS="VERB|NOUN|PRON|PROPN|SYM")
    advj_node = pattern.create_node(UPOS="ADJ|ADV", FEATS={"Degree": "Cmp"})
    than_node = pattern.create_node(FORM="than")
    obj_node = pattern.create_node()

    pattern.add_dependency(verb_node, advj_node, r'advmod|amod')
    pattern.add_dependency(advj_node, obj_node,
                           r'\w*(nmod:than|obl:than|advcl:than)\w*')
    pattern.add_dependency(obj_node, than_node, r'\w*case|mark\w*')

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_advj_node = match[advj_node]
        dep_than_node = match[than_node]
        dep_obj_node = match[obj_node]

        def __valid_mod(n, l):
            return (l == "amod" or l == "advmod") and in_interval(
                n, None, dep_advj_node)

        aux_node = list(dep_graph.children(dep_advj_node, filter=__valid_mod))

        if aux_node:
            aux_node = aux_node[0][0]
            offsprings = dep_graph.offsprings(aux_node)

            more_than_nodes = offsprings + [dep_than_node]
        else:
            more_than_nodes = (dep_advj_node, dep_than_node)

        dep_more_than_node = merge_dep_nodes(more_than_nodes,
                                             UPOS="ADP",
                                             LOC=dep_than_node.LOC)

        dep_graph.replace_nodes(more_than_nodes, dep_more_than_node)
        dep_graph.remove_dependency(dep_obj_node, dep_more_than_node)
        dep_graph.remove_dependency(dep_more_than_node, dep_obj_node)
        dep_graph.remove_dependency(dep_verb_node, dep_more_than_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "advcl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "obl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "case")
Пример #10
0
def multi_word_fix_flat(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    fixed_rels = {"fixed", "flat", "compound"}

    phrases = []

    for node in dep_graph.nodes():

        parents = [n for n, l in dep_graph.parents(node,
                                                   filter=lambda n, l: any(x in l for x in fixed_rels))]

        if parents:
            continue

        phrase = []
        for n, l in dep_graph.children(node,
                                       filter=lambda n, l: any(x in l for x in fixed_rels)):
            phrase.extend(dep_graph.offsprings(n))

        if not phrase:
            continue

        phrase.append(node)

        if len(phrase) > 1:
            phrase.sort(key=lambda n: n.LOC)
            # min_loc = phrase[0].LOC
            # max_loc = phrase[-1].LOC
            # phrase = [n for n in dep_graph.nodes() if min_loc <= n.LOC <= max_loc]
            phrases.append((phrase, node))

    phrases.sort(key=lambda x: len(x[0]), reverse=True)

    for phrase, head in phrases:

        if not all([dep_graph.get_node(x.ID) for x in phrase]):
            continue  # already been processed

        merging_nodes = set()
        min_loc = 10000
        max_loc = -1
        for child in phrase:
            if isinstance(child, DependencyGraphNode):
                min_loc = min(min_loc, child.LOC)
                max_loc = max(min_loc, child.LOC)
            elif isinstance(child, DependencyGraphSuperNode):
                min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
                max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
            merging_nodes.update(dep_graph.offsprings(child))

        merged_nodes = set([n for n in merging_nodes if min_loc <= n.LOC <= max_loc])
        for node in merging_nodes:
            if node.LOC == min_loc - 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
            if node.LOC == max_loc + 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
        merged_nodes = list(merged_nodes)
        merged_nodes.sort(key=lambda x: x.LOC)

        logger.debug("multi_word_fix_flat: we are merging ")
        logger.debug("\n".join(str(node) for node in merged_nodes))
        logger.debug("with head \n" + str(head))
        new_node = merge_dep_nodes(merged_nodes, UPOS=head.UPOS, LOC=head.LOC)

        dep_graph.replace_nodes(merged_nodes, new_node)
Пример #11
0
def multi_words_case(dep_graph: DependencyGraph):
    """
    :TODO  add example case
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode()
    x_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_node(noun_node)
    pattern.add_node(x_node)
    pattern.add_node(case_node)

    pattern.add_dependency(noun_node, x_node, r'\w*:\w*')
    pattern.add_dependency(x_node, case_node, r'\bcase\b')



    for match in list(dep_graph.match(pattern)):

        multiword_cases = []

        dep_noun_node = match[noun_node]
        dep_x_node = match[x_node]
        dep_case_node = match[case_node]

        if not dep_graph.has_node(dep_case_node):
            continue

        direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)]
        all_case_nodes = set()
        for node in direct_case_nodes:
            all_case_nodes.update(dep_graph.offsprings(node))

        if len(all_case_nodes) == 1:
            continue

        all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC)
        logger.debug("multi case discovered")
        for node in all_case_nodes:
            logger.debug(str(node))

        #        if len(case_nodes) > 2:
        #            raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases")

        x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node)

        for rel in x_rel:
            if ":" in rel:
                # print('-----------------rel:        ',rel)

                rel_str, case_str = rel.split(":")
                # some times, the rel only contains one word
                # Example :
                # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications)
                if case_str in "_".join([x.LEMMA for x in all_case_nodes]):
                    multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str))

        for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases:

            logger.debug("we are merging:")
            for node in case_nodes:
                logger.debug(str(node))

            if not all([dep_graph.has_node(x) for x in case_nodes]):
                continue

            new_case_node = merge_dep_nodes(case_nodes,
                                            UPOS=dep_case_node.UPOS,
                                            LOC=dep_case_node.LOC
                                            )
            dep_graph.replace_nodes(case_nodes, new_case_node)
            dep_graph.remove_dependency(dep_noun_node, dep_x_node)
            dep_graph.add_dependency(dep_noun_node, dep_x_node,
                                     rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))