def graph_match_metric(pred_graph: OIAGraph, truth_graph: OIAGraph):
    """

    :param predict:
    :param truth:
    :return:
    """

    pred_nodes = [pred_graph.node_text(n) for n in pred_graph.nodes()]
    true_nodes = [truth_graph.node_text(n) for n in truth_graph.nodes()]

    node_true_num = len(true_nodes)
    node_pred_num = len(pred_nodes)

    node_match_num = sum(node in true_nodes for node in pred_nodes)

    pred_edges = [(pred_graph.node_text(n1), edge.label.strip("\" "),
                   pred_graph.node_text(n2))
                  for n1, edge, n2 in pred_graph.edges()]
    true_edges = [(truth_graph.node_text(n1), edge.label.strip("\" "),
                   truth_graph.node_text(n2))
                  for n1, edge, n2 in truth_graph.edges()]

    logger.debug(pred_edges)
    logger.debug(true_edges)

    edge_true_num = len(true_edges)
    edge_pred_num = len(pred_edges)

    edge_match_num = sum(edge in true_edges for edge in pred_edges)

    exact_same = node_match_num == node_true_num == node_pred_num and \
                 edge_match_num == edge_true_num == edge_pred_num

    return (node_pred_num, node_true_num, node_match_num), \
           (edge_pred_num, edge_true_num, edge_match_num), exact_same
Exemplo n.º 2
0
    def backward(self, oia_graph: OIAGraph, ** kwargs):
        """

        @param oia_graph:
        @type oia_graph:
        @param kwargs:
        @type kwargs:
        @return:
        @rtype:
        """

        fixed = False
        for node in list(oia_graph.nodes()):
            if not is_conjunction_without_args(node, oia_graph):
                continue

            relations = [(n, l.label) for n, l in oia_graph.children(node)]

            relations = list(filter(lambda x: x[1].startswith("pred.arg."), relations))

            if not relations:
                continue

            if any(len(list(oia_graph.children(child))) for child, rel in relations):
                # child nodes also has child, not merge
                continue

            merged_words = sum([list(child.words()) for child, rel in relations], [])
            start = min([x for x in merged_words if isinstance(x, int)])
            end = max([x for x in merged_words if isinstance(x, int)])
            new_node = oia_graph.add_spans([(start, end)])

            fixed = True
            for child, rel in relations:
                oia_graph.remove_node(child)

            oia_graph.replace(node, new_node)

            logger.debug("Merging {0} to {1}".format("|".join(oia_graph.node_text(child)
                                                              for child, rel in relations),
                                                     oia_graph.node_text(new_node)))

        return fixed
    def forward(self, oia_graph: OIAGraph, **kwargs):
        """
        split the noun phrase with of in it
        According to the previous merge operation,
        if there is any modification to the part after the of, the noun phrase will be not merged.
        So the noun phrases with of do not have any modification to the second part.
        @param oia_graph:
        @type oia_graph:
        @param kwargs:
        @type kwargs:
        @return:
        @rtype:
        """

        for node in list(oia_graph.nodes()):

            node_words = oia_graph.node_text(node).split(" ")
            try:
                index = node_words.index("of")
            except Exception as e:
                continue

            if len(node_words) == 1:  # that is of
                continue

            of_split_words = []
            current_words = []
            for span in node.spans:
                if isinstance(span, str):
                    current_words.append(span)
                else:
                    start, end = span
                    for idx in range(start, end + 1):
                        if oia_graph.words[idx] == "of":
                            of_split_words.append(current_words)
                            of_split_words.append(idx)
                            current_words = []
                        else:
                            current_words.append(idx)

            if not current_words:
                # of is the ending, warning, maybe something like "because of "
                logger.warning("We found a of at the last of the phrase: " +
                               oia_graph.node_text(node))
                continue

            of_split_words.append(current_words)

            first_part_words = of_split_words[0]
            first_node = oia_graph.add_words(first_part_words)
            previous_node = first_node

            for p, l in list(oia_graph.parents(node)):
                oia_graph.add_relation(p, first_node, l.label)
                oia_graph.remove_relation(p, node)

            children = list(oia_graph.children(node))
            if children:
                logger.warning(
                    "noun of noun has {0} children, be careful!!!".format(
                        len(children)))
                for c, l in children:
                    logger.warning("Child: {} {}".format(
                        l.label, oia_graph.node_text(c)))
                    oia_graph.add_relation(first_node, c, l.label)
                    oia_graph.remove_relation(node, c)

            oia_graph.remove_node(node)

            for of_word, noun_words in more_itertools.chunked(
                    of_split_words[1:], 2):
                of_node = oia_graph.add_words([of_word])
                next_node = oia_graph.add_words(noun_words)

                oia_graph.add_relation(previous_node, of_node, "as:pred.arg.1")
                oia_graph.add_relation(of_node, next_node, "pred.arg.2")
                previous_node = next_node
Exemplo n.º 4
0
    def forward(self, oia_graph: OIAGraph, **kwargs):
        """

        @param oia_graph:
        @type oia_graph:
        @param kwargs:
        @type kwargs:
        @return:
        @rtype:
        """

        node_edge_mapping = {
            "VOC": "vocative",
            "APPOS": "appos",
            "DISCOURSE": "discourse",
            "REPARANDUM": "reparandum",
            "TOPIC": "topic",
            "TIME_IN": "mod"
        }

        for node in list(oia_graph.nodes()):

            if not (isinstance(node, OIAAuxNode) and node.label in {
                    'VOC', 'APPOS', 'TIME_IN', 'TOPIC', 'DISCOURSE',
                    'REPARANDUM'
            }):

                continue

            children = list(oia_graph.children(node))
            parents = list(oia_graph.parents(node))

            assert 0 < len(children) <= 2

            if len(children) == 2:
                arg1 = [
                    child for child, edge in children
                    if edge.label == "pred.arg.1"
                ]
                arg2 = [
                    child for child, edge in children
                    if edge.label == "pred.arg.2"
                ]
                assert len(arg1) == 1 and len(arg2) == 1
                arg1 = arg1[0]
                arg2 = arg2[0]

                oia_graph.add_relation(arg1, arg2,
                                       node_edge_mapping[node.label])

                for parent, edge in parents:
                    oia_graph.add_relation(parent, arg1, edge.label)

                oia_graph.remove_node(node)
            else:
                child, edge = children[0]
                if edge.label == "pred.arg.1":
                    arg1 = child
                    arg2 = [p for p, l in parents if l == "as:pred.arg.2"]
                    assert len(arg2) == 1
                    arg2 = arg2[0]

                    oia_graph.add_relation(
                        arg2, arg1, "as:" + node_edge_mapping[node.label])
                    oia_graph.remove_node(node)
                elif edge.label == "pred.arg.2":
                    arg2 = child
                    arg1 = [
                        p for p, l in parents if l.label == "as:pred.arg.1"
                    ]
                    assert len(arg1) == 1, [l.label for p, l in parents]
                    arg1 = arg1[0]

                    oia_graph.add_relation(arg1, arg2,
                                           node_edge_mapping[node.label])
                    oia_graph.remove_node(node)
                else:
                    raise Exception("Unknow edges: {}".format(edge.label))
Exemplo n.º 5
0
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [n for n, degree in in_degrees if degree == 0]

    if len(zero_degree_nodes) == 0:
        return
    elif len(zero_degree_nodes) == 1:
        root = zero_degree_nodes[0]
    else:
        # len(zero_degree_nodes) >= 2
        dists_to_root = []
        for oia_node in zero_degree_nodes:

            related_dep_nodes = set()
            if isinstance(oia_node, OIAWordsNode):
                dep_node = dep_graph.get_node_by_spans(oia_node.spans)

                if dep_node:
                    if isinstance(dep_node, DependencyGraphNode):
                        related_dep_nodes.add(dep_node)
                    elif isinstance(dep_node, list):
                        for node in dep_node:
                            related_dep_nodes.add(node)
                    else:
                        logger.error("get_node_by_spans return type unknown.")

            children = [n for n, l in oia_graph.children(oia_node)]

            for child in children:
                if isinstance(child, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(child.spans)

                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            related_dep_nodes.add(dep_node)
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                related_dep_nodes.add(node)
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

            dep_root = dep_graph.get_node("0")
            real_dep_root = next(n for n, l in dep_graph.children(dep_root))

            min_dist_to_root = min([
                len(
                    nx.shortest_path(dep_graph.g.to_undirected(),
                                     real_dep_root.ID, dep_node.ID))
                for dep_node in related_dep_nodes
            ])

            dists_to_root.append((oia_node, min_dist_to_root))

        dists_to_root.sort(key=lambda x: x[1])
        root_candidates = []

        min_dist = dists_to_root[0][1]

        for oia_node, dist in dists_to_root:
            if dist == min_dist:
                root_candidates.append(oia_node)

        if len(root_candidates) == 1:

            root = root_candidates[0]

        else:

            scores = []

            score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10}

            for cand in root_candidates:

                score = -100
                if any([
                        "func" in rel.label
                        for n, rel in oia_graph.children(cand)
                ]):
                    score = 100

                children = [n for n, l in oia_graph.children(cand)]
                dep_children = []
                for child in children:
                    if isinstance(child, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(child.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_children.append(dep_node)
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_children.append(node)
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")
                # check what between them
                dep_children.sort(key=lambda x: x.LOC)

                for node in dep_graph.nodes():
                    if node.LOC is None:
                        continue
                    if dep_children[0].LOC < node.LOC < dep_children[-1].LOC:

                        if node.FORM in score_map:
                            score = max(score, score_map[node.FORM])

                if isinstance(cand, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(cand.spans)
                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                score += 8
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                if node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                    score += 8
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

                elif isinstance(cand,
                                OIAAuxNode) and cand.label == "PARATAXIS":
                    score += 4

                scores.append((cand, score))

            scores.sort(key=lambda x: x[1], reverse=True)

            top_nodes = []
            for node, score in scores:
                if score == scores[0][1]:
                    top_nodes.append(node)

            if len(top_nodes) == 1:
                root = top_nodes[0]

            elif len(top_nodes) >= 3:
                # multiple top node found, merge them to one
                if all(
                        isinstance(node, OIAAuxNode)
                        and node.label == "PARATAXIS" for node in top_nodes):
                    next_nodes = []
                    for top in top_nodes:
                        for n, l in list(oia_graph.children(top)):
                            next_nodes.append(n)
                        oia_graph.remove_node(top)
                        for node in zero_degree_nodes:
                            if node.ID == top.ID:
                                zero_degree_nodes.remove(node)
                    root = oia_graph.add_aux("PARATAXIS")
                    oia_graph.add_node(root)
                    next_nodes.sort(key=lambda x: x.ID)
                    for index, second_node in enumerate(next_nodes):
                        oia_graph.add_argument(root, second_node, index)
                else:
                    logger.error(
                        "Deep intersection point, currently cannot process")
                    return
                # raise Exception("Two top nodes? I think it is not possible ")

            else:  # len(top_nodes) == 2:
                # check who is prev, and who is next

                dep_tops = []

                for top in top_nodes:
                    if isinstance(top, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(top.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_tops.append((top, dep_node))
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_tops.append((top, node))
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")

                if not len(dep_tops) >= 1:
                    logger.error("Multiple AUX head ")
                    return

                dep_tops.sort(key=lambda x: x[1].LOC)

                root = dep_tops[0][0]

    # root obtained, change other zero-in-degree node

    logger.debug("Root obtained ")
    logger.debug(root)

    for node in zero_degree_nodes:
        # print('zero_degree_nodes:', node)
        if root.ID == node.ID:
            continue

        if is_conj_node(node, dep_graph):
            # print('is_conj_node:',node,'  !!!!!!!!!!')
            for child, rel in list(oia_graph.children(node)):
                label = rel.label
                if "pred.arg." in label:
                    arg_no = label.split(".")[-1]
                    new_rel = "as:pred.arg." + arg_no
                    oia_graph.remove_relation(node, child)
                    oia_graph.add_relation(child, node, new_rel)

            continue

        ref_childs = [
            child for child, rel in oia_graph.children(node)
            if rel.label == "ref"
        ]

        if ref_childs:
            for child in ref_childs:
                oia_graph.remove_relation(node, child)
                oia_graph.add_relation(child, node, "as:ref")

            continue

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [
        n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
    ]

    while len(zero_degree_nodes) > 0:

        logger.debug("we found zero_degree_nodes: ")
        for node in zero_degree_nodes:
            logger.debug(node)

        root_offsprings = set(oia_graph.offsprings(root))

        logger.debug("root offsprings :")
        for n in root_offsprings:
            logger.debug(n)

        intersections = []
        for node in zero_degree_nodes:

            node_offspring = set(oia_graph.offsprings(node))

            logger.debug("node offsprings :")
            for n in node_offspring:
                logger.debug(n)

            intersection = root_offsprings.intersection(node_offspring)

            logger.debug("we found {0} initial intersection :".format(
                len(intersection)))
            for n in intersection:
                logger.debug(n)

            if intersection:

                top_intersection_point = None
                parents_to_root = None
                parents_to_other = None
                for x in intersection:
                    parents = set([n for n, l in oia_graph.parents(x)])
                    if not parents.intersection(intersection):
                        top_intersection_point = x
                        parents_to_root = parents.intersection(root_offsprings)
                        parents_to_other = parents.intersection(node_offspring)
                        break

                if top_intersection_point is None:
                    logger.error("It seems we have a problem ")
                    continue

                logger.debug("we found a intersections: ")
                logger.debug(top_intersection_point)

                logger.debug("Its parents to root: ")
                for x in parents_to_root:
                    logger.debug(x)

                logger.debug("Its parents to other: ")
                for x in parents_to_other:
                    logger.debug(x)

                intersections.append((top_intersection_point, parents_to_root,
                                      parents_to_other))

        if len(intersections) == 0:
            logger.error("seems we have disconnected compoenent")
            break
            # raise Exception("Unexpected situation")

        for intersection_point, parents_to_root, parents_to_other in intersections:

            # if node not in set([n for n, l in oia_graph.parents(intersection_point)]):
            #     logger.error("Deep intersection point, currently cannot process")
            #     # raise Exception("Deep intersection point, currently cannot process")
            #     continue

            for node in parents_to_other:

                if isinstance(node, OIAAuxNode) and node.label == "LIST":
                    logger.error("lets see what happens for LIST")
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for LIST "
                        )

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)
                    # for parent, l in list(oia_graph.parents(intersection_point)):
                    #     if parent != node:
                    #         oia_graph.remove_relation(parent, intersection_point)
                    #         oia_graph.add_relation(parent, node, l.label)
                elif (isinstance(node, OIAAuxNode)
                      and node.label == "WHETHER"):

                    # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root))
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for WHETHER "
                        )

                    for parent in parents_to_root:
                        relation = oia_graph.get_edge(parent,
                                                      intersection_point)
                        oia_graph.remove_relation(parent, intersection_point)
                        oia_graph.add_relation(parent, node, relation.label)
                else:

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)

        in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                      for node in oia_graph.nodes()]

        zero_degree_nodes = [
            n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
        ]
Exemplo n.º 6
0
    def forward(self, oia_graph: OIAGraph, dep_graph: DependencyGraph=None, **kwargs):
        """
        note that this only process the situation that
        @param oia_graph:
        @type oia_graph:
        @param kwargs:
        @type kwargs:
        @return:
        @rtype:
        """

        for node in list(oia_graph.nodes()):

            node_words = oia_graph.node_text(node).split(" ")

            if not any([x in {"and", "or"} for x in node_words]):
                continue

            if any(["{" in x and "}" in x for x in node_words]):
                continue

            arguments = []
            conjs = []
            current_words = []

            for span in node.spans:
                if isinstance(span, str):
                    current_words.append(span)
                else:
                    start, end = span
                    for idx in range(start, end + 1):
                        if oia_graph.words[idx].lower() in {"and", "or"}:
                            arguments.append(current_words)
                            conjs.append(idx)
                            current_words = []
                        else:
                            current_words.append(idx)

            arguments.append(current_words)

            logger.debug("conj found = {}".format(conjs))
            logger.debug("argument found = {}".format(arguments))

            if all(not arg or all(oia_graph.words[x] in {",", ";", ".", " "} for x in arg) for arg in arguments):  # single words
                continue

            if len(conjs) == 1:
                conj_words = conjs

            else: # len(conjs) >= 2:
                logger.warning("We are processing conjs with more than two args")
                conj_words = ['{1}']
                for idx, conj in enumerate(conjs):
                    conj_words.append(conj)
                    conj_words.append("{{{0}}}".format(idx + 2))

            conj_node = oia_graph.add_words(conj_words)


            for idx, arg in enumerate(arguments):
                arg_node = oia_graph.add_words(arg)
                oia_graph.add_relation(conj_node, arg_node, "pred.arg.{0}".format(idx + 1))


            for p, l in list(oia_graph.parents(node)):
                oia_graph.add_relation(p, conj_node, l.label)
                oia_graph.remove_relation(p, node)

            for c, l in list(oia_graph.children(node)):
                oia_graph.add_relation(conj_node, c, l.label)
                oia_graph.remove_relation(node, c)

            oia_graph.remove_node(node)