def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ list_phrases = [] for n in dep_graph.nodes(): list_nodes = [ n for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l) ] if not list_nodes: continue list_nodes.append(n) list_nodes.sort(key=lambda n: n.LOC) list_phrases.append(list_nodes) for list_nodes in list_phrases: pred = oia_graph.add_aux("LIST") for idx, node in enumerate(list_nodes): oia_arg = oia_graph.add_words(node.position) oia_graph.add_argument(pred, oia_arg, idx + 1)
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### Coordination #### #### I like apples, bananas and oranges. conj:and/or with punct #### @return a list of list of conjuncted entities :param sentence: :return: """ for node in dep_graph.nodes(): conj_components = list( dep_graph.children(node, filter=lambda n, l: l.startswith("arg_con"))) if not conj_components: continue oia_conj_root_node = oia_graph.add_words(node.position) for child, rels in conj_components: soake_child_node = oia_graph.add_words(child.position) arg_index = int(rels.values()[0]) oia_graph.add_argument(oia_conj_root_node, soake_child_node, arg_index)
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ Adverbial Clause ##### run in order to catch it. advcl with mark (in order to) ##### ##### he worked hard, replacing his feud. advcl without mark ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(verb_node, modifier_node, "advcl") for match in list(dep_graph.match(pattern)): dep_verb_node = match[verb_node] dep_modifier_node = match[modifier_node] if context.is_processed(dep_verb_node, dep_modifier_node): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) logger.debug("adverbial clause: verb={0}, modifier={1}".format( dep_verb_node.position, dep_modifier_node.position)) if oia_graph.has_relation(oia_verb_node, oia_modifier_node): continue mark = list( dep_graph.children(dep_modifier_node, filter=lambda n, rel: "mark" in rel)) if mark: mark, rel = mark[0] pred_node = oia_graph.add_words(mark.position) if pred_node is None: continue if mark.LEMMA in CONJUNCTION_WORDS[language]: continue oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(pred_node, oia_modifier_node, 2) else: oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for node in dep_graph.nodes(): if oia_graph.has_word(node.position): continue if node.UPOS == "SCONJ" and node.LEMMA in { "because", "so", "if", "then", "otherwise", "after", "before", "and", "or", "but" }: parents = [n for n, l in dep_graph.parents(node) if "mark" in l] if not parents: continue assert len(parents) == 1 parent = parents[0] logger.debug("context = " + str(context.processed_edges)) if context.is_processed(parent, node): continue oiar_node = oia_graph.add_words(parent.position) oia_sconj_node = oia_graph.add_words(node.position) if node.LEMMA in {"because", "if"}: oia_graph.add_argument(oia_sconj_node, oiar_node, 1) else: oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
def it_be_adjv_that(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx that ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") be_node = pattern.create_node(UPOS="VERB") csubj_node = pattern.create_node(UPOS="ADJ|ADV") that_node = pattern.create_node(LEMMA="that") pattern.add_dependency(be_node, it_node, r'expl') pattern.add_dependency(be_node, csubj_node, r'csubj') pattern.add_dependency(csubj_node, that_node, r'mark') for match in dep_graph.match(pattern): dep_be_node, dep_it_node, dep_that_node, dep_csubj_node = \ [match[x] for x in [be_node, it_node, that_node, csubj_node]] if context.is_processed(dep_be_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_csubj_node = oia_graph.add_words(dep_csubj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_be_node = oia_graph.add_words(dep_be_node.position) oia_graph.add_argument(oia_be_node, oia_it_node, 1) oia_graph.add_ref(oia_csubj_node, oia_it_node) context.processed(dep_be_node, dep_it_node)
def it_verb_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx to do ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") verb_node = pattern.create_node(UPOS="VERB") subj_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|VERB") pattern.add_dependency(verb_node, it_node, r'expl') pattern.add_dependency(verb_node, subj_node, r'nsubj|csubj') for match in dep_graph.match(pattern): dep_verb_node, dep_it_node, dep_subj_node = \ [match[x] for x in [verb_node, it_node, subj_node]] if context.is_processed(dep_verb_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) if dep_it_node.LOC < dep_subj_node.LOC: # it VERB subj that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 1) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_ref(oia_it_node, oia_subj_node) else: # subj VERB it that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 2) oia_graph.add_argument(oia_verb_node, oia_subj_node, 2) oia_graph.add_ref(oia_it_node, oia_subj_node) # dep_graph.remove_dependency(dep_verb_node, dep_subj_node) context.processed(dep_verb_node, dep_it_node) context.processed(dep_verb_node, dep_subj_node)
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 2: regular_nodes.sort(key=lambda x: x.LOC) case_node, noun_node = regular_nodes if dep_graph.get_dependency(noun_node, case_node) == "case": oia_case_node = oia_graph.add_words(case_node.position) oia_noun_node = oia_graph.add_words(noun_node.position) oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
def adv_relative_clause(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### When/Where Relative clause ##### #### a time when US troops won/ a place where US troops won. acl:relcl with time/place :param sentence: :return: """ pattern = DependencyGraph() modified_node = pattern.create_node() modifier_node = pattern.create_node() adv_rel_node = pattern.create_node() pattern.add_dependency(modified_node, modifier_node, r'acl:relcl\w*') pattern.add_dependency(modifier_node, adv_rel_node, r'advmod') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] dep_rel_node = match[adv_rel_node] if not any(x in dep_rel_node.LEMMA for x in {"when", "where", "how", "why", "what"}): continue oia_pred_node = oia_graph.add_words(dep_rel_node.position) oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) if oia_graph.has_relation(oia_modifier_node, oia_modified_node): continue oia_graph.add_argument(oia_pred_node, oia_modified_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_modifier_node, 2)
def oblique_with_prep(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|ADJ|ADV|NOUN|X|PROPN|PRON") # adj is for "has more on " # adv is for "south of XXXX" prep_node = DependencyGraphNode(UPOS=r"PRON|ADP|VERB|SCONJ|ADJ") # verb is for including/according, adj is for "prior to" oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(prep_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'\bobl') pattern.add_dependency(oblique_node, prep_node, r"case|mark") for match in dep_graph.match(pattern): dep_prep_node = match[prep_node] dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_cases = oblique_edge.values() # if dep_prop_node.LEMMA.lower() not in cases: # continue prop_nodes = [ x for x, l in dep_graph.children( dep_oblique_node, filter=lambda n, l: l == "case" or l == "mark") ] connected_case_nodes = continuous_component(prop_nodes, dep_prep_node) predicate = tuple([x.ID for x in connected_case_nodes]) head_node = None for node in connected_case_nodes: if node.LEMMA.lower() in oblique_cases: head_node = node if not head_node: head_node = connected_case_nodes[-1] pred_node = oia_graph.add_words(head_node.position) arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb verb1 in order to verb2 :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun verb_node = DependencyGraphNode( UPOS="VERB|NOUN|PROPN|AUX|PRON") # aux is for be word adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB") pattern.add_nodes([verb_node, adv_node]) pattern.add_dependency(verb_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] if context.is_processed(dep_verb_node, dep_adv_node): continue if oia_graph.has_relation(dep_verb_node, dep_adv_node): continue obl_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("obl")) ] obl_node = None obl_has_case = False if len(obl_children) == 1: obl_node = obl_children[0] case_nodes = list(n for n, l in dep_graph.children( obl_node, filter=lambda n, l: "case" in l)) if case_nodes: # if obl with case, let the oblique to process it obl_has_case = True mark_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("mark")) ] oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) if obl_node and not obl_has_case: # arg_nodes = list(dep_graph.offsprings(obl_node)) # arg_nodes.sort(key=lambda x: x.LOC) # arg_words = [x.ID for x in arg_nodes] # head = obl_node.ID oia_arg_node = oia_graph.add_words(obl_node.position) oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_adv_node, oia_arg_node, 2) else: if mark_children: mark_node = mark_children[0] oia_pred_node = oia_graph.add_words(mark_node.position) oia_graph.add_argument(oia_pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_adv_node, 2) else: oia_graph.add_mod(oia_adv_node, oia_verb_node)
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN") adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART") # part is for "not" ccomp_node = pattern.create_node() # pattern.add_dependency(verb_node, adv_node, r'advmod') pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp") patterns = [] for match in dep_graph.match(pattern): # dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] dep_ccomp_node = match[ccomp_node] if oia_graph.has_relation(dep_adv_node, dep_ccomp_node): continue dep_case_nodes = [ n for n, l in dep_graph.children(dep_ccomp_node, filter=lambda n, l: "case" == l and dep_adv_node .LOC < n.LOC < dep_ccomp_node.LOC) ] if dep_case_nodes: dep_case_nodes = continuous_component(dep_case_nodes, dep_case_nodes[0]) predicate_nodes = [dep_adv_node] + dep_case_nodes predicate_nodes.sort(key=lambda n: n.LOC) else: predicate_nodes = [dep_adv_node] dep_subj_nodes = [ n for n, l in dep_graph.parents(dep_adv_node, filter=lambda n, l: "advmod" == l and n.UPOS in {"ADV", "X", "NOUN"}) ] if len(dep_subj_nodes) > 1: raise Exception("Multiple subject") elif len(dep_subj_nodes) > 0: dep_subj_node = dep_subj_nodes[0] else: dep_subj_node = None patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node]) for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns: if len(predicate_nodes) > 1: new_pred_node = dep_graph.create_node( ID=" ".join([x.ID for x in predicate_nodes]), FORM=" ".join([x.FORM for x in predicate_nodes]), LEMMA=" ".join([x.LEMMA for x in predicate_nodes]), UPOS="ADV", LOC=predicate_nodes[0].LOC) new_pred_node.aux = True dep_graph.replace_nodes(predicate_nodes, new_pred_node) dep_graph.remove_dependency(dep_ccomp_node, new_pred_node) else: new_pred_node = predicate_nodes[0] oia_pred_node = oia_graph.add_words(new_pred_node.position) if dep_subj_node: oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True) else: oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position) oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [n for n, degree in in_degrees if degree == 0] if len(zero_degree_nodes) == 0: return elif len(zero_degree_nodes) == 1: root = zero_degree_nodes[0] else: # len(zero_degree_nodes) >= 2 dists_to_root = [] for oia_node in zero_degree_nodes: related_dep_nodes = set() if isinstance(oia_node, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(oia_node.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): related_dep_nodes.add(dep_node) elif isinstance(dep_node, list): for node in dep_node: related_dep_nodes.add(node) else: logger.error("get_node_by_spans return type unknown.") children = [n for n, l in oia_graph.children(oia_node)] for child in children: if isinstance(child, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(child.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): related_dep_nodes.add(dep_node) elif isinstance(dep_node, list): for node in dep_node: related_dep_nodes.add(node) else: logger.error( "get_node_by_spans return type unknown.") dep_root = dep_graph.get_node("0") real_dep_root = next(n for n, l in dep_graph.children(dep_root)) min_dist_to_root = min([ len( nx.shortest_path(dep_graph.g.to_undirected(), real_dep_root.ID, dep_node.ID)) for dep_node in related_dep_nodes ]) dists_to_root.append((oia_node, min_dist_to_root)) dists_to_root.sort(key=lambda x: x[1]) root_candidates = [] min_dist = dists_to_root[0][1] for oia_node, dist in dists_to_root: if dist == min_dist: root_candidates.append(oia_node) if len(root_candidates) == 1: root = root_candidates[0] else: scores = [] score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10} for cand in root_candidates: score = -100 if any([ "func" in rel.label for n, rel in oia_graph.children(cand) ]): score = 100 children = [n for n, l in oia_graph.children(cand)] dep_children = [] for child in children: if isinstance(child, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(child.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): dep_children.append(dep_node) elif isinstance(dep_node, list): for node in dep_node: dep_children.append(node) else: logger.error( "get_node_by_spans return type unknown.") # check what between them dep_children.sort(key=lambda x: x.LOC) for node in dep_graph.nodes(): if node.LOC is None: continue if dep_children[0].LOC < node.LOC < dep_children[-1].LOC: if node.FORM in score_map: score = max(score, score_map[node.FORM]) if isinstance(cand, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(cand.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS: score += 8 elif isinstance(dep_node, list): for node in dep_node: if node.LEMMA in IMPORTANT_CONNECTION_WORDS: score += 8 else: logger.error( "get_node_by_spans return type unknown.") elif isinstance(cand, OIAAuxNode) and cand.label == "PARATAXIS": score += 4 scores.append((cand, score)) scores.sort(key=lambda x: x[1], reverse=True) top_nodes = [] for node, score in scores: if score == scores[0][1]: top_nodes.append(node) if len(top_nodes) == 1: root = top_nodes[0] elif len(top_nodes) >= 3: # multiple top node found, merge them to one if all( isinstance(node, OIAAuxNode) and node.label == "PARATAXIS" for node in top_nodes): next_nodes = [] for top in top_nodes: for n, l in list(oia_graph.children(top)): next_nodes.append(n) oia_graph.remove_node(top) for node in zero_degree_nodes: if node.ID == top.ID: zero_degree_nodes.remove(node) root = oia_graph.add_aux("PARATAXIS") oia_graph.add_node(root) next_nodes.sort(key=lambda x: x.ID) for index, second_node in enumerate(next_nodes): oia_graph.add_argument(root, second_node, index) else: logger.error( "Deep intersection point, currently cannot process") return # raise Exception("Two top nodes? I think it is not possible ") else: # len(top_nodes) == 2: # check who is prev, and who is next dep_tops = [] for top in top_nodes: if isinstance(top, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(top.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): dep_tops.append((top, dep_node)) elif isinstance(dep_node, list): for node in dep_node: dep_tops.append((top, node)) else: logger.error( "get_node_by_spans return type unknown.") if not len(dep_tops) >= 1: logger.error("Multiple AUX head ") return dep_tops.sort(key=lambda x: x[1].LOC) root = dep_tops[0][0] # root obtained, change other zero-in-degree node logger.debug("Root obtained ") logger.debug(root) for node in zero_degree_nodes: # print('zero_degree_nodes:', node) if root.ID == node.ID: continue if is_conj_node(node, dep_graph): # print('is_conj_node:',node,' !!!!!!!!!!') for child, rel in list(oia_graph.children(node)): label = rel.label if "pred.arg." in label: arg_no = label.split(".")[-1] new_rel = "as:pred.arg." + arg_no oia_graph.remove_relation(node, child) oia_graph.add_relation(child, node, new_rel) continue ref_childs = [ child for child, rel in oia_graph.children(node) if rel.label == "ref" ] if ref_childs: for child in ref_childs: oia_graph.remove_relation(node, child) oia_graph.add_relation(child, node, "as:ref") continue in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [ n for n, degree in in_degrees if degree == 0 and n.ID != root.ID ] while len(zero_degree_nodes) > 0: logger.debug("we found zero_degree_nodes: ") for node in zero_degree_nodes: logger.debug(node) root_offsprings = set(oia_graph.offsprings(root)) logger.debug("root offsprings :") for n in root_offsprings: logger.debug(n) intersections = [] for node in zero_degree_nodes: node_offspring = set(oia_graph.offsprings(node)) logger.debug("node offsprings :") for n in node_offspring: logger.debug(n) intersection = root_offsprings.intersection(node_offspring) logger.debug("we found {0} initial intersection :".format( len(intersection))) for n in intersection: logger.debug(n) if intersection: top_intersection_point = None parents_to_root = None parents_to_other = None for x in intersection: parents = set([n for n, l in oia_graph.parents(x)]) if not parents.intersection(intersection): top_intersection_point = x parents_to_root = parents.intersection(root_offsprings) parents_to_other = parents.intersection(node_offspring) break if top_intersection_point is None: logger.error("It seems we have a problem ") continue logger.debug("we found a intersections: ") logger.debug(top_intersection_point) logger.debug("Its parents to root: ") for x in parents_to_root: logger.debug(x) logger.debug("Its parents to other: ") for x in parents_to_other: logger.debug(x) intersections.append((top_intersection_point, parents_to_root, parents_to_other)) if len(intersections) == 0: logger.error("seems we have disconnected compoenent") break # raise Exception("Unexpected situation") for intersection_point, parents_to_root, parents_to_other in intersections: # if node not in set([n for n, l in oia_graph.parents(intersection_point)]): # logger.error("Deep intersection point, currently cannot process") # # raise Exception("Deep intersection point, currently cannot process") # continue for node in parents_to_other: if isinstance(node, OIAAuxNode) and node.label == "LIST": logger.error("lets see what happens for LIST") if len(list(oia_graph.parents(node))) != 0: logger.error( "it seems different with what we have thought for LIST " ) relation = oia_graph.get_edge(node, intersection_point) oia_graph.remove_relation(node, intersection_point) oia_graph.add_relation(intersection_point, node, "as:" + relation.label) # for parent, l in list(oia_graph.parents(intersection_point)): # if parent != node: # oia_graph.remove_relation(parent, intersection_point) # oia_graph.add_relation(parent, node, l.label) elif (isinstance(node, OIAAuxNode) and node.label == "WHETHER"): # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root)) if len(list(oia_graph.parents(node))) != 0: logger.error( "it seems different with what we have thought for WHETHER " ) for parent in parents_to_root: relation = oia_graph.get_edge(parent, intersection_point) oia_graph.remove_relation(parent, intersection_point) oia_graph.add_relation(parent, node, relation.label) else: relation = oia_graph.get_edge(node, intersection_point) oia_graph.remove_relation(node, intersection_point) oia_graph.add_relation(intersection_point, node, "as:" + relation.label) in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [ n for n, degree in in_degrees if degree == 0 and n.ID != root.ID ]
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :TODO badcase Attached is a new link :param dep_graph: :param oia_graph: :return: """ # for node in dep_graph.nodes(): # print('node:',node) for pred_node in dep_graph.nodes( filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}): # ADJ is for "With the demand so high," # NOUN is for "X the best for Y" # AUX is for have in "I have a cat" # print('pred_node', pred_node) expl = None nsubj = None subj = None objs = [] for child, rel in dep_graph.children(pred_node): # print('child node:', child) # print('child rel:', rel) if ('nsubj' in rel or "csubj" in rel): # and ":xsubj" not in rel: nsubj = child elif rel.startswith('obj'): objs.append((child, 1)) elif rel.startswith('iobj'): objs.append((child, 0)) elif 'ccomp' in rel or "xcomp" in rel: # and child.UPOS == "VERB": objs.append((child, 2)) elif "expl" in rel: expl = child if nsubj: # if pred_node.LOC < nsubj.LOC: # # TODO: in what situation? # objs.insert(0, nsubj) # else: subj = nsubj if expl: # It VERB subj that # VERB subj it that if expl.LOC < pred_node.LOC: subj = expl objs.insert(0, (subj, -1)) else: # expl.LOC > pred_node.LOC: objs.insert(0, (expl, -1)) if not subj and not objs: continue pred_node = oia_graph.add_words(pred_node.position) if not pred_node: continue arg_index = 1 if subj is not None: if not oia_graph.has_relation(pred_node, subj): subj_node = oia_graph.add_words(subj.position) oia_graph.add_argument(pred_node, subj_node, arg_index) arg_index += 1 objs.sort(key=lambda x: x[1]) for obj, weight in objs: # print('obj:',obj) oia_obj_node = oia_graph.add_words(obj.position) # def __sconj_node(n): # # that conj is ommited # return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"}) def __adv_question_node(n): return ((n.UPOS == "ADV" and n.LEMMA in {"when", "where", "how", "whether"})) # # def __pron_question_node(n): # return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"}) # def __interested_node2(n): # # that conj is ommited # return (n.UPOS == "PART") # sconj_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "mark" and __sconj_node(n))] adv_question_nodes = [ n for n, l in dep_graph.children( obj, filter=lambda n, l: l == "mark" and __adv_question_node(n)) ] # subj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: "subj" in l and __pron_question_node(n))] # # obj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n, # l: ("obj" in l or "comp") in l and __pron_question_node( # n))] # nodes_of_interests2 = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "advmod" and __interested_node2(n))] # print('nodes_of_interests:', nodes_of_interests) # if nodes_of_interests2: # assert len(nodes_of_interests2) == 1 # interest_node = nodes_of_interests2[0] # oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC) # oia_graph.add_argument(pred_node, oia_interest_node, arg_index) # # oia_graph.add_function(oia_interest_node, oia_obj_node) # arg_index += 1 # oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index) # arg_index += 1 if adv_question_nodes: assert len(adv_question_nodes) == 1 interest_node = adv_question_nodes[0] oia_interest_node = oia_graph.add_words(interest_node.position) oia_graph.add_argument(pred_node, oia_interest_node, arg_index) oia_graph.add_function(oia_interest_node, oia_obj_node) else: if not oia_graph.has_relation(pred_node, obj): oia_graph.add_argument(pred_node, oia_obj_node, arg_index) arg_index += 1 pattern = DependencyGraph() parent_pred = pattern.create_node() child_pred = pattern.create_node() question_word = pattern.create_node(LEMMA=r'what|who') pattern.add_dependency(parent_pred, child_pred, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(parent_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(child_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') for match in dep_graph.match(pattern): dep_parent_pred, dep_child_pred, dep_question_word = [ match[x] for x in [parent_pred, child_pred, question_word] ] oia_parent_pred, oia_child_pred, oia_question_word = [ oia_graph.add_words(x.position) for x in [dep_parent_pred, dep_child_pred, dep_question_word] ] oia_question_word.is_func = True rel = oia_graph.get_edge(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_parent_pred, oia_child_pred) oia_graph.add_relation(oia_question_word, oia_child_pred, "mod_by:" + rel.label)
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### adverbs like however, then, etc ######################## :param sentence: :return: """ for dep_node in list(dep_graph.nodes()): parallel_nodes = [ n for n, l in dep_graph.children(dep_node) if "parataxis" == l ] if not parallel_nodes: continue parallel_nodes.append(dep_node) parallel_nodes.sort(key=lambda x: x.LOC) predicates = [] for index, (former, latter) in enumerate( more_itertools.pairwise(parallel_nodes)): advcon = [ n for n, l in dep_graph.children(latter, filter=lambda n, l: "advmod" in l and (former.LOC < n.LOC < latter.LOC) and (n.UPOS == "SCONJ" or n.LEMMA in {"so"})) ] coloncon = [ n for n, l in dep_graph.children(dep_node, filter=lambda n, l: "punct" in l and n.FORM in {":", ";", "--", ","} and (former.LOC < n.LOC < latter.LOC)) ] if advcon: dep_con = advcon[0] # dep_graph.remove_dependency(para, dep_con) # otherwise, the dep_con will be recovered by adv_modifier, may cause further question elif coloncon: dep_con = coloncon[0] else: dep_con = None predicates.append(dep_con) if all(x is None for x in predicates): oia_pred_node = oia_graph.add_aux("PARATAXIS") else: if len(predicates) == 1: oia_pred_node = oia_graph.add_words(predicates[0].position) else: position = ["{1}"] for i, node in enumerate(predicates): if node is not None: position.extend(node.position) position.append("{{{0}}}".format(i + 2)) oia_pred_node = oia_graph.add_words(position) for idx, node in enumerate(parallel_nodes): oia_arg_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
def advcl_mark_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() pred1_node = pattern.create_node() pred2_node = pattern.create_node() # sconj_node = pattern.create_node(UPOS="SCONJ") sconj_node = pattern.create_node() pattern.add_dependency(pred1_node, pred2_node, r'advcl\w*') # pattern.add_dependency(pred1_node, pred2_node, r'\w*') # pattern.add_dependency(pred2_node, sconj_node, r'mark|advmod') pattern.add_dependency(pred2_node, sconj_node, 'mark') for match in list(dep_graph.match(pattern)): dep_pred1_node = match[pred1_node] dep_pred2_node = match[pred2_node] dep_sconj_node = match[sconj_node] # advcl_rel = dep_graph.get_dependency(dep_pred1_node, dep_pred2_node) if dep_sconj_node.LEMMA not in CONJUNCTION_WORDS[language]: continue context.processed(dep_pred2_node, dep_sconj_node) context.processed(dep_pred1_node, dep_pred2_node) oia_pred1_node = oia_graph.add_words(dep_pred1_node.position) oia_pred2_node = oia_graph.add_words(dep_pred2_node.position) if dep_sconj_node.LEMMA == "if": # check whether there is "then" dep_then_nodes = [ n for n, l in dep_graph.children(dep_pred1_node) if n.LEMMA == "then" and l == "advmod" ] if dep_then_nodes: assert len(dep_then_nodes) == 1 dep_then_node = dep_then_nodes[0] context.processed(dep_pred1_node, dep_then_node) if_then_position = dep_sconj_node.position + [ "{1}" ] + dep_then_node.position + ["{2}"] oia_condition_node = oia_graph.add_words(if_then_position) else: oia_condition_node = oia_graph.add_words( dep_sconj_node.position) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_condition_node = oia_graph.add_words(dep_sconj_node.position) if dep_sconj_node.LEMMA in CONJUNCTION_WORDS[language]: oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_graph.add_argument(oia_condition_node, oia_pred1_node, 1, mod=True) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 2)