def adj_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ adj previous to noun is coped with by noun phrase this process the case that adj is behind the noun #################### a pretty little boy ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node() # UPOS="NOUN|PRON|PROPN") adj_node = pattern.create_node() # UPOS="ADJ|NOUN") pattern.add_dependency(noun_node, adj_node, r'amod') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] oia_noun_node = oia_graph.add_words(dep_noun_node.position) oia_adj_node = oia_graph.add_words(dep_adj_node.position) logger.debug("adj_modifier: ") logger.debug(dep_noun_node.position) logger.debug(oia_noun_node) logger.debug(dep_adj_node.position) logger.debug(oia_adj_node) oia_graph.add_mod(oia_adj_node, oia_noun_node)
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### Coordination #### #### I like apples, bananas and oranges. conj:and/or with punct #### @return a list of list of conjuncted entities :param sentence: :return: """ for node in dep_graph.nodes(): conj_components = list( dep_graph.children(node, filter=lambda n, l: l.startswith("arg_con"))) if not conj_components: continue oia_conj_root_node = oia_graph.add_words(node.position) for child, rels in conj_components: soake_child_node = oia_graph.add_words(child.position) arg_index = int(rels.values()[0]) oia_graph.add_argument(oia_conj_root_node, soake_child_node, arg_index)
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ Adverbial Clause ##### run in order to catch it. advcl with mark (in order to) ##### ##### he worked hard, replacing his feud. advcl without mark ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(verb_node, modifier_node, "advcl") for match in list(dep_graph.match(pattern)): dep_verb_node = match[verb_node] dep_modifier_node = match[modifier_node] if context.is_processed(dep_verb_node, dep_modifier_node): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) logger.debug("adverbial clause: verb={0}, modifier={1}".format( dep_verb_node.position, dep_modifier_node.position)) if oia_graph.has_relation(oia_verb_node, oia_modifier_node): continue mark = list( dep_graph.children(dep_modifier_node, filter=lambda n, rel: "mark" in rel)) if mark: mark, rel = mark[0] pred_node = oia_graph.add_words(mark.position) if pred_node is None: continue if mark.LEMMA in CONJUNCTION_WORDS[language]: continue oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(pred_node, oia_modifier_node, 2) else: oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def it_verb_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx to do ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") verb_node = pattern.create_node(UPOS="VERB") subj_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|VERB") pattern.add_dependency(verb_node, it_node, r'expl') pattern.add_dependency(verb_node, subj_node, r'nsubj|csubj') for match in dep_graph.match(pattern): dep_verb_node, dep_it_node, dep_subj_node = \ [match[x] for x in [verb_node, it_node, subj_node]] if context.is_processed(dep_verb_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) if dep_it_node.LOC < dep_subj_node.LOC: # it VERB subj that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 1) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_ref(oia_it_node, oia_subj_node) else: # subj VERB it that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 2) oia_graph.add_argument(oia_verb_node, oia_subj_node, 2) oia_graph.add_ref(oia_it_node, oia_subj_node) # dep_graph.remove_dependency(dep_verb_node, dep_subj_node) context.processed(dep_verb_node, dep_it_node) context.processed(dep_verb_node, dep_subj_node)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun modified_node = DependencyGraphNode() modifier_node = DependencyGraphNode() pattern.add_nodes([modified_node, modifier_node]) pattern.add_dependency(modified_node, modifier_node, r'\bobl') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] if oia_graph.has_relation(dep_modified_node, dep_modifier_node, direct_link=False): continue oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_modified_node)
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ list_phrases = [] for n in dep_graph.nodes(): list_nodes = [ n for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l) ] if not list_nodes: continue list_nodes.append(n) list_nodes.sort(key=lambda n: n.LOC) list_phrases.append(list_nodes) for list_nodes in list_phrases: pred = oia_graph.add_aux("LIST") for idx, node in enumerate(list_nodes): oia_arg = oia_graph.add_words(node.position) oia_graph.add_argument(pred, oia_arg, idx + 1)
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def adv_question(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() question_node = pattern.create_node( UPOS="ADV|ADJ", LEMMA=r"(\bhow\b|\bwhat\b|\bwhere\b|\bwhen\b|why\b)\w*") verb_node = pattern.create_node(UPOS="VERB|AUX") # subj_node = pattern.create_node() pattern.add_dependency(verb_node, question_node, "advmod|amod") # pattern.add_dependency(verb_node, subj_node, r"\w*subj") for match in list(dep_graph.match(pattern)): dep_question_node, dep_verb_node = \ [match[x] for x in (question_node, verb_node)] # if not dep_question_node.LOC < dep_subj_node.LOC: # # not a question # continue oia_question_node = oia_graph.add_words(dep_question_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_graph.remove_relation(oia_verb_node, oia_question_node) for parent, rel in list(oia_graph.parents(oia_verb_node)): if rel.mod: continue oia_graph.remove_relation(parent, oia_verb_node) oia_graph.add_relation(parent, oia_question_node, rel) oia_graph.add_function(oia_question_node, oia_verb_node)
def single_node(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 1: node = regular_nodes[0] oia_graph.add_words(node.position)
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for node in dep_graph.nodes(): if oia_graph.has_word(node.position): continue if node.UPOS == "SCONJ" and node.LEMMA in { "because", "so", "if", "then", "otherwise", "after", "before", "and", "or", "but" }: parents = [n for n, l in dep_graph.parents(node) if "mark" in l] if not parents: continue assert len(parents) == 1 parent = parents[0] logger.debug("context = " + str(context.processed_edges)) if context.is_processed(parent, node): continue oiar_node = oia_graph.add_words(parent.position) oia_sconj_node = oia_graph.add_words(node.position) if node.LEMMA in {"because", "if"}: oia_graph.add_argument(oia_sconj_node, oiar_node, 1) else: oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
def it_be_adjv_that(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx that ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") be_node = pattern.create_node(UPOS="VERB") csubj_node = pattern.create_node(UPOS="ADJ|ADV") that_node = pattern.create_node(LEMMA="that") pattern.add_dependency(be_node, it_node, r'expl') pattern.add_dependency(be_node, csubj_node, r'csubj') pattern.add_dependency(csubj_node, that_node, r'mark') for match in dep_graph.match(pattern): dep_be_node, dep_it_node, dep_that_node, dep_csubj_node = \ [match[x] for x in [be_node, it_node, that_node, csubj_node]] if context.is_processed(dep_be_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_csubj_node = oia_graph.add_words(dep_csubj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_be_node = oia_graph.add_words(dep_be_node.position) oia_graph.add_argument(oia_be_node, oia_it_node, 1) oia_graph.add_ref(oia_csubj_node, oia_it_node) context.processed(dep_be_node, dep_it_node)
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## :param sentence: :return: """ pattern = DependencyGraph() center_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*') for match in dep_graph.match(pattern): dep_center_node = match[center_node] dep_modifier_node = match[modifier_node] rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node) if "nmod:poss" in rels and dep_center_node in set( dep_graph.offsprings(dep_modifier_node)): # whose in there continue if oia_graph.has_relation(dep_center_node, dep_modifier_node, direct_link=False): continue oia_center_node = oia_graph.add_words(dep_center_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_center_node)
def adv_relative_clause(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### When/Where Relative clause ##### #### a time when US troops won/ a place where US troops won. acl:relcl with time/place :param sentence: :return: """ pattern = DependencyGraph() modified_node = pattern.create_node() modifier_node = pattern.create_node() adv_rel_node = pattern.create_node() pattern.add_dependency(modified_node, modifier_node, r'acl:relcl\w*') pattern.add_dependency(modifier_node, adv_rel_node, r'advmod') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] dep_rel_node = match[adv_rel_node] if not any(x in dep_rel_node.LEMMA for x in {"when", "where", "how", "why", "what"}): continue oia_pred_node = oia_graph.add_words(dep_rel_node.position) oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) if oia_graph.has_relation(oia_modifier_node, oia_modified_node): continue oia_graph.add_argument(oia_pred_node, oia_modified_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_modifier_node, 2)
def det_predet(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for n1, n2, dep in dep_graph.dependencies(): if "det:predet" in dep: oia_n1 = oia_graph.add_words(n1.position) oia_n2 = oia_graph.add_words(n2.position) oia_graph.add_mod(oia_n2, oia_n1)
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 2: regular_nodes.sort(key=lambda x: x.LOC) case_node, noun_node = regular_nodes if dep_graph.get_dependency(noun_node, case_node) == "case": oia_case_node = oia_graph.add_words(case_node.position) oia_noun_node = oia_graph.add_words(noun_node.position) oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb verb1 in order to verb2 :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun verb_node = DependencyGraphNode( UPOS="VERB|NOUN|PROPN|AUX|PRON") # aux is for be word adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB") pattern.add_nodes([verb_node, adv_node]) pattern.add_dependency(verb_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] if context.is_processed(dep_verb_node, dep_adv_node): continue if oia_graph.has_relation(dep_verb_node, dep_adv_node): continue obl_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("obl")) ] obl_node = None obl_has_case = False if len(obl_children) == 1: obl_node = obl_children[0] case_nodes = list(n for n, l in dep_graph.children( obl_node, filter=lambda n, l: "case" in l)) if case_nodes: # if obl with case, let the oblique to process it obl_has_case = True mark_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("mark")) ] oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) if obl_node and not obl_has_case: # arg_nodes = list(dep_graph.offsprings(obl_node)) # arg_nodes.sort(key=lambda x: x.LOC) # arg_words = [x.ID for x in arg_nodes] # head = obl_node.ID oia_arg_node = oia_graph.add_words(obl_node.position) oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_adv_node, oia_arg_node, 2) else: if mark_children: mark_node = mark_children[0] oia_pred_node = oia_graph.add_words(mark_node.position) oia_graph.add_argument(oia_pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_adv_node, 2) else: oia_graph.add_mod(oia_adv_node, oia_verb_node)
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN") adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART") # part is for "not" ccomp_node = pattern.create_node() # pattern.add_dependency(verb_node, adv_node, r'advmod') pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp") patterns = [] for match in dep_graph.match(pattern): # dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] dep_ccomp_node = match[ccomp_node] if oia_graph.has_relation(dep_adv_node, dep_ccomp_node): continue dep_case_nodes = [ n for n, l in dep_graph.children(dep_ccomp_node, filter=lambda n, l: "case" == l and dep_adv_node .LOC < n.LOC < dep_ccomp_node.LOC) ] if dep_case_nodes: dep_case_nodes = continuous_component(dep_case_nodes, dep_case_nodes[0]) predicate_nodes = [dep_adv_node] + dep_case_nodes predicate_nodes.sort(key=lambda n: n.LOC) else: predicate_nodes = [dep_adv_node] dep_subj_nodes = [ n for n, l in dep_graph.parents(dep_adv_node, filter=lambda n, l: "advmod" == l and n.UPOS in {"ADV", "X", "NOUN"}) ] if len(dep_subj_nodes) > 1: raise Exception("Multiple subject") elif len(dep_subj_nodes) > 0: dep_subj_node = dep_subj_nodes[0] else: dep_subj_node = None patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node]) for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns: if len(predicate_nodes) > 1: new_pred_node = dep_graph.create_node( ID=" ".join([x.ID for x in predicate_nodes]), FORM=" ".join([x.FORM for x in predicate_nodes]), LEMMA=" ".join([x.LEMMA for x in predicate_nodes]), UPOS="ADV", LOC=predicate_nodes[0].LOC) new_pred_node.aux = True dep_graph.replace_nodes(predicate_nodes, new_pred_node) dep_graph.remove_dependency(dep_ccomp_node, new_pred_node) else: new_pred_node = predicate_nodes[0] oia_pred_node = oia_graph.add_words(new_pred_node.position) if dep_subj_node: oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True) else: oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position) oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
def forward(self, oia_graph: OIAGraph, dep_graph: DependencyGraph=None, **kwargs): """ note that this only process the situation that @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ for node in list(oia_graph.nodes()): node_words = oia_graph.node_text(node).split(" ") if not any([x in {"and", "or"} for x in node_words]): continue if any(["{" in x and "}" in x for x in node_words]): continue arguments = [] conjs = [] current_words = [] for span in node.spans: if isinstance(span, str): current_words.append(span) else: start, end = span for idx in range(start, end + 1): if oia_graph.words[idx].lower() in {"and", "or"}: arguments.append(current_words) conjs.append(idx) current_words = [] else: current_words.append(idx) arguments.append(current_words) logger.debug("conj found = {}".format(conjs)) logger.debug("argument found = {}".format(arguments)) if all(not arg or all(oia_graph.words[x] in {",", ";", ".", " "} for x in arg) for arg in arguments): # single words continue if len(conjs) == 1: conj_words = conjs else: # len(conjs) >= 2: logger.warning("We are processing conjs with more than two args") conj_words = ['{1}'] for idx, conj in enumerate(conjs): conj_words.append(conj) conj_words.append("{{{0}}}".format(idx + 2)) conj_node = oia_graph.add_words(conj_words) for idx, arg in enumerate(arguments): arg_node = oia_graph.add_words(arg) oia_graph.add_relation(conj_node, arg_node, "pred.arg.{0}".format(idx + 1)) for p, l in list(oia_graph.parents(node)): oia_graph.add_relation(p, conj_node, l.label) oia_graph.remove_relation(p, node) for c, l in list(oia_graph.children(node)): oia_graph.add_relation(conj_node, c, l.label) oia_graph.remove_relation(node, c) oia_graph.remove_node(node)
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :TODO badcase Attached is a new link :param dep_graph: :param oia_graph: :return: """ # for node in dep_graph.nodes(): # print('node:',node) for pred_node in dep_graph.nodes( filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}): # ADJ is for "With the demand so high," # NOUN is for "X the best for Y" # AUX is for have in "I have a cat" # print('pred_node', pred_node) expl = None nsubj = None subj = None objs = [] for child, rel in dep_graph.children(pred_node): # print('child node:', child) # print('child rel:', rel) if ('nsubj' in rel or "csubj" in rel): # and ":xsubj" not in rel: nsubj = child elif rel.startswith('obj'): objs.append((child, 1)) elif rel.startswith('iobj'): objs.append((child, 0)) elif 'ccomp' in rel or "xcomp" in rel: # and child.UPOS == "VERB": objs.append((child, 2)) elif "expl" in rel: expl = child if nsubj: # if pred_node.LOC < nsubj.LOC: # # TODO: in what situation? # objs.insert(0, nsubj) # else: subj = nsubj if expl: # It VERB subj that # VERB subj it that if expl.LOC < pred_node.LOC: subj = expl objs.insert(0, (subj, -1)) else: # expl.LOC > pred_node.LOC: objs.insert(0, (expl, -1)) if not subj and not objs: continue pred_node = oia_graph.add_words(pred_node.position) if not pred_node: continue arg_index = 1 if subj is not None: if not oia_graph.has_relation(pred_node, subj): subj_node = oia_graph.add_words(subj.position) oia_graph.add_argument(pred_node, subj_node, arg_index) arg_index += 1 objs.sort(key=lambda x: x[1]) for obj, weight in objs: # print('obj:',obj) oia_obj_node = oia_graph.add_words(obj.position) # def __sconj_node(n): # # that conj is ommited # return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"}) def __adv_question_node(n): return ((n.UPOS == "ADV" and n.LEMMA in {"when", "where", "how", "whether"})) # # def __pron_question_node(n): # return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"}) # def __interested_node2(n): # # that conj is ommited # return (n.UPOS == "PART") # sconj_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "mark" and __sconj_node(n))] adv_question_nodes = [ n for n, l in dep_graph.children( obj, filter=lambda n, l: l == "mark" and __adv_question_node(n)) ] # subj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: "subj" in l and __pron_question_node(n))] # # obj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n, # l: ("obj" in l or "comp") in l and __pron_question_node( # n))] # nodes_of_interests2 = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "advmod" and __interested_node2(n))] # print('nodes_of_interests:', nodes_of_interests) # if nodes_of_interests2: # assert len(nodes_of_interests2) == 1 # interest_node = nodes_of_interests2[0] # oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC) # oia_graph.add_argument(pred_node, oia_interest_node, arg_index) # # oia_graph.add_function(oia_interest_node, oia_obj_node) # arg_index += 1 # oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index) # arg_index += 1 if adv_question_nodes: assert len(adv_question_nodes) == 1 interest_node = adv_question_nodes[0] oia_interest_node = oia_graph.add_words(interest_node.position) oia_graph.add_argument(pred_node, oia_interest_node, arg_index) oia_graph.add_function(oia_interest_node, oia_obj_node) else: if not oia_graph.has_relation(pred_node, obj): oia_graph.add_argument(pred_node, oia_obj_node, arg_index) arg_index += 1 pattern = DependencyGraph() parent_pred = pattern.create_node() child_pred = pattern.create_node() question_word = pattern.create_node(LEMMA=r'what|who') pattern.add_dependency(parent_pred, child_pred, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(parent_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(child_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') for match in dep_graph.match(pattern): dep_parent_pred, dep_child_pred, dep_question_word = [ match[x] for x in [parent_pred, child_pred, question_word] ] oia_parent_pred, oia_child_pred, oia_question_word = [ oia_graph.add_words(x.position) for x in [dep_parent_pred, dep_child_pred, dep_question_word] ] oia_question_word.is_func = True rel = oia_graph.get_edge(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_parent_pred, oia_child_pred) oia_graph.add_relation(oia_question_word, oia_child_pred, "mod_by:" + rel.label)
def advcl_mark_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() pred1_node = pattern.create_node() pred2_node = pattern.create_node() # sconj_node = pattern.create_node(UPOS="SCONJ") sconj_node = pattern.create_node() pattern.add_dependency(pred1_node, pred2_node, r'advcl\w*') # pattern.add_dependency(pred1_node, pred2_node, r'\w*') # pattern.add_dependency(pred2_node, sconj_node, r'mark|advmod') pattern.add_dependency(pred2_node, sconj_node, 'mark') for match in list(dep_graph.match(pattern)): dep_pred1_node = match[pred1_node] dep_pred2_node = match[pred2_node] dep_sconj_node = match[sconj_node] # advcl_rel = dep_graph.get_dependency(dep_pred1_node, dep_pred2_node) if dep_sconj_node.LEMMA not in CONJUNCTION_WORDS[language]: continue context.processed(dep_pred2_node, dep_sconj_node) context.processed(dep_pred1_node, dep_pred2_node) oia_pred1_node = oia_graph.add_words(dep_pred1_node.position) oia_pred2_node = oia_graph.add_words(dep_pred2_node.position) if dep_sconj_node.LEMMA == "if": # check whether there is "then" dep_then_nodes = [ n for n, l in dep_graph.children(dep_pred1_node) if n.LEMMA == "then" and l == "advmod" ] if dep_then_nodes: assert len(dep_then_nodes) == 1 dep_then_node = dep_then_nodes[0] context.processed(dep_pred1_node, dep_then_node) if_then_position = dep_sconj_node.position + [ "{1}" ] + dep_then_node.position + ["{2}"] oia_condition_node = oia_graph.add_words(if_then_position) else: oia_condition_node = oia_graph.add_words( dep_sconj_node.position) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_condition_node = oia_graph.add_words(dep_sconj_node.position) if dep_sconj_node.LEMMA in CONJUNCTION_WORDS[language]: oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_graph.add_argument(oia_condition_node, oia_pred1_node, 1, mod=True) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 2)
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
def forward(self, oia_graph: OIAGraph, **kwargs): """ split the noun phrase with of in it According to the previous merge operation, if there is any modification to the part after the of, the noun phrase will be not merged. So the noun phrases with of do not have any modification to the second part. @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ for node in list(oia_graph.nodes()): node_words = oia_graph.node_text(node).split(" ") try: index = node_words.index("of") except Exception as e: continue if len(node_words) == 1: # that is of continue of_split_words = [] current_words = [] for span in node.spans: if isinstance(span, str): current_words.append(span) else: start, end = span for idx in range(start, end + 1): if oia_graph.words[idx] == "of": of_split_words.append(current_words) of_split_words.append(idx) current_words = [] else: current_words.append(idx) if not current_words: # of is the ending, warning, maybe something like "because of " logger.warning("We found a of at the last of the phrase: " + oia_graph.node_text(node)) continue of_split_words.append(current_words) first_part_words = of_split_words[0] first_node = oia_graph.add_words(first_part_words) previous_node = first_node for p, l in list(oia_graph.parents(node)): oia_graph.add_relation(p, first_node, l.label) oia_graph.remove_relation(p, node) children = list(oia_graph.children(node)) if children: logger.warning( "noun of noun has {0} children, be careful!!!".format( len(children))) for c, l in children: logger.warning("Child: {} {}".format( l.label, oia_graph.node_text(c))) oia_graph.add_relation(first_node, c, l.label) oia_graph.remove_relation(node, c) oia_graph.remove_node(node) for of_word, noun_words in more_itertools.chunked( of_split_words[1:], 2): of_node = oia_graph.add_words([of_word]) next_node = oia_graph.add_words(noun_words) oia_graph.add_relation(previous_node, of_node, "as:pred.arg.1") oia_graph.add_relation(of_node, next_node, "pred.arg.2") previous_node = next_node
def general_question(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for verb in dep_graph.nodes(filter=lambda n: n.UPOS == "VERB"): if any( any(x in n.LEMMA for x in {"what", "how", "why", "when", "where"}) for n in dep_graph.offsprings(verb)): continue parents = [n for n, _ in dep_graph.parents(verb)] # if not(len(parents) == 1 and parents[0].ID == "0"): # continue # check subj and aux subj = None aux = None for child, rel in dep_graph.children(verb): if "subj" in rel: subj = child if "aux" in rel: aux = child is_be_verb = False if not isinstance(verb, DependencyGraphSuperNode): is_be_verb = verb.LEMMA == "be" else: assert isinstance(verb, DependencyGraphSuperNode) assert aux is None for n in verb.nodes: if isinstance(n, DependencyGraphNode): if n.LEMMA == "be": is_be_verb = True # print('verb.nodes:', str(" ".join(str(xx.LEMMA) for xx in verb.nodes))) # print('is_be_verb222:', is_be_verb) if n.UPOS == "AUX": aux = n # print('is_be_verb:', is_be_verb) if aux is None and not is_be_verb: # cannot be a general question continue expl_child = [n for n, l in dep_graph.children(verb) if l == "expl"] if expl_child: assert len(expl_child) == 1 subj = expl_child[0] if subj is None: logger.warning( "subject is none, cannot decide whether it is a question") continue # print('subj.LOC:', subj.LOC) # print('subj.LOC type:', type(subj.LOC)) oia_verb_node = oia_graph.add_words(verb.position) is_there_be_verb = is_be_verb and ("there" in verb.LEMMA.split(' ') or "here" in verb.LEMMA.split(' ')) is_question = False if is_there_be_verb: assert isinstance(verb, DependencyGraphSuperNode) be_node = [n for n in verb.nodes if n.LEMMA == "be"][0] there_node = [ n for n in verb.nodes if n.LEMMA == "there" or n.LEMMA == "here" ][0] # print('there_node:', there_node) if be_node.LOC < there_node.LOC: is_question = True elif (is_be_verb and verb.LOC < subj.LOC): is_question = True elif (aux is not None and aux.LOC < subj.LOC): is_question = True if is_question: # if aux is not None and aux.LEMMA == "do": # oia_question_node = oia_graph.add_word_with_head(aux.LOC) # else: oia_question_node = oia_graph.add_aux("WHETHER") oia_graph.add_function(oia_question_node, oia_verb_node)
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### adverbs like however, then, etc ######################## :param sentence: :return: """ for dep_node in list(dep_graph.nodes()): parallel_nodes = [ n for n, l in dep_graph.children(dep_node) if "parataxis" == l ] if not parallel_nodes: continue parallel_nodes.append(dep_node) parallel_nodes.sort(key=lambda x: x.LOC) predicates = [] for index, (former, latter) in enumerate( more_itertools.pairwise(parallel_nodes)): advcon = [ n for n, l in dep_graph.children(latter, filter=lambda n, l: "advmod" in l and (former.LOC < n.LOC < latter.LOC) and (n.UPOS == "SCONJ" or n.LEMMA in {"so"})) ] coloncon = [ n for n, l in dep_graph.children(dep_node, filter=lambda n, l: "punct" in l and n.FORM in {":", ";", "--", ","} and (former.LOC < n.LOC < latter.LOC)) ] if advcon: dep_con = advcon[0] # dep_graph.remove_dependency(para, dep_con) # otherwise, the dep_con will be recovered by adv_modifier, may cause further question elif coloncon: dep_con = coloncon[0] else: dep_con = None predicates.append(dep_con) if all(x is None for x in predicates): oia_pred_node = oia_graph.add_aux("PARATAXIS") else: if len(predicates) == 1: oia_pred_node = oia_graph.add_words(predicates[0].position) else: position = ["{1}"] for i, node in enumerate(predicates): if node is not None: position.extend(node.position) position.append("{{{0}}}".format(i + 2)) oia_pred_node = oia_graph.add_words(position) for idx, node in enumerate(parallel_nodes): oia_arg_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
def oblique_with_prep(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|ADJ|ADV|NOUN|X|PROPN|PRON") # adj is for "has more on " # adv is for "south of XXXX" prep_node = DependencyGraphNode(UPOS=r"PRON|ADP|VERB|SCONJ|ADJ") # verb is for including/according, adj is for "prior to" oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(prep_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'\bobl') pattern.add_dependency(oblique_node, prep_node, r"case|mark") for match in dep_graph.match(pattern): dep_prep_node = match[prep_node] dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_cases = oblique_edge.values() # if dep_prop_node.LEMMA.lower() not in cases: # continue prop_nodes = [ x for x, l in dep_graph.children( dep_oblique_node, filter=lambda n, l: l == "case" or l == "mark") ] connected_case_nodes = continuous_component(prop_nodes, dep_prep_node) predicate = tuple([x.ID for x in connected_case_nodes]) head_node = None for node in connected_case_nodes: if node.LEMMA.lower() in oblique_cases: head_node = node if not head_node: head_node = connected_case_nodes[-1] pred_node = oia_graph.add_words(head_node.position) arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)