def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun modified_node = DependencyGraphNode() modifier_node = DependencyGraphNode() pattern.add_nodes([modified_node, modifier_node]) pattern.add_dependency(modified_node, modifier_node, r'\bobl') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] if oia_graph.has_relation(dep_modified_node, dep_modifier_node, direct_link=False): continue oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_modified_node)
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ list_phrases = [] for n in dep_graph.nodes(): list_nodes = [ n for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l) ] if not list_nodes: continue list_nodes.append(n) list_nodes.sort(key=lambda n: n.LOC) list_phrases.append(list_nodes) for list_nodes in list_phrases: pred = oia_graph.add_aux("LIST") for idx, node in enumerate(list_nodes): oia_arg = oia_graph.add_words(node.position) oia_graph.add_argument(pred, oia_arg, idx + 1)
def adj_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ adj previous to noun is coped with by noun phrase this process the case that adj is behind the noun #################### a pretty little boy ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node() # UPOS="NOUN|PRON|PROPN") adj_node = pattern.create_node() # UPOS="ADJ|NOUN") pattern.add_dependency(noun_node, adj_node, r'amod') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] oia_noun_node = oia_graph.add_words(dep_noun_node.position) oia_adj_node = oia_graph.add_words(dep_adj_node.position) logger.debug("adj_modifier: ") logger.debug(dep_noun_node.position) logger.debug(oia_noun_node) logger.debug(dep_adj_node.position) logger.debug(oia_adj_node) oia_graph.add_mod(oia_adj_node, oia_noun_node)
def standardize(self, dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext, options: WorkFlowOptions): """ @param dep_graph: @type dep_graph: @return: @rtype: """ def __standardized_hook(): self.standardized = True return self.working_stack() oia_graph.set_context_hook(__standardized_hook) if options.debug: context.debugger.record("standardize", "init", oia_graph) for index, standardizer in enumerate(self.standardizers): try: self.standardized = False standardizer.forward(oia_graph) if options.debug and self.standardized: context.debugger.record("standardize", type(standardizer).__name__, oia_graph) except Exception as e: logger.opt(exception=True).error( "Error when running Standardize: ", type(standardizer).__name__) raise e
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### Coordination #### #### I like apples, bananas and oranges. conj:and/or with punct #### @return a list of list of conjuncted entities :param sentence: :return: """ for node in dep_graph.nodes(): conj_components = list( dep_graph.children(node, filter=lambda n, l: l.startswith("arg_con"))) if not conj_components: continue oia_conj_root_node = oia_graph.add_words(node.position) for child, rels in conj_components: soake_child_node = oia_graph.add_words(child.position) arg_index = int(rels.values()[0]) oia_graph.add_argument(oia_conj_root_node, soake_child_node, arg_index)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ Adverbial Clause ##### run in order to catch it. advcl with mark (in order to) ##### ##### he worked hard, replacing his feud. advcl without mark ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(verb_node, modifier_node, "advcl") for match in list(dep_graph.match(pattern)): dep_verb_node = match[verb_node] dep_modifier_node = match[modifier_node] if context.is_processed(dep_verb_node, dep_modifier_node): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) logger.debug("adverbial clause: verb={0}, modifier={1}".format( dep_verb_node.position, dep_modifier_node.position)) if oia_graph.has_relation(oia_verb_node, oia_modifier_node): continue mark = list( dep_graph.children(dep_modifier_node, filter=lambda n, rel: "mark" in rel)) if mark: mark, rel = mark[0] pred_node = oia_graph.add_words(mark.position) if pred_node is None: continue if mark.LEMMA in CONJUNCTION_WORDS[language]: continue oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(pred_node, oia_modifier_node, 2) else: oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def det_predet(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for n1, n2, dep in dep_graph.dependencies(): if "det:predet" in dep: oia_n1 = oia_graph.add_words(n1.position) oia_n2 = oia_graph.add_words(n2.position) oia_graph.add_mod(oia_n2, oia_n1)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def single_node(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 1: node = regular_nodes[0] oia_graph.add_words(node.position)
def backward(self, oia_graph: OIAGraph, ** kwargs): """ @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ fixed = False for node in list(oia_graph.nodes()): if not is_conjunction_without_args(node, oia_graph): continue relations = [(n, l.label) for n, l in oia_graph.children(node)] relations = list(filter(lambda x: x[1].startswith("pred.arg."), relations)) if not relations: continue if any(len(list(oia_graph.children(child))) for child, rel in relations): # child nodes also has child, not merge continue merged_words = sum([list(child.words()) for child, rel in relations], []) start = min([x for x in merged_words if isinstance(x, int)]) end = max([x for x in merged_words if isinstance(x, int)]) new_node = oia_graph.add_spans([(start, end)]) fixed = True for child, rel in relations: oia_graph.remove_node(child) oia_graph.replace(node, new_node) logger.debug("Merging {0} to {1}".format("|".join(oia_graph.node_text(child) for child, rel in relations), oia_graph.node_text(new_node))) return fixed
def make_dag(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ cycles = list(nx.algorithms.cycles.simple_cycles(oia_graph.g)) for cycle in cycles: if len(cycle) == 1: # self loop oia_graph.g.remove_edge(cycle[0], cycle[0]) continue cycle.append(cycle[0]) has_ref = False for v1, v2 in pairwise(cycle): n1 = oia_graph.get_node(v1) n2 = oia_graph.get_node(v2) relation = oia_graph.get_edge(n1, n2) if relation is None: continue if relation.label == "ref": has_ref = True oia_graph.remove_relation(n1, n2) oia_graph.add_relation(n2, n1, "as:ref")
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## :param sentence: :return: """ pattern = DependencyGraph() center_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*') for match in dep_graph.match(pattern): dep_center_node = match[center_node] dep_modifier_node = match[modifier_node] rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node) if "nmod:poss" in rels and dep_center_node in set( dep_graph.offsprings(dep_modifier_node)): # whose in there continue if oia_graph.has_relation(dep_center_node, dep_modifier_node, direct_link=False): continue oia_center_node = oia_graph.add_words(dep_center_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_center_node)
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 2: regular_nodes.sort(key=lambda x: x.LOC) case_node, noun_node = regular_nodes if dep_graph.get_dependency(noun_node, case_node) == "case": oia_case_node = oia_graph.add_words(case_node.position) oia_noun_node = oia_graph.add_words(noun_node.position) oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
def standardize_oia_repo(output_file_path, old_standard): """ @param output_file_path: @type output_file_path: @return: @rtype: """ source_oia_repo = OIARepo() target_oia_repo = OIARepo(output_file_path) standardizer = OIAStandardizer() context = UD2OIAContext() options = WorkFlowOptions() for source in ["train", "dev", "test"]: standardized_graphs = [] for oia in tqdm.tqdm(source_oia_repo.all(source), "Standardize {}:".format(source)): origin_oia_graph = OIAGraph.parse(oia) uri = origin_oia_graph.meta["uri"] oia_graph = copy.deepcopy(origin_oia_graph) logger.info("Standardizing {}:{}".format(source, uri)) if old_standard: upgrade_to_new_standard(oia_graph) logger.info("Update to new standard {}:{}".format(source, uri)) try: standardizer.standardize(None, oia_graph, context, options) except Exception as e: logger.error("Sentence {0} standardize error".format(uri)) logger.error("Sentence = " + " ".join(oia_graph.words)) raise e standardized_graphs.append(oia_graph) target_oia_repo.insert(source, standardized_graphs)
def graph_match_metric(pred_graph: OIAGraph, truth_graph: OIAGraph): """ :param predict: :param truth: :return: """ pred_nodes = [pred_graph.node_text(n) for n in pred_graph.nodes()] true_nodes = [truth_graph.node_text(n) for n in truth_graph.nodes()] node_true_num = len(true_nodes) node_pred_num = len(pred_nodes) node_match_num = sum(node in true_nodes for node in pred_nodes) pred_edges = [(pred_graph.node_text(n1), edge.label.strip("\" "), pred_graph.node_text(n2)) for n1, edge, n2 in pred_graph.edges()] true_edges = [(truth_graph.node_text(n1), edge.label.strip("\" "), truth_graph.node_text(n2)) for n1, edge, n2 in truth_graph.edges()] logger.debug(pred_edges) logger.debug(true_edges) edge_true_num = len(true_edges) edge_pred_num = len(pred_edges) edge_match_num = sum(edge in true_edges for edge in pred_edges) exact_same = node_match_num == node_true_num == node_pred_num and \ edge_match_num == edge_true_num == edge_pred_num return (node_pred_num, node_true_num, node_match_num), \ (edge_pred_num, edge_true_num, edge_match_num), exact_same
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for node in dep_graph.nodes(): if oia_graph.has_word(node.position): continue if node.UPOS == "SCONJ" and node.LEMMA in { "because", "so", "if", "then", "otherwise", "after", "before", "and", "or", "but" }: parents = [n for n, l in dep_graph.parents(node) if "mark" in l] if not parents: continue assert len(parents) == 1 parent = parents[0] logger.debug("context = " + str(context.processed_edges)) if context.is_processed(parent, node): continue oiar_node = oia_graph.add_words(parent.position) oia_sconj_node = oia_graph.add_words(node.position) if node.LEMMA in {"because", "if"}: oia_graph.add_argument(oia_sconj_node, oiar_node, 1) else: oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
def it_be_adjv_that(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx that ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") be_node = pattern.create_node(UPOS="VERB") csubj_node = pattern.create_node(UPOS="ADJ|ADV") that_node = pattern.create_node(LEMMA="that") pattern.add_dependency(be_node, it_node, r'expl') pattern.add_dependency(be_node, csubj_node, r'csubj') pattern.add_dependency(csubj_node, that_node, r'mark') for match in dep_graph.match(pattern): dep_be_node, dep_it_node, dep_that_node, dep_csubj_node = \ [match[x] for x in [be_node, it_node, that_node, csubj_node]] if context.is_processed(dep_be_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_csubj_node = oia_graph.add_words(dep_csubj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_be_node = oia_graph.add_words(dep_be_node.position) oia_graph.add_argument(oia_be_node, oia_it_node, 1) oia_graph.add_ref(oia_csubj_node, oia_it_node) context.processed(dep_be_node, dep_it_node)
def generate(self, dep_graph: DependencyGraph, context: UD2OIAContext, options: WorkFlowOptions): """ @param dep_graph: @type dep_graph: @return: @rtype: """ oia_graph = OIAGraph() node_dict = dict((node.ID, node) for node in dep_graph.origin_nodes() if node.LOC >= 0) node_list = sorted(node_dict.values(), key=lambda node: node.LOC) oia_graph.set_words([x.FORM for x in node_list]) logger.debug(oia_graph.words) def __oia_updated_hook(): self.oia_updated = True return self.working_stack() oia_graph.set_context_hook(__oia_updated_hook) for index, generator in enumerate(self.generators): try: self.oia_updated = False generator(dep_graph, oia_graph, context) if options.debug and self.oia_updated: context.debugger.record("generate", generator.__name__, oia_graph) except Exception as e: if options.debug: context.debugger.record("generate", generator.__name__, oia_graph, str(e)) traceback.print_exc(file=sys.stderr) logger.error(e) return oia_graph
def adv_relative_clause(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### When/Where Relative clause ##### #### a time when US troops won/ a place where US troops won. acl:relcl with time/place :param sentence: :return: """ pattern = DependencyGraph() modified_node = pattern.create_node() modifier_node = pattern.create_node() adv_rel_node = pattern.create_node() pattern.add_dependency(modified_node, modifier_node, r'acl:relcl\w*') pattern.add_dependency(modifier_node, adv_rel_node, r'advmod') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] dep_rel_node = match[adv_rel_node] if not any(x in dep_rel_node.LEMMA for x in {"when", "where", "how", "why", "what"}): continue oia_pred_node = oia_graph.add_words(dep_rel_node.position) oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) if oia_graph.has_relation(oia_modifier_node, oia_modified_node): continue oia_graph.add_argument(oia_pred_node, oia_modified_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_modifier_node, 2)
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### adverbs like however, then, etc ######################## :param sentence: :return: """ for dep_node in list(dep_graph.nodes()): parallel_nodes = [ n for n, l in dep_graph.children(dep_node) if "parataxis" == l ] if not parallel_nodes: continue parallel_nodes.append(dep_node) parallel_nodes.sort(key=lambda x: x.LOC) predicates = [] for index, (former, latter) in enumerate( more_itertools.pairwise(parallel_nodes)): advcon = [ n for n, l in dep_graph.children(latter, filter=lambda n, l: "advmod" in l and (former.LOC < n.LOC < latter.LOC) and (n.UPOS == "SCONJ" or n.LEMMA in {"so"})) ] coloncon = [ n for n, l in dep_graph.children(dep_node, filter=lambda n, l: "punct" in l and n.FORM in {":", ";", "--", ","} and (former.LOC < n.LOC < latter.LOC)) ] if advcon: dep_con = advcon[0] # dep_graph.remove_dependency(para, dep_con) # otherwise, the dep_con will be recovered by adv_modifier, may cause further question elif coloncon: dep_con = coloncon[0] else: dep_con = None predicates.append(dep_con) if all(x is None for x in predicates): oia_pred_node = oia_graph.add_aux("PARATAXIS") else: if len(predicates) == 1: oia_pred_node = oia_graph.add_words(predicates[0].position) else: position = ["{1}"] for i, node in enumerate(predicates): if node is not None: position.extend(node.position) position.append("{{{0}}}".format(i + 2)) oia_pred_node = oia_graph.add_words(position) for idx, node in enumerate(parallel_nodes): oia_arg_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :TODO badcase Attached is a new link :param dep_graph: :param oia_graph: :return: """ # for node in dep_graph.nodes(): # print('node:',node) for pred_node in dep_graph.nodes( filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}): # ADJ is for "With the demand so high," # NOUN is for "X the best for Y" # AUX is for have in "I have a cat" # print('pred_node', pred_node) expl = None nsubj = None subj = None objs = [] for child, rel in dep_graph.children(pred_node): # print('child node:', child) # print('child rel:', rel) if ('nsubj' in rel or "csubj" in rel): # and ":xsubj" not in rel: nsubj = child elif rel.startswith('obj'): objs.append((child, 1)) elif rel.startswith('iobj'): objs.append((child, 0)) elif 'ccomp' in rel or "xcomp" in rel: # and child.UPOS == "VERB": objs.append((child, 2)) elif "expl" in rel: expl = child if nsubj: # if pred_node.LOC < nsubj.LOC: # # TODO: in what situation? # objs.insert(0, nsubj) # else: subj = nsubj if expl: # It VERB subj that # VERB subj it that if expl.LOC < pred_node.LOC: subj = expl objs.insert(0, (subj, -1)) else: # expl.LOC > pred_node.LOC: objs.insert(0, (expl, -1)) if not subj and not objs: continue pred_node = oia_graph.add_words(pred_node.position) if not pred_node: continue arg_index = 1 if subj is not None: if not oia_graph.has_relation(pred_node, subj): subj_node = oia_graph.add_words(subj.position) oia_graph.add_argument(pred_node, subj_node, arg_index) arg_index += 1 objs.sort(key=lambda x: x[1]) for obj, weight in objs: # print('obj:',obj) oia_obj_node = oia_graph.add_words(obj.position) # def __sconj_node(n): # # that conj is ommited # return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"}) def __adv_question_node(n): return ((n.UPOS == "ADV" and n.LEMMA in {"when", "where", "how", "whether"})) # # def __pron_question_node(n): # return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"}) # def __interested_node2(n): # # that conj is ommited # return (n.UPOS == "PART") # sconj_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "mark" and __sconj_node(n))] adv_question_nodes = [ n for n, l in dep_graph.children( obj, filter=lambda n, l: l == "mark" and __adv_question_node(n)) ] # subj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: "subj" in l and __pron_question_node(n))] # # obj_question_nodes = [n for n, l in dep_graph.children(obj, # filter=lambda n, # l: ("obj" in l or "comp") in l and __pron_question_node( # n))] # nodes_of_interests2 = [n for n, l in dep_graph.children(obj, # filter=lambda n,l: l == "advmod" and __interested_node2(n))] # print('nodes_of_interests:', nodes_of_interests) # if nodes_of_interests2: # assert len(nodes_of_interests2) == 1 # interest_node = nodes_of_interests2[0] # oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC) # oia_graph.add_argument(pred_node, oia_interest_node, arg_index) # # oia_graph.add_function(oia_interest_node, oia_obj_node) # arg_index += 1 # oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index) # arg_index += 1 if adv_question_nodes: assert len(adv_question_nodes) == 1 interest_node = adv_question_nodes[0] oia_interest_node = oia_graph.add_words(interest_node.position) oia_graph.add_argument(pred_node, oia_interest_node, arg_index) oia_graph.add_function(oia_interest_node, oia_obj_node) else: if not oia_graph.has_relation(pred_node, obj): oia_graph.add_argument(pred_node, oia_obj_node, arg_index) arg_index += 1 pattern = DependencyGraph() parent_pred = pattern.create_node() child_pred = pattern.create_node() question_word = pattern.create_node(LEMMA=r'what|who') pattern.add_dependency(parent_pred, child_pred, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(parent_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') pattern.add_dependency(child_pred, question_word, r'subj|nsubj|iobj|obj|xcomp|ccomp') for match in dep_graph.match(pattern): dep_parent_pred, dep_child_pred, dep_question_word = [ match[x] for x in [parent_pred, child_pred, question_word] ] oia_parent_pred, oia_child_pred, oia_question_word = [ oia_graph.add_words(x.position) for x in [dep_parent_pred, dep_child_pred, dep_question_word] ] oia_question_word.is_func = True rel = oia_graph.get_edge(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_child_pred, oia_question_word) oia_graph.remove_relation(oia_parent_pred, oia_child_pred) oia_graph.add_relation(oia_question_word, oia_child_pred, "mod_by:" + rel.label)
def advcl_mark_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() pred1_node = pattern.create_node() pred2_node = pattern.create_node() # sconj_node = pattern.create_node(UPOS="SCONJ") sconj_node = pattern.create_node() pattern.add_dependency(pred1_node, pred2_node, r'advcl\w*') # pattern.add_dependency(pred1_node, pred2_node, r'\w*') # pattern.add_dependency(pred2_node, sconj_node, r'mark|advmod') pattern.add_dependency(pred2_node, sconj_node, 'mark') for match in list(dep_graph.match(pattern)): dep_pred1_node = match[pred1_node] dep_pred2_node = match[pred2_node] dep_sconj_node = match[sconj_node] # advcl_rel = dep_graph.get_dependency(dep_pred1_node, dep_pred2_node) if dep_sconj_node.LEMMA not in CONJUNCTION_WORDS[language]: continue context.processed(dep_pred2_node, dep_sconj_node) context.processed(dep_pred1_node, dep_pred2_node) oia_pred1_node = oia_graph.add_words(dep_pred1_node.position) oia_pred2_node = oia_graph.add_words(dep_pred2_node.position) if dep_sconj_node.LEMMA == "if": # check whether there is "then" dep_then_nodes = [ n for n, l in dep_graph.children(dep_pred1_node) if n.LEMMA == "then" and l == "advmod" ] if dep_then_nodes: assert len(dep_then_nodes) == 1 dep_then_node = dep_then_nodes[0] context.processed(dep_pred1_node, dep_then_node) if_then_position = dep_sconj_node.position + [ "{1}" ] + dep_then_node.position + ["{2}"] oia_condition_node = oia_graph.add_words(if_then_position) else: oia_condition_node = oia_graph.add_words( dep_sconj_node.position) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_condition_node = oia_graph.add_words(dep_sconj_node.position) if dep_sconj_node.LEMMA in CONJUNCTION_WORDS[language]: oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1) oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2) else: oia_graph.add_argument(oia_condition_node, oia_pred1_node, 1, mod=True) oia_graph.add_argument(oia_condition_node, oia_pred2_node, 2)
def forward(self, oia_graph: OIAGraph, **kwargs): """ @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ node_edge_mapping = { "VOC": "vocative", "APPOS": "appos", "DISCOURSE": "discourse", "REPARANDUM": "reparandum", "TOPIC": "topic", "TIME_IN": "mod" } for node in list(oia_graph.nodes()): if not (isinstance(node, OIAAuxNode) and node.label in { 'VOC', 'APPOS', 'TIME_IN', 'TOPIC', 'DISCOURSE', 'REPARANDUM' }): continue children = list(oia_graph.children(node)) parents = list(oia_graph.parents(node)) assert 0 < len(children) <= 2 if len(children) == 2: arg1 = [ child for child, edge in children if edge.label == "pred.arg.1" ] arg2 = [ child for child, edge in children if edge.label == "pred.arg.2" ] assert len(arg1) == 1 and len(arg2) == 1 arg1 = arg1[0] arg2 = arg2[0] oia_graph.add_relation(arg1, arg2, node_edge_mapping[node.label]) for parent, edge in parents: oia_graph.add_relation(parent, arg1, edge.label) oia_graph.remove_node(node) else: child, edge = children[0] if edge.label == "pred.arg.1": arg1 = child arg2 = [p for p, l in parents if l == "as:pred.arg.2"] assert len(arg2) == 1 arg2 = arg2[0] oia_graph.add_relation( arg2, arg1, "as:" + node_edge_mapping[node.label]) oia_graph.remove_node(node) elif edge.label == "pred.arg.2": arg2 = child arg1 = [ p for p, l in parents if l.label == "as:pred.arg.1" ] assert len(arg1) == 1, [l.label for p, l in parents] arg1 = arg1[0] oia_graph.add_relation(arg1, arg2, node_edge_mapping[node.label]) oia_graph.remove_node(node) else: raise Exception("Unknow edges: {}".format(edge.label))
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb verb1 in order to verb2 :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun verb_node = DependencyGraphNode( UPOS="VERB|NOUN|PROPN|AUX|PRON") # aux is for be word adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB") pattern.add_nodes([verb_node, adv_node]) pattern.add_dependency(verb_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] if context.is_processed(dep_verb_node, dep_adv_node): continue if oia_graph.has_relation(dep_verb_node, dep_adv_node): continue obl_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("obl")) ] obl_node = None obl_has_case = False if len(obl_children) == 1: obl_node = obl_children[0] case_nodes = list(n for n, l in dep_graph.children( obl_node, filter=lambda n, l: "case" in l)) if case_nodes: # if obl with case, let the oblique to process it obl_has_case = True mark_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("mark")) ] oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) if obl_node and not obl_has_case: # arg_nodes = list(dep_graph.offsprings(obl_node)) # arg_nodes.sort(key=lambda x: x.LOC) # arg_words = [x.ID for x in arg_nodes] # head = obl_node.ID oia_arg_node = oia_graph.add_words(obl_node.position) oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_adv_node, oia_arg_node, 2) else: if mark_children: mark_node = mark_children[0] oia_pred_node = oia_graph.add_words(mark_node.position) oia_graph.add_argument(oia_pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_adv_node, 2) else: oia_graph.add_mod(oia_adv_node, oia_verb_node)
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN") adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART") # part is for "not" ccomp_node = pattern.create_node() # pattern.add_dependency(verb_node, adv_node, r'advmod') pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp") patterns = [] for match in dep_graph.match(pattern): # dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] dep_ccomp_node = match[ccomp_node] if oia_graph.has_relation(dep_adv_node, dep_ccomp_node): continue dep_case_nodes = [ n for n, l in dep_graph.children(dep_ccomp_node, filter=lambda n, l: "case" == l and dep_adv_node .LOC < n.LOC < dep_ccomp_node.LOC) ] if dep_case_nodes: dep_case_nodes = continuous_component(dep_case_nodes, dep_case_nodes[0]) predicate_nodes = [dep_adv_node] + dep_case_nodes predicate_nodes.sort(key=lambda n: n.LOC) else: predicate_nodes = [dep_adv_node] dep_subj_nodes = [ n for n, l in dep_graph.parents(dep_adv_node, filter=lambda n, l: "advmod" == l and n.UPOS in {"ADV", "X", "NOUN"}) ] if len(dep_subj_nodes) > 1: raise Exception("Multiple subject") elif len(dep_subj_nodes) > 0: dep_subj_node = dep_subj_nodes[0] else: dep_subj_node = None patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node]) for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns: if len(predicate_nodes) > 1: new_pred_node = dep_graph.create_node( ID=" ".join([x.ID for x in predicate_nodes]), FORM=" ".join([x.FORM for x in predicate_nodes]), LEMMA=" ".join([x.LEMMA for x in predicate_nodes]), UPOS="ADV", LOC=predicate_nodes[0].LOC) new_pred_node.aux = True dep_graph.replace_nodes(predicate_nodes, new_pred_node) dep_graph.remove_dependency(dep_ccomp_node, new_pred_node) else: new_pred_node = predicate_nodes[0] oia_pred_node = oia_graph.add_words(new_pred_node.position) if dep_subj_node: oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True) else: oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position) oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
def forward(self, oia_graph: OIAGraph, **kwargs): """ split the noun phrase with of in it According to the previous merge operation, if there is any modification to the part after the of, the noun phrase will be not merged. So the noun phrases with of do not have any modification to the second part. @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ for node in list(oia_graph.nodes()): node_words = oia_graph.node_text(node).split(" ") try: index = node_words.index("of") except Exception as e: continue if len(node_words) == 1: # that is of continue of_split_words = [] current_words = [] for span in node.spans: if isinstance(span, str): current_words.append(span) else: start, end = span for idx in range(start, end + 1): if oia_graph.words[idx] == "of": of_split_words.append(current_words) of_split_words.append(idx) current_words = [] else: current_words.append(idx) if not current_words: # of is the ending, warning, maybe something like "because of " logger.warning("We found a of at the last of the phrase: " + oia_graph.node_text(node)) continue of_split_words.append(current_words) first_part_words = of_split_words[0] first_node = oia_graph.add_words(first_part_words) previous_node = first_node for p, l in list(oia_graph.parents(node)): oia_graph.add_relation(p, first_node, l.label) oia_graph.remove_relation(p, node) children = list(oia_graph.children(node)) if children: logger.warning( "noun of noun has {0} children, be careful!!!".format( len(children))) for c, l in children: logger.warning("Child: {} {}".format( l.label, oia_graph.node_text(c))) oia_graph.add_relation(first_node, c, l.label) oia_graph.remove_relation(node, c) oia_graph.remove_node(node) for of_word, noun_words in more_itertools.chunked( of_split_words[1:], 2): of_node = oia_graph.add_words([of_word]) next_node = oia_graph.add_words(noun_words) oia_graph.add_relation(previous_node, of_node, "as:pred.arg.1") oia_graph.add_relation(of_node, next_node, "pred.arg.2") previous_node = next_node
def adv_question(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() question_node = pattern.create_node( UPOS="ADV|ADJ", LEMMA=r"(\bhow\b|\bwhat\b|\bwhere\b|\bwhen\b|why\b)\w*") verb_node = pattern.create_node(UPOS="VERB|AUX") # subj_node = pattern.create_node() pattern.add_dependency(verb_node, question_node, "advmod|amod") # pattern.add_dependency(verb_node, subj_node, r"\w*subj") for match in list(dep_graph.match(pattern)): dep_question_node, dep_verb_node = \ [match[x] for x in (question_node, verb_node)] # if not dep_question_node.LOC < dep_subj_node.LOC: # # not a question # continue oia_question_node = oia_graph.add_words(dep_question_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_graph.remove_relation(oia_verb_node, oia_question_node) for parent, rel in list(oia_graph.parents(oia_verb_node)): if rel.mod: continue oia_graph.remove_relation(parent, oia_verb_node) oia_graph.add_relation(parent, oia_question_node, rel) oia_graph.add_function(oia_question_node, oia_verb_node)
def general_question(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for verb in dep_graph.nodes(filter=lambda n: n.UPOS == "VERB"): if any( any(x in n.LEMMA for x in {"what", "how", "why", "when", "where"}) for n in dep_graph.offsprings(verb)): continue parents = [n for n, _ in dep_graph.parents(verb)] # if not(len(parents) == 1 and parents[0].ID == "0"): # continue # check subj and aux subj = None aux = None for child, rel in dep_graph.children(verb): if "subj" in rel: subj = child if "aux" in rel: aux = child is_be_verb = False if not isinstance(verb, DependencyGraphSuperNode): is_be_verb = verb.LEMMA == "be" else: assert isinstance(verb, DependencyGraphSuperNode) assert aux is None for n in verb.nodes: if isinstance(n, DependencyGraphNode): if n.LEMMA == "be": is_be_verb = True # print('verb.nodes:', str(" ".join(str(xx.LEMMA) for xx in verb.nodes))) # print('is_be_verb222:', is_be_verb) if n.UPOS == "AUX": aux = n # print('is_be_verb:', is_be_verb) if aux is None and not is_be_verb: # cannot be a general question continue expl_child = [n for n, l in dep_graph.children(verb) if l == "expl"] if expl_child: assert len(expl_child) == 1 subj = expl_child[0] if subj is None: logger.warning( "subject is none, cannot decide whether it is a question") continue # print('subj.LOC:', subj.LOC) # print('subj.LOC type:', type(subj.LOC)) oia_verb_node = oia_graph.add_words(verb.position) is_there_be_verb = is_be_verb and ("there" in verb.LEMMA.split(' ') or "here" in verb.LEMMA.split(' ')) is_question = False if is_there_be_verb: assert isinstance(verb, DependencyGraphSuperNode) be_node = [n for n in verb.nodes if n.LEMMA == "be"][0] there_node = [ n for n in verb.nodes if n.LEMMA == "there" or n.LEMMA == "here" ][0] # print('there_node:', there_node) if be_node.LOC < there_node.LOC: is_question = True elif (is_be_verb and verb.LOC < subj.LOC): is_question = True elif (aux is not None and aux.LOC < subj.LOC): is_question = True if is_question: # if aux is not None and aux.LEMMA == "do": # oia_question_node = oia_graph.add_word_with_head(aux.LOC) # else: oia_question_node = oia_graph.add_aux("WHETHER") oia_graph.add_function(oia_question_node, oia_verb_node)
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)