def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun modified_node = DependencyGraphNode() modifier_node = DependencyGraphNode() pattern.add_nodes([modified_node, modifier_node]) pattern.add_dependency(modified_node, modifier_node, r'\bobl') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] if oia_graph.has_relation(dep_modified_node, dep_modifier_node, direct_link=False): continue oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_modified_node)
def aclwhose(dep_graph, oia_graph, context: UD2OIAContext): """ #### the person whose/who's cat is cute #### @return a list of four-tuple (noun, whose/who's, possessee, aclmodifier) :param sentence: :return: """ pattern = DependencyGraph() a = DependencyGraphNode() # person b = DependencyGraphNode(FEATS={"PronType": "Int"}) # whose c = DependencyGraphNode() # cat d = DependencyGraphNode() # cute pattern.add_nodes([a, b, c, d]) pattern.add_dependency(a, d, r'.*acl:relcl.*') pattern.add_dependency(d, c, r'.*nsubj|obj|iobj.*') pattern.add_dependency(c, b, r'.*nmod:poss.*') # pattern.add_dependency(b, a, r'.*ref.*') for match in dep_graph.match(pattern): dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]] a_node, b_node, c_node, d_node = [ oia_graph.add_words(x.position) for x in [dep_a, dep_b, dep_c, dep_d] ] oia_graph.add_function(d_node, a_node) oia_graph.add_function(b_node, c_node) oia_graph.add_ref(b_node, a_node)
def adv_adj_modifier(dep_graph, oia_graph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun adj_node = DependencyGraphNode(UPOS="ADJ") adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN") pattern.add_nodes([adj_node, adv_node]) pattern.add_dependency(adj_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_adj_node = match[adj_node] dep_adv_node = match[adv_node] if oia_graph.has_relation(dep_adj_node, dep_adv_node): continue oia_adj_node = oia_graph.add_words(dep_adj_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) oia_graph.add_mod(oia_adv_node, oia_adj_node)
def negation(dep_graph, oia_graph, context: UD2OIAContext): """ #################### Negation ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() not_node = DependencyGraphNode(LEMMA="not") parent_node = DependencyGraphNode() pattern.add_nodes([not_node, parent_node]) pattern.add_dependency(parent_node, not_node, r'\w*') for match in dep_graph.match(pattern): dep_not_node = match[not_node] dep_parent_node = match[parent_node] oia_pred_node = oia_graph.add_aux(label="SCOPE") oia_not_node = oia_graph.add_words(dep_not_node.position) oia_parent_node = oia_graph.add_words(dep_parent_node.position) oia_graph.add_argument(oia_pred_node, oia_not_node, 1) oia_graph.add_argument(oia_pred_node, oia_parent_node, 1)
def oblique_relative_clause(dep_graph, oia_graph, context: UD2OIAContext): """ ##### Oblique relative Clause ##### ##### An announcement, in which he stated that ##### :param sentence: :return: """ pattern = DependencyGraph() a = DependencyGraphNode() b = DependencyGraphNode() c = DependencyGraphNode(FEATS={"PronType": "Rel"}) d = DependencyGraphNode() pattern.add_nodes([a, b, c, d]) pattern.add_dependency(a, d, r'acl:relcl\w*') pattern.add_dependency(a, c, r'ref') pattern.add_dependency(d, c, r'obl') pattern.add_dependency(c, b, r'case') for match in dep_graph.match(pattern): dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]] a_node, b_node, c_node, d_node = [ oia_graph.add_words(x.position) for x in [dep_a, dep_b, dep_c, dep_d] ] oia_graph.add_argument(b_node, d_node, 1) oia_graph.add_argument(b_node, c_node, 2) oia_graph.add_ref(c_node, a_node)
def appositive_phrase(dep_graph, oia_graph, context: UD2OIAContext): """ ##### Apposition: Trump, president of US, came ##### :param sentence: :return: """ pattern = DependencyGraph() subj_node = DependencyGraphNode() appos_node = DependencyGraphNode() pattern.add_nodes([subj_node, appos_node]) pattern.add_dependency(subj_node, appos_node, r'\w*appos\w*') for match in dep_graph.match(pattern): dep_subj_node = match[subj_node] dep_appos_node = match[appos_node] oia_appos_node = oia_graph.add_words(dep_appos_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_appos_node and oia_subj_node: pred_node = oia_graph.add_aux(label="APPOS") oia_graph.add_argument(pred_node, oia_subj_node, 1) oia_graph.add_argument(pred_node, oia_appos_node, 2)
def separated_asas(dep_graph: DependencyGraph): """ ##### Equality comparison ##### ##### A is as X a C as B ##### ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as ##### ##### the second 'as' is always the dependent of B ##### ##### B sometimes depends on the first 'as', sometimes dependts on X ##### ##### Sometimes X has a head that is also within the range of as...as ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() adj_node = DependencyGraphNode(UPOS="ADJ") noun_node = DependencyGraphNode(UPOS="NOUN") as1_node = DependencyGraphNode(FORM="as") as2_node = DependencyGraphNode(FORM="as") obj_node = DependencyGraphNode() pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node]) pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*') pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*') pattern.add_dependency(obj_node, as2_node, r'mark') as_as_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] dep_as1_node = match[as1_node] dep_as2_node = match[as2_node] dep_obj_node = match[obj_node] if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC: pred = [ node for node in dep_graph.nodes() if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC ] pred.append(dep_as2_node) pred.sort(key=lambda x: x.LOC) head = dep_adj_node asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC) as_as_pred.append( (pred, head, asas_node, dep_noun_node, dep_obj_node)) for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred: dep_graph.replace_nodes(pred, asas_node) dep_graph.remove_dependency(asas_node, dep_obj_node) dep_graph.remove_dependency(dep_noun_node, asas_node) dep_graph.add_dependency(dep_noun_node, dep_obj_node, "acl:" + asas_node.FORM)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def amod_obl(dep_graph: DependencyGraph): """ ##### include: more than, successful by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON") adj_node = DependencyGraphNode(UPOS="ADJ") adp_node = DependencyGraphNode(UPOS="ADP") obl_node = DependencyGraphNode() pattern.add_nodes([noun_node, adj_node, adp_node, obl_node]) pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, obl_node, r'obl:\w+') pattern.add_dependency(obl_node, adp_node, r'case') more_than_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] dep_obl_node = match[obl_node] dep_adp_node = match[adp_node] obl_nodes = list( dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l)) if len(obl_nodes) > 1: # similar in form to the one continue if dep_adp_node.FORM not in dep_graph.get_dependency( dep_adj_node, dep_obl_node).values(): continue if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC: more_than_pred.append( (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node)) for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred: nodes = [dep_adj_node, dep_adp_node] more_than_pred = merge_dep_nodes(nodes, UPOS="ADP", LOC=dep_adp_node.LOC) dep_graph.remove_dependency(dep_noun_node, dep_adj_node) dep_graph.remove_dependency(dep_adj_node, dep_obl_node) dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred) dep_graph.add_dependency(dep_noun_node, dep_obl_node, "nmod:" + more_than_pred.FORM)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def such_that(dep_graph: DependencyGraph): """ ##### such a high price that :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS="NOUN") such_node = DependencyGraphNode(FORM="such") clause_pred_node = DependencyGraphNode(UPOS="VERB") that_node = DependencyGraphNode(FORM="that") pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node]) pattern.add_dependency(noun_node, such_node, r'det:predet') pattern.add_dependency(such_node, clause_pred_node, r'advcl:that') pattern.add_dependency(clause_pred_node, that_node, r'mark') such_that_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_such_node = match[such_node] dep_clause_pred_node = match[clause_pred_node] dep_that_node = match[that_node] if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC: such_that_pred.append((dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node)) for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred: nodes = [dep_such_node, dep_that_node] such_that_pred = merge_dep_nodes(nodes, UPOS="SCONJ", LOC=dep_that_node.LOC) dep_graph.add_node(such_that_pred) dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node, "advcl:" + such_that_pred.FORM) dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark") dep_graph.remove_node(dep_such_node) dep_graph.remove_node(dep_that_node)
def acl_mod_adjv(dep_graph, oia_graph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS="NOUN|PRON|PROPN|NUM") adjv_node = DependencyGraphNode(UPOS="ADJ|ADV") pattern.add_nodes([noun_node, adjv_node]) pattern.add_dependency(noun_node, adjv_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adjv_node = match[adjv_node] oia_noun_node = oia_graph.add_words(dep_noun_node.position) oia_adjv_node = oia_graph.add_words(dep_adjv_node.position) oia_graph.add_mod(oia_adjv_node, oia_noun_node)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb verb1 in order to verb2 :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun verb_node = DependencyGraphNode( UPOS="VERB|NOUN|PROPN|AUX|PRON") # aux is for be word adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB") pattern.add_nodes([verb_node, adv_node]) pattern.add_dependency(verb_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] if context.is_processed(dep_verb_node, dep_adv_node): continue if oia_graph.has_relation(dep_verb_node, dep_adv_node): continue obl_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("obl")) ] obl_node = None obl_has_case = False if len(obl_children) == 1: obl_node = obl_children[0] case_nodes = list(n for n, l in dep_graph.children( obl_node, filter=lambda n, l: "case" in l)) if case_nodes: # if obl with case, let the oblique to process it obl_has_case = True mark_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("mark")) ] oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) if obl_node and not obl_has_case: # arg_nodes = list(dep_graph.offsprings(obl_node)) # arg_nodes.sort(key=lambda x: x.LOC) # arg_words = [x.ID for x in arg_nodes] # head = obl_node.ID oia_arg_node = oia_graph.add_words(obl_node.position) oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_adv_node, oia_arg_node, 2) else: if mark_children: mark_node = mark_children[0] oia_pred_node = oia_graph.add_words(mark_node.position) oia_graph.add_argument(oia_pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_adv_node, 2) else: oia_graph.add_mod(oia_adv_node, oia_verb_node)
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
def subject_relative_clause_loop(dep_graph, oia_graph, context: UD2OIAContext): """ The loop version is because that the match algorithm donot match part of the loop, see test_match for more detail ##### Subject-extracted/referred relative clause ##### ##### the person who is tall / that is killed -- with ref ##### ##### the person waiting for the baby -- without ref ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() entity_node = DependencyGraphNode() relcl_node = DependencyGraphNode() pattern.add_node(entity_node) pattern.add_node(relcl_node) pattern.add_dependency(relcl_node, entity_node, r'\w*subj\w*') pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_relcl_node = match[relcl_node] oia_verb_node = oia_graph.add_words(dep_relcl_node.position) oia_enitity_node = oia_graph.add_words(dep_entity_node.position) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) dep_case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) dep_case_nodes.sort(key=lambda x: x.LOC) if dep_case_nodes: # with which xxxx, the with will become the root pred dep_case_node = dep_case_nodes[-1] oia_case_node = oia_graph.add_words(dep_case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument(oia_verb_node, oia_ref_node, 1) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument( oia_verb_node, oia_enitity_node, 1, mod=True) # function and pred, seems we need another label pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode(LEMMA=r"what|who|which|that") pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_verb_node = match[verb_node] dep_subj_node = match[subj_node] context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_enitity_node = oia_graph.add_words(dep_entity_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_mod(oia_verb_node, oia_enitity_node) oia_graph.add_ref(oia_enitity_node, oia_subj_node) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
def continuous_asas(dep_graph: DependencyGraph): """ ##### as far as I known ##### ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as ##### ##### the second 'as' is always the dependent of B ##### ##### B sometimes depends on the first 'as', sometimes dependts on X ##### ##### Sometimes X has a head that is also within the range of as...as ##### :param dep_graph: :param oia_graph: :return: """ verb_node = DependencyGraphNode(UPOS="VERB|NOUN|PRON|PROPN") adv_node = DependencyGraphNode(UPOS="ADV|ADJ") as1_node = DependencyGraphNode(LEMMA="as") as2_node = DependencyGraphNode(LEMMA="as") verb2_node = DependencyGraphNode(UPOS="VERB|ADJ|NOUN|PROPN|PRON") # ADJ is for as soon as possible pattern1 = DependencyGraph() pattern1.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node]) pattern1.add_dependency(verb_node, adv_node, r'advmod|amod') pattern1.add_dependency(adv_node, as1_node, r'\w*advmod\w*') pattern1.add_dependency(as1_node, verb2_node, r'advcl:as|obl:as|advmod') pattern1.add_dependency(verb2_node, as2_node, r'mark|case') pattern2 = DependencyGraph() pattern2.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node]) pattern2.add_dependency(verb_node, adv_node, r'advmod|amod') pattern2.add_dependency(adv_node, as1_node, r'\w*advmod\w*') pattern2.add_dependency(adv_node, verb2_node, r'advcl:as|obl:as|advmod') pattern2.add_dependency(verb2_node, as2_node, r'mark|case') as_as_pred = [] for match in list(dep_graph.match(pattern1)) + list( dep_graph.match(pattern2)): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] dep_as1_node = match[as1_node] dep_as2_node = match[as2_node] dep_verb2_node = match[verb2_node] if not (dep_as1_node.LOC < dep_adv_node.LOC < dep_as2_node.LOC < dep_verb2_node.LOC): continue as_as_pred.append((dep_as1_node, dep_as2_node, dep_adv_node, dep_verb_node, dep_verb2_node)) pred = [ node for node in dep_graph.nodes() if dep_as1_node.LOC <= node.LOC <= dep_adv_node.LOC ] pred.append(dep_as2_node) pred.sort(key=lambda x: x.LOC) head = dep_adv_node dep_asas_node = merge_dep_nodes(pred, UPOS="ADP", LOC=head.LOC) dep_graph.replace_nodes(pred, dep_asas_node) dep_graph.remove_dependency(dep_verb2_node, dep_asas_node) dep_graph.remove_dependency(dep_asas_node, dep_verb2_node) dep_graph.remove_dependency(dep_verb_node, dep_asas_node) if dep_verb_node.UPOS == "VERB": dep_graph.set_dependency(dep_verb_node, dep_verb2_node, "advcl:" + dep_asas_node.FORM) dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "mark") else: dep_graph.set_dependency(dep_verb_node, dep_verb2_node, "obl:" + dep_asas_node.FORM) dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "case")