def extract_PP_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (PREP, PREPC)) rc_mod = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if (dep['governor'] == node_index or dep['governorGloss'] == cop) and not cls.part_rc_mod(full_sentence, rc_mod, dep): dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) space_index = phrase.find(" ") if space_index >= 0: specific = dep['spec'] if specific: phrase = phrase[space_index:] phrase = specific + phrase spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = PP if dep_in_tree.parent().label().startswith(NP): obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj spec.f_headWord = specific element.f_specifiers.append(spec)
def determine_subjects(self, sentence, dependencies, active): actors = [] main_actor_index = None # Find main actor subj = Search.find_dependencies(dependencies, NSUBJ) if active else \ Search.find_dependencies(dependencies, AGENT) subj = self.exclude_relative_clauses(sentence, subj) if len(subj) == 0: self.logger.debug("Sentence contains no subject!") elif len(subj) == 1: main_actor_index = subj[0]['dependent'] else: self.logger.info("Sentence has more then one subject") self.logger.debug(subj) # Find all actors if main_actor_index: actor = Builder.create_actor(self.f_stanford_sentence, self.f_full_sentence, main_actor_index, dependencies) actor.f_subjectRole = True actor.f_passive = not active actors.append(actor) for new_actor in self.check_conjunctions(dependencies, actor, True, True, active): new_actor.f_subjectRole = True new_actor.f_passive = not active actors.append(new_actor) return actors
def determine_object_from_dobj(self, verb, dependencies): objects = [] dobjs = Search.find_dependencies(dependencies, DOBJ) dobjs_filtered = Search.filter_by_gov(dobjs, verb) if len(dobjs_filtered) == 0: if not verb.f_xcomp or not verb.f_xcomp.f_object: for conj in self.f_analyzed_sentence.f_conjs: if conj.f_to == verb: dobjs_filtered = [dep for dep in dobjs if conj.f_to.f_word_index < dep['dependent']] else: dobjs_filtered = [dep for dep in dobjs if conj.f_from.f_word_index < dep['dependent']] if len(dobjs_filtered) == 0: preps = Search.find_dependencies(dependencies, PREP) preps_filtered = [] for dep in preps: if dep['governorGloss'] in verb.f_name \ and dep['governor'] > verb.f_word_index: preps_filtered.append(dep) if len(preps_filtered) == 0: cops = Search.find_dependencies(dependencies, COP) if len(cops) == 0: self.logger.debug("No Object found") elif len(cops) > 1: self.logger.info("Sentence with more than one copula object!") self.logger.debug(cops) else: dep_index = cops[0]['governor'] dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) if dep_in_tree.parent().label() == NP: obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: self.logger.debug("No object found") elif len(preps_filtered) > 1: self.logger.info("Sentence with more than one prepositional object!") self.logger.debug(preps_filtered) else: dep_index = preps_filtered[0]['dependent'] dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) if dep_in_tree.parent().label() == NP: obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: self.logger.debug("No object found") else: dep_index = dobjs_filtered[0]['dependent'] obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) return objects
def find_INFMOD_specifiers(cls, origin, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, INFMOD) name = "" for dep in to_check: if dep['governor'] == node_index: to_check = Search.find_dependencies(dependencies, (AUX, COP, NEG)) for acn in to_check: if acn['governor'] == dep['dependent']: name += acn['dependentGloss'] + " " name += dep['dependentGloss'] spec = Specifier(origin, dep['dependent'], name) spec.f_type = INFMOD element.f_specifiers.append(spec) break
def is_active_sentence(self, sentence, dependencies): subj = Search.find_dependencies(dependencies, (NSUBJ, CSUBJ, DOBJ)) subj = self.exclude_relative_clauses(sentence, subj) if len(subj) > 0: return True subj_pass = Search.find_dependencies(dependencies, (NSUBJPASS, CSUBJPASS, AGENT)) subj_pass = self.exclude_relative_clauses(sentence, subj_pass) if len(subj_pass) > 0: return False self.logger.debug("It is not clear whether this sentence is active or passive!") return False
def analyze_recursive(self, main_sentence, dependencies): main_sentence = deepcopy(main_sentence) sub_sentence_count = self.determine_sub_sentence_count(main_sentence) if sub_sentence_count == 0: self.extract_elements(main_sentence, dependencies) elif sub_sentence_count == 1: sub_sentence = self.find_sub_sentences(main_sentence)[0] filtered_dependencies = self.filter_dependencies(sub_sentence, dependencies) self.analyze_recursive(sub_sentence, filtered_dependencies) sub_sentence_index = sub_sentence.treeposition() del(main_sentence[sub_sentence_index]) deps = [dep for dep in dependencies if dep['dep'] == RCMOD or dep not in filtered_dependencies] if len(Search.find_dependencies(deps, (NSUBJ, AGENT, NSUBJPASS, DOBJ))) > 0: self.extract_elements(main_sentence, deps) else: sub_sentences = self.find_sub_sentences(main_sentence) for sub_sentence in sub_sentences: filtered_dependencies = self.filter_dependencies(sub_sentence, dependencies) self.analyze_recursive(sub_sentence, filtered_dependencies)
def determine_object(self, sentence, verb, dependencies, active): objects = [] if verb.f_xcomp: xcomp_obj = self.determine_object(sentence, verb.f_xcomp, dependencies, active) if len(xcomp_obj) > 0: verb.f_xcomp.f_object = xcomp_obj[0] if not active: nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS) nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass) if len(nsubjpass) == 0: objs = self.determine_object_from_dobj(verb, dependencies) objects.extend(objs) else: if len(nsubjpass) > 1: self.logger.debug("Passive sentence with more than one subject!") dep_index = nsubjpass[0]['dependent'] obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) obj.f_subjectRole = True objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: objs = self.determine_object_from_dobj(verb, dependencies) objects.extend(objs) if len(objects) > 0: conjs = self.check_conjunctions(dependencies, objects[0], True, False, active) for conj in conjs: if isinstance(conj, Element): objects.append(conj) return objects
def is_negated(cls, node_index, dependencies): to_check = Search.find_dependencies(dependencies, COP) index = node_index for dep in to_check: if dep['dependent'] == node_index: index = dep['governor'] break to_check = Search.find_dependencies(dependencies, NEG) for dep in to_check: if dep['governor'] == index: return True return False
def get_modifiers(cls, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (ADVMOD, ACOMP)) for dep in to_check: if dep['governor'] == node_index: if dep['governor'] < dep['dependent'] and dep['dependentGloss'] not in f_sequenceIndicators: return dep['dependent']
def find_determiner(cls, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, (POSS, DET)) for dep in to_check: if dep['governor'] == node_index: element.f_determiner = dep['dependentGloss'] break
def check_global_conjunctions(self): conj = Search.find_dependencies(self.f_dependencies, CONJ) for dep in conj: action_from = self.get_action_containing(dep['governor']) action_to = self.get_action_containing(dep['dependent']) if action_from and action_to: if not self.find_conjunction(action_from, dep, action_to): self.build_link(action_from, dep, action_to)
def create_action(cls, origin, full_sentence, node_index, dependencies, active): node = Search.find_dep_in_tree(full_sentence, node_index) action = Action(origin, node_index, node[0]) aux = cls.get_auxiliars(node_index, dependencies) if len(aux) > 0: action.f_aux = aux mod_index = cls.get_modifiers(node_index, dependencies) if mod_index: mod = Search.find_dep_in_tree(full_sentence, mod_index) action.f_mod = mod[0] action.f_modPos = mod_index action.f_negated = cls.is_negated(node, dependencies) cop_index = cls.get_cop(node_index, dependencies) if cop_index: cop = Search.find_dep_in_tree(full_sentence, cop_index) action.f_cop = cop[0] action.f_copIndex = cop_index prt = cls.get_prt(node_index, dependencies) if prt: action.f_prt = prt iobj_index = cls.get_iobj(node_index, dependencies) if iobj_index: iobj = Search.find_dep_in_tree(full_sentence, iobj_index) spec = Specifier(origin, iobj_index, " ".join(iobj.leaves())) spec.f_type = IOBJ action.f_specifiers.append(spec) if not active: cls.check_dobj(node_index, dependencies, action, origin, full_sentence) to_check = Search.find_dependencies(dependencies, (XCOMP, DEP)) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) if dep['dep'] == DEP: if dep_in_tree.label()[0] != "V" or dep['dependent'] < dep['governor']: continue xcomp = cls.create_action(origin, full_sentence, dep['dependent'], dependencies, True) action.f_xcomp = xcomp break vp_head = Search.get_full_phrase_tree(node, VP) cls.extract_SBAR_spec(origin, full_sentence, action, vp_head) cls.extract_PP_spec(origin, full_sentence, action, node_index, dependencies) cls.extract_RCMOD_spec(origin, full_sentence, action, node_index, dependencies) cls.logger.debug("Identified action {}".format(action)) return action
def get_PARTMOD_specifiers(cls, origin, full_sentence, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, PARTMOD) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, VP) phrase = phrase_tree.leaves() if phrase_tree else [] spec = Specifier(origin, dep['dependent'], " ".join(phrase)) spec.f_type = PARTMOD element.f_specifiers.append(spec)
def get_specifier_from_dependencies(cls, origin, node_index, dependencies, element, dep_type): to_check = Search.find_dependencies(dependencies, dep_type) index = None name = "" for dep in to_check: if dep['governor'] == node_index: name += dep['dependentGloss'] + " " conjs = Search.find_dependencies(dependencies, CONJ) for conj in conjs: if conj['governor'] == dep['dependent']: name += conj['spec'] + " " + dep['dependentGloss'] + " " if not index: index = dep['dependent'] if index: name = name[:-1] spec = Specifier(origin, index, name) spec.f_type = dep_type element.f_specifiers.append(spec)
def find_NN_specifiers(cls, origin, node_index, dependencies, element): cls.get_specifier_from_dependencies(origin, node_index, dependencies, element, NN) to_check = Search.find_dependencies(dependencies, DEP) for dep in to_check: if dep['governor'] == node_index: if dep['governor'] + 1 != dep['dependent']: continue spec = Specifier(origin, dep['dependent'], dep['dependentGloss']) spec.f_type = NNAFTER element.f_specifiers.append(spec)
def check_conjunctions(self, dependencies, element, obj, actor, active): results = [] conjs = Search.find_dependencies(dependencies, CONJ) cops = Search.find_dependencies(dependencies, COP) if len(conjs) > 0: action = element if isinstance(element, Action) else None for conj in conjs: x_comp_hit = True if action and action.f_xcomp and conj['governorGloss'] in action.f_xcomp.f_baseForm else False if (conj['governorGloss'] == element.f_name and len(Search.filter_by_gov(cops, conj['governor'])) == 0) \ or x_comp_hit: dep_index = conj['dependent'] if obj: if actor: new_ele = Builder.create_actor( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) else: new_ele = Builder.create_object( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) self.check_np_sub_sentences(dep_index, dependencies, new_ele) else: if x_comp_hit: new_ele = copy(action) new_ele.f_xcomp = Builder.create_action( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies, True) else: new_ele = Builder.create_action( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies, active) if conj['dependent'] != conj['governor']: results.append(new_ele) self.build_link(element, conj, new_ele) return results
def check_dobj(cls, node_index, dependencies, action, origin, full_sentence): to_check = Search.find_dependencies(dependencies, DOBJ) for dep in to_check: if dep['governor'] == node_index: cls.logger.error("Dobj was found in a passive sentence") node = Search.find_dep_in_tree(full_sentence, dep['dependent']) spec = Specifier(origin, dep['dependent'], cls.get_full_noun(node, dep['dependent'], dependencies)) spec.f_type = DOBJ obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj action.f_specifiers.append(spec)
def find_node_action(self, dep_index, action_list, deps): for action in action_list: if action.f_word_index == dep_index: return action cops = Search.find_dependencies(deps, COP) for dep in cops: if dep['governor'] == dep_index: return self.find_node_action(dep['dependent'], action_list, deps) return None
def find_dependants(cls, node_index, dependencies, deps, is_governor): to_check = Search.find_dependencies(dependencies, deps) dependants = "" for dep in to_check: if is_governor: if dep['governor'] == node_index: dependants += dep['dependentGloss'] + " " else: if dep['dependent'] == node_index: dependants += dep['governorGloss'] + " " return dependants[:-1]
def extract_RCMOD_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if dep['dependent'] == node_index or dep['dependentGloss'] == cop: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['governor']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = RCMOD element.f_specifiers.append(spec)
def check_sub_sentences(self, head, dependencies, obj, is_np): if head and not isinstance(head, str): if head.label() == SBAR: leaves = head.leaves() start_index = Search.find_sentence_index(self.f_full_sentence, head) end_index = start_index + len(leaves) ccomps = Search.find_dependencies(dependencies, CCOMP) for ccomp in ccomps: if start_index < ccomp['dependent'] < end_index: complms = Search.find_dependencies(dependencies, COMPLM) for complm in complms: if complm['governor'] == ccomp['dependent'] and complm['dependentGloss'] == THAT: return action = obj if isinstance(obj, Action) else None if not action or not action.f_xcomp or start_index > action.f_xcomp.f_word_index or end_index < action.f_xcomp.f_word_index: self.analyze_recursive(head, self.filter_dependencies(head, dependencies)) return else: if head.label() in (PP, VP, NP, S): for child in head: self.check_sub_sentences(child, dependencies, obj, is_np)
def get_full_noun(cls, node, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (NN, DEP)) noun = "" sufix = "" for dep in to_check: if dep['governor'] == node_index: if dep['dep'] == DEP: if dep['governor'] + 1 != dep['dependent']: continue sufix += " " + dep['dependentGloss'] else: noun += dep['dependentGloss'] + " " noun += node[0] + sufix return noun
def create_actor(cls, origin, full_sentence, node_index, dependencies): actor = None node = Search.find_dep_in_tree(full_sentence, node_index) full_noun = cls.get_full_noun(node, node_index, dependencies) if not WordNetWrapper.person_or_system(full_noun, node[0]): if node.label() == CD or WordNetWrapper.can_be_group_action(node[0]): preps = Search.find_dependencies(dependencies, PREP) for spec in preps: if spec['spec'] in f_realActorPPIndicators and spec['governor'] == node_index: dep_index = spec['dependent'] dep_in_tree = Search.find_dep_in_tree(full_sentence, dep_index) full_noun = cls.get_full_noun(dep_in_tree, dep_index, dependencies) if WordNetWrapper.person_or_system(full_noun, spec['dependentGloss']): actor = cls.create_internal_actor(origin, full_sentence, dep_in_tree, dep_index, dependencies) break if not actor: actor = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) actor.f_unreal = True else: actor = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) cls.logger.debug("Identified actor {}".format(actor)) return actor
def marker_detection(self): for analyzed_sentence in self.f_analyzed_sentences: stanford_sentence = analyzed_sentence.f_sentence deps = stanford_sentence.f_dependencies markers = Search.find_dependencies(deps, MARK) for dep in markers: action = self.find_node_action(dep['governor'], analyzed_sentence.f_actions, deps) if action: value = dep['dependentGloss'] self.logger.debug("Marking {} with marker {}".format( action, value)) action.f_marker = value action.f_markerPos = dep['dependent'] markers = Search.find_dependencies(deps, ADVMOD) for dep in markers: action = self.find_node_action(dep['governor'], analyzed_sentence.f_actions, deps) if action and action.f_word_index > dep['dependent']: value = dep['dependentGloss'] if value in f_parallelIndicators: action.f_marker = WHILE action.f_markerPos = dep['dependent'] elif value != ALSO: self.logger.debug("Marking {} with advmod {}".format( action, value)) action.f_preAdvMod = value action.f_preAdvModPos = dep['dependent'] markers = Search.find_dependencies(deps, COMPLM) for dep in markers: if dep['dependentGloss'] != THAT: action = self.find_node_action(dep['governor'], analyzed_sentence.f_actions, deps) if action: value = dep['dependentGloss'] if value in f_conditionIndicators: value = IFCOMPLM self.logger.debug( "Marking {} with marker-complm {}".format( action, value)) action.f_marker = value action.f_markerPos = dep['dependent'] for analyzed_sentence in self.f_analyzed_sentences: stanford_sentence = analyzed_sentence.f_sentence for action in self.f_world.get_actions_of_sentence( stanford_sentence): specs = action.get_specifiers(PP) if action.f_object: specs.extend(action.f_object.get_specifiers(PP)) specs.extend(action.get_specifiers(RCMOD)) specs.extend(action.get_specifiers(SBAR)) for spec in specs: if Search.starts_with(f_conditionIndicators, spec.f_name) and not action.f_marker: self.logger.debug( "Marking {} with marker {} if".format( action, spec.f_name)) action.f_marker = IF action.f_markerPos = spec.f_word_index action.realMarker = spec.f_name if spec.f_name not in f_conditionIndicators: action.f_markerFromPP = True for indic in f_sequenceIndicators: if spec.f_name.startswith( indic) and not action.f_preAdvMod: action.f_preAdvMod = indic action.f_preAdvModPos = spec.f_word_index action.preAdvModFromSpec = True if spec.f_name in f_parallelIndicators and not action.f_marker: self.logger.debug( "Marking {} with marker {} while".format( action, spec.f_name)) action.f_marker = WHILE action.f_markerPos = spec.f_word_index action.realMarker = spec.f_name for analyzed_sentence in self.f_analyzed_sentences: stanford_sentence = analyzed_sentence.f_sentence linked = [] next_mark = None actions = self.f_world.get_actions_of_sentence(stanford_sentence) for action in actions: if next_mark and not action.f_preAdvMod: action.f_preAdvMod = next_mark action.f_preAdvModPos = -1 self.logger.debug( "Marking {} with implicit advmod {}".format( action, next_mark)) if action in linked: next_mark = None if (action.f_marker in f_conditionIndicators and not action.f_markerFromPP ) or action.f_preAdvMod in f_conditionIndicators: next_mark = THEN self.determine_conjunct_elements( copy(analyzed_sentence.f_conjs), action, linked, actions) for analyzed_sentence in self.f_analyzed_sentences: actions = analyzed_sentence.f_actions for action in actions: linked = [] self.determine_conjunct_elements( copy(analyzed_sentence.f_conjs), action, linked, actions) if len(linked) > 1: for linked_action in linked: if not linked_action.f_preAdvMod: linked_action.f_preAdvMod = action.f_preAdvMod linked_action.f_preAdvModPos = -1 if not linked_action.f_marker and action.f_marker: if Search.starts_with(finishedIndicators, action.f_marker): linked_action.f_marker = action.f_marker linked_action.f_markerPos = action.f_markerPos for analyzed_sentence in self.f_analyzed_sentences: actions = analyzed_sentence.f_actions for index, action in enumerate(actions): if action.f_marker == IFCOMPLM: action.f_marker = IF elif action.f_marker == IF: if index > 0: previous_action = actions[index - 1] actions[index - 1] = action actions[index] = previous_action self.f_world.switch_actions(action, previous_action) break
def get_iobj(cls, node_index, dependencies): to_check = Search.find_dependencies(dependencies, IOBJ) for dep in to_check: if dep['dependent'] == node_index: return dep['governor']
def determine_verbs(self, sentence, dependencies, active): actions = [] main_predicate_index = None # Determine main predicate if active: nsubj = Search.find_dependencies(dependencies, NSUBJ) nsubj = self.exclude_relative_clauses(sentence, nsubj) if len(nsubj) == 0: dobj = Search.find_dependencies(dependencies, DOBJ) dobj = self.exclude_relative_clauses(sentence, dobj) if len(dobj) >= 1: main_predicate_index = dobj[0]['governor'] elif len(nsubj) == 1: main_predicate_index = nsubj[0]['governor'] cop = Search.find_dependencies(dependencies, COP) cop = self.exclude_relative_clauses(sentence, cop) for dep in cop: if dep['governor'] == main_predicate_index: main_predicate_index = dep['dependent'] break else: self.logger.info("Sentence has more than one active predicate") self.logger.debug(nsubj) else: nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS) nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass) if len(nsubjpass) == 1: main_predicate_index = nsubjpass[0]['governor'] elif len(nsubjpass) > 1: self.logger.info("Sentence has more than one passive predicate") self.logger.debug(nsubjpass) # Find all actions if main_predicate_index: main_predicate = Search.find_dep_in_tree(self.f_full_sentence, main_predicate_index) vp_head = Search.get_full_phrase_tree(main_predicate, VP) action = Builder.create_action(self.f_stanford_sentence, self.f_full_sentence, main_predicate_index, dependencies, active) self.check_sub_sentences(vp_head, dependencies, action, False) actions.append(action) else: verbs = Search.find_in_tree(sentence, VP, (SBAR, S)) if len(verbs) == 0: self.logger.info("Sentence contains no action") elif len(verbs) > 1: self.logger.info("Sentence has more than one verb phrase") else: vp = verbs[0] action = Builder.create_action_syntax(self.f_stanford_sentence, self.f_full_sentence, vp) self.check_sub_sentences(vp, dependencies, action, False) actions.append(action) if len(actions) > 0: for new_action in self.check_conjunctions(dependencies, actions[0], False, False, active): actions.append(new_action) return actions