def determine_object_from_dobj(self, verb, dependencies): objects = [] dobjs = Search.find_dependencies(dependencies, DOBJ) dobjs_filtered = Search.filter_by_gov(dobjs, verb) if len(dobjs_filtered) == 0: if not verb.f_xcomp or not verb.f_xcomp.f_object: for conj in self.f_analyzed_sentence.f_conjs: if conj.f_to == verb: dobjs_filtered = [dep for dep in dobjs if conj.f_to.f_word_index < dep['dependent']] else: dobjs_filtered = [dep for dep in dobjs if conj.f_from.f_word_index < dep['dependent']] if len(dobjs_filtered) == 0: preps = Search.find_dependencies(dependencies, PREP) preps_filtered = [] for dep in preps: if dep['governorGloss'] in verb.f_name \ and dep['governor'] > verb.f_word_index: preps_filtered.append(dep) if len(preps_filtered) == 0: cops = Search.find_dependencies(dependencies, COP) if len(cops) == 0: self.logger.debug("No Object found") elif len(cops) > 1: self.logger.info("Sentence with more than one copula object!") self.logger.debug(cops) else: dep_index = cops[0]['governor'] dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) if dep_in_tree.parent().label() == NP: obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: self.logger.debug("No object found") elif len(preps_filtered) > 1: self.logger.info("Sentence with more than one prepositional object!") self.logger.debug(preps_filtered) else: dep_index = preps_filtered[0]['dependent'] dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) if dep_in_tree.parent().label() == NP: obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: self.logger.debug("No object found") else: dep_index = dobjs_filtered[0]['dependent'] obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) return objects
def create_action(cls, origin, full_sentence, node_index, dependencies, active): node = Search.find_dep_in_tree(full_sentence, node_index) action = Action(origin, node_index, node[0]) aux = cls.get_auxiliars(node_index, dependencies) if len(aux) > 0: action.f_aux = aux mod_index = cls.get_modifiers(node_index, dependencies) if mod_index: mod = Search.find_dep_in_tree(full_sentence, mod_index) action.f_mod = mod[0] action.f_modPos = mod_index action.f_negated = cls.is_negated(node, dependencies) cop_index = cls.get_cop(node_index, dependencies) if cop_index: cop = Search.find_dep_in_tree(full_sentence, cop_index) action.f_cop = cop[0] action.f_copIndex = cop_index prt = cls.get_prt(node_index, dependencies) if prt: action.f_prt = prt iobj_index = cls.get_iobj(node_index, dependencies) if iobj_index: iobj = Search.find_dep_in_tree(full_sentence, iobj_index) spec = Specifier(origin, iobj_index, " ".join(iobj.leaves())) spec.f_type = IOBJ action.f_specifiers.append(spec) if not active: cls.check_dobj(node_index, dependencies, action, origin, full_sentence) to_check = Search.find_dependencies(dependencies, (XCOMP, DEP)) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) if dep['dep'] == DEP: if dep_in_tree.label()[0] != "V" or dep['dependent'] < dep['governor']: continue xcomp = cls.create_action(origin, full_sentence, dep['dependent'], dependencies, True) action.f_xcomp = xcomp break vp_head = Search.get_full_phrase_tree(node, VP) cls.extract_SBAR_spec(origin, full_sentence, action, vp_head) cls.extract_PP_spec(origin, full_sentence, action, node_index, dependencies) cls.extract_RCMOD_spec(origin, full_sentence, action, node_index, dependencies) cls.logger.debug("Identified action {}".format(action)) return action
def extract_PP_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (PREP, PREPC)) rc_mod = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if (dep['governor'] == node_index or dep['governorGloss'] == cop) and not cls.part_rc_mod(full_sentence, rc_mod, dep): dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) space_index = phrase.find(" ") if space_index >= 0: specific = dep['spec'] if specific: phrase = phrase[space_index:] phrase = specific + phrase spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = PP if dep_in_tree.parent().label().startswith(NP): obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj spec.f_headWord = specific element.f_specifiers.append(spec)
def exclude_relative_clauses(self, sentence, dependencies): relative_clauses = [] for dep in dependencies: if dep['dep'] != RCMOD: sentence_index = Search.find_sentence_index(self.f_full_sentence, sentence) dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep['dependent']) while dep_in_tree.label() != ROOT: if sentence.label() == dep_in_tree.label(): part_index = Search.find_sentence_index(self.f_full_sentence, dep_in_tree) if sentence_index >= part_index: break if dep_in_tree.label() in (SBAR, S, PRN) and dep_in_tree.parent().label() != SBAR: relative_clauses.append(dep) break dep_in_tree = dep_in_tree.parent() return [dep for dep in dependencies if dep not in relative_clauses]
def get_PARTMOD_specifiers(cls, origin, full_sentence, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, PARTMOD) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, VP) phrase = phrase_tree.leaves() if phrase_tree else [] spec = Specifier(origin, dep['dependent'], " ".join(phrase)) spec.f_type = PARTMOD element.f_specifiers.append(spec)
def part_rc_mod(cls, full_sentence, rc_mod, dep): for rcm in rc_mod: if rcm['governor'] == dep['dependent']: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) if phrase in f_conditionIndicators: return True return False
def check_dobj(cls, node_index, dependencies, action, origin, full_sentence): to_check = Search.find_dependencies(dependencies, DOBJ) for dep in to_check: if dep['governor'] == node_index: cls.logger.error("Dobj was found in a passive sentence") node = Search.find_dep_in_tree(full_sentence, dep['dependent']) spec = Specifier(origin, dep['dependent'], cls.get_full_noun(node, dep['dependent'], dependencies)) spec.f_type = DOBJ obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj action.f_specifiers.append(spec)
def create_object(cls, origin, full_sentence, node_index, dependencies): node = Search.find_dep_in_tree(full_sentence, node_index) full_noun = cls.get_full_noun(node, node_index, dependencies) if WordNetWrapper.person_or_system(full_noun, node[0]) or Processing.can_be_person_pronoun(node[0]): result = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) else: result = Resource(origin, node_index, node[0]) cls.determine_noun_specifiers(origin, full_sentence, node, node_index, dependencies, result) result.f_subjectRole = False cls.logger.debug("Identified object {}".format(result)) return result
def extract_RCMOD_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if dep['dependent'] == node_index or dep['dependentGloss'] == cop: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['governor']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = RCMOD element.f_specifiers.append(spec)
def create_actor(cls, origin, full_sentence, node_index, dependencies): actor = None node = Search.find_dep_in_tree(full_sentence, node_index) full_noun = cls.get_full_noun(node, node_index, dependencies) if not WordNetWrapper.person_or_system(full_noun, node[0]): if node.label() == CD or WordNetWrapper.can_be_group_action(node[0]): preps = Search.find_dependencies(dependencies, PREP) for spec in preps: if spec['spec'] in f_realActorPPIndicators and spec['governor'] == node_index: dep_index = spec['dependent'] dep_in_tree = Search.find_dep_in_tree(full_sentence, dep_index) full_noun = cls.get_full_noun(dep_in_tree, dep_index, dependencies) if WordNetWrapper.person_or_system(full_noun, spec['dependentGloss']): actor = cls.create_internal_actor(origin, full_sentence, dep_in_tree, dep_index, dependencies) break if not actor: actor = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) actor.f_unreal = True else: actor = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) cls.logger.debug("Identified actor {}".format(actor)) return actor
def check_np_sub_sentences(self, dep_index, dependencies, obj): if not self.f_ignore_np_subsentences: dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep_index) head = Search.get_full_phrase_tree(dep_in_tree, NP) self.check_sub_sentences(head, dependencies, obj, True)
def determine_verbs(self, sentence, dependencies, active): actions = [] main_predicate_index = None # Determine main predicate if active: nsubj = Search.find_dependencies(dependencies, NSUBJ) nsubj = self.exclude_relative_clauses(sentence, nsubj) if len(nsubj) == 0: dobj = Search.find_dependencies(dependencies, DOBJ) dobj = self.exclude_relative_clauses(sentence, dobj) if len(dobj) >= 1: main_predicate_index = dobj[0]['governor'] elif len(nsubj) == 1: main_predicate_index = nsubj[0]['governor'] cop = Search.find_dependencies(dependencies, COP) cop = self.exclude_relative_clauses(sentence, cop) for dep in cop: if dep['governor'] == main_predicate_index: main_predicate_index = dep['dependent'] break else: self.logger.info("Sentence has more than one active predicate") self.logger.debug(nsubj) else: nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS) nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass) if len(nsubjpass) == 1: main_predicate_index = nsubjpass[0]['governor'] elif len(nsubjpass) > 1: self.logger.info("Sentence has more than one passive predicate") self.logger.debug(nsubjpass) # Find all actions if main_predicate_index: main_predicate = Search.find_dep_in_tree(self.f_full_sentence, main_predicate_index) vp_head = Search.get_full_phrase_tree(main_predicate, VP) action = Builder.create_action(self.f_stanford_sentence, self.f_full_sentence, main_predicate_index, dependencies, active) self.check_sub_sentences(vp_head, dependencies, action, False) actions.append(action) else: verbs = Search.find_in_tree(sentence, VP, (SBAR, S)) if len(verbs) == 0: self.logger.info("Sentence contains no action") elif len(verbs) > 1: self.logger.info("Sentence has more than one verb phrase") else: vp = verbs[0] action = Builder.create_action_syntax(self.f_stanford_sentence, self.f_full_sentence, vp) self.check_sub_sentences(vp, dependencies, action, False) actions.append(action) if len(actions) > 0: for new_action in self.check_conjunctions(dependencies, actions[0], False, False, active): actions.append(new_action) return actions