def exclude_relative_clauses(self, sentence, dependencies): relative_clauses = [] for dep in dependencies: if dep['dep'] != RCMOD: sentence_index = Search.find_sentence_index(self.f_full_sentence, sentence) dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence, dep['dependent']) while dep_in_tree.label() != ROOT: if sentence.label() == dep_in_tree.label(): part_index = Search.find_sentence_index(self.f_full_sentence, dep_in_tree) if sentence_index >= part_index: break if dep_in_tree.label() in (SBAR, S, PRN) and dep_in_tree.parent().label() != SBAR: relative_clauses.append(dep) break dep_in_tree = dep_in_tree.parent() return [dep for dep in dependencies if dep not in relative_clauses]
def extract_PP_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (PREP, PREPC)) rc_mod = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if (dep['governor'] == node_index or dep['governorGloss'] == cop) and not cls.part_rc_mod(full_sentence, rc_mod, dep): dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) space_index = phrase.find(" ") if space_index >= 0: specific = dep['spec'] if specific: phrase = phrase[space_index:] phrase = specific + phrase spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = PP if dep_in_tree.parent().label().startswith(NP): obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj spec.f_headWord = specific element.f_specifiers.append(spec)
def determine_subjects(self, sentence, dependencies, active): actors = [] main_actor_index = None # Find main actor subj = Search.find_dependencies(dependencies, NSUBJ) if active else \ Search.find_dependencies(dependencies, AGENT) subj = self.exclude_relative_clauses(sentence, subj) if len(subj) == 0: self.logger.debug("Sentence contains no subject!") elif len(subj) == 1: main_actor_index = subj[0]['dependent'] else: self.logger.info("Sentence has more then one subject") self.logger.debug(subj) # Find all actors if main_actor_index: actor = Builder.create_actor(self.f_stanford_sentence, self.f_full_sentence, main_actor_index, dependencies) actor.f_subjectRole = True actor.f_passive = not active actors.append(actor) for new_actor in self.check_conjunctions(dependencies, actor, True, True, active): new_actor.f_subjectRole = True new_actor.f_passive = not active actors.append(new_actor) return actors
def extract_PP_spec_syntax(cls, origin, full_sentence, element, vphead): pp_list = Search.find_in_tree(vphead, PP, (SBAR, S, NP, PRN)) for pp in pp_list: pp_index = Search.find_sentence_index(full_sentence, pp) spec = Specifier(origin, pp_index, " ".join(pp.leaves())) spec.f_type = PP element.f_specifiers.append(spec)
def part_rc_mod(cls, full_sentence, rc_mod, dep): for rcm in rc_mod: if rcm['governor'] == dep['dependent']: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) if phrase in f_conditionIndicators: return True return False
def get_PARTMOD_specifiers(cls, origin, full_sentence, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, PARTMOD) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, VP) phrase = phrase_tree.leaves() if phrase_tree else [] spec = Specifier(origin, dep['dependent'], " ".join(phrase)) spec.f_type = PARTMOD element.f_specifiers.append(spec)
def extract_SBAR_spec(cls, origin, full_sentence, element, phrase_head): if phrase_head: sbar_list = Search.find_in_tree(phrase_head, SBAR, []) phrase_index = Search.find_sentence_index(full_sentence, phrase_head) for sbar in sbar_list: sbar_index = Search.find_sentence_index(full_sentence, sbar) if sbar_index > phrase_index: spec = Specifier(origin, sbar_index, " ".join(sbar.leaves())) spec.f_type = SBAR element.f_specifiers.append(spec)
def check_dobj(cls, node_index, dependencies, action, origin, full_sentence): to_check = Search.find_dependencies(dependencies, DOBJ) for dep in to_check: if dep['governor'] == node_index: cls.logger.error("Dobj was found in a passive sentence") node = Search.find_dep_in_tree(full_sentence, dep['dependent']) spec = Specifier(origin, dep['dependent'], cls.get_full_noun(node, dep['dependent'], dependencies)) spec.f_type = DOBJ obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies) spec.f_object = obj action.f_specifiers.append(spec)
def find_sub_sentences(self, sentence): result = Search.find_children(sentence, self.f_sentenceTags) for child in sentence: if child.label() in ("PP", "ADVP"): result.extend(Search.find_children(child, self.f_sentenceTags)) for grandchild in child: result.extend(Search.find_children(grandchild, self.f_sentenceTags)) return result
def extract_RCMOD_spec(cls, origin, full_sentence, element, node_index, dependencies): to_check = Search.find_dependencies(dependencies, RCMOD) for dep in to_check: cop = element.f_cop if isinstance(element, Action) else None if dep['dependent'] == node_index or dep['dependentGloss'] == cop: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['governor']) phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP) if phrase_tree: phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR)) phrase = " ".join(phrase_tree.leaves()) spec = Specifier(origin, dep['dependent'], phrase) spec.f_type = RCMOD element.f_specifiers.append(spec)
def is_active_sentence(self, sentence, dependencies): subj = Search.find_dependencies(dependencies, (NSUBJ, CSUBJ, DOBJ)) subj = self.exclude_relative_clauses(sentence, subj) if len(subj) > 0: return True subj_pass = Search.find_dependencies(dependencies, (NSUBJPASS, CSUBJPASS, AGENT)) subj_pass = self.exclude_relative_clauses(sentence, subj_pass) if len(subj_pass) > 0: return False self.logger.debug("It is not clear whether this sentence is active or passive!") return False
def find_INFMOD_specifiers(cls, origin, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, INFMOD) name = "" for dep in to_check: if dep['governor'] == node_index: to_check = Search.find_dependencies(dependencies, (AUX, COP, NEG)) for acn in to_check: if acn['governor'] == dep['dependent']: name += acn['dependentGloss'] + " " name += dep['dependentGloss'] spec = Specifier(origin, dep['dependent'], name) spec.f_type = INFMOD element.f_specifiers.append(spec) break
def determine_object(self, sentence, verb, dependencies, active): objects = [] if verb.f_xcomp: xcomp_obj = self.determine_object(sentence, verb.f_xcomp, dependencies, active) if len(xcomp_obj) > 0: verb.f_xcomp.f_object = xcomp_obj[0] if not active: nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS) nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass) if len(nsubjpass) == 0: objs = self.determine_object_from_dobj(verb, dependencies) objects.extend(objs) else: if len(nsubjpass) > 1: self.logger.debug("Passive sentence with more than one subject!") dep_index = nsubjpass[0]['dependent'] obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) obj.f_subjectRole = True objects.append(obj) self.check_np_sub_sentences(dep_index, dependencies, obj) else: objs = self.determine_object_from_dobj(verb, dependencies) objects.extend(objs) if len(objects) > 0: conjs = self.check_conjunctions(dependencies, objects[0], True, False, active) for conj in conjs: if isinstance(conj, Element): objects.append(conj) return objects
def analyze_recursive(self, main_sentence, dependencies): main_sentence = deepcopy(main_sentence) sub_sentence_count = self.determine_sub_sentence_count(main_sentence) if sub_sentence_count == 0: self.extract_elements(main_sentence, dependencies) elif sub_sentence_count == 1: sub_sentence = self.find_sub_sentences(main_sentence)[0] filtered_dependencies = self.filter_dependencies(sub_sentence, dependencies) self.analyze_recursive(sub_sentence, filtered_dependencies) sub_sentence_index = sub_sentence.treeposition() del(main_sentence[sub_sentence_index]) deps = [dep for dep in dependencies if dep['dep'] == RCMOD or dep not in filtered_dependencies] if len(Search.find_dependencies(deps, (NSUBJ, AGENT, NSUBJPASS, DOBJ))) > 0: self.extract_elements(main_sentence, deps) else: sub_sentences = self.find_sub_sentences(main_sentence) for sub_sentence in sub_sentences: filtered_dependencies = self.filter_dependencies(sub_sentence, dependencies) self.analyze_recursive(sub_sentence, filtered_dependencies)
def combine_actions(self): for action in self.f_world.f_actions: reference_action = None if action.f_actorFrom and isinstance( action.f_actorFrom.f_reference, Action): reference_action = action.f_actorFrom.f_reference elif action.f_object and action.f_object.f_reference: if isinstance(action.f_object.f_reference, Action): reference_action = action.f_object.f_reference else: reference_action = Search.get_action( self.f_world.f_actions, action.f_object.f_reference) if reference_action: if self.can_be_merged(reference_action, action, False): self.logger.debug("Merging {} and {}".format( reference_action, action)) self.merge(reference_action, action, False) elif self.can_be_merged(reference_action, action, True): self.logger.debug( "Copying attributes from {} to {}".format( reference_action, action)) action.f_actorFrom = reference_action.f_actorFrom action.f_object = reference_action.f_object action.f_cop = reference_action.f_cop action.f_copIndex = reference_action.f_copIndex
def is_negated(cls, node_index, dependencies): to_check = Search.find_dependencies(dependencies, COP) index = node_index for dep in to_check: if dep['dependent'] == node_index: index = dep['governor'] break to_check = Search.find_dependencies(dependencies, NEG) for dep in to_check: if dep['governor'] == index: return True return False
def get_modifiers(cls, node_index, dependencies): to_check = Search.find_dependencies(dependencies, (ADVMOD, ACOMP)) for dep in to_check: if dep['governor'] == node_index: if dep['governor'] < dep['dependent'] and dep['dependentGloss'] not in f_sequenceIndicators: return dep['dependent']
def find_determiner(cls, node_index, dependencies, element): to_check = Search.find_dependencies(dependencies, (POSS, DET)) for dep in to_check: if dep['governor'] == node_index: element.f_determiner = dep['dependentGloss'] break
def reference_resolution(self): to_check = [] to_check.extend(self.f_world.f_actors) to_check.extend(self.f_world.f_resources) for obj in to_check: if obj.f_needsResolve: self.logger.debug("Resolving {}".format(obj)) sentence_word_id = (obj.f_sentence.f_id, obj.f_word_index) if sentence_word_id in self.f_reference_map: target = self.f_reference_map[sentence_word_id] element = self.to_element(target) obj.f_reference = element self.logger.debug( "Manual resolution of {}".format(element)) else: sentence_id = obj.f_sentence.f_id if Processing.is_action_resolution_determiner(obj.f_name): action = self.find_action(sentence_id, obj) obj.f_reference = action self.logger.debug( "Resolution result: {}".format(action)) else: animate = self.determine_animate_type(obj) containing_action = Search.get_action( self.f_world.get_actions_of_sentence( obj.f_sentence), obj) invert_role_match = containing_action.f_cop or Processing.is_RC_pronoun( obj.f_name) reference = self.find_reference( sentence_id, obj, animate, invert_role_match) obj.f_reference = reference self.logger.debug( "Resolution result: {}".format(reference))
def determine_sub_sentence_count(self, sentence): result = Search.count_children(sentence, self.f_sentenceTags) if result == 1 and sentence[0].label() == WHNP: result -= 1 for child in sentence: if child.label() in (PP, ADVP): result += Search.count_children(child, self.f_sentenceTags) for grandchild in child: if grandchild.label() == NP: result += Search.count_children(grandchild, self.f_sentenceTags) return result
def check_global_conjunctions(self): conj = Search.find_dependencies(self.f_dependencies, CONJ) for dep in conj: action_from = self.get_action_containing(dep['governor']) action_to = self.get_action_containing(dep['dependent']) if action_from and action_to: if not self.find_conjunction(action_from, dep, action_to): self.build_link(action_from, dep, action_to)
def create_action_syntax(cls, origin, full_sentence, vphead): verb_parts = cls.extract_verb_parts(vphead) index = Search.find_sentence_index(full_sentence, vphead) action = Action(origin, index, " ".join(verb_parts)) cls.extract_SBAR_spec(origin, full_sentence, action, vphead) cls.extract_PP_spec_syntax(origin, full_sentence, action, vphead) cls.logger.debug("Identified action {}".format(action)) return action
def get_specifier_from_dependencies(cls, origin, node_index, dependencies, element, dep_type): to_check = Search.find_dependencies(dependencies, dep_type) index = None name = "" for dep in to_check: if dep['governor'] == node_index: name += dep['dependentGloss'] + " " conjs = Search.find_dependencies(dependencies, CONJ) for conj in conjs: if conj['governor'] == dep['dependent']: name += conj['spec'] + " " + dep['dependentGloss'] + " " if not index: index = dep['dependent'] if index: name = name[:-1] spec = Specifier(origin, index, name) spec.f_type = dep_type element.f_specifiers.append(spec)
def find_NN_specifiers(cls, origin, node_index, dependencies, element): cls.get_specifier_from_dependencies(origin, node_index, dependencies, element, NN) to_check = Search.find_dependencies(dependencies, DEP) for dep in to_check: if dep['governor'] == node_index: if dep['governor'] + 1 != dep['dependent']: continue spec = Specifier(origin, dep['dependent'], dep['dependentGloss']) spec.f_type = NNAFTER element.f_specifiers.append(spec)
def check_conjunctions(self, dependencies, element, obj, actor, active): results = [] conjs = Search.find_dependencies(dependencies, CONJ) cops = Search.find_dependencies(dependencies, COP) if len(conjs) > 0: action = element if isinstance(element, Action) else None for conj in conjs: x_comp_hit = True if action and action.f_xcomp and conj['governorGloss'] in action.f_xcomp.f_baseForm else False if (conj['governorGloss'] == element.f_name and len(Search.filter_by_gov(cops, conj['governor'])) == 0) \ or x_comp_hit: dep_index = conj['dependent'] if obj: if actor: new_ele = Builder.create_actor( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) else: new_ele = Builder.create_object( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies) self.check_np_sub_sentences(dep_index, dependencies, new_ele) else: if x_comp_hit: new_ele = copy(action) new_ele.f_xcomp = Builder.create_action( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies, True) else: new_ele = Builder.create_action( self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies, active) if conj['dependent'] != conj['governor']: results.append(new_ele) self.build_link(element, conj, new_ele) return results
def create_action(cls, origin, full_sentence, node_index, dependencies, active): node = Search.find_dep_in_tree(full_sentence, node_index) action = Action(origin, node_index, node[0]) aux = cls.get_auxiliars(node_index, dependencies) if len(aux) > 0: action.f_aux = aux mod_index = cls.get_modifiers(node_index, dependencies) if mod_index: mod = Search.find_dep_in_tree(full_sentence, mod_index) action.f_mod = mod[0] action.f_modPos = mod_index action.f_negated = cls.is_negated(node, dependencies) cop_index = cls.get_cop(node_index, dependencies) if cop_index: cop = Search.find_dep_in_tree(full_sentence, cop_index) action.f_cop = cop[0] action.f_copIndex = cop_index prt = cls.get_prt(node_index, dependencies) if prt: action.f_prt = prt iobj_index = cls.get_iobj(node_index, dependencies) if iobj_index: iobj = Search.find_dep_in_tree(full_sentence, iobj_index) spec = Specifier(origin, iobj_index, " ".join(iobj.leaves())) spec.f_type = IOBJ action.f_specifiers.append(spec) if not active: cls.check_dobj(node_index, dependencies, action, origin, full_sentence) to_check = Search.find_dependencies(dependencies, (XCOMP, DEP)) for dep in to_check: if dep['governor'] == node_index: dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent']) if dep['dep'] == DEP: if dep_in_tree.label()[0] != "V" or dep['dependent'] < dep['governor']: continue xcomp = cls.create_action(origin, full_sentence, dep['dependent'], dependencies, True) action.f_xcomp = xcomp break vp_head = Search.get_full_phrase_tree(node, VP) cls.extract_SBAR_spec(origin, full_sentence, action, vp_head) cls.extract_PP_spec(origin, full_sentence, action, node_index, dependencies) cls.extract_RCMOD_spec(origin, full_sentence, action, node_index, dependencies) cls.logger.debug("Identified action {}".format(action)) return action
def find_node_action(self, dep_index, action_list, deps): for action in action_list: if action.f_word_index == dep_index: return action cops = Search.find_dependencies(deps, COP) for dep in cops: if dep['governor'] == dep_index: return self.find_node_action(dep['dependent'], action_list, deps) return None
def filter_dependencies(self, sentence, dependencies): filtered_deps = [] start_index = Search.find_sentence_index(self.f_full_sentence, sentence) end_index = start_index + len(sentence.leaves()) for dep in dependencies: if dep['dep'] == RCMOD or \ (start_index <= dep['governor'] < end_index and start_index <= dep['dependent'] < end_index): filtered_deps.append(dep) return filtered_deps
def create_object(cls, origin, full_sentence, node_index, dependencies): node = Search.find_dep_in_tree(full_sentence, node_index) full_noun = cls.get_full_noun(node, node_index, dependencies) if WordNetWrapper.person_or_system(full_noun, node[0]) or Processing.can_be_person_pronoun(node[0]): result = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies) else: result = Resource(origin, node_index, node[0]) cls.determine_noun_specifiers(origin, full_sentence, node, node_index, dependencies, result) result.f_subjectRole = False cls.logger.debug("Identified object {}".format(result)) return result
def find_dependants(cls, node_index, dependencies, deps, is_governor): to_check = Search.find_dependencies(dependencies, deps) dependants = "" for dep in to_check: if is_governor: if dep['governor'] == node_index: dependants += dep['dependentGloss'] + " " else: if dep['dependent'] == node_index: dependants += dep['governorGloss'] + " " return dependants[:-1]