示例#1
0
    def exclude_relative_clauses(self, sentence, dependencies):

        relative_clauses = []

        for dep in dependencies:
            if dep['dep'] != RCMOD:
                sentence_index = Search.find_sentence_index(self.f_full_sentence,
                                                            sentence)
                dep_in_tree = Search.find_dep_in_tree(self.f_full_sentence,
                                                      dep['dependent'])

                while dep_in_tree.label() != ROOT:

                    if sentence.label() == dep_in_tree.label():
                        part_index = Search.find_sentence_index(self.f_full_sentence,
                                                                dep_in_tree)
                        if sentence_index >= part_index:
                            break

                    if dep_in_tree.label() in (SBAR, S, PRN) and dep_in_tree.parent().label() != SBAR:
                        relative_clauses.append(dep)
                        break

                    dep_in_tree = dep_in_tree.parent()

        return [dep for dep in dependencies if dep not in relative_clauses]
示例#2
0
    def extract_PP_spec(cls, origin, full_sentence, element, node_index, dependencies):
        to_check = Search.find_dependencies(dependencies, (PREP, PREPC))
        rc_mod = Search.find_dependencies(dependencies, RCMOD)

        for dep in to_check:
            cop = element.f_cop if isinstance(element, Action) else None
            if (dep['governor'] == node_index or dep['governorGloss'] == cop) and not cls.part_rc_mod(full_sentence, rc_mod, dep):
                dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent'])
                phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP)
                if phrase_tree:
                    phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR))
                    phrase = " ".join(phrase_tree.leaves())
                    space_index = phrase.find(" ")
                    if space_index >= 0:
                        specific = dep['spec']
                        if specific:
                            phrase = phrase[space_index:]
                            phrase = specific + phrase
                        spec = Specifier(origin, dep['dependent'], phrase)
                        spec.f_type = PP
                        if dep_in_tree.parent().label().startswith(NP):
                            obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies)
                            spec.f_object = obj
                        spec.f_headWord = specific
                        element.f_specifiers.append(spec)
示例#3
0
    def determine_subjects(self, sentence, dependencies, active):
        actors = []

        main_actor_index = None

        # Find main actor

        subj = Search.find_dependencies(dependencies, NSUBJ) if active else \
            Search.find_dependencies(dependencies, AGENT)

        subj = self.exclude_relative_clauses(sentence, subj)
        if len(subj) == 0:
            self.logger.debug("Sentence contains no subject!")
        elif len(subj) == 1:
            main_actor_index = subj[0]['dependent']
        else:
            self.logger.info("Sentence has more then one subject")
            self.logger.debug(subj)

        # Find all actors

        if main_actor_index:
            actor = Builder.create_actor(self.f_stanford_sentence,
                                         self.f_full_sentence,
                                         main_actor_index, dependencies)
            actor.f_subjectRole = True
            actor.f_passive = not active
            actors.append(actor)
            for new_actor in self.check_conjunctions(dependencies, actor, True, True, active):
                new_actor.f_subjectRole = True
                new_actor.f_passive = not active
                actors.append(new_actor)

        return actors
示例#4
0
    def extract_PP_spec_syntax(cls, origin, full_sentence, element, vphead):
        pp_list = Search.find_in_tree(vphead, PP, (SBAR, S, NP, PRN))

        for pp in pp_list:
            pp_index = Search.find_sentence_index(full_sentence, pp)
            spec = Specifier(origin, pp_index, " ".join(pp.leaves()))
            spec.f_type = PP
            element.f_specifiers.append(spec)
示例#5
0
    def part_rc_mod(cls, full_sentence, rc_mod, dep):
        for rcm in rc_mod:
            if rcm['governor'] == dep['dependent']:
                dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent'])
                phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP)
                phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR))
                phrase = " ".join(phrase_tree.leaves())
                if phrase in f_conditionIndicators:
                    return True

        return False
示例#6
0
    def get_PARTMOD_specifiers(cls, origin, full_sentence, node_index, dependencies, element):
        to_check = Search.find_dependencies(dependencies, PARTMOD)

        for dep in to_check:
            if dep['governor'] == node_index:
                dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent'])
                phrase_tree = Search.get_full_phrase_tree(dep_in_tree, VP)
                phrase = phrase_tree.leaves() if phrase_tree else []
                spec = Specifier(origin, dep['dependent'], " ".join(phrase))
                spec.f_type = PARTMOD
                element.f_specifiers.append(spec)
示例#7
0
    def extract_SBAR_spec(cls, origin, full_sentence, element, phrase_head):
        if phrase_head:
            sbar_list = Search.find_in_tree(phrase_head, SBAR, [])
            phrase_index = Search.find_sentence_index(full_sentence, phrase_head)

            for sbar in sbar_list:
                sbar_index = Search.find_sentence_index(full_sentence, sbar)

                if sbar_index > phrase_index:
                    spec = Specifier(origin, sbar_index, " ".join(sbar.leaves()))
                    spec.f_type = SBAR
                    element.f_specifiers.append(spec)
示例#8
0
    def check_dobj(cls, node_index, dependencies, action, origin, full_sentence):
        to_check = Search.find_dependencies(dependencies, DOBJ)

        for dep in to_check:
            if dep['governor'] == node_index:
                cls.logger.error("Dobj was found in a passive sentence")
                node = Search.find_dep_in_tree(full_sentence, dep['dependent'])
                spec = Specifier(origin, dep['dependent'], cls.get_full_noun(node, dep['dependent'], dependencies))
                spec.f_type = DOBJ
                obj = cls.create_object(origin, full_sentence, dep['dependent'], dependencies)
                spec.f_object = obj
                action.f_specifiers.append(spec)
示例#9
0
    def find_sub_sentences(self, sentence):

        result = Search.find_children(sentence, self.f_sentenceTags)

        for child in sentence:
            if child.label() in ("PP", "ADVP"):
                result.extend(Search.find_children(child,
                                                   self.f_sentenceTags))
                for grandchild in child:
                    result.extend(Search.find_children(grandchild,
                                                       self.f_sentenceTags))

        return result
示例#10
0
    def extract_RCMOD_spec(cls, origin, full_sentence, element, node_index, dependencies):
        to_check = Search.find_dependencies(dependencies, RCMOD)

        for dep in to_check:
            cop = element.f_cop if isinstance(element, Action) else None
            if dep['dependent'] == node_index or dep['dependentGloss'] == cop:
                dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['governor'])
                phrase_tree = Search.get_full_phrase_tree(dep_in_tree, PP)
                if phrase_tree:
                    phrase_tree = cls.delete_branches(phrase_tree, (S, SBAR))
                    phrase = " ".join(phrase_tree.leaves())
                    spec = Specifier(origin, dep['dependent'], phrase)
                    spec.f_type = RCMOD
                    element.f_specifiers.append(spec)
示例#11
0
    def is_active_sentence(self, sentence, dependencies):
        subj = Search.find_dependencies(dependencies, (NSUBJ, CSUBJ, DOBJ))
        subj = self.exclude_relative_clauses(sentence, subj)
        if len(subj) > 0:
            return True

        subj_pass = Search.find_dependencies(dependencies, (NSUBJPASS,
                                                            CSUBJPASS,
                                                            AGENT))
        subj_pass = self.exclude_relative_clauses(sentence, subj_pass)
        if len(subj_pass) > 0:
            return False

        self.logger.debug("It is not clear whether this sentence is active or passive!")
        return False
示例#12
0
    def find_INFMOD_specifiers(cls, origin, node_index, dependencies, element):
        to_check = Search.find_dependencies(dependencies, INFMOD)
        name = ""

        for dep in to_check:
            if dep['governor'] == node_index:
                to_check = Search.find_dependencies(dependencies, (AUX, COP, NEG))
                for acn in to_check:
                    if acn['governor'] == dep['dependent']:
                        name += acn['dependentGloss'] + " "
                name += dep['dependentGloss']
                spec = Specifier(origin, dep['dependent'], name)
                spec.f_type = INFMOD
                element.f_specifiers.append(spec)
                break
示例#13
0
    def determine_object(self, sentence, verb, dependencies, active):
        objects = []

        if verb.f_xcomp:
            xcomp_obj = self.determine_object(sentence, verb.f_xcomp, dependencies, active)
            if len(xcomp_obj) > 0:
                verb.f_xcomp.f_object = xcomp_obj[0]

        if not active:
            nsubjpass = Search.find_dependencies(dependencies, NSUBJPASS)
            nsubjpass = self.exclude_relative_clauses(sentence, nsubjpass)
            if len(nsubjpass) == 0:
                objs = self.determine_object_from_dobj(verb, dependencies)
                objects.extend(objs)
            else:
                if len(nsubjpass) > 1:
                    self.logger.debug("Passive sentence with more than one subject!")
                dep_index = nsubjpass[0]['dependent']
                obj = Builder.create_object(self.f_stanford_sentence, self.f_full_sentence, dep_index, dependencies)
                obj.f_subjectRole = True
                objects.append(obj)
                self.check_np_sub_sentences(dep_index, dependencies, obj)
        else:
            objs = self.determine_object_from_dobj(verb, dependencies)
            objects.extend(objs)

        if len(objects) > 0:
            conjs = self.check_conjunctions(dependencies, objects[0], True, False, active)
            for conj in conjs:
                if isinstance(conj, Element):
                    objects.append(conj)

        return objects
示例#14
0
    def analyze_recursive(self, main_sentence, dependencies):

        main_sentence = deepcopy(main_sentence)
        sub_sentence_count = self.determine_sub_sentence_count(main_sentence)

        if sub_sentence_count == 0:
            self.extract_elements(main_sentence, dependencies)
        elif sub_sentence_count == 1:
            sub_sentence = self.find_sub_sentences(main_sentence)[0]
            filtered_dependencies = self.filter_dependencies(sub_sentence,
                                                             dependencies)
            self.analyze_recursive(sub_sentence, filtered_dependencies)

            sub_sentence_index = sub_sentence.treeposition()
            del(main_sentence[sub_sentence_index])
            deps = [dep for dep in dependencies if dep['dep'] == RCMOD or dep not in filtered_dependencies]
            if len(Search.find_dependencies(deps, (NSUBJ, AGENT, NSUBJPASS, DOBJ))) > 0:
                self.extract_elements(main_sentence, deps)

        else:
            sub_sentences = self.find_sub_sentences(main_sentence)
            for sub_sentence in sub_sentences:
                filtered_dependencies = self.filter_dependencies(sub_sentence,
                                                                 dependencies)
                self.analyze_recursive(sub_sentence, filtered_dependencies)
示例#15
0
    def combine_actions(self):
        for action in self.f_world.f_actions:
            reference_action = None
            if action.f_actorFrom and isinstance(
                    action.f_actorFrom.f_reference, Action):
                reference_action = action.f_actorFrom.f_reference
            elif action.f_object and action.f_object.f_reference:
                if isinstance(action.f_object.f_reference, Action):
                    reference_action = action.f_object.f_reference
                else:
                    reference_action = Search.get_action(
                        self.f_world.f_actions, action.f_object.f_reference)

            if reference_action:
                if self.can_be_merged(reference_action, action, False):
                    self.logger.debug("Merging {} and {}".format(
                        reference_action, action))
                    self.merge(reference_action, action, False)
                elif self.can_be_merged(reference_action, action, True):
                    self.logger.debug(
                        "Copying attributes from {} to {}".format(
                            reference_action, action))
                    action.f_actorFrom = reference_action.f_actorFrom
                    action.f_object = reference_action.f_object
                    action.f_cop = reference_action.f_cop
                    action.f_copIndex = reference_action.f_copIndex
示例#16
0
    def is_negated(cls, node_index, dependencies):
        to_check = Search.find_dependencies(dependencies, COP)
        index = node_index

        for dep in to_check:
            if dep['dependent'] == node_index:
                index = dep['governor']
                break

        to_check = Search.find_dependencies(dependencies, NEG)

        for dep in to_check:
            if dep['governor'] == index:
                return True

        return False
示例#17
0
    def get_modifiers(cls, node_index, dependencies):
        to_check = Search.find_dependencies(dependencies, (ADVMOD, ACOMP))

        for dep in to_check:
            if dep['governor'] == node_index:
                if dep['governor'] < dep['dependent'] and dep['dependentGloss'] not in f_sequenceIndicators:
                    return dep['dependent']
示例#18
0
    def find_determiner(cls, node_index, dependencies, element):
        to_check = Search.find_dependencies(dependencies, (POSS, DET))

        for dep in to_check:
            if dep['governor'] == node_index:
                element.f_determiner = dep['dependentGloss']
                break
示例#19
0
    def reference_resolution(self):
        to_check = []
        to_check.extend(self.f_world.f_actors)
        to_check.extend(self.f_world.f_resources)

        for obj in to_check:
            if obj.f_needsResolve:
                self.logger.debug("Resolving {}".format(obj))
                sentence_word_id = (obj.f_sentence.f_id, obj.f_word_index)
                if sentence_word_id in self.f_reference_map:
                    target = self.f_reference_map[sentence_word_id]
                    element = self.to_element(target)
                    obj.f_reference = element
                    self.logger.debug(
                        "Manual resolution of {}".format(element))
                else:
                    sentence_id = obj.f_sentence.f_id
                    if Processing.is_action_resolution_determiner(obj.f_name):
                        action = self.find_action(sentence_id, obj)
                        obj.f_reference = action
                        self.logger.debug(
                            "Resolution result: {}".format(action))
                    else:
                        animate = self.determine_animate_type(obj)
                        containing_action = Search.get_action(
                            self.f_world.get_actions_of_sentence(
                                obj.f_sentence), obj)
                        invert_role_match = containing_action.f_cop or Processing.is_RC_pronoun(
                            obj.f_name)
                        reference = self.find_reference(
                            sentence_id, obj, animate, invert_role_match)
                        obj.f_reference = reference
                        self.logger.debug(
                            "Resolution result: {}".format(reference))
示例#20
0
    def determine_sub_sentence_count(self, sentence):

        result = Search.count_children(sentence, self.f_sentenceTags)

        if result == 1 and sentence[0].label() == WHNP:
            result -= 1

        for child in sentence:
            if child.label() in (PP, ADVP):
                result += Search.count_children(child,
                                                self.f_sentenceTags)
                for grandchild in child:
                    if grandchild.label() == NP:
                        result += Search.count_children(grandchild,
                                                        self.f_sentenceTags)

        return result
示例#21
0
    def check_global_conjunctions(self):
        conj = Search.find_dependencies(self.f_dependencies, CONJ)

        for dep in conj:
            action_from = self.get_action_containing(dep['governor'])
            action_to = self.get_action_containing(dep['dependent'])
            if action_from and action_to:
                if not self.find_conjunction(action_from, dep, action_to):
                    self.build_link(action_from, dep, action_to)
示例#22
0
    def create_action_syntax(cls, origin, full_sentence, vphead):
        verb_parts = cls.extract_verb_parts(vphead)
        index = Search.find_sentence_index(full_sentence, vphead)

        action = Action(origin, index, " ".join(verb_parts))
        cls.extract_SBAR_spec(origin, full_sentence, action, vphead)
        cls.extract_PP_spec_syntax(origin, full_sentence, action, vphead)

        cls.logger.debug("Identified action {}".format(action))
        return action
示例#23
0
    def get_specifier_from_dependencies(cls, origin, node_index, dependencies, element, dep_type):
        to_check = Search.find_dependencies(dependencies, dep_type)
        index = None
        name = ""

        for dep in to_check:
            if dep['governor'] == node_index:
                name += dep['dependentGloss'] + " "
                conjs = Search.find_dependencies(dependencies, CONJ)
                for conj in conjs:
                    if conj['governor'] == dep['dependent']:
                        name += conj['spec'] + " " + dep['dependentGloss'] + " "
                if not index:
                    index = dep['dependent']

        if index:
            name = name[:-1]
            spec = Specifier(origin, index, name)
            spec.f_type = dep_type
            element.f_specifiers.append(spec)
示例#24
0
    def find_NN_specifiers(cls, origin, node_index, dependencies, element):
        cls.get_specifier_from_dependencies(origin, node_index, dependencies, element, NN)
        to_check = Search.find_dependencies(dependencies, DEP)

        for dep in to_check:
            if dep['governor'] == node_index:
                if dep['governor'] + 1 != dep['dependent']:
                    continue
                spec = Specifier(origin, dep['dependent'], dep['dependentGloss'])
                spec.f_type = NNAFTER
                element.f_specifiers.append(spec)
示例#25
0
    def check_conjunctions(self, dependencies, element, obj, actor, active):
        results = []
        conjs = Search.find_dependencies(dependencies, CONJ)
        cops = Search.find_dependencies(dependencies, COP)

        if len(conjs) > 0:
            action = element if isinstance(element, Action) else None
            for conj in conjs:
                x_comp_hit = True if action and action.f_xcomp and conj['governorGloss'] in action.f_xcomp.f_baseForm else False
                if (conj['governorGloss'] == element.f_name
                    and len(Search.filter_by_gov(cops, conj['governor'])) == 0) \
                        or x_comp_hit:
                    dep_index = conj['dependent']
                    if obj:
                        if actor:
                            new_ele = Builder.create_actor(
                                self.f_stanford_sentence,
                                self.f_full_sentence, dep_index,
                                dependencies)
                        else:
                            new_ele = Builder.create_object(
                                self.f_stanford_sentence,
                                self.f_full_sentence, dep_index,
                                dependencies)
                            self.check_np_sub_sentences(dep_index, dependencies, new_ele)
                    else:
                        if x_comp_hit:
                            new_ele = copy(action)
                            new_ele.f_xcomp = Builder.create_action(
                                self.f_stanford_sentence, self.f_full_sentence,
                                dep_index, dependencies, True)
                        else:
                            new_ele = Builder.create_action(
                                self.f_stanford_sentence, self.f_full_sentence,
                                dep_index, dependencies, active)

                    if conj['dependent'] != conj['governor']:
                        results.append(new_ele)
                        self.build_link(element, conj, new_ele)

        return results
示例#26
0
    def create_action(cls, origin, full_sentence, node_index, dependencies, active):
        node = Search.find_dep_in_tree(full_sentence, node_index)
        action = Action(origin, node_index, node[0])

        aux = cls.get_auxiliars(node_index, dependencies)
        if len(aux) > 0:
            action.f_aux = aux

        mod_index = cls.get_modifiers(node_index, dependencies)
        if mod_index:
            mod = Search.find_dep_in_tree(full_sentence, mod_index)
            action.f_mod = mod[0]
            action.f_modPos = mod_index

        action.f_negated = cls.is_negated(node, dependencies)

        cop_index = cls.get_cop(node_index, dependencies)
        if cop_index:
            cop = Search.find_dep_in_tree(full_sentence, cop_index)
            action.f_cop = cop[0]
            action.f_copIndex = cop_index

        prt = cls.get_prt(node_index, dependencies)
        if prt:
            action.f_prt = prt

        iobj_index = cls.get_iobj(node_index, dependencies)
        if iobj_index:
            iobj = Search.find_dep_in_tree(full_sentence, iobj_index)
            spec = Specifier(origin, iobj_index, " ".join(iobj.leaves()))
            spec.f_type = IOBJ
            action.f_specifiers.append(spec)

        if not active:
            cls.check_dobj(node_index, dependencies, action, origin, full_sentence)

        to_check = Search.find_dependencies(dependencies, (XCOMP, DEP))

        for dep in to_check:
            if dep['governor'] == node_index:
                dep_in_tree = Search.find_dep_in_tree(full_sentence, dep['dependent'])
                if dep['dep'] == DEP:
                    if dep_in_tree.label()[0] != "V" or dep['dependent'] < dep['governor']:
                        continue

                xcomp = cls.create_action(origin, full_sentence, dep['dependent'], dependencies, True)
                action.f_xcomp = xcomp
                break

        vp_head = Search.get_full_phrase_tree(node, VP)
        cls.extract_SBAR_spec(origin, full_sentence, action, vp_head)
        cls.extract_PP_spec(origin, full_sentence, action, node_index, dependencies)
        cls.extract_RCMOD_spec(origin, full_sentence, action, node_index, dependencies)

        cls.logger.debug("Identified action {}".format(action))
        return action
示例#27
0
    def find_node_action(self, dep_index, action_list, deps):
        for action in action_list:
            if action.f_word_index == dep_index:
                return action

        cops = Search.find_dependencies(deps, COP)
        for dep in cops:
            if dep['governor'] == dep_index:
                return self.find_node_action(dep['dependent'], action_list,
                                             deps)

        return None
示例#28
0
    def filter_dependencies(self, sentence, dependencies):
        filtered_deps = []
        start_index = Search.find_sentence_index(self.f_full_sentence, sentence)
        end_index = start_index + len(sentence.leaves())

        for dep in dependencies:
            if dep['dep'] == RCMOD or \
                    (start_index <= dep['governor'] < end_index
                     and start_index <= dep['dependent'] < end_index):
                filtered_deps.append(dep)

        return filtered_deps
示例#29
0
    def create_object(cls, origin, full_sentence, node_index, dependencies):
        node = Search.find_dep_in_tree(full_sentence, node_index)
        full_noun = cls.get_full_noun(node, node_index, dependencies)

        if WordNetWrapper.person_or_system(full_noun, node[0]) or Processing.can_be_person_pronoun(node[0]):
            result = cls.create_internal_actor(origin, full_sentence, node, node_index, dependencies)
        else:
            result = Resource(origin, node_index, node[0])
            cls.determine_noun_specifiers(origin, full_sentence, node, node_index, dependencies, result)

        result.f_subjectRole = False
        cls.logger.debug("Identified object {}".format(result))
        return result
示例#30
0
    def find_dependants(cls, node_index, dependencies, deps, is_governor):
        to_check = Search.find_dependencies(dependencies, deps)
        dependants = ""

        for dep in to_check:
            if is_governor:
                if dep['governor'] == node_index:
                    dependants += dep['dependentGloss'] + " "
            else:
                if dep['dependent'] == node_index:
                    dependants += dep['governorGloss'] + " "

        return dependants[:-1]