Exemplo n.º 1
0
 def lexical_polarity(word):
     if word.word_form.lower() in WordBasedExtractors.POSITIVE_WORDS:
         return WordBasedResult('pos')
     elif word.word_form.lower() in WordBasedExtractors.NEGATIVE_WORDS:
         return WordBasedResult('neg')
     else:
         return WordBasedResult('neu')
Exemplo n.º 2
0
 def parse_start_or_end_child_in_s_clause(sentence):
     '''Suggested by Marilena Di Bari.
     Typically temporal expressions are at the very end or beginning of a
     well-formed English sentence.
     '''
     parsetree = sentence.parsetree
     result = []
     for idx in parsetree.treepositions(order='leaves'):
         try:
             tree = parsetree[idx[:-1]]
             steps_up = 1
             # there are some leaves which are not necessarily child of S
             # all the leaves are always child of ROOT)
             # Don't believe me? Try to parse this sentence:
             # -  "And Rosneft benefits from BP's expertise in exploring in
             #     difficult and potentially hazardous conditions."
             while not (tree.node.startswith('S') or tree.node == 'ROOT'):
                 tree = tree.parent()
                 steps_up += 1
             position_under_s = idx[(len(idx) - steps_up)]
             leaf_result = position_under_s in (0, len(tree) - 1)
             result.append(WordBasedResult(leaf_result))
         except Exception:
             result.append(WordBasedResult(False))
     return SentenceBasedResult(tuple(result))
Exemplo n.º 3
0
 def dependency_incoming_granfather_pos_collapsed(word):
     try:
         pos = word.dependencies_in('collapsed')[0][1].dependencies_in(
             'collapsed')[0][1].part_of_speech
         return WordBasedResult(pos)
     except IndexError:
         return WordBasedResult(False)
Exemplo n.º 4
0
 def dependency_incoming_granfather_relations_collapsed(word):
     try:
         gfather = word.dependencies_in('collapsed')[0][1].dependencies_in(
             'collapsed')[0][0]
         return WordBasedResult(gfather)
     except IndexError:
         return WordBasedResult(False)
Exemplo n.º 5
0
 def from_temp_dct(from_obj, to_obj, document):
     if isinstance(from_obj, TemporalExpression):
         res = from_obj.value.replace('-', '') == \
             document.dct.replace('_', '')
         return WordBasedResult(res)
     else:
         return WordBasedResult('_')
Exemplo n.º 6
0
 def parse_3_levels_up_childs(word):
     try:
         node = word.constituency_parent.parent().parent()
         node_label = [i.node for i in node]
         return WordBasedResult('_'.join(node_label))
     except (AttributeError, TypeError):
         return WordBasedResult('_^_')
Exemplo n.º 7
0
 def word_distance(from_obj, to_obj, document):
     if from_obj.id_sentence() == to_obj.id_sentence():
         if from_obj.id_first_word() < to_obj.id_first_word():
             return WordBasedResult(
                 abs(to_obj.id_first_word() - from_obj.id_last_word()))
         else:
             return WordBasedResult(
                 abs(from_obj.id_first_word() - to_obj.id_last_word()))
     else:
         return WordBasedResult('_')
Exemplo n.º 8
0
 def direction(from_obj, to_obj, document):
     if from_obj.id_sentence() - to_obj.id_sentence() < 0:
         return WordBasedResult('>')
     elif from_obj.id_sentence() - to_obj.id_sentence() > 0:
         return WordBasedResult('<')
     else:
         if from_obj.id_first_word() - to_obj.id_first_word() > 0:
             return WordBasedResult('>')
         else:
             return WordBasedResult('<')
Exemplo n.º 9
0
 def dominant_verb_collapsed(word):
     if word.part_of_speech.startswith('V'):
         return WordBasedResult(word.part_of_speech)
     else:
         steps = max_steps
         while not word.part_of_speech.startswith('V') and steps:
             try:
                 word = word.dependencies_in('collapsed')[0][1]
                 steps -= 1
             except:
                 return WordBasedResult(False)
         return WordBasedResult(word.part_of_speech)
Exemplo n.º 10
0
 def temporal_temporal_adverbs_relationships_in_time_indefinite(word):
     adverbs = ('already', 'before', 'early', 'earlier', 'eventually',
                'finally', 'first', 'formerly', 'just', 'last', 'late',
                'later', 'lately', 'next', 'previously', 'recently',
                'since', 'soon', 'still', 'yet', 'after', 'earliest',
                'latest', 'afterwards')
     return WordBasedResult(word.word_form.lower() in adverbs)
Exemplo n.º 11
0
 def temporal_temporal_adverbs_frequency_indefinite(word):
     adverbs = ('always', 'constantly', 'ever', 'frequently', 'generally',
                'infrequently', 'never', 'normally', 'occasionally',
                'often', 'rarely', 'regularly', 'seldom', 'sometimes',
                'regularly', 'usually', 'continually', 'periodically',
                'repeatedly')
     return WordBasedResult(word.word_form.lower() in adverbs)
Exemplo n.º 12
0
 def temporal_pod(word):
     pods = ('morning', 'afternoon', 'evening', 'night', 'noon', 'midnight',
             'midday', 'sunrise', 'dusk', 'sunset', 'dawn', 'overnight',
             'midday', 'noonday', 'noontide', 'nightfall', 'midafternoon',
             'daybreak', 'gloaming', 'a\.?m\.?', 'p\.?m\.?')
     pattrn = r'^({})s?$'.format('|'.join(pods))
     return WordBasedResult(any(re.findall(pattrn, word.word_form.lower())))
Exemplo n.º 13
0
 def parse_2_levels_up_nodes(word):
     parents = []
     try:
         parents.append(word.constituency_parent.parent().node)
         parents.append(word.constituency_parent.parent().parent().node)
     except (AttributeError, TypeError):
         pass
     return WordBasedResult('_'.join(reversed(parents)))
Exemplo n.º 14
0
    def sentences_linked_by_coref(from_obj, to_obj, document):
        """Checks whether the sentence of from_obj and the one of to_obj are
        linked by a coreferencial link.

        """
        linked = document.sentences[from_obj.id_sentence()].connected_to(
            to_obj.id_sentence())
        return WordBasedResult(linked)
Exemplo n.º 15
0
    def to_is_root(from_obj, to_obj, document):
        """It returns True if one of the words in `to_obj` is ROOT according
        to the dependency relations.

        """
        sent = document.sentences[to_obj.id_sentence()]
        to_ids = [w.id_token for w in to_obj.words]
        res = any([sent.basic_dependencies.is_root(n) for n in to_ids])
        return WordBasedResult(res)
Exemplo n.º 16
0
 def temporal_period(word):
     periods = [
         'centur[y|ies]', 'decades?', 'years?', 'months?', 'days?',
         'week\-?ends?', 'weeks?', 'hours?', 'minutes?', 'seconds?',
         'fortnights?'
     ]
     pattern = r'^({pattern})$'.format(pattern='|'.join(periods))
     return WordBasedResult(any(re.findall(pattern,
                                           word.word_form.lower())))
Exemplo n.º 17
0
def matching_gazetteer(gazetteer, sentence):
    ''' It searches for gazetteer elements into the sentence and returns a
    SentenceBasedResult object which is composed of WordBasedResults
    'I's or 'O's.

    Example:
    sentence = ['I', 'live', 'in', 'New', 'York', '.']
    gazetteer = { ..., ('New', 'York'), ...}

    returns SentenceBasedResult(W('O'), W('O'), W('O'), W('I'), W('I'), W('O'))
    '''
    word_forms = [token.word_form for token in sentence.words]
    result = [WordBasedResult('O')] * len(word_forms)
    for gazetteer_item in gazetteer:
        subsequences = search_subsequence(word_forms, gazetteer_item, end=True)
        for start, end in subsequences:
            for index in xrange(start, end + 1):
                result[index] = WordBasedResult('I')
    return SentenceBasedResult(tuple(result))
Exemplo n.º 18
0
 def to_parse_common_ancestor(from_obj, to_obj, document):
     start, end = to_obj.id_first_word() + 1, to_obj.id_last_word() + 1
     sentence = document.sentences[to_obj.words[0].id_sentence]
     positions = list(sentence.parsetree.treepositions(order='leaves'))
     address = positions[start - 1]
     for w in positions[start:end]:
         if len(w) < len(address):
             address = w
     common_ancestor = sentence.parsetree[address[:-1]]
     return WordBasedResult(common_ancestor.node)
Exemplo n.º 19
0
    def dependency_relation_type(from_obj, to_obj, document):
        if from_obj.id_sentence() != to_obj.id_sentence():
            return WordBasedResult('')

        result = []
        for from_word in from_obj.words:
            for to_word in to_obj.words:
                try:
                    from_father = from_word.dependencies_in('basic', to_word)
                    to_father = to_word.dependencies_in('basic', from_word)
                    if from_father:
                        result.append(from_father[0][0])
                        continue
                    if to_father:
                        result.append(to_father[0][0])
                        continue
                except KeyError:
                    pass
        return WordBasedResult(' '.join(result))
Exemplo n.º 20
0
 def temporal_difference(from_obj, to_obj, document):
     if isinstance(from_obj, TemporalExpression) and \
             isinstance(to_obj, TemporalExpression):
         from_dt = re.match(r'^([0-9]{4})-([0-9]{2})-([0-9]{2})',
                            from_obj.value)
         to_dt = re.match(r'^([0-9]{4})-([0-9]{2})-([0-9]{2})',
                          to_obj.value)
         if from_dt and to_dt:
             try:
                 from_dt = date(int(from_dt.group(1)),
                                int(from_dt.group(2)),
                                int(from_dt.group(3)))
                 to_dt = date(int(to_dt.group(1)), int(to_dt.group(2)),
                              int(to_dt.group(3)))
                 diff = from_dt - to_dt
                 return WordBasedResult(diff.days)
             except ValueError:
                 return WordBasedResult('_')
         return WordBasedResult('_')
     else:
         return WordBasedResult('_')
Exemplo n.º 21
0
 def morphological_extended_pattern(word):
     pattern = ''
     for char in word.word_form:
         if char.isupper():
             pattern += 'X'
         elif char.islower():
             pattern += 'x'
         elif char.isdigit():
             pattern += 'd'
         elif char.isspace():
             pattern += ' '
         else:
             pattern += char
     return WordBasedResult(pattern)
Exemplo n.º 22
0
    def linked_by_dependency_relation(from_obj, to_obj, document):
        '''It returns if the two elements are connected through one of their
        words.

        '''
        def same_sentence(from_obj, to_obj):
            return from_obj.id_sentence() == to_obj.id_sentence()

        def connected(word1, word2):
            cond1 = word1.dependencies_in('basic', word2)
            cond2 = word2.dependencies_in('basic', word1)
            if cond1:
                return '<'
            if cond2:
                return '>'
            return False

        if same_sentence(from_obj, to_obj):
            for from_word in from_obj.words:
                for to_word in to_obj.words:
                    conn = connected(from_word, to_word)
                    if conn:
                        return WordBasedResult(conn)
        return WordBasedResult(False)
Exemplo n.º 23
0
    def dependency_incoming_relations_basic(word):
        '''For each word I represent a vector of all incoming relations

           for each word =
            dr1   dr2   dr2   dr2   dr2   dr2   ...   dr2
           [ F  ,  F  ,  T ,   F  ,  F  ,  F  , ... ,  T ]

           Dependencies relations are taken from:
           http://nlp.stanford.edu/software/dependencies_manual.pdf

        '''
        f_suffix = lambda f_name: 'basic_dependency_incoming_' + f_name

        r = ((f_suffix(l),
              WordBasedResult(bool(word.basic_dependencies_in.get(l, False))))
             for l in dep_labels)
        return WordBasedResults(tuple(r))
Exemplo n.º 24
0
    def to_governor_verb_pos(from_obj, to_obj, document):
        def stop_condition(word):
            return any([
                word_to.part_of_speech.startswith('V')
                for _, word_to in word.dependencies_in('basic')
            ])

        sentence = document.sentences[to_obj.id_sentence()]
        governors_pos = set()
        for word in to_obj.words:
            try:
                while not stop_condition(word):
                    parents = sentence.dependencies_in('basic')
                    if parents:
                        word = parents[0][1]
                governors_pos.add(word.part_of_speech)
            except:
                continue
        return WordBasedResult('-'.join(sorted(governors_pos)))
Exemplo n.º 25
0
 def lexical_tense(word):
     postag = word.part_of_speech
     if postag in ('VB', 'VD', 'VH', 'VV'):
         return WordBasedResult('BASE')
     elif postag in ('VBN', 'VDN', 'VHN', 'VVN'):
         return WordBasedResult('PASTPARTICIPLE')
     elif postag in ('VBD', 'VDD', 'VHD', 'VVD'):
         return WordBasedResult('PAST')
     elif postag in ('VBG', 'VDG', 'VHG', 'VVG'):
         return WordBasedResult('GERUND')
     elif postag in ('VBZ', 'VBP', 'VDZ', 'VDP', 'VHZ', 'VHP'):
         return WordBasedResult('PRESENT')
     else:
         return WordBasedResult('NONE')
Exemplo n.º 26
0
    def parse_distance_from_s_node(sentence):
        '''How far the current node (its POS) is from an S-parent.

        '''
        parsetree = sentence.parsetree
        result = []
        for idx in parsetree.treepositions(order='leaves'):
            tree = parsetree[idx[:-1]]
            steps_up = 1
            # there are some leaves which are not necessarily child of S
            # all the leaves are always child of ROOT)
            # Don't believe me? Try to parse this sentence:
            # -  "And Rosneft benefits from BP's expertise in exploring in
            #     difficult and potentially hazardous conditions."
            try:
                while not (tree.node.startswith('S') or tree.node == 'ROOT'):
                    tree = tree.parent()
                    steps_up += 1
            except AttributeError:
                pass

            result.append(WordBasedResult(steps_up))
        return SentenceBasedResult(tuple(result))
Exemplo n.º 27
0
 def same_temp_modality(from_obj, to_obj, document):
     if isinstance(from_obj, TemporalExpression) and \
             isinstance(to_obj, TemporalExpression):
         return WordBasedResult(from_obj.mod == to_obj.mod)
     else:
         return WordBasedResult(False)
Exemplo n.º 28
0
 def to_temp_modality(from_obj, to_obj, document):
     if isinstance(to_obj, TemporalExpression):
         return WordBasedResult(to_obj.mod)
     else:
         return WordBasedResult('_')
Exemplo n.º 29
0
 def same_temp_type(from_obj, to_obj, document):
     if isinstance(from_obj, TemporalExpression) and \
             isinstance(to_obj, TemporalExpression):
         return WordBasedResult(from_obj.ttype == to_obj.ttype)
     else:
         return WordBasedResult(False)
Exemplo n.º 30
0
 def to_temp_type(from_obj, to_obj, document):
     if isinstance(to_obj, TemporalExpression):
         return WordBasedResult(to_obj.ttype)
     else:
         return WordBasedResult('_')