Пример #1
0
    def filter_pos_infinitive(self, s, category_list=[], allowed=False):
            '''
              Filters grammatical categories (pos:Part-of-Speech tags) from a string
              and converts to infinitive, predicative and singularized forms words:

              If allowed is set to True it only allows POS in category_list.
              If allowed is set to False it allows all POS except those in category_list

              POS that can be in category list: 
              nouns        = ['NN', 'NNS', 'NNP', 'NNPS']
              verbs        = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
              adjectives   = ['JJ','JJR','JJS']
              determiners  = ['DT']
              conjunctions = ['IN', 'CC']
              adverbs      = ['RB','RBR', 'RBS']
              modals       = ['MD']
              utterances   = ['UH']

              In:
                  (s:string, category_list:list of strings, allowed:boolean)
              Out:
                  (string)
            '''
            if isinstance(s, str):
                s = unicode(s, "utf-8", "xmlcharrefreplace")
            list = []
            pos_list = self.pos_tagging(s)
            if len(category_list) == 0:
                return s
            if allowed == False:
                for pos in pos_list:
                    if pos.split(':')[1] not in category_list:
                        if pos.split(':')[1] in ['NNS']:
                            word = singularize(pos.split(':')[0])  
                            list.append(word)
                        elif pos.split(':')[1] in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']:
                            word = conjugate(pos.split(':')[0], INFINITIVE)  
                            list.append(word)
                        elif pos.split(':')[1] in ['JJ','JJR','JJS']:
                            word = predicative(pos.split(':')[0])  
                            list.append(word)
                        else:
                            list.append(pos.split(':')[0])
            else:
                for pos in pos_list:
                    if pos.split(':')[1] in category_list:
                        if pos.split(':')[1] in ['NNS']:
                            word = singularize(pos.split(':')[0])  
                            list.append(word)
                        elif pos.split(':')[1] in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']:
                            word = conjugate(pos.split(':')[0], INFINITIVE)  
                            list.append(word)
                        elif pos.split(':')[1] in ['JJ','JJR','JJS']:
                            word = predicative(pos.split(':')[0])  
                            list.append(word)
                        else:
                            list.append(pos.split(':')[0])
            return u' '.join(list)
    def pos_tagging_infinitive(self, s):
            '''
              Grammatical category of each word a.k.a. Part-of-Speech (pos) tagging,
              but transformming adjectives to predicative form, singularizing nouns and
              verbs to infinitive form

              ej. ella:PRP maneja:VBD carros:NNS rojos:JJ
                    PRP: Possesive pronoun  ---> ella
                    VBD: Verb in past tense ----> manejar(infinitive)
                    NNS: Noun in plural --------> carro (singularized)
                    JJ: adjective --------------> rojo (predicative)
              In:
                    (s:string) string text               
              Out:
                    (list) list with grammatical categories in the form 'word:category'
            '''
            categories = parse(s)
            list = []
            if isinstance(s, str):
                s = unicode(s, "utf-8", "xmlcharrefreplace")
            for x in categories.split():
                for y in x:
                    if y[1] in ['NNS']:
                        word = singularize(y[0])  
                        list.append(word+":NN")
                    elif y[1] in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']:
                        word = conjugate(y[0], INFINITIVE)  
                        list.append(word+":VB")
                    elif y[1] in ['JJ','JJR','JJS']:
                        word = predicative(y[0])  
                        list.append(word+":JJ")
                    else:
                        list.append(y[0]+':'+y[1])
            return list
Пример #3
0
 def test_predicative(self):
     # Assert the accuracy of the predicative algorithm ("horribles" => "horrible").
     from pattern.db import Datasheet
     test = {}
     for w, lemma, tag, f in Datasheet.load(os.path.join(PATH, "corpora", "wordforms-es-davies.csv")):
         if tag == "j": test.setdefault(lemma, []).append(w)
     i, n = 0, 0
     for pred, attr in test.items():
         attr = sorted(attr, key=len, reverse=True)[0]
         if es.predicative(attr) == pred:
             i += 1
         n += 1
     self.assertTrue(float(i) / n > 0.92)
     print("pattern.es.predicative()")
Пример #4
0
 def test_predicative(self):
     # Assert the accuracy of the predicative algorithm ("horribles" => "horrible").
     from pattern.db import Datasheet
     test = {}
     for w, lemma, tag, f in Datasheet.load(os.path.join(PATH, "corpora", "wordforms-es-davies.csv")):
         if tag == "j": test.setdefault(lemma, []).append(w)
     i, n = 0, 0
     for pred, attr in test.items():
         attr = sorted(attr, key=len, reverse=True)[0]
         if es.predicative(attr) == pred:
             i += 1
         n += 1
     self.assertTrue(float(i) / n > 0.92)
     print "pattern.es.predicative()"
Пример #5
0
 def unify_tokens(self):
     """
     Singuralizes nouns, conjugates verbs to infinitive and passes adjectives to
     predicative form in tokens
     :return: Tokens
     """
     if self._analysis is None:
         raise Exception('It\'s necessary execute first analize')
     for i in range(len(self._tokens)):
         if self._analysis[i][1][0] == 'n':
             self._tokens[i] = singularize(self._tokens[i])
         elif self._analysis[i][1][0] == 'v':
             self._tokens[i] = conjugate(self._tokens[i], INFINITIVE)
         elif self._analysis[i][1][0] == 'a':
             self._tokens[i] = predicative(self._tokens[i])
     return self._tokens
Пример #6
0
 def unify_tokens(self):
     """
     Singuralizes nouns, conjugates verbs to infinitive and passes adjectives to
     predicative form in tokens
     :return: Tokens
     """
     if self._analysis is None:
         raise Exception('It\'s necessary execute first analize')
     for i in range(len(self._tokens)):
         if self._analysis[i][1][0] == 'n':
             self._tokens[i] = singularize(self._tokens[i])
         elif self._analysis[i][1][0] == 'v':
             self._tokens[i] = conjugate(self._tokens[i], INFINITIVE)
         elif self._analysis[i][1][0] == 'a':
             self._tokens[i] = predicative(self._tokens[i])
     return self._tokens
Пример #7
0
def adjetives():
    print(predicative('hermosos'))
    print(attributive('hermoso', gender=FEMALE + PLURAL))
Пример #8
0
def standard_value(value):

    clean_value=predicative(value.lower().strip())

    return clean_value