def test_tag(self):
     trained_tagger = PerceptronTagger()
     tokens = trained_tagger.tag(self.text)
     assert_equal([w for w, t in tokens], [
         'Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is',
         'better', 'than', 'complicated', '.'
     ])
                if word in SYMBOLS:
                    pos = 'SYM'
                elif word == "'" and pos == 'POS' and has_open_left_single_quote:
                    pos = "''"
                    has_open_left_single_quote = False
                elif word == "'" and pos == "''":
                    has_open_left_single_quote = False
                elif word == '`' and pos == '``':
                    has_open_left_single_quote = True
                word = reverse_map_paren(word)
                tokens.append((word, pos))
                prev2 = prev
                prev = pos
        return tokens

    perceptron_tagger.tag = partial(_tag, perceptron_tagger)
except:  # pragma: no cover
    raise NotImplementedError(
        'PerceptronTagger from textblob_aptagger does not exist!')


def tag(text):
    """Returns the POS tags of the text using PerceptronTagger

    Parameters
    ----------
    text : str or iterable
        This is the text to be processed.
        If it's a str, it will be sentence tokenized and word tokenized using nltk
        If it's an iterable, it will be assumed to be a list of tokens
Exemplo n.º 3
0
 def test_tag(self):
     trained_tagger = PerceptronTagger()
     tokens = trained_tagger.tag(self.text)
     assert_equal([w for w, t in tokens],
         ['Simple', 'is', 'better', 'than', 'complex', '.', 'Complex', 'is',
          'better', 'than', 'complicated', '.'])