def test_detect_non_ascii(self): lang = self.translator.detect(unicode("关于中文维基百科")) assert_equal(lang, 'zh-CN') lang2 = self.translator.detect(unicode("известен още с псевдонимите")) assert_equal(lang2, "bg") lang3 = self.translator.detect(unicode("Избранная статья")) assert_equal(lang3, "ru")
def test_translate_non_ascii(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) translated = blob.translate(from_lang="ar", to="en") assert_equal(translated, "With full sovereignty") chinese_blob = tb.TextBlob(unicode("美丽优于丑陋")) translated = chinese_blob.translate(from_lang="zh-CN", to='en') assert_equal(translated, "Beautiful is better than ugly")
def pos_tags(self): '''Returns an list of tuples of the form (word, POS tag). Example: :: [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN')] :rtype: list of tuples ''' return [(Word(word, pos_tag=t), unicode(t)) for word, t in self.pos_tagger.tag(self.raw) if not PUNCTUATION_REGEX.match(unicode(t))]
def __repr__(self): '''Returns a string representation for debugging.''' class_name = self.__class__.__name__ # String representation of words strings = [unicode(w) for w in self._collection] if len(self) > 60: return '{cls}({beginning}...{end})'.format(cls=class_name, beginning=strings[:3], end=strings[-3:]) else: return '{cls}({lst})'.format(cls=class_name, lst=strings)
def test_detect_non_ascii(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) assert_equal(blob.detect_language(), "ar")
def test_translate_detects_language_by_default(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) assert_equal(blob.translate(), "With full sovereignty")
def parse(s, *args, **kwargs): """ Returns a tagged Unicode string. """ return parser.parse(unicode(s), *args, **kwargs)
def positive(s, threshold=0.1, **kwargs): """ Returns True if the given sentence has a positive sentiment (polarity >= threshold). """ return polarity(unicode(s), **kwargs) >= threshold
def subjectivity(s, **kwargs): """ Returns the sentence subjectivity (objective/subjective) between 0.0 and 1.0. """ return sentiment(unicode(s), **kwargs)[1]
def polarity(s, **kwargs): """ Returns the sentence polarity (positive/negative) between -1.0 and 1.0. """ return sentiment(unicode(s), **kwargs)[0]
def parsetree(s, *args, **kwargs): """ Returns a parsed Text from the given string. """ return Text(parse(unicode(s), *args, **kwargs))