Python lowerstrip примеры использования

Язык программирования: Python

Пространство имен/Пакет: text.utils

Метод/Функция: lowerstrip

Примеров на hotexamples.com: 6

Python lowerstrip - 6 примеров найдено. Это лучшие примеры Python кода для text.utils.lowerstrip, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: blob.py Проект: scraping-xx/TextBlob

 def word_counts(self):
     '''Dictionary of word frequencies in this text.
     '''
     counts = defaultdict(int)
     stripped_words = [lowerstrip(word) for word in self.words]
     for word in stripped_words:
         counts[word] += 1
     return counts

Пример #2

Показать файл

Файл: classifiers.py Проект: scraping-xx/TextBlob

def contains_extractor(document):
    '''A basic document feature extractor that returns a dict of words that
    the document contains.
    '''
    tokenizer = WordTokenizer()
    if isinstance(document, basestring):
        tokens = set([w.lower()
                    for w in tokenizer.itokenize(document, include_punc=False)])
    else:
        tokens = set((lowerstrip(w, all=False) for w in document))
    features = dict((u'contains({0})'.format(w), True) for w in tokens)
    return features

Пример #3

Показать файл

Файл: blob.py Проект: scraping-xx/TextBlob

 def __init__(self, text, tokenizer=None,
             pos_tagger=None, np_extractor=None, analyzer=None,
             parser=None, classifier=None, clean_html=False):
     if not isinstance(text, basestring):
         raise TypeError('The `text` argument passed to `__init__(text)` '
                         'must be a string, not {0}'.format(type(text)))
     if clean_html:
         raise NotImplementedError("clean_html has been deprecated. "
                                 "To remove HTML markup, use BeautifulSoup's "
                                 "get_text() function")
     self.raw = self.string = text
     self.stripped = lowerstrip(self.raw, all=True)
     _initialize_models(self, tokenizer, pos_tagger, np_extractor, analyzer,
                        parser, classifier)

Пример #4

Показать файл

Файл: classifiers.py Проект: scraping-xx/TextBlob

def basic_extractor(document, train_set):
    '''A basic document feature extractor that returns a dict indicating
    what words in ``train_set`` are contained in ``document``.

    :param document: The text to extract features from. Can be a string or an iterable.
    :param train_set: Training data set, a list of tuples of the form
        ``(words, label)``.
    '''
    tokenizer = WordTokenizer()
    word_features = _get_words_from_dataset(train_set)
    if isinstance(document, basestring):
        tokens = set([w.lower()
                    for w in tokenizer.itokenize(document, include_punc=False)])
    else:
        tokens = set((lowerstrip(w, all=False) for w in document))
    features = dict([(u'contains({0})'.format(word), (word in tokens))
                                            for word in word_features])
    return features

Пример #5

Показать файл

Файл: test_utils.py Проект: ratancs/TextBlob

 def test_lowerstrip(self):
     assert_equal(lowerstrip(self.text),
                 'this has punctuation')

Пример #6

Показать файл

 def test_lowerstrip(self):
     assert_equal(lowerstrip(self.text), 'this. has. punctuation')