Python wordpunct примеры использования

Язык программирования: Python

Пространство имен/Пакет: en.parser.nltk_lite.tokenize

Метод/Функция: wordpunct

Примеров на hotexamples.com: 3

Python wordpunct - 3 примера найдено. Это лучшие примеры Python кода для en.parser.nltk_lite.tokenize.wordpunct, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def demo():
    """
    A demonstration that shows the output of several different
    tokenizers on the same string.
    """

    from en.parser.nltk_lite import tokenize

    # Define the test string.
    s = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\n\nThanks."
    print 'Input text:'
    print ` s `
    print
    print 'Tokenize using whitespace:'
    _display(tokenize.whitespace(s))
    print
    print 'Tokenize sequences of alphanumeric characters:'
    _display(tokenize.regexp(s, pattern=r'\w+', gaps=False))
    print
    print 'Tokenize sequences of letters and sequences of nonletters:'
    _display(tokenize.wordpunct(s))
    print
    print 'Tokenize by lines:'
    _display(tokenize.line(s))
    print
    print 'Tokenize by blank lines:'
    _display(tokenize.blankline(s))
    print
    print 'A simple sentence tokenizer:'
    _display(tokenize.regexp(s, pattern=r'\.(\s+|$)', gaps=True))
    print

Пример #2

Показать файл

Файл: state_union.py Проект: RobertoMalatesta/RePhraser-1

def raw(files = items):
    if type(files) is str: files = (files,)

    for file in files:
        path = os.path.join(get_basedir(), "state_union", file + ".txt")
        f = open(path)
        preamble = True
        text = f.read()
        for t in tokenize.wordpunct(text):
            yield t

Пример #3

Показать файл

def raw(files=items):
    if type(files) is str: files = (files, )

    for file in files:
        path = os.path.join(get_basedir(), "gutenberg", file + ".txt")
        f = open(path)
        preamble = True
        for line in f.readlines():
            if not preamble:
                for t in tokenize.wordpunct(line):
                    yield t
            if line[:5] == '*END*':
                preamble = False