Exemplos de wordpunct em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: nodebox_linguistics_extended.parser.nltk_lite.tokenize

Método / Função: wordpunct

Exemplos em hotexamples.com: 3

wordpunct em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de nodebox_linguistics_extended.parser.nltk_lite.tokenize.wordpunct em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: regexp.py Projeto: srinivas365/nodebox_linguistics_extended

def demo(): """ A demonstration that shows the output of several different tokenizers on the same string. """ from nodebox_linguistics_extended.parser.nltk_lite import tokenize # Define the test string. s = "Good muffins cost $3.88\nin New York. Please buy me\ntwo of them.\n\nThanks." print 'Input text:' print ` s ` print print 'Tokenize using whitespace:' _display(tokenize.whitespace(s)) print print 'Tokenize sequences of alphanumeric characters:' _display(tokenize.regexp(s, pattern=r'\w+', gaps=False)) print print 'Tokenize sequences of letters and sequences of nonletters:' _display(tokenize.wordpunct(s)) print print 'Tokenize by lines:' _display(tokenize.line(s)) print print 'Tokenize by blank lines:' _display(tokenize.blankline(s)) print print 'A simple sentence tokenizer:' _display(tokenize.regexp(s, pattern=r'\.(\s+|$)', gaps=True)) print

Exemplo n.º 2

0

Exibir arquivo

def raw(files = items): if type(files) is str: files = (files,) for file in files: path = os.path.join(get_basedir(), "state_union", file + ".txt") f = open(path) preamble = True text = f.read() for t in tokenize.wordpunct(text): yield t

Exemplo n.º 3

0

Exibir arquivo

Arquivo: gutenberg.py Projeto: srinivas365/nodebox_linguistics_extended

def raw(files=items): if type(files) is str: files = (files, ) for file in files: path = os.path.join(get_basedir(), "gutenberg", file + ".txt") f = open(path) preamble = True for line in f.readlines(): if not preamble: for t in tokenize.wordpunct(line): yield t if line[:5] == '*END*': preamble = False