Python blankline示例

编程语言: Python

命名空间/包名称: nltk_lite.tokenize

方法/功能: blankline

hotexamples.com的示例: 7

Python blankline - 已找到7个示例。这些是从开源项目中提取的最受好评的nltk_lite.tokenize.blankline现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： conll2000.py 项目： sethwoodworth/wikipedia-style-edits

def chunked(files = items, chunk_types=('NP',)):
    if type(files) is str: files = (files,)
    for file in files:
        path = os.path.join(get_basedir(), "conll2000", file + ".txt")
        s = open(path).read()
        for sent in tokenize.blankline(s):
            yield tree.conll_chunk(sent, chunk_types)

示例#2

显示文件

文件： conll2000.py 项目： sethwoodworth/wikipedia-style-edits

def tagged(files = items):
    if type(files) is str: files = (files,)
    for file in files:
        path = os.path.join(get_basedir(), "conll2000", file + ".txt")
        s = open(path).read()
        for sent in tokenize.blankline(s):
            yield [(word, tag) for (word, tag, chunk) in _list_sent(sent)]

示例#3

显示文件

文件： brown.py 项目： sethwoodworth/wikipedia-style-edits

def _read(files, conversion_function):
    if type(files) is str: files = (files,)

    for file in files:
        path = os.path.join(get_basedir(), "brown", file)
        f = open(path).read()
        for sent in tokenize.blankline(f):
            yield conversion_function(sent)

示例#4

显示文件

文件： shoebox.py 项目： sethwoodworth/wikipedia-style-edits

def raw(files = 'rotokas'):
    """
    @param files: One or more treebank files to be processed
    @type files: L{string} or L{tuple(string)}
    @rtype: iterator over L{list(string)}
    """       

    # Just one file to process?  If so convert to a tuple so we can iterate
    if type(files) is str: files = (files,)

    for file in files:
        path = os.path.join(get_basedir(), "shoebox", file + ".dic")
        f = open(path).read()
        for entry in tokenize.blankline(f):
            yield list(_parse_entry(entry))

示例#5

显示文件

文件： ycoe.py 项目： sethwoodworth/wikipedia-style-edits

def _read(files, conversion_function):
    if type(files) is str:
        files = (files,)

    for file in files:
        path = os.path.join(get_basedir(), "ycoe/pos", file)
        f = open(path).read()
        rx_pattern = re.compile(
            r"""
                <.*>_CODE
                |\s.*_ID
        """,
            re.VERBOSE | re.UNICODE,
        )
        mySents = tokenize.blankline(f)
        for sent in mySents:
            sent = re.sub(rx_pattern, "", sent)
            if sent != "":
                yield conversion_function(sent, sep="_")

示例#6

显示文件

文件： analyser.py 项目： Wikiwix/Topicalizer

 def processParagraphs(self, corpus):
     paragraphs = tokenize.blankline(corpus)
     return paragraphs

示例#7

显示文件

文件： analyser.py 项目： BjoernKW/Topicalizer

    def processParagraphs(self, corpus):
	paragraphs = tokenize.blankline(corpus)
	return paragraphs