Python split_on_first_whitespace示例

编程语言: Python

命名空间/包名称: utils

方法/功能: split_on_first_whitespace

hotexamples.com的示例: 4

Python split_on_first_whitespace - 已找到4个示例。这些是从开源项目中提取的最受好评的utils.split_on_first_whitespace现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_utils.py 项目： ymichael/cs3245-hw

def test_split_on_first_whitespace():
    # Only two words.
    assert ('hello', 'world') == utils.split_on_first_whitespace('hello world')

    # More than two words.
    assert ('hello', 'world two') == \
        utils.split_on_first_whitespace('hello world two')

    # Should not fail on empty string.
    assert ('', '') == utils.split_on_first_whitespace('')

示例#2

显示文件

文件： test_utils.py 项目： kaiserahmed/cs3245-hw

def test_split_on_first_whitespace():
    # Only two words.
    assert ('hello', 'world') == utils.split_on_first_whitespace('hello world')

    # More than two words.
    assert ('hello', 'world two') == \
        utils.split_on_first_whitespace('hello world two')

    # Should not fail on empty string.
    assert ('', '') == utils.split_on_first_whitespace('')

示例#3

显示文件

def build_LM(in_file):
    """
    build language models for each label
    each line in in_file contains a label and an URL separated by a tab(\t)
    """
    print 'Building language models...'
    tokenizer = get_tokenizer()
    language_models = {}
    all_grams = set()

    with open(in_file) as in_file_contents:
        for line in in_file_contents:
            lang, text = utils.split_on_first_whitespace(line)
            language_models.setdefault(lang, model.Model())

            language_model = language_models[lang]
            for gram in tokenizer(text):
                all_grams.add(gram)

                language_model.incr_gram_count(gram)

    for lang in language_models:
        language_model = language_models[lang]
        for gram in all_grams:
            language_model.register_gram(gram)

    return language_models

示例#4

显示文件

文件： build_test_analyse_LM.py 项目： kaiserahmed/cs3245-hw

def build_LM(in_file):
    """
    build language models for each label
    each line in in_file contains a label and an URL separated by a tab(\t)
    """
    print 'Building language models...'
    tokenizer = get_tokenizer()
    language_models = {}
    all_grams = set()

    with open(in_file) as in_file_contents:
        for line in in_file_contents:
            lang, text = utils.split_on_first_whitespace(line)
            language_models.setdefault(lang, model.Model())

            language_model = language_models[lang]
            for gram in tokenizer(text):
                all_grams.add(gram)

                language_model.incr_gram_count(gram)

    for lang in language_models:
        language_model = language_models[lang]
        for gram in all_grams:
            language_model.register_gram(gram)

    return language_models