Python Vocabulary.factory示例

编程语言: Python

命名空间/包名称: tfrnnlm.text

类/类型: Vocabulary

方法/功能: factory

hotexamples.com的示例: 8

Python Vocabulary.factory - 已找到8个示例。这些是从开源项目中提取的最受好评的tfrnnlm.text.Vocabulary.factory现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Vocabulary(7)

factory(4)

index_string(2)

index(1)

示例#1

显示文件

 def test_invalid_partition(self):
     self.assertRaises(
         ValueError, PartitionedData.from_text, {
             "train": ["red red red", "blue blue green"],
             "validate": ["red blue blue orange"],
             "test": ["green green red black"]
         }, ["bogus"], Vocabulary.factory(WordTokenizer(True)))

示例#2

显示文件

文件： __init__.py 项目： ycchuang/tfrnnlm

def create_partitioned_data():
    return PartitionedData.from_text(
        {
            "train": ["blue blue green", "red red red"],
            "test": ["green green red black"],
            "validate": ["red blue blue orange"]
        }, ["train"], Vocabulary.factory(WordTokenizer(True)))

示例#3

显示文件

文件： command.py 项目： wpm/tfrnnlm

def create_data_set(args):
    if args.vocabulary_partitions is None:
        args.vocabulary_partitions = args.partitions.keys()
    tokenizer = {"whitespace": WhitespaceTokenizer,
                 "word": WordTokenizer,
                 "character": CharacterTokenizer}[args.tokenizer](args.case_normalized)
    factory = Vocabulary.factory(tokenizer,
                                 min_frequency=args.min_frequency, max_vocabulary=args.max_vocabulary,
                                 out_of_vocabulary=args.out_of_vocabulary)
    partitions = {}
    for partition, filenames in args.partitions.items():
        partitions[partition] = [open(filename).read() for filename in filenames]
    partitioned_data = PartitionedData.from_text(partitions, args.vocabulary_partitions, factory)
    partitioned_data.serialize(args.directory)
    logger.info("Created %s in %s" % (partitioned_data, args.directory))

示例#4

显示文件

文件： command.py 项目： ycchuang/tfrnnlm

def create_data_set(args):
    if args.vocabulary_partitions is None:
        args.vocabulary_partitions = args.partitions.keys()
    tokenizer = {
        "whitespace": WhitespaceTokenizer,
        "word": WordTokenizer,
        "character": CharacterTokenizer
    }[args.tokenizer](args.case_normalized)
    factory = Vocabulary.factory(tokenizer,
                                 min_frequency=args.min_frequency,
                                 max_vocabulary=args.max_vocabulary,
                                 out_of_vocabulary=args.out_of_vocabulary)
    partitions = {}
    for partition, filenames in args.partitions.items():
        partitions[partition] = [
            open(filename).read() for filename in filenames
        ]
    partitioned_data = PartitionedData.from_text(partitions,
                                                 args.vocabulary_partitions,
                                                 factory)
    partitioned_data.serialize(args.directory)
    logger.info("Created %s in %s" % (partitioned_data, args.directory))

示例#5

显示文件

文件： __init__.py 项目： wpm/tfrnnlm

def create_partitioned_data():
    return PartitionedData.from_text({
        "train": ["blue blue green", "red red red"],
        "test": ["green green red black"],
        "validate": ["red blue blue orange"]
    }, ["train"], Vocabulary.factory(WordTokenizer(True)))

示例#6

显示文件

 def test_vocabulary_factory(self):
     factory = Vocabulary.factory(WordTokenizer(True), max_vocabulary=2)
     self.assertEqual(
         Vocabulary(["to be or not to be"],
                    WordTokenizer(True),
                    max_vocabulary=2), factory(["to be or not to be"]))

示例#7

显示文件

文件： test_text.py 项目： wpm/tfrnnlm

 def test_vocabulary_factory(self):
     factory = Vocabulary.factory(WordTokenizer(True), max_vocabulary=2)
     self.assertEqual(Vocabulary(["to be or not to be"], WordTokenizer(True), max_vocabulary=2),
                      factory(["to be or not to be"]))

示例#8

显示文件

文件： test_text.py 项目： wpm/tfrnnlm

 def test_invalid_partition(self):
     self.assertRaises(ValueError, PartitionedData.from_text, {
         "train": ["red red red", "blue blue green"],
         "validate": ["red blue blue orange"],
         "test": ["green green red black"]
     }, ["bogus"], Vocabulary.factory(WordTokenizer(True)))