Python CWSPipe示例

编程语言: Python

命名空间/包名称: fastNLP.io.pipe.cws

类/类型: CWSPipe

hotexamples.com的示例: 6

Python CWSPipe - 已找到6个示例。这些是从开源项目中提取的最受好评的fastNLP.io.pipe.cws.CWSPipe现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CWSPipe(6)

get_vocab(2)

示例#1

显示文件

 def test_process_from_file(self):
     dataset_names = ['msra', 'cityu', 'as', 'pku']
     for dataset_name in dataset_names:
         with self.subTest(dataset_name=dataset_name):
             data_bundle = CWSPipe(bigrams=True, trigrams=True).\
                 process_from_file(f'test/data_for_tests/io/cws_{dataset_name}')
             print(data_bundle)

示例#2

显示文件

 def test_process_from_file(self):
     dataset_names = ['pku', 'cityu', 'as', 'msra']
     for dataset_name in dataset_names:
         with self.subTest(dataset_name=dataset_name):
             data_bundle = CWSPipe(
                 dataset_name=dataset_name).process_from_file()
             print(data_bundle)

示例#3

显示文件

 def test_process_from_file(self):
     dataset_names = ['msra']
     for dataset_name in dataset_names:
         with self.subTest(dataset_name=dataset_name):
             data_bundle = CWSPipe().process_from_file(
                 f'test/data_for_tests/io/cws_{dataset_name}')
             print(data_bundle)

示例#4

显示文件

def get_data():
    data_bundle = CWSPipe(dataset_name=dataname, bigrams=True,
                          trigrams=False).process_from_file()
    char_embed = StaticEmbedding(
        data_bundle.get_vocab('chars'),
        dropout=0.33,
        word_dropout=0.01,
        model_dir_or_name='~/exps/CWS/pretrain/vectors/1grams_t3_m50_corpus.txt'
    )
    bigram_embed = StaticEmbedding(
        data_bundle.get_vocab('bigrams'),
        dropout=0.33,
        min_freq=3,
        word_dropout=0.01,
        model_dir_or_name='~/exps/CWS/pretrain/vectors/2grams_t3_m50_corpus.txt'
    )
    return data_bundle, char_embed, bigram_embed

示例#5

显示文件

 def test_demo(self):
     # related to issue https://github.com/fastnlp/fastNLP/issues/324#issue-705081091
     from fastNLP import DataSet, Instance
     from fastNLP.io import DataBundle
     data_bundle = DataBundle()
     ds = DataSet()
     ds.append(Instance(raw_words="截流 进入 最后 冲刺 （ 附 图片 １ 张 ）"))
     data_bundle.set_dataset(ds, name='train')
     data_bundle = CWSPipe().process(data_bundle)
     self.assertFalse('<' in data_bundle.get_vocab('chars'))

示例#6

显示文件

 def test_replace_number(self):
     data_bundle = CWSPipe(bigrams=True, replace_num_alpha=True).\
                 process_from_file(f'test/data_for_tests/io/cws_pku')
     for word in ['<', '>', '<NUM>']:
         self.assertNotEqual(data_bundle.get_vocab('chars').to_index(word), 1)