示例#1
0
 def _load_opensubtitles(invalid_vocab_times=0):
     from transformers import BertTokenizer
     toker = PretrainedTokenizer(
         BertTokenizer('./tests/dataloader/dummy_bertvocab/vocab.txt'))
     return OpenSubtitles(
         "./tests/dataloader/dummy_opensubtitles#OpenSubtitles",
         tokenizer=toker,
         pretrained='bert',
         min_rare_vocab_times=invalid_vocab_times)
示例#2
0
 def _load_opensubtitles(invalid_vocab_times=0):
     from transformers import GPT2Tokenizer
     toker = PretrainedTokenizer(
         GPT2Tokenizer('./tests/dataloader/dummy_gpt2vocab/vocab.json',
                       './tests/dataloader/dummy_gpt2vocab/merges.txt'))
     return OpenSubtitles(
         "./tests/dataloader/dummy_opensubtitles#OpenSubtitles",
         tokenizer=toker,
         pretrained='gpt2',
         min_rare_vocab_times=invalid_vocab_times)
示例#3
0
 def _load_opensubtitles(invalid_vocab_times=0):
     return OpenSubtitles(
         "./tests/dataloader/dummy_opensubtitles#OpenSubtitles",
         invalid_vocab_times=invalid_vocab_times)