def _load_opensubtitles(invalid_vocab_times=0): from transformers import BertTokenizer toker = PretrainedTokenizer( BertTokenizer('./tests/dataloader/dummy_bertvocab/vocab.txt')) return OpenSubtitles( "./tests/dataloader/dummy_opensubtitles#OpenSubtitles", tokenizer=toker, pretrained='bert', min_rare_vocab_times=invalid_vocab_times)
def _load_opensubtitles(invalid_vocab_times=0): from transformers import GPT2Tokenizer toker = PretrainedTokenizer( GPT2Tokenizer('./tests/dataloader/dummy_gpt2vocab/vocab.json', './tests/dataloader/dummy_gpt2vocab/merges.txt')) return OpenSubtitles( "./tests/dataloader/dummy_opensubtitles#OpenSubtitles", tokenizer=toker, pretrained='gpt2', min_rare_vocab_times=invalid_vocab_times)
def _load_opensubtitles(invalid_vocab_times=0): return OpenSubtitles( "./tests/dataloader/dummy_opensubtitles#OpenSubtitles", invalid_vocab_times=invalid_vocab_times)