def _load_ubuntucorpus(min_rare_vocab_times=0): return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times)
def _load_ubuntucorpus(min_rare_vocab_times=0): from transformers import GPT2Tokenizer toker = PretrainedTokenizer(GPT2Tokenizer('./tests/dataloader/dummy_gpt2vocab/vocab.json', './tests/dataloader/dummy_gpt2vocab/merges.txt')) return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="gpt2")
def _load_ubuntucorpus(min_rare_vocab_times=0): from transformers import BertTokenizer toker = PretrainedTokenizer(BertTokenizer('./tests/dataloader/dummy_bertvocab/vocab.txt')) return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="bert")
def _load_ubuntucorpus(invalid_vocab_times=0): return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", invalid_vocab_times=invalid_vocab_times)