示例#1
0
	def _load_ubuntucorpus(min_rare_vocab_times=0):
		return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times)
示例#2
0
	def _load_ubuntucorpus(min_rare_vocab_times=0):
		from transformers import GPT2Tokenizer
		toker = PretrainedTokenizer(GPT2Tokenizer('./tests/dataloader/dummy_gpt2vocab/vocab.json', './tests/dataloader/dummy_gpt2vocab/merges.txt'))
		return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="gpt2")
示例#3
0
	def _load_ubuntucorpus(min_rare_vocab_times=0):
		from transformers import BertTokenizer
		toker = PretrainedTokenizer(BertTokenizer('./tests/dataloader/dummy_bertvocab/vocab.txt'))
		return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu", min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="bert")
 def _load_ubuntucorpus(invalid_vocab_times=0):
     return UbuntuCorpus("./tests/dataloader/dummy_ubuntucorpus#Ubuntu",
                         invalid_vocab_times=invalid_vocab_times)