示例#1
0
	def _load_switchboardcorpus(min_rare_vocab_times=0):
		from transformers import BertTokenizer
		toker = PretrainedTokenizer(BertTokenizer('./tests/dataloader/dummy_bertvocab/vocab.txt'))
		return SwitchboardCorpus("./tests/dataloader/dummy_switchboardcorpus#SwitchboardCorpus",
								 min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="bert")
示例#2
0
	def _load_switchboardcorpus(min_rare_vocab_times=0):
		return SwitchboardCorpus("./tests/dataloader/dummy_switchboardcorpus#SwitchboardCorpus",
								 min_rare_vocab_times=min_rare_vocab_times)
示例#3
0
	def _load_switchboardcorpus(min_rare_vocab_times=0):
		from transformers import GPT2Tokenizer
		toker = PretrainedTokenizer(GPT2Tokenizer('./tests/dataloader/dummy_gpt2vocab/vocab.json', './tests/dataloader/dummy_gpt2vocab/merges.txt'))
		return SwitchboardCorpus("./tests/dataloader/dummy_switchboardcorpus#SwitchboardCorpus",
								 min_rare_vocab_times=min_rare_vocab_times, tokenizer=toker, pretrained="gpt2")
 def _load_switchboardcorpus(invalid_vocab_times=0):
     return SwitchboardCorpus(
         "./tests/dataloader/dummy_switchboardcorpus#SwitchboardCorpus",
         invalid_vocab_times=invalid_vocab_times)