예제 #1
0
def load_and_cache_examples(args, tokenizer):
    if isinstance(tokenizer, list):
        dataset = TextDataset_2Tokenizers(tokenizer,
                                          args,
                                          args.train_data_file,
                                          block_size=args.block_size)
    else:
        dataset = TextDataset_Split(tokenizer,
                                    args,
                                    args.train_data_file,
                                    block_size=args.block_size)
    return dataset
예제 #2
0
def load_and_cache_examples(args, tokenizer, evaluate=False):
    if isinstance(tokenizer, list):
        dataset = TextDataset_2Tokenizers(tokenizer,
                                          args,
                                          file_path=args.eval_data_file if
                                          evaluate else args.train_data_file,
                                          block_size=args.block_size)
    else:
        dataset = TextDataset_Split(tokenizer,
                                    args,
                                    file_path=args.eval_data_file
                                    if evaluate else args.train_data_file,
                                    block_size=args.block_size)
    return dataset