num_workers=args.num_workers, collate_fn=lambda x: x, drop_last=drop_last, pin_memory=True) #evaluator = LXMERTEvaluator(dset) evaluator = None print() return DataTuple(dataset=dset, torchdset=tset, loader=data_loader, evaluator=evaluator) # Create pretrain.jsonl & traindev data clean_data("./data") train_tuple = get_tuple(args.train, args.batch_size, shuffle=True, drop_last=True) valid_tuple = None class InputFeatures(object): """A single set of features of data.""" def __init__(self, input_ids, input_mask, segment_ids, lm_label_ids, visual_feats, obj_labels, is_matched, ans): self.input_ids = input_ids self.input_mask = input_mask self.segment_ids = segment_ids
if steps % args.save_steps == 0: save_path = os.path.join( args.checkpoints, "step_" + str(steps) + str(args.split)) print("save_path:", save_path) fluid.io.save_persistables(exe, save_path, train_program) time_end = time.time() used_time = time_end - time_begin time_end = time_begin print("used_time:", used_time) if steps == args.stop_steps: break except fluid.core.EOFException: train_pyreader.reset() break if __name__ == '__main__': print_arguments(args) if args.task_name == "hm": # Create pretrain.jsonl & traindev data clean_data("./data/hm") # This handles formatting for the E-Models. There needs to be a label column & some data needs to be copied to the end for length requirements. double_data("./data/hm") main(args)