def evaluate_models_from(GPT_saved_models_folder, eval_file, enc, args): # Prepare eval data eval_dataloader_loss = DynamicBatchingLoader(eval_file, enc, args.normalize_data, args.eval_batch_size, args.max_seq_length) eval_dataloader_gen = get_eval_list_same_length(eval_file, enc, args.eval_batch_size, True) # read eval_loss log file eval_loss_log_file = os.path.join(GPT_saved_models_folder, "eval_log.txt") min_ckpt_old_perplexity = None min_ckpt_new_perplexity = None min_old_perplexity = 1000000.0 min_new_perplexity = 1000000.0 with open(eval_loss_log_file, "r") as reader: head_row = next(reader) for line in reader: line = line.strip() epoch, ckpt_no, _, loss, perplexity = line.split(",") epoch = int(epoch) ckpt_no = int(ckpt_no) - 1 loss = float(loss) perplexity = float(perplexity) print(ckpt_no, loss, perplexity, end="") if min_old_perplexity > perplexity: min_old_perplexity = perplexity min_ckpt_old_perplexity = ckpt_no # calculate new loss and perplexity model_filename = "GP2-pretrain-step-{}.pkl" model = load_model(GPT2LMHeadModel(config), os.path.join(GPT_saved_models_folder, model_filename.format(ckpt_no)), args, verbose=True) eval_loss, eval_ppl = eval_model_loss(model, enc, eval_dataloader_loss, epoch, args) if min_new_perplexity > eval_ppl: min_new_perplexity = eval_ppl min_ckpt_new_perplexity = ckpt_no print("Old best ckpt and perplexity:", min_ckpt_old_perplexity, min_old_perplexity) print("New best ckpt and perplexity:", min_ckpt_new_perplexity, min_new_perplexity) return min_ckpt_old_perplexity, min_old_perplexity, min_ckpt_new_perplexity, min_new_perplexity
train_dataloader = BucketingDataLoader(args.train_input_file, args.train_batch_size, args.max_seq_length) else: pass # train_dataloader = DistributedBucketingDataLoader( # get_rank(), get_world_size(), # args.train_input_file, args.train_batch_size, # args.max_seq_length) eval_dataloader_loss = DynamicBatchingLoader(args.eval_input_file, enc, args.normalize_data, args.eval_batch_size, args.max_seq_length) eval_dataloader_gen = get_eval_list_same_length(args.eval_input_file, enc, args.eval_batch_size, True) ######################################################################### # Prepare Model and Optimizer ########################################################################## model = load_model(GPT2LMHeadModel(config), args.init_checkpoint, args, verbose=True) if args.local_rank != -1: # when from scratch make sure initial models are the same params = [p.data for p in model.parameters()] all_reduce_and_rescale_tensors(params, float(torch.distributed.get_world_size())) model_parameters = filter(lambda p: p.requires_grad, model.parameters())