def evaluate_models_from(GPT_saved_models_folder, eval_file, enc, args): # Prepare eval data eval_dataloader_loss = DynamicBatchingLoader(eval_file, enc, args.normalize_data, args.eval_batch_size, args.max_seq_length) eval_dataloader_gen = get_eval_list_same_length(eval_file, enc, args.eval_batch_size, True) # read eval_loss log file eval_loss_log_file = os.path.join(GPT_saved_models_folder, "eval_log.txt") min_ckpt_old_perplexity = None min_ckpt_new_perplexity = None min_old_perplexity = 1000000.0 min_new_perplexity = 1000000.0 with open(eval_loss_log_file, "r") as reader: head_row = next(reader) for line in reader: line = line.strip() epoch, ckpt_no, _, loss, perplexity = line.split(",") epoch = int(epoch) ckpt_no = int(ckpt_no) - 1 loss = float(loss) perplexity = float(perplexity) print(ckpt_no, loss, perplexity, end="") if min_old_perplexity > perplexity: min_old_perplexity = perplexity min_ckpt_old_perplexity = ckpt_no # calculate new loss and perplexity model_filename = "GP2-pretrain-step-{}.pkl" model = load_model(GPT2LMHeadModel(config), os.path.join(GPT_saved_models_folder, model_filename.format(ckpt_no)), args, verbose=True) eval_loss, eval_ppl = eval_model_loss(model, enc, eval_dataloader_loss, epoch, args) if min_new_perplexity > eval_ppl: min_new_perplexity = eval_ppl min_ckpt_new_perplexity = ckpt_no print("Old best ckpt and perplexity:", min_ckpt_old_perplexity, min_old_perplexity) print("New best ckpt and perplexity:", min_ckpt_new_perplexity, min_new_perplexity) return min_ckpt_old_perplexity, min_old_perplexity, min_ckpt_new_perplexity, min_new_perplexity
def convert_to_dialogpt(args): config = GPT2Config.from_json_file(args.config_path) model = load_model(GPT2LMHeadModel(config), None, args, verbose=True) model_state_dict = torch.load(args.megatron_checkpoint_path) model_state_dict = fix_state_dict_namespace(model_state_dict['model']) model_state_dict = fix_model_shapes(model_state_dict) start_model = model if (hasattr(model, "transformer") and all(not s.startswith('transformer.') for s in model_state_dict.keys())): logger.info('loading transfomer only') start_model = model.transformer start_model.load_state_dict(model_state_dict) torch.save(start_model.state_dict(), args.dialogpt_output_path)
# get_rank(), get_world_size(), # args.train_input_file, args.train_batch_size, # args.max_seq_length) eval_dataloader_loss = DynamicBatchingLoader(args.eval_input_file, enc, args.normalize_data, args.eval_batch_size, args.max_seq_length) eval_dataloader_gen = get_eval_list_same_length(args.eval_input_file, enc, args.eval_batch_size, True) ######################################################################### # Prepare Model and Optimizer ########################################################################## model = load_model(GPT2LMHeadModel(config), args.init_checkpoint, args, verbose=True) if args.local_rank != -1: # when from scratch make sure initial models are the same params = [p.data for p in model.parameters()] all_reduce_and_rescale_tensors(params, float(torch.distributed.get_world_size())) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) total_params = sum([np.prod(p.size()) for p in model_parameters]) logger.info('Number of parameter = {}'.format(total_params)) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'ln'] # no decay for bias and LayerNorm (ln)
tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path) # load config = GPT2Config.from_json_file( os.path.join(args.model_name_or_path, 'config.json')) config.no_token_id = args.no_token_id config.persona_emb_type = args.persona_emb_type config.PersonaNum = args.PersonaNum config.do_persona_linear = args.do_persona_linear config.persona_n_embd = args.persona_n_embd args.n_gpu = 1 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device model = load_model(GPT2LMHeadModel(config), model_file, args, verbose=True) # fix misused key value model.eval() model.to('cuda') # decode_size = len(open(decode_file,'rU').readlines()) output_lines = [] # with open(decode_file,'r') as fin: with codecs.open(decode_file, 'r', encoding='utf-8') as fin: print(decode_file) lines = fin.readlines() assert args.decode_num <= len(lines) if args.decode_num == -1: decode_size = len(lines) else:
get_rank(), get_world_size(), args.train_input_file, args.train_batch_size, args.max_seq_length) eval_dataloader_loss = DynamicBatchingLoader( args.eval_input_file, enc, args.normalize_data, args.eval_batch_size, args.max_seq_length) eval_dataloader_gen = get_eval_list_same_length( args.eval_input_file, enc, args.eval_batch_size, True) ######################################################################### # Prepare Model and Optimizer ########################################################################## model = load_model(GPT2LMHeadModel(config), args.init_checkpoint, args, verbose=True) if args.local_rank != -1: # when from scratch make sure initial models are the same params = [p.data for p in model.parameters()] all_reduce_and_rescale_tensors( params, float(torch.distributed.get_world_size())) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) total_params = sum([np.prod(p.size()) for p in model_parameters]) logger.info('Number of parameter = {}'.format(total_params)) logger.info('ee') param_optimizer = list(model.named_parameters()) logger.info('ok') no_decay = ['bias', 'ln'] # no decay for bias and LayerNorm (ln) optimizer_grouped_parameters = [