def vaild_loop(config, valid_generator, inference_program, model_handle): """ model vaild loop """ [exe, place, bow_loss, kl_loss, nll_loss, final_loss] = model_handle valid_num = 0.0 total_valid_bow_loss = 0.0 total_valid_kl_loss = 0.0 total_valid_nll_loss = 0.0 total_valid_final_loss = 0.0 for batch_id, data in enumerate(valid_generator()): data_feed = build_data_feed(data, place, batch_size=config.batch_size, is_training=True, bow_max_len=config.max_len, pretrain_epoch=False) if data_feed is None: continue val_fetch_outs = \ exe.run(inference_program, feed=data_feed, fetch_list=[bow_loss.name, kl_loss.name, nll_loss.name, final_loss.name]) total_valid_bow_loss += val_fetch_outs[0] * config.batch_size total_valid_kl_loss += val_fetch_outs[1] * config.batch_size total_valid_nll_loss += val_fetch_outs[2] * config.batch_size total_valid_final_loss += val_fetch_outs[3] * config.batch_size valid_num += config.batch_size print("valid dataset: bow loss %0.6f kl loss %0.6f nll loss %0.6f total loss %0.6f" % \ (total_valid_bow_loss / valid_num, total_valid_kl_loss / valid_num, \ total_valid_nll_loss / valid_num, total_valid_final_loss / valid_num)) return [total_valid_bow_loss / valid_num, total_valid_kl_loss / valid_num, \ total_valid_nll_loss / valid_num, total_valid_final_loss / valid_num]
def predict(model_handle, text): """ predict for text by model_handle """ batch_size = 1 [exe, place, final_score, final_ids, final_index, processors, id_dict_array] = model_handle data_generator = processors.preprocessing_for_lines([text], batch_size=batch_size) results = [] for batch_id, data in enumerate(data_generator()): data_feed, sent_num = build_data_feed(data, place, batch_size=batch_size) out = exe.run(feed=data_feed, fetch_list=[final_score.name, final_ids.name, final_index.name]) batch_score = out[0] batch_ids = out[1] batch_pre_index = out[2] batch_score_arr = np.split(batch_score, batch_size, axis=1) batch_ids_arr = np.split(batch_ids, batch_size, axis=1) batch_pre_index_arr = np.split(batch_pre_index, batch_size, axis=1) index = 0 for (score, ids, pre_index) in zip(batch_score_arr, batch_ids_arr, batch_pre_index_arr): trace_ids, trace_score = trace_fianl_result(score, ids, pre_index, topk=1, EOS=3) results.append(id_to_text(trace_ids[0][:-1], id_dict_array)) index += 1 if index >= sent_num: break return results[0]
def test(config): """ test """ batch_size = config.batch_size config.vocab_size = len(open(config.vocab_path).readlines()) final_score, final_ids, final_index = knowledge_seq2seq(config) final_score.persistable = True final_ids.persistable = True final_index.persistable = True main_program = fluid.default_main_program() if config.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) fluid.io.load_params(executor=exe, dirname=config.model_path, main_program=main_program) print("laod params finsihed") # test data generator processors = KnowledgeCorpus( data_dir=config.data_dir, data_prefix=config.data_prefix, vocab_path=config.vocab_path, min_len=config.min_len, max_len=config.max_len) test_generator = processors.data_generator( batch_size=config.batch_size, phase="test", shuffle=False) # load dict id_dict_array = load_id2str_dict(config.vocab_path) out_file = config.output fout = open(out_file, 'w') for batch_id, data in enumerate(test_generator()): data_feed, sent_num = build_data_feed(data, place, batch_size=batch_size) if data_feed is None: break out = exe.run(feed=data_feed, fetch_list=[final_score.name, final_ids.name, final_index.name]) batch_score = out[0] batch_ids = out[1] batch_pre_index = out[2] batch_score_arr = np.split(batch_score, batch_size, axis=1) batch_ids_arr = np.split(batch_ids, batch_size, axis=1) batch_pre_index_arr = np.split(batch_pre_index, batch_size, axis=1) index = 0 for (score, ids, pre_index) in zip(batch_score_arr, batch_ids_arr, batch_pre_index_arr): trace_ids, trace_score = trace_fianl_result(score, ids, pre_index, topk=1, EOS=3) fout.write(id_to_text(trace_ids[0][:-1], id_dict_array)) fout.write('\n') index += 1 if index >= sent_num: break fout.close()
def train_loop(config, train_generator, valid_generator, main_program, inference_program, model_handle, param_name_list, opt_var_name_list): """ model train loop """ stage = config.stage [exe, place, bow_loss, kl_loss, nll_loss, final_loss] = model_handle #总步数 total_step = 0 start_epoch = 0 if stage == 0 else config.pretrain_epoch end_epoch = config.pretrain_epoch if stage == 0 else config.num_epochs print("stage"+str(stage)+"--- start epoch/end epoch: ", start_epoch, end_epoch) best_score = float('inf') for epoch_idx in range(start_epoch, end_epoch): total_bow_loss = 0 total_kl_loss = 0 total_nll_loss = 0 total_final_loss = 0 sample_num = 0 for batch_id, data in enumerate(train_generator()): data_feed = build_data_feed(data, place, batch_size=config.batch_size, is_training=True, bow_max_len=config.max_len, pretrain_epoch=epoch_idx < config.pretrain_epoch) if data_feed is None: break out = exe.run(main_program, feed=data_feed, fetch_list=[bow_loss.name, kl_loss.name, nll_loss.name, final_loss.name]) total_step += 1 total_bow_loss += out[0] total_kl_loss += out[1] total_nll_loss += out[2] total_final_loss += out[3] sample_num += 1 if batch_id > 0 and batch_id % config.log_steps == 0: print("epoch %d step %d | " "bow loss %0.6f kl loss %0.6f nll loss %0.6f total loss %0.6f" % \ (epoch_idx, batch_id, total_bow_loss / sample_num, total_kl_loss / sample_num, \ total_nll_loss / sample_num, total_final_loss / sample_num)) total_bow_loss = 0 total_kl_loss = 0 total_nll_loss = 0 total_final_loss = 0 sample_num = 0 #在训练过程中,每config.valid_steps 个batch(步)进行一次valid,并储存一次最好模型 if batch_id > 0 and batch_id % config.valid_steps == 0: eval_bow_loss, eval_kl_loss, eval_nll_loss, eval_total_loss = \ vaild_loop(config, valid_generator, inference_program, model_handle) # save model if stage != 0: param_path = config.save_dir + "/" + str(total_step) fluid.io.save_params(executor=exe, dirname=param_path, main_program=main_program) if eval_nll_loss < best_score: # save to best best_model_path = config.save_dir + "/best_model" print("save to best", eval_nll_loss, best_model_path) fluid.io.save_params(executor=exe, dirname=best_model_path, main_program=main_program) best_score = eval_nll_loss eval_bow_loss, eval_kl_loss, eval_nll_loss, eval_total_loss = \ vaild_loop(config, valid_generator, inference_program, model_handle) if stage != 0: param_path = config.save_dir + "/" + str(total_step) fluid.io.save_params(executor=exe, dirname=param_path, main_program=main_program) if eval_nll_loss < best_score: best_model_path = config.save_dir + "/best_model" print("save to best", eval_nll_loss, best_model_path) fluid.io.save_params(executor=exe, dirname=best_model_path, main_program=main_program) best_score = eval_nll_loss if stage == 0: # save last model and opt_stat to npz for next stage init save_model_file = config.save_dir + "/model_stage_0" save_opt_state_file = config.save_dir + "/opt_state_stage_0" model_stage_0 = {} for name in param_name_list: t = np.asarray(fluid.global_scope().find_var(name).get_tensor()) model_stage_0[name] = t np.savez(save_model_file, **model_stage_0) opt_state_stage_0 = {} for name in opt_var_name_list: t_data = np.asarray(fluid.global_scope().find_var(name).get_tensor()) opt_state_stage_0[name] = t_data np.savez(save_opt_state_file, **opt_state_stage_0)