def __init__(self, src_ids, position_ids, sentence_ids, input_mask, enc_input, enc_input_mask, config: Model_Config, weight_sharing=True, cache=None, use_fp16=False): self._emb_size = config["HIDDEN_SIZE"] self._n_layer = config["NUM_HIDDEN_LAYERS"] self._n_head = config["NUM_ATTENTION_HEADS"] self._voc_size = config["VOCAB_SIZE"] self._max_position_seq_len = config["MAX_POSITION_EMBEDDINGS"] self._sent_types = config["TYPE_VOCAB_SIZE"] self._hidden_act = config['HIDDEN_ACT'] self._prepostprocess_dropout = config["HIDDEN_DROPOUT_PROB"] self._attention_dropout = config["ATTENTION_PROBS_DROPOUT_PROB"] self._weight_sharing = weight_sharing # name self.decoder_name = "decoder" self._word_emb_name = "enc_word_embedding" self._pos_emb_name = "enc_pos_embedding" self._seg_emb_name = self.decoder_name + "dec_seg_embedding" self._dtype = "float32" self._inttype = 'int32' # task parameters self.goal_type_num = config["GOAL_TYPE_NUM"] self.goal_entity_num = config["GOAL_ENTITY_NUM"] self.knowledge_s_num = config["KNOWLEDGE_S_NUM"] self.knowledge_p_num = config["KNOWLEDGE_P_NUM"] # parameter self.pos_embed = Dataset.get_position_embed(SEQ_MAX_LEN, HIDDEN_SIZE) # self._param_initializer = fluid.initializer.TruncatedNormal( # scale=config['initializer_range']) self._build_model(src_ids, position_ids, sentence_ids, input_mask, enc_input, enc_input_mask, cache=cache)
def check_params(dataset: Dataset): params = dataset.get_params() assert VOCAB_SIZE == params["VOCAB_SIZE"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("VOCAB_SIZE", params["VOCAB_SIZE"], VOCAB_SIZE) assert GOAL_TYPE_NUM == params["GOAL_TYPE_NUM"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("GOAL_TYPE_NUM", params["GOAL_TYPE_NUM"], GOAL_TYPE_NUM) assert GOAL_ENTITY_NUM == params["GOAL_ENTITY_NUM"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("GOAL_ENTITY_NUM", params["GOAL_ENTITY_NUM"], GOAL_ENTITY_NUM) assert KNOWLEDGE_S_NUM == params["KNOWLEDGE_S_NUM"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("KNOWLEDGE_S_NUM", params["KNOWLEDGE_S_NUM"], KNOWLEDGE_S_NUM) assert KNOWLEDGE_P_NUM == params["KNOWLEDGE_P_NUM"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("KNOWLEDGE_P_NUM", params["KNOWLEDGE_P_NUM"], KNOWLEDGE_P_NUM) assert TYPE_VOCAB_SIZE == params["TYPE_VOCAB_SIZE"], "Parameter Error, %s shoud be %d , but it is %d. Please cheak!"%\ ("TYPE_VOCAB_SIZE", params["TYPE_VOCAB_SIZE"], TYPE_VOCAB_SIZE) check_info = "Parameter checked." print(check_info) return check_info
def fine_tunning(): # logging tools tgt_base_dir = set_base(__file__) LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p" log_filename = os.path.join(tgt_base_dir, "dec_pre_training.log") logger = log(log_filename) config = Model_Config() # define program train_prog = fluid.Program() startup_prog = fluid.Program() # define model with fluid.program_guard(train_prog, startup_prog): token_ids = fluid.layers.data(name="token_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') segment_ids = fluid.layers.data(name="segment_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') pos_ids = fluid.layers.data(name="pos_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') enc_slf_attn = fluid.layers.data( name='enc_slf_attn', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') lm_label_mat = fluid.layers.data(name='lm_label_mat', shape=[None, SEQ_MAX_LEN], dtype='int64') lm_pos_mask = fluid.layers.data(name='lm_pos_mask', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') lm_pos_len = fluid.layers.data(name='lm_pos_len', shape=[None, 1], dtype='int64') goal_type_pos = fluid.layers.data(name="goal_type_pos", shape=[None, 2], dtype='int64') goal_type_label = fluid.layers.data(name="goal_type_label", shape=[None], dtype='int64') enc_input = fluid.layers.fill_constant( shape=[BATCH_SIZE, SEQ_MAX_LEN, HIDDEN_SIZE], dtype='float32', value=0.0) enc_mask = fluid.layers.fill_constant( shape=[BATCH_SIZE, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='float32', value=0.0) decode = Decoder(token_ids, pos_ids, segment_ids, enc_slf_attn, config=config, enc_input=enc_input, enc_input_mask=enc_mask) # output, loss, acc = decode.mask_goal_type(goal_type_pos, goal_type_label) loss, goal_type_acc = decode.pretrain(goal_type_pos, goal_type_label, lm_label_mat, lm_pos_mask, lm_pos_len) # loss = decode.lm_task(lm_label_mat, lm_pos_mask, lm_pos_len) adam = fluid.optimizer.AdamOptimizer() adam.minimize(loss) # define executor place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() exe = fluid.Executor(place) # start up parameter exe.run(startup_prog) params_list = train_prog.block(0).all_parameters() params_name_list = [p.name for p in params_list] write_iterable("decoder_params.param", params_name_list) # load the model if we have if LOAD_PERSISTABLE: try: print("begin to load %s" % (LOAD_PERSISTABLE_FILE)) fluid.io.load_persistables(exe, tgt_base_dir, main_program=train_prog, filename=LOAD_PERSISTABLE_FILE) info_msg = "Load %s success!" % (LOAD_PERSISTABLE_FILE) logger.info(info_msg) print(info_msg) except: load_error = "the persistable model cannot be loaded." logger.error(load_error) # load the model if we have if LOAD_MODEL: try: model_file = os.path.join(tgt_base_dir, LOAD_MODEL_FILE) print("begin to load %s" % (LOAD_OPTL_FILE)) load_model(model_file, params_name_list, place, opt_state_init_file=LOAD_OPTL_FILE if LOAD_OPT else "") info_msg = "Load %s success!" % (LOAD_VARS_FILE) logger.info(info_msg) print(info_msg) except: load_error = "the vars model cannot be loaded." logger.error(load_error) # show the information dataset = Dataset(limit=LIMIT) check_params(dataset) start_time = time.time() recoder = time.time() logger.info("Begin trainning") print("Begin trainning") for epoch_id in range(EPOCH_NUM): data_gen = dataset.generate_dec_batch(batch_size=BATCH_SIZE) for batch_id, item in enumerate(data_gen): token_id_arr, segment_id_arr, pos_arr, slf_attn_mask_arr, \ lm_mask_arr, lm_len_arr, lm_lable_arr, \ goal_pos, goal_type_list = item feed_dict = { "token_ids": token_id_arr, "segment_ids": segment_id_arr, "pos_ids": pos_arr, "enc_slf_attn": slf_attn_mask_arr, "goal_type_pos": goal_pos, "goal_type_label": goal_type_list, "lm_label_mat": lm_lable_arr, "lm_pos_mask": lm_mask_arr, "lm_pos_len": lm_len_arr } res = exe.run(train_prog, feed=feed_dict, fetch_list=[loss, goal_type_acc]) # print msg if batch_id % PRINT_BATCH == 0: now = time.time() info_msg = "Now epoch: %d, batch: %d, avg loss: %.3f, task accuracy: %.3f, spend %d s, speed %.2f batch/s, " % \ (epoch_id, batch_id, res[0], res[1], now - start_time, PRINT_BATCH / (now - recoder)) logger.info(info_msg) print(info_msg) recoder = time.time() # save the model if batch_id % SAVE_BATCH == 0: save_msg = "save model at %d epoch, %d batch" % (epoch_id, batch_id) model_name = "dec_model" + "_epoch_%d_batch_%d" % (epoch_id, batch_id) logger.info(save_msg) fluid.io.save_vars(exe, tgt_base_dir, main_program=train_prog, filename=model_name + ".vars", predicate=lambda var: isinstance( var, fluid.framework.Parameter)) fluid.io.save_persistables(exe, tgt_base_dir, main_program=train_prog, filename=model_name + ".pers") # save the model opt_var_name_list = adam.get_opti_var_name_list() save_model_info_msg = save_model( tgt_base_dir, param_name_list=params_name_list, opt_var_name_list=opt_var_name_list, name=model_name) logger.info(save_msg + save_model_info_msg) if len(os.listdir(tgt_base_dir)) > MAX_SAVE: file_list = [ (os.path.join(tgt_base_dir, item), os.path.getmtime(os.path.join(tgt_base_dir, item))) for item in os.listdir(tgt_base_dir) ] delete_file = sorted(file_list, key=lambda x: x[1], reverse=False) os.remove(delete_file[0][0]) os.remove(delete_file[1][0]) del_msg = "delete the model file %s." % (delete_file) logger.info(del_msg)
enc_input=enc_input, enc_input_mask=enc_mask, config=config) # output, loss, acc = decode.mask_goal_type(goal_type_pos, goal_type_label) # loss, goal_type_acc = decode.pretrain(goal_type_pos, goal_type_label, lm_label_mat, lm_pos_mask, lm_pos_len) loss = decode.lm_task(lm_label_mat, lm_pos_mask, lm_pos_len) adam = fluid.optimizer.AdamOptimizer() adam.minimize(loss) # define executor place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() exe = fluid.Executor(place) # start up parameter exe.run(startup_prog) dataset = Dataset(limit=LIMIT) with open("paramslist.txt", 'w', encoding='utf8') as f: for item in train_prog.list_vars(): f.write(str(item) + "\n") # load the model if we have if LOAD_PERSISTABLE: try: print("begin to load %s" % (LOAD_PERSISTABLE_FILE)) fluid.io.load_persistables(exe, tgt_base_dir, main_program=train_prog, filename=LOAD_PERSISTABLE_FILE) info_msg = "Load %s success!" % (LOAD_PERSISTABLE_FILE)
except: load_error = "the model params cannot be loaded." logger.error(load_error) print("begin to load %s" % (LOAD_VARS_FILE)) info_msg = "Load %s success!" % (LOAD_VARS_FILE) logger.info(info_msg) print(info_msg) # show the information dataset = Dataset(limit=LIMIT) check_params(dataset) # clock and message start_time = time.time() recoder = time.time() logger.info("Begin trainning") print("Begin trainning") for epoch_id in range(EPOCH_NUM): data_gen = dataset.generate_fine_tunning_batch(batch_size=BATCH_SIZE) for batch_id, item in enumerate(data_gen): input_name_list = [ enc_token_ids.name, enc_segment_ids.name, enc_pos_ids.name,
'goal_entity_label': goal_entity_label, 'knowledge_s_pos': knowledge_s_pos, 'knowledge_s_label': knowledge_s_label, 'knowledge_p_pos': knowledge_p_pos, 'knowledge_p_label': knowledge_p_label } encode = Encoder(token_ids, pos_ids, segment_ids, input_length, config) output = encode.get_sequence_output() loss, mean_mesure = encode.get_pretrain_output(pretrian_data) adam = fluid.optimizer.AdamOptimizer() adam.minimize(loss) place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() exe = fluid.Executor(place) data = Dataset(limit=LIMIT) check_info = check_params(data) logger.info(check_info) params_list = train_prog.block(0).all_parameters() params_name_list = [p.name for p in params_list] write_iterable("encoder_params.param", params_name_list) # startup the program exe.run(train_startup) if LOAD_PERSISTABLE: try: print("Begin to Load!") cpu_exe = fluid.Executor(fluid.CPUPlace()) fluid.io.load_persistables(cpu_exe, tgt_base_dir, main_program=train_prog,