def save_predict_result(results, params): # 读取结果 test_df = pd.read_csv(test_data_path) # 填充结果 test_df['Prediction'] = results[:20000] # 提取ID和预测结果两列 test_df = test_df[['QID', 'Prediction']] # 保存结果. result_save_path = get_result_filename(params) test_df.to_csv(result_save_path, index=None, sep=',')
def get_params(): # 获得参数 parser = argparse.ArgumentParser() parser.add_argument("--mode", default='train', help="run mode", type=str) parser.add_argument("--max_enc_len", default=400, help="Encoder input max sequence length", type=int) parser.add_argument("--max_dec_len", default=100, help="Decoder input max sequence length", type=int) parser.add_argument("--batch_size", default=batch_size, help="batch size", type=int) parser.add_argument("--epochs", default=epochs, help="train epochs", type=int) parser.add_argument("--vocab_path", default=vocab_path, help="vocab path", type=str) parser.add_argument("--learning_rate", default=0.15, help="Learning rate", type=float) parser.add_argument("--adagrad_init_acc", default=0.1, help="Adagrad optimizer initial accumulator value. " "Please refer to the Adagrad optimizer API documentation " "on tensorflow site for more details.", type=float) parser.add_argument('--rand_unif_init_mag', default=0.02, help='magnitude for lstm cells random uniform inititalization', type=float) parser.add_argument('--trunc_norm_init_std', default=1e-4, help='std of trunc norm init, ' 'used for initializing everything else', type=float) parser.add_argument('--cov_loss_wt', default=1.0, help='Weight of coverage loss (lambda in the paper).' ' If zero, then no incentive to minimize coverage loss.', type=float) parser.add_argument('--max_grad_norm', default=2.0, help='for gradient clipping', type=float) parser.add_argument("--vocab_size", default=vocab_size, help="max vocab size , None-> Max ", type=int) parser.add_argument("--beam_size", default=batch_size, help="beam size for beam search decoding (must be equal to batch size in decode mode)", type=int) parser.add_argument("--embed_size", default=300, help="Words embeddings dimension", type=int) parser.add_argument("--enc_units", default=256, help="Encoder GRU cell units number", type=int) parser.add_argument("--dec_units", default=256, help="Decoder GRU cell units number", type=int) parser.add_argument("--attn_units", default=256, help="[context vector, decoder state, decoder input] feedforward \ result dimension - this result is used to compute the attention weights", type=int) parser.add_argument("--train_seg_x_dir", default=train_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--train_seg_y_dir", default=train_y_seg_path, help="train_seg_y_dir", type=str) parser.add_argument("--test_seg_x_dir", default=test_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--checkpoint_dir", default=checkpoint_dir, help="checkpoint_dir", type=str) parser.add_argument("--checkpoints_save_steps", default=5, help="Save checkpoints every N steps", type=int) parser.add_argument("--min_dec_steps", default=4, help="min_dec_steps", type=int) parser.add_argument("--max_train_steps", default=sample_total // batch_size, help="max_train_steps", type=int) parser.add_argument("--save_batch_train_data", default=False, help="save batch train data to pickle", type=bool) parser.add_argument("--load_batch_train_data", default=False, help="load batch train data from pickle", type=bool) parser.add_argument("--test_save_dir", default=save_result_dir, help="test_save_dir", type=str) parser.add_argument("--pointer_gen", default=False, help="pointer_gen", type=bool) parser.add_argument("--use_coverage", default=False, help="use_coverage", type=bool) parser.add_argument("--greedy_decode", default=False, help="greedy_decode", type=bool) parser.add_argument("--result_save_path", default=get_result_filename(batch_size, epochs, 200, 300), help='result_save_path', type=str) args = parser.parse_args() params = vars(args) return params
def train_models(checkpoint_dir, test_sentence, vocab_path, reverse_vocab_path, test=False): # 生成训练集和测试集 train_df_X, train_df_Y, test_df_X, wv_model, X_max_len, train_y_max_len = build_dataset( train_data_path, test_data_path, save_wv_model_path, testOnly=test) # 词表大小 vocab_size = len(vocab) params = {} params['vocab_size'] = vocab_size params['input_length'] = train_data_X.shape[1] vocab_inp_size = vocab_size vocab_tar_size = vocab_size input_length = train_data_X.shape[1] output_length = train_data_Y.shape[1] BUFFER_SIZE = len(train_data_X) steps_per_epoch = len(train_data_X) // BATCH_SIZE start_index = train_ids_y[0][0] # Dataset generator dataset = tf.data.Dataset.from_tensor_slices( (train_data_X, train_data_Y)).shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) # create sample input and target # example_input_batch, example_target_batch = next(iter(dataset)) # # create encoder model encoder = Encoder(vocab_inp_size, embedding_dim, embedding_matrix, input_length, units, BATCH_SIZE) # create decoder model decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE) # model = Seq2Seq(params) # Define the optimizer and the loss function # optimizer = tf.keras.optimizers.Adam(1e-3) optimizer = tf.keras.optimizers.Adagrad(1e-3) # Checkpoints (Object-based saving) checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) if test: # test only and plot results # # * The evaluate function is similar to the training loop, except we don't use *teacher forcing* here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output. # * Stop predicting when the model predicts the *end token*. # * And store the *attention weights for every time step*. # # Note: The encoder output is calculated only once for one input. # restoring the latest checkpoint in checkpoint_dir checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) # result, sentence, attention_plot = evaluate(encoder, decoder, test_sentence, vocab, reverse_vocab, units, input_length, train_y_max_len, start_index) results = model_predict(encoder, decoder, test_data_X, BATCH_SIZE, vocab, reverse_vocab, train_y_max_len, start_index) # print(results[1005]) # 读入提交数据 test_df = pd.read_csv(test_data_path) test_df.head() for idx, result in enumerate(results): if result == '': print(idx) # 赋值结果 test_df['Prediction'] = results # 提取ID和预测结果两列 test_df = test_df[['QID', 'Prediction']] test_df.head() # 判断是否有空值 # for predic in test_df['Prediction']: # if type(predic) != str: # print(predic) test_df['Prediction'] = test_df['Prediction'].apply(submit_proc) test_df.head() # 获取结果存储路径 result_save_path = get_result_filename( BATCH_SIZE, EPOCHS, X_max_len, embedding_dim, commit='_4_1_submit_seq2seq_code') # 保存结果. test_df.to_csv(result_save_path, index=None, sep=',') # 读取结果 test_df = pd.read_csv(result_save_path) # 查看格式 test_df.head(10) # print('Input: %s' % (sentence)) # print('Predicted report: {}'.format(result)) # attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))] # plot_attention(attention_plot, sentence.split(' '), result.split(' ')) else: # Training # # 1. Pass the *input* through the *encoder* which return *encoder output* and the *encoder hidden state*. # 2. The encoder output, encoder hidden state and the decoder input (which is the *start token*) is passed to the decoder. # 3. The decoder returns the *predictions* and the *decoder hidden state*. # 4. The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss. # 5. Use *teacher forcing* to decide the next input to the decoder. # 6. *Teacher forcing* is the technique where the *target word* is passed as the *next input* to the decoder. # 7. The final step is to calculate the gradients and apply it to the optimizer and backpropagate. for epoch in range(EPOCHS): start = time.time() total_loss = 0 for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)): batch_loss = train_step(encoder, decoder, inp, targ, optimizer, start_index) total_loss += batch_loss if batch % 100 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format( epoch + 1, batch, batch_loss.numpy())) # saving (checkpoint) the model every epoch checkpoint.save(file_prefix=checkpoint_prefix) print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / steps_per_epoch)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
def main(): # 获得参数 parser = argparse.ArgumentParser() parser.add_argument("--mode", default='train', help="run mode", type=str) parser.add_argument("--max_enc_len", default=400, help="Encoder input max sequence length", type=int) parser.add_argument("--max_dec_len", default=100, help="Decoder input max sequence length", type=int) parser.add_argument("--batch_size", default=batch_size, help="batch size", type=int) parser.add_argument("--epochs", default=epochs, help="train epochs", type=int) parser.add_argument("--vocab_path", default=vocab_path, help="vocab path", type=str) parser.add_argument("--learning_rate", default=0.15, help="Learning rate", type=float) parser.add_argument( "--adagrad_init_acc", default=0.1, help="Adagrad optimizer initial accumulator value. " "Please refer to the Adagrad optimizer API documentation " "on tensorflow site for more details.", type=float) parser.add_argument( '--rand_unif_init_mag', default=0.02, help='magnitude for lstm cells random uniform inititalization', type=float) parser.add_argument('--eps', default=1e-12, help='eps', type=float) parser.add_argument('--trunc_norm_init_std', default=1e-4, help='std of trunc norm init, ' 'used for initializing everything else', type=float) parser.add_argument( '--cov_loss_wt', default=1.0, help='Weight of coverage loss (lambda in the paper).' ' If zero, then no incentive to minimize coverage loss.', type=float) parser.add_argument('--max_grad_norm', default=2.0, help='for gradient clipping', type=float) parser.add_argument("--vocab_size", default=50000, help="max vocab size , None-> Max ", type=int) parser.add_argument("--max_vocab_size", default=50000, help="max vocab size , None-> Max ", type=int) parser.add_argument( "--beam_size", default=batch_size, help= "beam size for beam search decoding (must be equal to batch size in decode mode)", type=int) parser.add_argument("--embed_size", default=300, help="Words embeddings dimension", type=int) parser.add_argument("--enc_units", default=128, help="Encoder GRU cell units number", type=int) parser.add_argument("--dec_units", default=256, help="Decoder GRU cell units number", type=int) parser.add_argument( "--attn_units", default=256, help="[context vector, decoder state, decoder input] feedforward \ result dimension - this result is used to compute the attention weights", type=int) parser.add_argument("--train_seg_x_dir", default=train_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--train_seg_y_dir", default=train_y_seg_path, help="train_seg_y_dir", type=str) parser.add_argument("--val_seg_x_dir", default=val_x_seg_path, help="val_x_seg_path", type=str) parser.add_argument("--val_seg_y_dir", default=val_y_seg_path, help="val_y_seg_path", type=str) parser.add_argument("--test_seg_x_dir", default=test_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--checkpoint_dir", default=checkpoint_dir, help="checkpoint_dir", type=str) parser.add_argument("--checkpoints_save_steps", default=5, help="Save checkpoints every N steps", type=int) parser.add_argument("--min_dec_steps", default=4, help="min_dec_steps", type=int) parser.add_argument("--max_train_steps", default=500000 / (batch_size / 8), help="max_train_steps", type=int) # parser.add_argument("--max_train_steps", default=50, help="max_train_steps", type=int) parser.add_argument("--save_batch_train_data", default=False, help="save batch train data to pickle", type=bool) parser.add_argument("--load_batch_train_data", default=False, help="load batch train data from pickle", type=bool) parser.add_argument("--test_save_dir", default=save_result_dir, help="test_save_dir", type=str) parser.add_argument("--pointer_gen", default=True, help="training, eval or test options", type=bool) parser.add_argument("--use_coverage", default=True, help="test_save_dir", type=bool) parser.add_argument("--greedy_decode", default=False, help="greedy_decode", type=bool) parser.add_argument("--result_save_path", default=get_result_filename(batch_size, epochs, 200, 300), help='result_save_path', type=str) parser.add_argument("--max_num_to_eval", default=5, help="max_num_to_eval", type=int) parser.add_argument("--num_to_test", default=20000, help="num_to_test", type=int) parser.add_argument("--gpu_memory", default=30, help="gpu_memory GB", type=int) args = parser.parse_args() params = vars(args) # print(params) if params["mode"] == "train": train(params) elif params["mode"] == "test": params['beam_size'] = 2 params['batch_size'] = 2 result_save_path = params['result_save_path'] predict_result(params, result_save_path) # test_and_save(params) elif params["mode"] == "eval": evaluate(params) elif params['mode'] == 'auto': # PGN training params['mode'] = 'train' # params['use_coverage'] = False # params['epochs'] = 30 params['use_coverage'] = True params['epochs'] = 30 train(params) # predict result params['mode'] = 'test' params['beam_size'] = 2 params['batch_size'] = 2 result_save_path = params['result_save_path'] predict_result(params, result_save_path) # evaluate params['mode'] = 'eval' evaluate(params)
def test_and_save(params): assert params["test_save_dir"], "provide a dir to save the results" gen = test(params) results = [] with tqdm(total=params["num_to_test"], position=0, leave=True) as pbar: for i in range(params["num_to_test"]): trial = next(gen) results.append(trial.abstract) pbar.update(1) return results def predict_result(params, result_save_path): # 预测结果 results = test_and_save(params) # 保存结果 save_predict_result(results, result_save_path) if __name__ == '__main__': # 获得参数 params = get_params() params['batch_size'] = 3 params['beam_size'] = 3 params['mode'] = 'test' result_save_path = get_result_filename(params['batch_size'], 30, 400, 300) predict_result(params,result_save_path)