print("### Loading Train Data ###") data_agent = data_manager(train_path, train=True) # In[6]: print("### Loading Test Data ###") test_agent = data_manager(test_path, train=False) # ## Preprocessing and Padding # In[7]: idx_in_sen, idx_out_sen, mask_in, mask_out, length_in, idx2word, word2idx, remain_idx = transform_orig( [data_agent.orig_data, data_agent.out_sen], min_count=min_count, max_len=[Encoder_max_len, Decoder_max_len], path="Attn_ver1/tmp/tokenizer.pkl") # In[8]: pickle.dump({"orig_word": [idx2word, word2idx]}, open(os.path.join(tmp_path, "tokenizer.pkl"), "wb")) # ## Build Model # In[9]: def Encoder(inputs, dim,
# In[6]: train_path = ["data/{}/train.csv".format(x) for x in ["all"]] test_path = ["data/{}/test.csv".format(x) for x in ["all"]] print("### Loading Train Data ###") data_agent = data_manager(train_path, train=True) print("### Loading Test Data ###") test_agent = data_manager(test_path, train=False) # In[7]: print("\n### Preprocessing ###") idx_in_sen, idx_out_sen, mask_in, mask_out, length_in, length_out, idx2word, word2idx, remain_idx = transform_orig( [data_agent.orig_data, data_agent.out_sen], min_count=min_count, max_len=[Encoder_max_len, Decoder_max_len]) # In[8]: pickle.dump({"orig_word": [idx2word, word2idx]}, open(os.path.join(tmp_path, "tokenizer.pkl"), "wb")) # In[9]: print(""" ################################################################################################## ####################################### Building Model ######################################### ################################################################################################## """) start_time = time.time()