test_sql_data, test_table_data, \ TRAIN_DB, DEV_DB, TEST_DB = load_dataset(use_small=USE_SMALL) #word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \ # load_used=args.train_emb, use_small=USE_SMALL) if args.db_content == 0: word_emb = load_word_and_type_emb('glove/glove.42B.300d.txt', "para-nmt-50m/data/paragram_sl999_czeng.txt",\ val_sql_data, val_table_data, args.db_content, is_list=True, use_htype=False) else: word_emb = load_concat_wemb( 'glove/glove.42B.300d.txt', "para-nmt-50m/data/paragram_sl999_czeng.txt") model = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0) agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args) if args.train_emb: # Load pretrained model. agg_lm, sel_lm, cond_lm = best_model_name(args, for_load=True) print "Loading from %s" % agg_lm model.agg_pred.load_state_dict(torch.load(agg_lm)) print "Loading from %s" % sel_lm model.selcond_pred.load_state_dict(torch.load(sel_lm)) print "Loading from %s" % cond_lm
print("Using GloVe 50d") print() elif N_word == 600: word_emb = load_concat_wemb( './glove/glove.42B.300d.txt', './para-nmt-50m/data/paragram_sl999_czeng.txt', dim=int(N_word / 2)) print("Using GloVe 300d") print() if args.ensemble == 'mixed': model_1 = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content, word_emb_bert=None, BERT=False, types=args.types, POS=args.POS) model_2 = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content, word_emb_bert=bert_tuple, BERT=args.BERT, types=args.types, POS=args.POS) #TODO: Change optimizer to RAdam as soon as there is an implementation available in PyTorch
test_sql_data, test_table_data, \ TRAIN_DB, DEV_DB, TEST_DB = load_dataset(use_small=USE_SMALL) #word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \ # load_used=args.train_emb, use_small=USE_SMALL) if args.db_content == 0: word_emb = load_word_and_type_emb('glove/glove.42B.300d.txt', "para-nmt-50m/data/paragram_sl999_czeng.txt",\ val_sql_data, val_table_data, args.db_content, is_list=True, use_htype=False) else: word_emb = load_concat_wemb( 'glove/glove.42B.300d.txt', "para-nmt-50m/data/paragram_sl999_czeng.txt") model = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content) agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args) print("Loading from %s" % agg_m) model.agg_pred.load_state_dict(torch.load(agg_m)) print("Loading from %s" % sel_m) model.selcond_pred.load_state_dict(torch.load(sel_m)) print("Loading from %s" % cond_m) model.op_str_pred.load_state_dict(torch.load(cond_m)) #only for loading trainable embedding print("Loading from %s" % agg_e) model.agg_type_embed_layer.load_state_dict(torch.load(agg_e)) print("Loading from %s" % sel_e) model.sel_type_embed_layer.load_state_dict(torch.load(sel_e))
n_word=600 if args.toy: use_small=True gpu=args.gpu batch_size=16 else: use_small=False gpu=args.gpu batch_size=128 dev_sql, dev_table, dev_db, test_sql, test_table, test_db = load_dataset(use_small=use_small, mode='test') #word_emb = load_word_emb('data_zhuiyi/sgns.baidubaike.bigram-char') word_emb = load_concat_wemb('data_zhuiyi/sgns.baidubaike.bigram-char', 'data_zhuiyi/hanlp-wiki-vec-zh') model = SQLNet(word_emb, N_word=n_word, use_ca=args.ca, gpu=gpu, trainable_emb=args.train_emb, db_content=args.db_content) model_path = 'saved_model/best_model' print "Loading from %s" % model_path model.load_state_dict(torch.load(model_path)) print "Loaded model from %s" % model_path dev_acc = epoch_acc(model, batch_size, dev_sql, dev_table, dev_db, args.db_content) print 'Dev Logic Form Accuracy: %.3f, Execution Accuracy: %.3f' % (dev_acc[1], dev_acc[2]) print "Start to predict test set" predict_test(model, batch_size, test_sql, test_table, args.output_dir, args.db_content) print "Output path of prediction result is %s" % args.output_dir # import json # import torch
help='set model save directory') parser.add_argument('--train_emb', action='store_true', help='Use trained word embedding for SQLNet.') args = parser.parse_args() _, _, val_sql_data, val_table_data, \ test_sql_data, test_table_data, \ TRAIN_DB, DEV_DB, TEST_DB = load_dataset() word_emb = load_concat_wemb("glove/glove.42B.300d.txt", "para-nmt-50m/paragram_sl999_czeng.txt") model = SQLNet(word_emb, N_word=600, gpu=False, trainable_emb=False, db_content=1) agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args) print "Loading from %s" % agg_m model.agg_pred.load_state_dict(torch.load(agg_m)) print "Loading from %s" % sel_m model.selcond_pred.load_state_dict(torch.load(sel_m)) print "Loading from %s" % cond_m model.op_str_pred.load_state_dict(torch.load(cond_m)) # only for loading trainable embedding print "Loading from %s" % agg_e model.agg_type_embed_layer.load_state_dict(torch.load(agg_e)) print "Loading from %s" % sel_e model.sel_type_embed_layer.load_state_dict(torch.load(sel_e))
def train(params, glove): print() # ---------------------------- Initialization ---------------------------- N_word = 600 B_word = 42 learning_rate = 1e-3 BATCH_SIZE = 64 EPOCHS = 7 UNITS = int(params[5]) if (params[3].lower() == "true"): GPU = True else: GPU = False print("GPU Available:", GPU) print("Numbe-Of-Units:", UNITS) if params[2].lower() == '0': print("DB Content: Not_Available") else: print("DB Content: Available") print() print() # $Slots filling problem (3 Models) # $Slots_Models: (Model_AGG, Model_SEL, Model_COND) TEST_ENTRY = (True, True, True) TRAIN_ENTRY = (True, True, True) TRAIN_AGG, TRAIN_SEL, TRAIN_COND = TRAIN_ENTRY # (AGG, SEL, COND) print("Training $Slots_Models:") print(" TRAIN_AGG=", TRAIN_AGG) print(" TRAIN_SEL=", TRAIN_SEL) print(" TRAIN_COND", TRAIN_COND) print() # ---------------------------- Load DataSets ---------------------------- #Data for training:(train_tok/train_tok.tables) #Data for testing: (test_tok/test_tok.tables) #Data for evaluation;(dev_tok/dev_tok.tables) print("Load data...") train_sql_data, table_data = load_data('data/train_tok.jsonl', 'data/train_tok.tables.jsonl') val_sql_data, val_table_data = load_data('data/dev_tok.jsonl', 'data/dev_tok.tables.jsonl') test_sql_data, test_table_data = load_data('data/test_tok.jsonl', 'data/test_tok.tables.jsonl') TRAIN_DB = 'data/train.db' DEV_DB = 'data/dev.db' TEST_DB = 'data/test.db' print() # ---------------------------- Models_Location ---------------------------- agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = Best_model_name(params) print("Location:", params[1]) print(' ', agg_m, sel_m, cond_m) print(' ', agg_e, sel_e, cond_e) print() # ---------------------------- Load Glove_Embeddings & SQLNet_Model ---------------------------- #content=0: Training with not tables' entries #content=1: Training with tables' entries available train_emb = False if params[2].lower() == '0': word_emb = load_word_and_type_emb('glove/'+glove, "para-nmt-50m/data/paragram_sl999_czeng.txt",\ val_sql_data, val_table_data, 0, is_list=True, use_htype=False ) print() model = SQLNet( word_emb, N_word=N_word, gpu=GPU, trainable_emb=train_emb, N_h=UNITS, db_content=0, ) else: word_emb = load_concat_wemb( 'glove/' + glove, "para-nmt-50m/data/paragram_sl999_czeng.txt") print() model = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=train_emb, N_h=UNITS, db_content=1) print() if (params[4].lower() == "adam"): print("Optimizer: ADAM") optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0) elif (params[4].lower() == "sgd"): print("Optimizer: SGD") optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) elif (params[4].lower() == "adadelta"): print("Optimizer: AdaDelta") optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) print() # ---------------------------- Training ---------------------------- print(" --------------- Start Training --------------- ") print(" train data", len(train_sql_data), "table len", len(table_data)) print(" val. data ", len(val_sql_data), "table len", len(val_table_data)) print(" test data", len(test_sql_data), "table len", len(test_table_data)) #initial accuracy init_acc = epoch_acc(model, BATCH_SIZE, val_sql_data, val_table_data, TRAIN_ENTRY, int(params[2])) print() best_agg_acc = init_acc[1][0] best_agg_idx = 0 best_sel_acc = init_acc[1][1] best_sel_idx = 0 best_cond_acc = init_acc[1][2] best_cond_idx = 0 print("Accuracy", init_acc[0]) print(" $agg:", np.round(init_acc[1][0], 3)) print(" $sel:", np.round(init_acc[1][1], 3)) print(" cond:", np.round(init_acc[1][2], 3), "[$cond_num, $cond_col,$cond_op,$cond_val]", np.round(init_acc[1][3:], 3)) if TRAIN_AGG: torch.save(model.agg_pred.state_dict(), agg_m) torch.save(model.agg_type_embed_layer.state_dict(), agg_e) if TRAIN_SEL: torch.save(model.selcond_pred.state_dict(), sel_m) torch.save(model.sel_type_embed_layer.state_dict(), sel_e) if TRAIN_COND: torch.save(model.op_str_pred.state_dict(), cond_m) torch.save(model.cond_type_embed_layer.state_dict(), cond_e) print() # Total validation Train_Loss = [] Train_Acc = [] Val_Acc = [] Time = [] #Training Set tr_agg = [] tr_sel = [] tr_cond = [] tr_num = [] tr_col = [] tr_opr = [] tr_val = [] # Validation Set val_agg = [] val_sel = [] val_cond = [] val_num = [] val_col = [] val_opr = [] val_val = [] for i in range(EPOCHS): print(" * * * * * * * * * Epoch ", i + 1, " * * * * * * * * *") tic = time.clock() loss = epoch_train(model, optimizer, BATCH_SIZE, train_sql_data, table_data, TRAIN_ENTRY, int(params[2])) Train_Loss.append(loss) print(' Loss = ', loss) acc = epoch_acc(model, BATCH_SIZE, train_sql_data, table_data, TRAIN_ENTRY, int(params[2])) Train_Acc.append(acc[0]) tr_agg.append(acc[1][0]) tr_sel.append(acc[1][1]) tr_cond.append(acc[1][2]) tr_num.append(acc[1][3]) tr_col.append(acc[1][4]) tr_opr.append(acc[1][5]) tr_val.append(acc[1][6]) print() Print_Acc(acc, "Train") # Validation set val_acc = epoch_acc(model, BATCH_SIZE, val_sql_data, val_table_data, TRAIN_ENTRY, int(params[2]), False) Val_Acc.append(val_acc[0]) val_agg.append(val_acc[1][0]) val_sel.append(val_acc[1][1]) val_cond.append(val_acc[1][2]) val_num.append(val_acc[1][3]) val_col.append(val_acc[1][4]) val_opr.append(val_acc[1][5]) val_val.append(val_acc[1][6]) print() Print_Acc(val_acc) # Training time for each epoch toc = time.clock() Time.append(toc - tic) print("Epoch duration:", toc, toc / 60) if TRAIN_SEL: if val_acc[1][1] > best_sel_acc: best_sel_acc = val_acc[1][1] best_sel_idx = i + 1 torch.save(model.selcond_pred.state_dict(), params[1] + '/epoch%d.sel_model%s' % (i + 1, '')) torch.save(model.selcond_pred.state_dict(), sel_m) torch.save(model.sel_type_embed_layer.state_dict(), params[1] + '/epoch%d.sel_embed%s' % (i + 1, '')) torch.save(model.sel_type_embed_layer.state_dict(), sel_e) if TRAIN_COND: if val_acc[1][2] > best_cond_acc: best_cond_acc = val_acc[1][2] best_cond_idx = i + 1 torch.save(model.op_str_pred.state_dict(), params[1] + '/epoch%d.cond_model%s' % (i + 1, '')) torch.save(model.op_str_pred.state_dict(), cond_m) torch.save(model.cond_type_embed_layer.state_dict(), params[1] + '/epoch%d.cond_embed%s' % (i + 1, '')) torch.save(model.cond_type_embed_layer.state_dict(), cond_e) print('\n* * * * * * * * * * * * Results * * * * * * * * * * * *') print(params) print() print('Exec time:', sum(Time)) print('Avg time:', sum(Time) / len(Time)) print("Time=", np.round(Time, 3)) print('Exec time:', sum(Time) / 60) print('Avg time:', (sum(Time) / len(Time)) / 60) print("Time=", np.round(Time, 3) / 60) print() print("Train_Loss=", np.round(Train_Loss, 3)) print("Train_Acc=", np.round(Train_Acc, 3)) print("Val_Acc=", np.round(Val_Acc, 3)) print() print("tr_agg=", np.round(tr_agg, 3)) print("tr_sel=", np.round(tr_sel, 3)) print("tr_cond=", np.round(tr_cond, 3)) print("tr_num=", np.round(tr_num, 3)) print("tr_col=", np.round(tr_col, 3)) print("tr_opr =", np.round(tr_opr, 3)) print("tr_val=", np.round(tr_val, 3)) print() print("val_agg=", np.round(val_agg, 3)) print("val_sel=", np.round(val_sel, 3)) print("val_cond=", np.round(val_cond, 3)) print("val_num=", np.round(val_num, 3)) print("val_col=", np.round(val_col, 3)) print("val_opr =", np.round(val_opr, 3)) print("val_val=", np.round(val_val, 3)) print() print('Exec time:', sum(Time) / 60) print('Avg time:', (sum(Time) / len(Time)) / 60) print("Time=", [a / 60 for a in np.round(Time, 3)]) if (params[6].lower() == "yes"): print() print(" --------------- Testing --------------- ") print("Test acc_qm: %s;\n breakdown on (agg, sel, where): %s" % epoch_acc(model, BATCH_SIZE, test_sql_data, test_table_data, TEST_ENTRY, int(params[2]))) print("Test execution acc: %s" % epoch_exec_acc(model, BATCH_SIZE, test_sql_data, test_table_data, TEST_DB, int(params[2]))) ACC = epoch_acc(model, BATCH_SIZE, test_sql_data, test_table_data, TEST_ENTRY, int(params[2])) exec_Acc = epoch_exec_acc(model, BATCH_SIZE, test_sql_data, test_table_data, TEST_DB, int(params[2])) print("Test acc_qm: %s;\n breakdown on (agg, sel, where): %s" % ACC) print("Test execution acc: %s" % exec_Acc)
test_sql_data = update_sql_data_pos(test_sql_data) print("SQL data has been updated with POS tags for each token") print() if args.ensemble != 'single': agg_m1, sel_m1, cond_m1, agg_e1, sel_e1, cond_e1, agg_m2, sel_m2, cond_m2, agg_e2, sel_e2, cond_e2 = best_model_name( args) if args.ensemble == 'mixed': model_1 = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content, word_emb_bert=None, BERT=False, types=args.types, POS=args.POS) model_2 = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=args.train_emb, db_content=args.db_content, word_emb_bert=bert_tuple, BERT=args.BERT, types=args.types, POS=args.POS) elif args.ensemble == 'homogeneous' and args.BERT:
use_small = True gpu = args.gpu batch_size = 2 else: use_small = False gpu = args.gpu batch_size = args.bs learning_rate = 1e-3 # load dataset train_sql, train_table, train_db, dev_sql, dev_table, dev_db = load_dataset(use_small=use_small) #word_emb = load_word_emb('data_zhuiyi/sgns.baidubaike.bigram-char') word_emb = load_concat_wemb('data_zhuiyi/sgns.baidubaike.bigram-char', 'data_zhuiyi/sgns.baidubaike.bigram-char') #word_emb = load_concat_wemb('data_zhuiyi/char_embedding', 'data_zhuiyi/char_embedding') model = SQLNet(word_emb, N_word=n_word, use_ca=args.ca, gpu=gpu, trainable_emb=args.train_emb, db_content=args.db_content) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0) if args.restore: model_path = 'saved_model/best_model' print "Loading trained model from %s" % model_path model.load_state_dict(torch.load(model_path)) best_sn, best_sc, best_sa, best_wn, best_wc, best_wo, best_wv, best_wr = 0, 0, 0, 0, 0, 0, 0, 0 best_sn_idx, best_sc_idx, best_sa_idx, best_wn_idx, best_wc_idx, best_wo_idx, best_wv_idx, best_wr_idx = 0, 0, 0, 0, 0, 0, 0, 0 best_lf, best_lf_idx = 0.0, 0 best_ex, best_ex_idx = 0.0, 0 print "#" * 20 + " Star to Train " + "#" * 20 for i in range(args.epoch): print 'Epoch %d' % (i + 1)
TEST_ENTRY = (True, True, True) # (AGG, SEL, COND) # Give manually the path to the saved Models. agg_m = "Sav_Models/Saved_Model_Con0/epoch7.agg_model" sel_m = "Sav_Models/Saved_Model_Con0/epoch7.sel_model" cond_m = "Sav_Models/Saved_Model_Con0/epoch7.cond_model" agg_e = "Sav_Models/Saved_Model_Con0/epoch7.agg_embed" sel_e = "Sav_Models/Saved_Model_Con0/epoch7.sel_embed" cond_e = "Sav_Models/Saved_Model_Con0/epoch7.cond_embed" word_emb = load_word_and_type_emb('glove/'+glove, "para-nmt-50m/data/paragram_sl999_czeng.txt",\ val_sql_data, val_table_data, 0, is_list=True, use_htype=False) model = SQLNet(word_emb, N_word=N_word, gpu=GPU, trainable_emb=False, db_content=0, N_h=Cont0_UNITS) #agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_n(args) print(" Loading from", agg_m) model.agg_pred.load_state_dict(torch.load(agg_m)) print(" Loading from", sel_m) model.selcond_pred.load_state_dict(torch.load(sel_m)) print(" Loading from", cond_m) model.op_str_pred.load_state_dict(torch.load(cond_m)) #only for loading trainable embedding print(" Loading from", agg_e) model.agg_type_embed_layer.load_state_dict(torch.load(agg_e)) print(" Loading from", sel_e) model.sel_type_embed_layer.load_state_dict(torch.load(sel_e))