# # full_data_len = 190363 # full_data_len = 10 x_path = "data/t-post.txt" x_tag_path = "data/t-post-tag.txt" y_path = "data/t-response.txt" y_tag_path = "data/t-response-tag.txt" # test_path = "data/toy2.txt" threshold = 0 # _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1) X_seqs, y_seqs, y_tag_seqs, i2w, w2i, t2i, i2t, tf, data_x_y = get_data.processing(x_path, y_path, x_tag_path, y_tag_path, threshold, 0, 4, batch_size) # test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size) print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) dim_tag = len(t2i) num_sents = batch_size print "save data dic..." save_data_dic("data/i2w-test-t.pkl", "data/w2i-test-t.pkl", i2w, w2i) save_data_dic("data/i2t-test-t.pkl", "data/t2i-test-t.pkl", i2t, t2i)
cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "adadelta" x_path = "data/10000X.txt" y_left_path = "data/10000left.txt" y_right_path = "data/10000right.txt" # x_path = "data/post.txt" # y_left_path = "data/left_r.txt" # y_right_path = "data/right_r.txt" threshold = 1 xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 4, 5, 1) xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t1 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10000, 10001, 1) xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t2 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10001, 10002, 1) # assert len(data_190353) == 1 print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) num_sents = batch_size print "save data dic..." save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i)
# try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "sgd" x_path = "data/nba/basketball-post_0527.txt" x_tag_path = "data/nba/basketball-post_tag_0527.txt" y_path = "data/nba/basketball-response_0527.txt" y_tag_path = "data/nba/basketball-response-tag_0527.txt" # test_path = "data/toy2.txt" # test_path = "data/test-100.post" threshold = 0 # _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1) X_seqs, y_seqs, y_tag_seqs, i2w, w2i, t2i, i2t, tf, data_x_y = get_data.processing( x_path, y_path, x_tag_path, y_tag_path, threshold, 0, 4000, batch_size) # test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size) print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) dim_tag = len(t2i) num_sents = batch_size print "save data dic..." save_data_dic("data/nba/i2w-test-t.pkl", "data/nba/w2i-test-t.pkl", i2w, w2i) save_data_dic("data/nba/i2t-test-t.pkl", "data/nba/t2i-test-t.pkl", i2t, t2i) print "#features = ", dim_x, "#labels = ", dim_y
read_data_batch = 8000 # full_data_len = 190363 full_data_len =10000 hidden_size = [100,200] # try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "adadelta" x_path = "data/SMT-train-8000.post" y_path = "data/SMT-train-8000.response" threshold = 0 xs, ys, i2w, w2i, tf, data_x_y = get_data.processing(x_path, y_path, threshold) # txs, tys, data_tx_ty = get_data.test_processing # print "#dic = " + str(len(w2i)) # # print "unknown = " + str(tf["<UNknown>"]) # dim_x = len(w2i) # dim_y = len(w2i) # num_sents = batch_size # print "save data dic..." # save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i) # print "#features = ", dim_x, "#labels = ", dim_y
x_path = "data/10000X.txt" y_left_path = "data/10000left.txt" y_right_path = "data/10000right.txt" # x_path = "data/post.txt" # y_left_path = "data/left_r.txt" # y_right_path = "data/right_r.txt" threshold = 0 # xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 4, 5, 1) # xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t1 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10000, 10001, 1) # xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t2 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10001, 10002, 1) # assert len(data_190353) == 1 xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 9902, 10002, batch_size) print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) num_sents = batch_size print "save data dic..." # save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i) print "#features = ", dim_x, "#labels = ", dim_y print "compiling..."
# batch_size = 10 # read_data_batch = 10 # # full_data_len = 190363 # full_data_len = 10 # x_path = "data/toy2.txt" # y_path = "data/toy3.txt" # test_path = "data/toy2.txt" threshold = 1 _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1) test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size) print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) num_sents = batch_size print "save data dic..." save_data_dic("data/i2w8000.pkl", "data/w2i8000.pkl", i2w, w2i) print "#features = ", dim_x, "#labels = ", dim_y
read_data_batch = 500 full_data_len = 190363 hidden_size = [200] # try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "adadelta" x_path = "data/10000X.txt" y_left_path = "data/10000r.txt" y_right_path = "data/10000l.txt" threshold = 0 xs, yls, yrs, i2w, w2i, tf, data_x_yl_yr = get_data.processing(x_path, y_left_path, y_right_path, threshold, 0, 10000, batch_size) xs4, yls4, yrs4, i2w4, w2i4, tf4, data_49522 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 49522, 49523, batch_size) xs4, yls4, yrs4, i2w4, w2i4, tf4, data_49540 = get_data.processing("data/post.txt", "data/left_r.txt", "data/right_r.txt", threshold, 49540, 49540, batch_size) # assert len(data_190353) == 1 print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) num_sents = batch_size print "save data dic..." save_data_dic("data/i2w.pkl", "data/w2i.pkl", i2w, w2i)
# batch_size = 10 # read_data_batch = 10 # # full_data_len = 190363 # full_data_len = 10 # x_path = "data/toy2.txt" # y_path = "data/toy3.txt" # test_path = "data/toy2.txt" threshold = 0 # _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1) X_seqs, y_seqs, i2w, w2i, tf, data_x_y = get_data.processing(x_path, y_path, threshold, 0, 200, batch_size) # test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size) print "#dic = " + str(len(w2i)) # print "unknown = " + str(tf["<UNknown>"]) dim_x = len(w2i) dim_y = len(w2i) num_sents = batch_size print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = FANN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate, num_sents) load_model("data/GRU-200_best.model", model)