model = Seq2Seq(dim_x, dim_y, hidden_size_encoder, hidden_size_decoder, cell, optimizer, drop_rate, num_sents) print 'loading...' load_model("0420-new.model", model) print 'model done' print "predicting..." # test_data_x_y = get_data.test_processing_long(r'data/SMT-test-100.post', i2w, w2i, 100, 100) # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 100) test_data_x_y = get_data.test_sentence_input_processing_long("a b c d", i2w, w2i, 5, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w) def response(sentence_seg, model, i2w, w2i): test_data_x_y = get_data.test_sentence_input_processing_long(sentence_seg, i2w, w2i, 100, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w)
def response(sentence_seg, model, i2w, w2i): test_data_x_y = get_data.test_sentence_input_processing_long(sentence_seg, i2w, w2i, 100, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w)
mask_y_right = xy[5] local_batch_size = xy[6] cost, sents_left, sents_right, test, test2, test3, test4, test5,test6,test7,test8,test9 = model.train(X, YL, YR, mask, mask_y_left, mask_y_right, lr, local_batch_size) read_data_batch_error += cost error += cost in_b_time = time.time() - in_b_start # break l,r = model.predict(data_t1[0][0], data_t1[0][1],data_t1[0][3], data_t1[0][5], 1) l2,r2 = model.predict(data_t2[0][0], data_t2[0][1],data_t2[0][3], data_t2[0][5], 1) l3,r3 = model.predict(data_4[0][0], data_4[0][1],data_4[0][3], data_4[0][5], 1) #打印结果 print "left : " get_data.print_sentence(l, dim_y, i2w) get_data.print_sentence(l2, dim_y, i2w) get_data.print_sentence(l3, dim_y, i2w) print "right : " get_data.print_sentence(r, dim_y, i2w) get_data.print_sentence(r2, dim_y, i2w) get_data.print_sentence(r3, dim_y, i2w) read_data_batch_error /= len(data_x_yl_yr); del X_seqs del yl_seqs del data_x_yl_yr gc.collect() if read_data_batch_error < g_error: g_error = read_data_batch_error
cost, sents_y, sents_t = model.train(X, Y, Yt, mask, mask_y, lr, local_batch_size) error += cost # break in_b_time = time.time() - in_start # break # l,r = model.predict(data_t1[0][0], data_t1[0][1],data_t1[0][3], data_t1[0][5], 1) # l2,r2 = model.predict(data_t2[0][0], data_t2[0][1],data_t2[0][3], data_t2[0][5], 1) # l3,r3 = model.predict(data_4[0][0], data_4[0][1],data_4[0][3], data_4[0][5], 1) # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], batch_size) #打印结果 # print "Test : " # get_data.print_sentence(l, dim_y, i2w) # get_data.print_sentence(l2, dim_y, i2w) # get_data.print_sentence_last_n(t_sents[0], dim_y, i2w, 5) error /= len(data_x_y); print "Iter = " + str(i)+ " Error = " + str(error) + ", Time = " + str(in_b_time) get_data.print_sentence(sents_y, dim_y, i2w) get_data.print_sentence(sents_t, dim_tag, i2t) if error <= e: break print "Finished. Time = " + str(time.time() - start) print "save model..." save_model("0504-new.model", model)
hidden_size_encoder, hidden_size_decoder, cell, optimizer, drop_rate, num_sents) load_model('model/12_0526.model', model) print 'loading...' load_model("model/0525.model", model) print 'model done' print "predicting..." t_bleu = [] for tlen in xrange(len(test_data_x_y)): p_sents_y, p_sents_t = model.predict(test_data_x_y[tlen][0], test_data_x_y[tlen][1], test_data_x_y[tlen][4], test_batch) get_data.print_sentence(p_sents_y, dim_y, i2w) candidate_dic = get_data.get_candidate_dic_for_test_pos( p_sents_y, dim_y, i2w) batch_bleu, _ = print_bleu_normal_batch(candidate_dic, reference_dic, tlen * test_batch) t_bleu.append(batch_bleu) print "~~~~~~~~~~~~~Test Bleu is ", float( sum(t_bleu)) / len(t_bleu), "~~~~~~~~~~~~~~~~" # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 100) # test_data_x_y = get_data.test_sentence_input_processing_long("a b c d", i2w, w2i, 5, 1) # sents_y, sents_t = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][4], 2) # get_data.print_sentence(sents_y, dim_y, i2w)
error += cost # break in_b_time = time.time() - in_start # break # l,r = model.predict(data_t1[0][0], data_t1[0][1],data_t1[0][3], data_t1[0][5], 1) # l2,r2 = model.predict(data_t2[0][0], data_t2[0][1],data_t2[0][3], data_t2[0][5], 1) # l3,r3 = model.predict(data_4[0][0], data_4[0][1],data_4[0][3], data_4[0][5], 1) # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], batch_size) #打印结果 # print "Test : " # get_data.print_sentence(l, dim_y, i2w) # get_data.print_sentence(l2, dim_y, i2w) # get_data.print_sentence_last_n(t_sents[0], dim_y, i2w, 5) error /= len(data_x_y) print "Iter = " + str(i) + " Error = " + str(error) + ", Time = " + str( in_b_time) get_data.print_sentence(sents_y, dim_y, i2w) get_data.print_sentence(sents_t, dim_tag, i2t) if error <= e: break print "Finished. Time = " + str(time.time() - start) print "save model..." save_model("0504-new.model", model)
def response(sentence_seg, model, i2w, w2i): test_data_x_y = get_data.test_sentence_input_processing_long( sentence_seg, i2w, w2i, 100, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1], test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w)
print "compiling..." model = Seq2Seq(dim_x, dim_y, hidden_size_encoder, hidden_size_decoder, cell, optimizer, drop_rate, num_sents) print 'loading...' load_model("0420-new.model", model) print 'model done' print "predicting..." # test_data_x_y = get_data.test_processing_long(r'data/SMT-test-100.post', i2w, w2i, 100, 100) # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 100) test_data_x_y = get_data.test_sentence_input_processing_long( "a b c d", i2w, w2i, 5, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1], test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w) def response(sentence_seg, model, i2w, w2i): test_data_x_y = get_data.test_sentence_input_processing_long( sentence_seg, i2w, w2i, 100, 1) t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1], test_data_x_y[0][3], 1) get_data.print_sentence(t_sents[0], dim_y, i2w)
local_batch_size = xy[6] cost, sents_left, sents_right, test, test2, test3, test4, test5,test6,test7,test8,test9 = model.train(X, YL, YR, mask, mask_y_left, mask_y_right, lr, local_batch_size) error += cost all_error += cost if (batch_id+1) % 500 == 0: in_time = time.time() - in_start l,r = model.predict(data_49522[0][0], data_49522[0][1],data_49522[0][3], data_49522[0][5], 1) l1,r1 = model.predict(data_49540[0][0], data_49540[0][1],data_49540[0][3], data_49540[0][5], 1) in_b_time = time.time() - in_b_start in_b_start = time.time() # break #打印结果 print "left : " get_data.print_sentence(l, dim_y, i2w) get_data.print_sentence(r1, dim_y, i2w) print "right : " get_data.print_sentence(r, dim_y, i2w) get_data.print_sentence(r1, dim_y, i2w) error /= 500.0; if error < g_error: g_error = error save_model("10000.model", model) print "Iter = " + str(i) + ", " + str(float(batch_size+1)/len(data_x_yl_yr)) + "%, Error = " + str(error) + ", Time = " + str(in_b_time) if all_error/len(data_x_yl_yr) <= e: break
# l,r = model.predict(data_t1[0][0], data_t1[0][1],data_t1[0][3], data_t1[0][5], 1) # l2,r2 = model.predict(data_t2[0][0], data_t2[0][1],data_t2[0][3], data_t2[0][5], 1) # l3,r3 = model.predict(data_4[0][0], data_4[0][1],data_4[0][3], data_4[0][5], 1) # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], batch_size) #打印结果 # print "Test : " # get_data.print_sentence(l, dim_y, i2w) # get_data.print_sentence(l2, dim_y, i2w) # get_data.print_sentence_last_n(t_sents[0], dim_y, i2w, 5) error /= len(data_x_y); print "Iter = " + str(i)+ " Error = " + str(error) + ", Time = " + str(in_b_time) if error < g_error: g_error = error print 'new smaller cost, save param...' save_model("GRU_hidden200-200_post200.model", model) if error < 2.0: print "train_last :" get_data.print_sentence(sents, dim_y, i2w) if error <= e: break print "Finished. Time = " + str(time.time() - start) print "save model..." save_model("GRU_hidden200-200_post200-final.model", model)
reference_dic = cPickle.load(open(r'data/nba2/reference_dic_for_nba_2-500.pkl', 'rb')) print "done." print "compiling..." model = Seq2Seq(dim_x + dim_tag, dim_y + dim_tag, dim_y, dim_tag, hidden_size_encoder, hidden_size_decoder, cell, optimizer, drop_rate, num_sents) print 'loading model...' load_model(r'data/nba2/model/0531 - 0.122.model', model) print 'model done' p_sents_y, p_sents_t = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][4], test_batch) get_data.print_sentence(p_sents_y, dim_y, i2w) # print "predicting..." # t_bleu = [] # for tlen in xrange(len(test_data_x_y)): # p_sents_y, p_sents_t = model.predict(test_data_x_y[tlen][0], test_data_x_y[tlen][1],test_data_x_y[tlen][4], test_batch) # get_data.print_sentence(p_sents_y, dim_y, i2w) # candidate_dic = get_data.get_candidate_dic_for_test_pos(p_sents_y, dim_y, i2w) # batch_bleu, _ = print_bleu_normal_batch(candidate_dic, reference_dic, tlen * test_batch) # t_bleu.append(batch_bleu) # print "~~~~~~~~~~~~~Test Bleu is ", float(sum(t_bleu))/len(t_bleu), "~~~~~~~~~~~~~~~~" # t_sents = model.predict(test_data_x_y[0][0], test_data_x_y[0][1],test_data_x_y[0][3], 100)