def test_step_report(logger,session,PadZeroBegin,max_length,test_path, dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet, word_column, label_column,char_embedd_dim,max_char_per_word): # read test data graph = tf.get_default_graph() logger.info("Reading data from test set...") word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling( test_path, word_alphabet, label_alphabet, word_column, label_column) logger.info("Padding test text and lables ...") word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin) label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin) logger.info("Creating character set FROM test set ...") char_index_test,_= dp.generate_character_data(word_sentences_test, char_alphabet=char_alphabet, setType="Test") logger.info("Padding Test set ...") char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word) print(type(char_index_test_pad)) print(type(word_index_sentences_test_pad)) feed_dict=create_feed_Dict_Eval(graph,PadZeroBegin=PadZeroBegin,max_length=max_length, x_batch=word_index_sentences_test_pad, act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob, char_batch=char_index_test_pad) #tf.Print(feed_dict,feed_dict) logit_op = graph.get_tensor_by_name('output/logits:0') transition_params_op = graph.get_tensor_by_name('transitions:0') logits,transition_params = session.run([logit_op, transition_params_op],feed_dict) viterbi_decode(logits=logits,transition_params=transition_params, seq_length=test_seq_length,x_batch=word_index_sentences_test_pad,word_alphabet=word_alphabet,label_alphabet=label_alphabet, prefix_filename="test",beginZero=PadZeroBegin) return
def test_step(logger,session,BiLSTM,PadZeroBegin,max_length,test_path, dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet, word_column, label_column,char_embedd_dim,max_char_per_word): # read test data logger.info("Reading data from test set...") word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling( test_path, word_alphabet, label_alphabet, word_column, label_column) logger.info("Padding test text and lables ...") word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin) label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin) logger.info("Creating character set FROM test set ...") char_index_test,_= dp.generate_character_data(word_sentences_test, char_alphabet=char_alphabet, setType="Test") logger.info("Padding Test set ...") char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word) # test summaries #test_summary_op = tf.summary.merge([loss_summary]) #test_summary_dir = os.path.join(out_dir, "summaries", "test") #test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph) feed_dict=create_feed_Dict_Test(BiLSTM,PadZeroBegin=PadZeroBegin,max_length=max_length, x_batch=word_index_sentences_test_pad, y_batch=label_index_sentences_test_pad, act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob, char_batch=char_index_test_pad) '''#tf.Print(feed_dict,feed_dict) logits, transition_params = session.run([BiLSTM.logits, BiLSTM.transition_params],feed_dict) #logits is a list of numpy.ndarray #transition_params : ndarray''' logits, transition_params,embedded_char,embedded_words,char_pool_flat,input_x_test = session.run([BiLSTM.logits, BiLSTM.transition_params, BiLSTM.W_char,BiLSTM.W_word,BiLSTM.char_pool_flat,BiLSTM.input_x],feed_dict) accuracy,accuracy_low_classes = predictAccuracyAndWrite(logits,transition_params,test_seq_length, label_index_sentences_test_pad,step,word_index_sentences_test_pad,word_alphabet,label_alphabet,prefix_filename="test",beginZero=PadZeroBegin) #test_summary_writer.add_summary(summaries, step) print("step {}, accuracy on test set {:g}, accuracy for classes except Others: {:g}".format(step,accuracy,accuracy_low_classes)) checkpoint_dir_test = os.path.abspath(os.path.join(out_dir, "checkpoints_test")) if not os.path.exists(checkpoint_dir_test): os.makedirs(checkpoint_dir_test) fname_data = "input_x_test_"+str(step)+".pkl" fname_conv_out = "char_pool_flat_"+str(step)+".pkl" fname_seqLength = "act_seq_len_"+str(step)+".pkl" fname_embedded_char = "embedded_char_"+str(step)+".pkl" fname_embedded_words = "embedded_words_"+str(step)+".pkl" pickle.dump(input_x_test,open(os.path.join(checkpoint_dir_test, fname_data),'wb')) pickle.dump(char_pool_flat,open(os.path.join(checkpoint_dir_test, fname_conv_out),'wb')) pickle.dump(test_seq_length,open(os.path.join(checkpoint_dir_test, fname_seqLength),'wb')) pickle.dump(embedded_char,open(os.path.join(checkpoint_dir_test, fname_embedded_char),'wb')) pickle.dump(embedded_words,open(os.path.join(checkpoint_dir_test, fname_embedded_words),'wb')) print("Saved test data checkpoint to {}\n".format(checkpoint_dir_test)) return accuracy,accuracy_low_classes
train_path = FLAGS.train_path test_path = FLAGS.test_path dev_path = FLAGS.dev_path word_column = FLAGS.word_col label_column = FLAGS.label_col # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # read training data logger.info("Reading data from training set...") word_sentences_train, _, word_index_sentences_train, label_index_sentences_train = dp.read_conll_sequence_labeling( train_path, word_alphabet, label_alphabet, word_column, label_column, out_dir=out_dir) # if oov is "random" and do not fine tune, close word_alphabet if oov == "random" and not fine_tune: logger.info("Close word alphabet.") word_alphabet.close() # read dev data logger.info("Reading data from dev set...") word_sentences_dev, _, word_index_sentences_dev, label_index_sentences_dev = dp.read_conll_sequence_labeling( dev_path, word_alphabet, label_alphabet, word_column, label_column) # close alphabets : by close we mean we cannot add any more words to the word vocabulary.