def test_step_report(logger,session,PadZeroBegin,max_length,test_path, dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet, word_column, label_column,char_embedd_dim,max_char_per_word): # read test data graph = tf.get_default_graph() logger.info("Reading data from test set...") word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling( test_path, word_alphabet, label_alphabet, word_column, label_column) logger.info("Padding test text and lables ...") word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin) label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin) logger.info("Creating character set FROM test set ...") char_index_test,_= dp.generate_character_data(word_sentences_test, char_alphabet=char_alphabet, setType="Test") logger.info("Padding Test set ...") char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word) print(type(char_index_test_pad)) print(type(word_index_sentences_test_pad)) feed_dict=create_feed_Dict_Eval(graph,PadZeroBegin=PadZeroBegin,max_length=max_length, x_batch=word_index_sentences_test_pad, act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob, char_batch=char_index_test_pad) #tf.Print(feed_dict,feed_dict) logit_op = graph.get_tensor_by_name('output/logits:0') transition_params_op = graph.get_tensor_by_name('transitions:0') logits,transition_params = session.run([logit_op, transition_params_op],feed_dict) viterbi_decode(logits=logits,transition_params=transition_params, seq_length=test_seq_length,x_batch=word_index_sentences_test_pad,word_alphabet=word_alphabet,label_alphabet=label_alphabet, prefix_filename="test",beginZero=PadZeroBegin) return
def test_step(logger,session,BiLSTM,PadZeroBegin,max_length,test_path, dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet, word_column, label_column,char_embedd_dim,max_char_per_word): # read test data logger.info("Reading data from test set...") word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling( test_path, word_alphabet, label_alphabet, word_column, label_column) logger.info("Padding test text and lables ...") word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin) label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin) logger.info("Creating character set FROM test set ...") char_index_test,_= dp.generate_character_data(word_sentences_test, char_alphabet=char_alphabet, setType="Test") logger.info("Padding Test set ...") char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word) # test summaries #test_summary_op = tf.summary.merge([loss_summary]) #test_summary_dir = os.path.join(out_dir, "summaries", "test") #test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph) feed_dict=create_feed_Dict_Test(BiLSTM,PadZeroBegin=PadZeroBegin,max_length=max_length, x_batch=word_index_sentences_test_pad, y_batch=label_index_sentences_test_pad, act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob, char_batch=char_index_test_pad) '''#tf.Print(feed_dict,feed_dict) logits, transition_params = session.run([BiLSTM.logits, BiLSTM.transition_params],feed_dict) #logits is a list of numpy.ndarray #transition_params : ndarray''' logits, transition_params,embedded_char,embedded_words,char_pool_flat,input_x_test = session.run([BiLSTM.logits, BiLSTM.transition_params, BiLSTM.W_char,BiLSTM.W_word,BiLSTM.char_pool_flat,BiLSTM.input_x],feed_dict) accuracy,accuracy_low_classes = predictAccuracyAndWrite(logits,transition_params,test_seq_length, label_index_sentences_test_pad,step,word_index_sentences_test_pad,word_alphabet,label_alphabet,prefix_filename="test",beginZero=PadZeroBegin) #test_summary_writer.add_summary(summaries, step) print("step {}, accuracy on test set {:g}, accuracy for classes except Others: {:g}".format(step,accuracy,accuracy_low_classes)) checkpoint_dir_test = os.path.abspath(os.path.join(out_dir, "checkpoints_test")) if not os.path.exists(checkpoint_dir_test): os.makedirs(checkpoint_dir_test) fname_data = "input_x_test_"+str(step)+".pkl" fname_conv_out = "char_pool_flat_"+str(step)+".pkl" fname_seqLength = "act_seq_len_"+str(step)+".pkl" fname_embedded_char = "embedded_char_"+str(step)+".pkl" fname_embedded_words = "embedded_words_"+str(step)+".pkl" pickle.dump(input_x_test,open(os.path.join(checkpoint_dir_test, fname_data),'wb')) pickle.dump(char_pool_flat,open(os.path.join(checkpoint_dir_test, fname_conv_out),'wb')) pickle.dump(test_seq_length,open(os.path.join(checkpoint_dir_test, fname_seqLength),'wb')) pickle.dump(embedded_char,open(os.path.join(checkpoint_dir_test, fname_embedded_char),'wb')) pickle.dump(embedded_words,open(os.path.join(checkpoint_dir_test, fname_embedded_words),'wb')) print("Saved test data checkpoint to {}\n".format(checkpoint_dir_test)) return accuracy,accuracy_low_classes
logger.info("label alphabet size: %d" % (label_alphabet.size() - 1)) # get maximum length : this is mainly for padding. max_length_train = utils.get_max_length(word_sentences_train) max_length_dev = utils.get_max_length(word_sentences_dev) #max_length_test = utils.get_max_length(word_sentences_test) max_length = min(dp.MAX_LENGTH, max(max_length_train, max_length_dev)) logger.info("Maximum length (i.e max words ) of training set is %d" % max_length_train) logger.info("Maximum length (i.e max words ) of dev set is %d" % max_length_dev) #logger.info("Maximum length (i.e max words ) of test set is %d" % max_length_test) logger.info("Maximum length (i.e max words ) used for training is %d" % max_length) logger.info("Padding training text and lables ...") word_index_sentences_train_pad, train_seq_length = utils.padSequence( word_index_sentences_train, max_length, beginZero=FLAGS.PadZeroBegin) label_index_sentences_train_pad, _ = utils.padSequence( label_index_sentences_train, max_length, beginZero=FLAGS.PadZeroBegin) logger.info("Padding dev text and lables ...") word_index_sentences_dev_pad, dev_seq_length = utils.padSequence( word_index_sentences_dev, max_length, beginZero=FLAGS.PadZeroBegin) label_index_sentences_dev_pad, _ = utils.padSequence( label_index_sentences_dev, max_length, beginZero=FLAGS.PadZeroBegin) logger.info("Creating character set FROM training set ...") char_alphabet = Alphabet('character') char_index_train, max_char_per_word_train = dp.generate_character_data( word_sentences_train, char_alphabet=char_alphabet, setType="Train") # close character alphabet. WE close it because the embed table is goign to be random char_alphabet.close()