def test_step_report(logger,session,PadZeroBegin,max_length,test_path,
    dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet,
     word_column, label_column,char_embedd_dim,max_char_per_word):
        # read test data
    graph = tf.get_default_graph()
    logger.info("Reading data from test set...")
    word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling(
        test_path, word_alphabet, label_alphabet, word_column, label_column)
    logger.info("Padding test text and lables ...")
    word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin)
    label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin)
    logger.info("Creating character set FROM test set ...")
    char_index_test,_= dp.generate_character_data(word_sentences_test, 
                                char_alphabet=char_alphabet, setType="Test")

    logger.info("Padding Test set ...")
    char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word)
    print(type(char_index_test_pad))
    print(type(word_index_sentences_test_pad))
    
    feed_dict=create_feed_Dict_Eval(graph,PadZeroBegin=PadZeroBegin,max_length=max_length,
        x_batch=word_index_sentences_test_pad,
         act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob,
         char_batch=char_index_test_pad)
    #tf.Print(feed_dict,feed_dict)
    logit_op = graph.get_tensor_by_name('output/logits:0')
    transition_params_op = graph.get_tensor_by_name('transitions:0')
    logits,transition_params = session.run([logit_op, transition_params_op],feed_dict)
    viterbi_decode(logits=logits,transition_params=transition_params,
      seq_length=test_seq_length,x_batch=word_index_sentences_test_pad,word_alphabet=word_alphabet,label_alphabet=label_alphabet, 
      prefix_filename="test",beginZero=PadZeroBegin)
    return
def test_step(logger,session,BiLSTM,PadZeroBegin,max_length,test_path,
    dropout_keep_prob,step,out_dir,char_alphabet,label_alphabet,word_alphabet,
     word_column, label_column,char_embedd_dim,max_char_per_word):
        # read test data
    logger.info("Reading data from test set...")
    word_sentences_test, _, word_index_sentences_test, label_index_sentences_test = dp.read_conll_sequence_labeling(
        test_path, word_alphabet, label_alphabet, word_column, label_column)
    logger.info("Padding test text and lables ...")
    word_index_sentences_test_pad,test_seq_length = utils.padSequence(word_index_sentences_test,max_length, beginZero=PadZeroBegin)
    label_index_sentences_test_pad,_= utils.padSequence(label_index_sentences_test,max_length, beginZero=PadZeroBegin)
    logger.info("Creating character set FROM test set ...")
    char_index_test,_= dp.generate_character_data(word_sentences_test, 
                                char_alphabet=char_alphabet, setType="Test")

    logger.info("Padding Test set ...")
    char_index_test_pad = dp.construct_padded_char(char_index_test, char_alphabet, max_sent_length=max_length,max_char_per_word=max_char_per_word)
    
    # test summaries
    #test_summary_op = tf.summary.merge([loss_summary])
    #test_summary_dir = os.path.join(out_dir, "summaries", "test")
    #test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph)

    feed_dict=create_feed_Dict_Test(BiLSTM,PadZeroBegin=PadZeroBegin,max_length=max_length,
        x_batch=word_index_sentences_test_pad, y_batch=label_index_sentences_test_pad,
         act_seq_lengths= test_seq_length, dropout_keep_prob=dropout_keep_prob,
         char_batch=char_index_test_pad)
    '''#tf.Print(feed_dict,feed_dict)
    logits, transition_params = session.run([BiLSTM.logits, BiLSTM.transition_params],feed_dict)
    #logits is a list of numpy.ndarray
    #transition_params : ndarray'''
    
    logits, transition_params,embedded_char,embedded_words,char_pool_flat,input_x_test = session.run([BiLSTM.logits, BiLSTM.transition_params,
        BiLSTM.W_char,BiLSTM.W_word,BiLSTM.char_pool_flat,BiLSTM.input_x],feed_dict)
    
    accuracy,accuracy_low_classes = predictAccuracyAndWrite(logits,transition_params,test_seq_length,
        label_index_sentences_test_pad,step,word_index_sentences_test_pad,word_alphabet,label_alphabet,prefix_filename="test",beginZero=PadZeroBegin)

    #test_summary_writer.add_summary(summaries, step)
    print("step {},  accuracy on test set {:g}, accuracy for classes except Others: {:g}".format(step,accuracy,accuracy_low_classes))

    checkpoint_dir_test = os.path.abspath(os.path.join(out_dir, "checkpoints_test"))

    if not os.path.exists(checkpoint_dir_test):
        os.makedirs(checkpoint_dir_test)
    fname_data = "input_x_test_"+str(step)+".pkl"
    fname_conv_out = "char_pool_flat_"+str(step)+".pkl" 
    fname_seqLength = "act_seq_len_"+str(step)+".pkl" 
    fname_embedded_char = "embedded_char_"+str(step)+".pkl" 
    fname_embedded_words = "embedded_words_"+str(step)+".pkl" 
    pickle.dump(input_x_test,open(os.path.join(checkpoint_dir_test, fname_data),'wb'))
    pickle.dump(char_pool_flat,open(os.path.join(checkpoint_dir_test, fname_conv_out),'wb'))
    pickle.dump(test_seq_length,open(os.path.join(checkpoint_dir_test, fname_seqLength),'wb'))
    pickle.dump(embedded_char,open(os.path.join(checkpoint_dir_test, fname_embedded_char),'wb'))
    pickle.dump(embedded_words,open(os.path.join(checkpoint_dir_test, fname_embedded_words),'wb'))
    print("Saved test data checkpoint to {}\n".format(checkpoint_dir_test))
    return accuracy,accuracy_low_classes
Exemplo n.º 3
0
train_path = FLAGS.train_path
test_path = FLAGS.test_path
dev_path = FLAGS.dev_path

word_column = FLAGS.word_col
label_column = FLAGS.label_col
# Output directory for models and summaries
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
# read training data
logger.info("Reading data from training set...")
word_sentences_train, _, word_index_sentences_train, label_index_sentences_train = dp.read_conll_sequence_labeling(
    train_path,
    word_alphabet,
    label_alphabet,
    word_column,
    label_column,
    out_dir=out_dir)

# if oov is "random" and do not fine tune, close word_alphabet
if oov == "random" and not fine_tune:
    logger.info("Close word alphabet.")
    word_alphabet.close()

# read dev data
logger.info("Reading data from dev set...")
word_sentences_dev, _, word_index_sentences_dev, label_index_sentences_dev = dp.read_conll_sequence_labeling(
    dev_path, word_alphabet, label_alphabet, word_column, label_column)

# close alphabets : by close we mean we cannot add any more words to the word vocabulary.