def eval():
    # load data
    eval_size, eval_query, eval_question = eval_data_helpers.load_data(
        FLAGS.eval_data_file)
    x = word2vec_helpers.SentencesIndex(eval_query, max_document_length)
    x = [x[8]] * FLAGS.batch_size
    y = word2vec_helpers.SentencesIndex(eval_question, max_document_length)
    y = y[0:FLAGS.batch_size]

    # eval
    print("\nEvaluating...\n")

    real_len_x_value = real_len_func(x)
    real_len_xpos_value = real_len_func(y)
    feed_dict = {
        input_x: x,
        input_xpos: y,
        real_len_x: real_len_x_value,
        real_len_xpos: real_len_xpos_value,
        dropout_keep_prob: 1.0,
        batch_size: FLAGS.batch_size,
    }
    batch_x_vs_xpos = sess.run([x_vs_xpos], feed_dict)
    print(batch_x_vs_xpos)
    top1 = np.argmax(batch_x_vs_xpos)
    print(top1)
Exemplo n.º 2
0
from gensim.models import Word2Vec
import eval_data_helpers

#################### config ###################
modelfile = "../wvmodel/size300window5sg1min_count100negative10iter50.model"
pattern_file = "../data/eval_data.txt"
output_file = "../data/phrase_gen.txt"
############### end of config #################

model = Word2Vec.load(modelfile)

data_size, data, label = eval_data_helpers.load_data(pattern_file)

word_freq = {}
for sentence in data:
    for word in sentence.split():
        if word in word_freq:
            word_freq[word] = word_freq[word] + 1
        else:
            word_freq[word] = 1

word_mostsim = {}
for word, freq in word_freq.items():
    if word in model.wv.vocab:
        word_mostsim[word] = model.wv.most_similar(word)

with open(output_file, 'w') as f:
    for word, sim_word in word_mostsim.items():
        f.write(word)
        f.write('\t')
        f.write(str([item[0] for item in sim_word]))
def eval():
    # Load data
    eval_size, x_raw, y_test = eval_data_helpers.load_data(
        FLAGS.eval_data_file)
    x_test = word2vec_helpers.SentencesIndex(x_raw, max_document_length)

    all_predictions = []
    all_scores = []
    all_softmax = []
    # Send predict_request
    for x in x_test:
        input_x = x.tolist()
        # print(input_x)
        # print(type(input_x))
        dropout_keep_prob = 1.0

        predict_request.model_spec.name = "CNN_classifier"
        predict_request.model_spec.signature_name = "predict_sentence"
        # print(tf.contrib.util.make_tensor_proto([input_x], shape=[1,22], dtype=tf.int32))

        predict_request.inputs["input_x"].CopyFrom(
            tf.contrib.util.make_tensor_proto([input_x],
                                              shape=[1, 22],
                                              dtype=tf.int32))
        predict_request.inputs["dropout_keep_prob"].CopyFrom(
            tf.contrib.util.make_tensor_proto(dropout_keep_prob,
                                              shape=[1],
                                              dtype=tf.float32))
        result = stub.Predict(predict_request, 10.0)  # 10 secs timeout
        # print(type(result))
        feature_configs = {
            "prediction": tf.FixedLenFeature(shape=[], dtype=tf.int64),
            "scores": tf.FixedLenFeature(shape=[], dtype=tf.float32),
            "softmax": tf.FixedLenFeature(shape=[], dtype=tf.float32),
        }
        prediction = result.outputs['prediction'].int64_val
        scores = np.array(result.outputs['scores'].float_val)
        softmax = np.array(result.outputs['softmax'].float_val)
        # print(prediction)
        # print(scores)
        # print(softmax)
        all_predictions = np.concatenate([all_predictions, prediction])
        all_scores.append(scores)
        all_softmax.append(softmax)
        #break

    # Print accuracy if y_test is defined
    if y_test is not None:
        correct_predictions = float(sum(all_predictions == y_test))
        print("Total number of test examples: {}".format(len(y_test)))
        print("Accuracy: {:g}".format(correct_predictions /
                                      float(len(y_test))))
        for th in np.linspace(0, 0.95, 10):
            threshold = th
            true_pos = 0
            true_neg = 0
            false_pos = 0
            false_neg = 0
            for i in range(len(y_test)):
                if all_predictions[i] != 0:
                    if all_softmax[i][int(all_predictions[i])] > threshold:
                        if all_predictions[i] == y_test[i]:
                            true_pos += 1
                        if all_predictions[i] != y_test[i]:
                            false_pos += 1

            precision = true_pos / (true_pos + false_pos)
            print("Precision: {} in {} threshold".format(precision, threshold))
Exemplo n.º 4
0
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
eval_size, eval_query, eval_question = eval_data_helpers.load_data(
    FLAGS.eval_data_file)

max_document_length = 50
word2vec_helpers = Word2VecHelper()
x = word2vec_helpers.SentencesIndex(eval_query, max_document_length)
y = word2vec_helpers.SentencesIndex(eval_question, max_document_length)

# Checkpoint
ckpt = tf.train.get_checkpoint_state(
    os.path.join(FLAGS.checkpoint_dir, 'checkpoints'))
if ckpt:
    print("Read model parameters from %s" % ckpt.model_checkpoint_path)


def visualization(words, attentions, html_file):
    i = 0
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
eval_size, x_raw, y_test = eval_data_helpers.load_data(FLAGS.eval_data_file)

max_document_length = 22
word2vec_helpers = Word2VecHelper()
x_test = word2vec_helpers.SentencesIndex(x_raw, max_document_length)

# Checkpoint
ckpt = tf.train.get_checkpoint_state(
    os.path.join(FLAGS.checkpoint_dir, 'checkpoints'))
if ckpt:
    print("Read model parameters from %s" % ckpt.model_checkpoint_path)

# Evaluation
# ==================================================
print("\nEvaluating...\n")