Пример #1
0
def data_construct(input_file, batch_size, vocab, max_length=100, config=None):
    logger.info('Loading data...')

    x_test, contents, labels, y_test = data_helpers.load_test_data(
        input_file, max_length, vocab, config)
    if config:
        test_iter = mx.io.NDArrayIter(x_test, y_test, batch_size)
    else:
        test_iter = mx.io.NDArrayIter(x_test, None, batch_size)
    return test_iter, contents, labels
Пример #2
0
def main():
    x = data_helpers.load_test_data('data/test.csv')
    a = inference(x)
    print(a)
    np.savetxt('submission_softmax.csv',
               np.c_[range(1,
                           len(x) + 1), a],
               delimiter=',',
               header='ImageId,Label',
               comments='',
               fmt='%d')
Пример #3
0
def main():
    trained_model = "checkpoints/model.ckpt"
    embedding_size = 100  # Word embedding dimension
    batch_size = 128  # Batch data size
    sequence_length = 300  # Sentence length
    rnn_size = 50  # Number of hidden layer neurons
    attention_matrix_size = 100
    margin = 0.1
    gpu_mem_usage = 0.75
    gpu_device = "/gpu:0"

    embeddings, word2idx = data_helpers.load_embedding('vectors.nobin')
    voc = data_helpers.load_vocab(
        'D:\\DataMining\\Datasets\\insuranceQA\\V1\\vocabulary')
    all_answers = data_helpers.load_answers(
        'D:\\DataMining\\Datasets\\insuranceQA\\V1\\answers.label.token_idx',
        voc)
    questions, answers, labels, qids, aids = data_helpers.load_test_data(
        'D:\\DataMining\\Datasets\\insuranceQA\\V1\\question.test1.label.token_idx.pool',
        all_answers, voc, word2idx, 300)
    with tf.Graph().as_default(), tf.device(gpu_device):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_mem_usage)
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)
        model = QALSTM(batch_size, sequence_length, embeddings, embedding_size,
                       rnn_size, margin, attention_matrix_size)
        with tf.Session(config=session_conf).as_default(
        ) as sess:  # config=session_conf
            saver = tf.train.Saver()
            print("Start loading the model")
            saver.restore(sess, trained_model)
            print("The model is loaded")
            scores = []
            for question, answer in data_helpers.test_batch_iter(
                    questions, answers, batch_size):
                feed_dict = {model.qtest: question, model.atest: answer}
                score = sess.run([model.scores], feed_dict)
                scores.extend(score[0].tolist())
            MAP, MRR = eval_map_mrr(qids, aids, scores, labels)
            print('MAP %2.3f\tMRR %2.3f' % (MAP, MRR))
Пример #4
0
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparatopn
# ==================================================

# Load data
print("Loading data...")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file,
                                              FLAGS.negative_data_file)
x_eval = data_helpers.load_test_data(FLAGS.test_data_file)

# Pad sentences
sentences_padded_all, max_length = data_helpers.pad_sentences(x_text + x_eval)
sentences_padded, max_length = data_helpers.pad_sentences(x_text, max_length)

# Build vocabulary
vocabulary, vocabulary_inv = data_helpers.build_vocab(sentences_padded_all)
x, y = data_helpers.build_input_data(sentences_padded, y, vocabulary)

# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]
Пример #5
0
        seq_len = len(tmp_list)
        if seq_len > max_len:
            seq_len = max_len
        real_len.append(seq_len)
    return real_len

def load_train_params(train_dir):
    sorted_label = json.loads(open(train_dir + '/sorted_label.json').read())
    train_params = json.loads(open(train_dir + '/train_params.json').read())
    return sorted_label,train_params

# CHANGE THIS: Load data. Load your own data here
if FLAGS.eval_train:
    train_dir = os.path.join(FLAGS.checkpoint_dir, "..", "trained_results")
    sorted_label, train_params = load_train_params(train_dir)
    x_raw, y_test = data_helpers.load_test_data(FLAGS.test_data_file, sorted_label)
    y_test = np.argmax(y_test, axis=1)
else:
    x_raw = ["a masterpiece four years in the making", "everything is off."]
    y_test = [1, 0]

# Map data into vocabulary
vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab")
vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
x_real_len_test = np.array(get_real_len(x_raw, train_params['max_document_length']))
x_test = np.array(list(vocab_processor.transform(x_raw)))
print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
Пример #6
0
def eval():
    # Map data into vocabulary
    source_vocab_path = os.path.join(FLAGS.checkpoint_dir, "..",
                                     "source_vocab")
    source_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
        source_vocab_path)
    source_max_sentence_length = len(
        list(source_vocab_processor.transform(['test']))[0])
    target_vocab_path = os.path.join(FLAGS.checkpoint_dir, "..",
                                     "target_vocab")
    target_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
        target_vocab_path)
    target_max_sentence_length = len(
        list(target_vocab_processor.transform(['test']))[0])

    with tf.device('/cpu:0'):
        source_sent, target_sent = data_helpers.load_test_data(
            FLAGS.test_source_dir, FLAGS.test_target_dir,
            source_max_sentence_length, target_max_sentence_length)

    source_eval = np.array(list(source_vocab_processor.transform(source_sent)))
    target_eval = np.array(list(target_vocab_processor.transform(target_sent)))

    checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            source = graph.get_operation_by_name("encoder_x").outputs[0]
            target = graph.get_operation_by_name("decoder_y").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name(
                "output/predictions").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(list(source_eval),
                                              FLAGS.batch_size,
                                              1,
                                              shuffle=False)

            # Collect the predictions here
            all_predictions = np.empty([0, target_max_sentence_length], int)
            for batch in batches:
                # auto-regressive infer
                batch_predictions = np.ones_like(batch)
                for j in range(target_max_sentence_length):
                    pred = sess.run(predictions,
                                    feed_dict={
                                        source: batch,
                                        target: batch_predictions
                                    })
                    batch_predictions[:, j] = pred[:, j]

                all_predictions = np.concatenate(
                    [all_predictions, batch_predictions])

            is_target = np.not_equal(target_eval, 0).astype(float)
            accuracy = np.sum(
                np.equal(all_predictions, target_eval).astype(float) *
                is_target) / np.sum(is_target)
            print("Total number of test examples: {}\n".format(
                len(target_eval)))
            print("Accuracy: {:g}".format(accuracy))

            prediction_sent = []
            for idx_seq in all_predictions:
                prediction_sent.append(" ".join(
                    target_vocab_processor.vocabulary_.reverse(idx)
                    for idx in idx_seq))

            # BLEU Score
            list_of_references = []
            hypotheses = []
            for pred, target in zip(prediction_sent, target_sent):
                if len(pred.split()) > 3 and len(target.split()) > 3:
                    list_of_references.append([pred.split()])
                    hypotheses.append(target.split())
            chencherry = SmoothingFunction()
            score = corpus_bleu(list_of_references,
                                hypotheses,
                                smoothing_function=chencherry.method4)
            print("BLEU Score : {:g}\n".format(score * 100))

            # Samples of Translation Result
            if not os.path.exists('results'): os.mkdir('results')
            f = open(FLAGS.output_dir, 'w')
            for idx, (s, t, p) in enumerate(
                    zip(source_sent, target_sent, prediction_sent)):
                f.write("Sample #%d\n" % idx)
                f.write("Source : %s\n" % s)
                f.write("Target : %s\n" % t)
                f.write("Predict : %s\n\n" % p)
            f.close()
Пример #7
0
def predict(data, params_path=FLAGS.checkpoint_dir):
    num_sent = len(data)
    mask = np.ones(shape=[FLAGS.sequence_length]).nonzero()
    with tf.device('/cpu:0'):
        x_text_ta, x_position_ta, x_id_ta, x_text_av, x_position_av, x_id_av = data_helpers.load_test_data(data)

    checkpoint_file = params_path + '/'

    # find Time & Attribute tuples
    checkpoint_file_ta = checkpoint_file + 'model_ta_final'
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file_ta))
            saver.restore(sess, checkpoint_file_ta)

            # Get the placeholders from the graph by name
            input_text = graph.get_operation_by_name("input_text").outputs[0]
            input_position = graph.get_operation_by_name("input_position").outputs[0]
            input_mask = graph.get_operation_by_name("position_mask").outputs[0]
            emb_dropout_keep_prob = graph.get_operation_by_name("emb_dropout_keep_prob").outputs[0]
            rnn_dropout_keep_prob = graph.get_operation_by_name("rnn_dropout_keep_prob").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]
            probablities = graph.get_operation_by_name("output/probabilities").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_text_ta, x_position_ta, None, FLAGS.batch_size, 1, shuffle=False)

            # Collect the predictions here
            preds = []
            probs = []
            for x_batch in batches:
                x_text_batch, x_position_batch = x_batch
                pred, prob = sess.run([predictions, probablities], {input_text: x_text_batch,
                                                                    input_position: x_position_batch,
                                                                    input_mask: mask,
                                                                    emb_dropout_keep_prob: 1.0,
                                                                    rnn_dropout_keep_prob: 1.0,
                                                                    dropout_keep_prob: 1.0})
                preds.append(pred)
                probs.append(prob[:, 1])
            preds = np.concatenate(preds)
            probs = np.concatenate(probs)
            time_attr = np.concatenate((x_position_ta, probs[:, None]), axis=1)
            mask_ta = np.where(preds == 1)
            id_ta = x_id_ta[mask_ta].copy()
            time_attr = time_attr[mask_ta].copy()

    # find Attribute & Value tuples
    checkpoint_file_av = checkpoint_file + 'model_av_final'
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file_av))
            saver.restore(sess, checkpoint_file_av)

            # Get the placeholders from the graph by name
            input_text = graph.get_operation_by_name("input_text").outputs[0]
            input_position = graph.get_operation_by_name("input_position").outputs[0]
            input_mask = graph.get_operation_by_name("position_mask").outputs[0]
            emb_dropout_keep_prob = graph.get_operation_by_name("emb_dropout_keep_prob").outputs[0]
            rnn_dropout_keep_prob = graph.get_operation_by_name("rnn_dropout_keep_prob").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]
            probablities = graph.get_operation_by_name("output/probabilities").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_text_av, x_position_av, None, FLAGS.batch_size, 1, shuffle=False)

            # Collect the predictions here
            preds = []
            probs = []
            for x_batch in batches:
                x_text_batch, x_position_batch = x_batch
                pred, prob = sess.run([predictions, probablities], {input_text: x_text_batch,
                                                                    input_position: x_position_batch,
                                                                    input_mask: mask,
                                                                    emb_dropout_keep_prob: 1.0,
                                                                    rnn_dropout_keep_prob: 1.0,
                                                                    dropout_keep_prob: 1.0})
                preds.append(pred)
                probs.append(prob[:, 1])
            preds = np.concatenate(preds)
            probs = np.concatenate(probs)
            attr_value = np.concatenate((x_position_av, probs[:, None]), axis=1)
            mask_av = np.where(preds == 1)
            id_av = x_id_av[mask_av].copy()
            attr_value = attr_value[mask_av].copy()

    # combining (time, attribute, value) tuples
    two_tuples = []
    for id in range(num_sent):
        mask_ta = np.where(id_ta == id)
        mask_av = np.where(id_av == id)
        two_tuples.append({'time_attr': time_attr[mask_ta], 'attr_val': attr_value[mask_av]})

    results = []
    for item in two_tuples:
        results.append(Two2Three(item))

    for id in range(num_sent):
        data[id]['results'] = results[id].tolist()

    return data
Пример #8
0
def test_cnn(test_examples, test_labels, checkpoint_file, vocabulary):
    # Eval Parameters
    #tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
    #tf.flags.DEFINE_string("checkpoint_dir", checkpoint_dir, "checkpoint directory from training run")

    # Misc Parameters
    #tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
    #tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    print("\nParameters:")
    for attr, value in sorted(FLAGS.__flags.items()):
        print("{}={}".format(attr.upper(), value))
    print("")

    # Load data. Load your own data here
    print("Loading data...")
    x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_test_data(
        test_examples, test_labels, vocabulary)
    #x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data()

    y_test = np.argmax(y_test, axis=1)
    print("Vocabulary size: {:d}".format(len(vocabulary)))
    print("Test set size {:d}".format(len(y_test)))

    print("\nEvaluating...\n")

    # Evaluation
    # ==================================================

    #checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
    graph = tf.Graph()
    #with graph.as_default(), tf.device('/gpu:2'):
    with graph.as_default():
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.10,
        #                            allow_growth = True)
        gpu_options = tf.GPUOptions(allow_growth=True)

        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name(
                "dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name(
                "output/predictions").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_test,
                                              FLAGS.batch_size,
                                              1,
                                              shuffle=False)

            # Collect the predictions here
            all_predictions = []

            for x_test_batch in batches:

                batch_predictions = sess.run(predictions, {
                    input_x: x_test_batch,
                    dropout_keep_prob: 1.0
                })
                all_predictions = np.concatenate(
                    [all_predictions, batch_predictions])

    # Print accuracy and fscores
    correct_predictions = float(sum(all_predictions == y_test))
    print("Total number of test examples: {}".format(len(y_test)))
    print("Accuracy: {:g}".format(correct_predictions / float(len(y_test))))
    """
    gold_labels = []
    for gold_label in y_test:
        gold_labels.append(np.argmax(gold_label))
    predicted_labels = []
    for prediction in all_predictions:
        predicted_labels.append(np.argmax(prediction))
    """

    return all_predictions
        initializer(name, arg_dict[name])

        param_blocks.append( (i, arg_dict[name], args_grad[name], name) )


    data = cnn_exec.arg_dict['data']
    label = cnn_exec.arg_dict['softmax_label']

    return CNNModel(cnn_exec=cnn_exec, symbol=cnn, data=data, label=label, param_blocks=param_blocks)

pkl_file = open('vocab.pkl', 'rb')
vocab  = pickle.load(pkl_file)
pkl_file.close()


sentence=data_helpers.load_test_data()
sentences_padded = data_helpers.pad_sentences(sentence)
sentence_test=[]
for sent in sentences_padded:
    l=[]
    for word in sent:
        if word in vocab:
            l.append(vocab[word])
        else:
            l.append(0)
    sentence_test.append(l)
sentence_test=np.array(sentence_test)

vocab_size = len(vocab)
num_embed = 50
batch_size = 100
Пример #10
0
tf.app.flags.DEFINE_boolean('log_device_placement', False,
                            'Demonstrate which variables are on what device.')

# Store all elemnts in FLAG structure!
FLAGS = tf.app.flags.FLAGS

if not os.path.isabs(FLAGS.train_dir):
    raise ValueError('You must assign absolute path for --train_dir')

if not os.path.isabs(FLAGS.checkpoint_dir):
    raise ValueError('You must assign absolute path for --checkpoint_dir')

maybe_download_and_extract()
images_train, cls_train, labels_train = load_training_data()
images_test, cls_test, labels_test = load_test_data()
tensors_key = ['images_train', 'labels_train', 'images_test', 'labels_test']
tensors = [images_train, labels_train, images_test, labels_test]
data = dict(zip(tensors_key, tensors))

num_train_samples = images_train.shape[0]
height = 32
width = 32
num_channels = 3
print(load_class_names())

graph = tf.Graph()
with graph.as_default():

    global_step = tf.Variable(0, name="global_step", trainable=False)
Пример #11
0
                       "Checkpoint directory from training run")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

x_raw = data_helpers.load_test_data(
    '/Users/Winnerineast/Documents/haodaifu/NewData/tobetrained.csv')

# Map data into vocabulary
vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab")
vocabulary, vocabulary_inv, max_length = data_helpers.restore_vocabulary(
    vocab_path)
sentences_padded, tmp_length = data_helpers.pad_sentences(x_raw, max_length)
x_test, y_test = data_helpers.build_input_data(sentences_padded, None,
                                               vocabulary)

print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()