Exemplo n.º 1
0
                cnn._keep_prob: 1.0
            }  # for evaluation
            step, summaries, loss, accuracy = sess.run(
                [global_step, validation_summary_op, cnn._loss, cnn._accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()

            print("%s: Step: %d,Loss: %.4f,Accuracy: %.4f" %
                  (time_str, step, loss, accuracy))

            if writer:
                writer.add_summary(summaries, step)

        # generates batches

        batches = data_util.batch_iter(list(zip(x_train, y_train)),
                                       FLAGS.batch_size, FLAGS.num_epochs)

        #Training Loop

        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch, writer=train_summary_writer)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_interval == 0:
                print("Evaluation:\n")
                validation_step(x_batch,
                                y_batch,
                                writer=validation_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_interval == 0:
                path = saver.save(sess,
Exemplo n.º 2
0
def training():

    # Load data.
    print('Loading data...')
    try:
        with gfile.Open(MODEL_DIR + '/data', 'rb') as f:
            x_data, y_data = pickle.loads(f.read())
        print('  Old data found in {}.'.format(MODEL_DIR + '/data'))
    except:
        print('  Creation of a new set of data.')
        x_data, y_data = zip(*du.load_labels_data(DATA_DIRECTORY))
        with gfile.Open(MODEL_DIR + '/data', 'wb') as f:
            f.write(pickle.dumps((x_data, y_data)))

    # Load and save vocabulary.
    print('Loading vocabulary...')
    try:
        vocab_processor = learn.preprocessing.VocabularyProcessor.restore(
            MODEL_DIR + '/vocab')
        print("  Old vocabulary found in {}.".format(MODEL_DIR + '/vocab'))
    except:
        print("  Creation of a new vocabulary.")
        max_document_length = max([len(x.split(" ")) for x in y_data])
        vocab_processor = learn.preprocessing.VocabularyProcessor(
            max_document_length)
        vocab_processor.fit(y_data)
    vocab_processor_x = learn.preprocessing.VocabularyProcessor(
        4, vocabulary=vocab_processor.vocabulary_)
    vocab_processor.save(MODEL_DIR + '/vocab')
    print("  Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))

    # Write correspondance 'word ID' to 'word'.
    with open(MODEL_DIR + '/correspondance.tsv', 'w') as f:
        f.write('Word ID\tWord\n')
        for word, word_id in vocab_processor.vocabulary_._mapping.iteritems():
            f.write('{}\t{}\n'.format(str(word_id), word))

    with tf.Graph().as_default() as graph:
        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)

        # Create model.
        print('Creating model...')
        model = Regression(number_of_words=len(x_data[0]),
                           sequence_length=LENGTH_MAX,
                           vocab_size=len(vocab_processor.vocabulary_),
                           embedding_size=EMBEDDING_SIZE)

        # Define Training procedure.
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
        grads_and_vars = optimizer.compute_gradients(model.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Checkpoint directory.
        checkpoint_path = MODEL_DIR + "/checkpoint.ckpt"
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

    with tf.Session(graph=graph) as sess:

        # Initialize.
        print('Initializing...')
        sess.run(tf.global_variables_initializer())

        # Maybe restore model parameters.
        ckpt = tf.train.get_checkpoint_state(MODEL_DIR)
        if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + '.index'):
            print("Restoring model parameters from %s." %
                  ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Fresh parameters for this model.")

        # Tensorboard.
        dir_summary = MODEL_DIR + '/summary/' + datetime.datetime.now(
        ).isoformat()
        train_writer = tf.summary.FileWriter(dir_summary, sess.graph)
        merged_summary = tf.summary.merge_all()

        def train_step(x_batch, y_batch):
            """
            A single training step.
            """
            feed_dict = {model.input_x: x_batch, model.input_y: y_batch}

            summary, _, step, loss = sess.run(
                [merged_summary, train_op, global_step, model.loss], feed_dict)

            train_writer.add_summary(summary, step)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {}".format(time_str, step, loss))

        # Generate batches.
        batch_generator = du.batch_iter(DATA_DIRECTORY, BATCH_SIZE, 200000)

        # Training loops.
        while True:
            x_text, y_text = zip(*batch_generator.next())

            x_batch = [" ".join(four_words) for four_words in x_text]
            x_batch = vocab_processor_x.transform(
                x_batch
            )  # list of token sequence = [[1,2,3,4], [5,6,7,8], [7,8,9,10]]
            y_batch = vocab_processor.transform(
                y_text
            )  # list of tokens sequences = [[1,3 2 5 6], [7,8,9,10,12,15,16]]

            x_batch = np.array([x for x in x_batch])
            y_batch = np.array([y for y in y_batch])

            # Pad sentences of variable lengths.
            y_batch = np.concatenate(
                (y_batch, np.zeros(
                    (len(y_batch), LENGTH_MAX - len(y_batch[1])))), 1)

            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % SAVE_EVERY == 0:
                path = saver.save(sess,
                                  checkpoint_path,
                                  global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
Exemplo n.º 3
0
        log_device_placement=FLAGS.log_device_placement)

    sess = tf.Session(config=session_config)
    with sess.as_default():

        saver = tf.train.import_meta_graph("%s.meta" % checkpoint_file)
        saver.restore(sess, checkpoint_file)

        input_x = graph.get_operation_by_name("input/x").outputs[0]
        keep_prob = graph.get_operation_by_name("input/keep_prop").outputs[0]

        predictions = graph.get_operation_by_name(
            "output/predictions").outputs[0]

        batches = data_util.batch_iter(list(x_test),
                                       FLAGS.batch_size,
                                       1,
                                       shuffle=False)

        all_predictions = []

        for x_test_batch in batches:

            feed_dict = {input_x: x_test_batch, keep_prob: 1.0}

            batch_predictions = sess.run(predictions, feed_dict)

            all_predictions = np.concatenate(
                [all_predictions, batch_predictions])

if y_test is not None:
    correct_predictions = float(sum(all_predictions == y_test))
Exemplo n.º 4
0
def classify_text(raw_input):
    # print("Transforming data...")
    vocab_path = os.path.join(checkpoint_dir, "..", "vocab")
    vocab_processor = learn.preprocessing.VocabularyProcessor.restore(
        vocab_path)
    x_test = np.array(list(vocab_processor.transform(raw_input)))
    with graph.as_default():
        sess = tf.Session(config=config_prefs)
        with sess.as_default():
            # load graph
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # get saved vars
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            dropout_keep_probability = graph.get_operation_by_name(
                "dropout_keep_probability").outputs[0]

            # tensors to classify
            predictions = graph.get_operation_by_name(
                "output/predictions").outputs[0]

            # gen batches
            batch_data = data_util.batch_iter(x_test,
                                              batch_size,
                                              1,
                                              shuffle=False)

            predicted_classes = []
            # Classify Batches
            for batch in batch_data:
                batch_pred = sess.run(predictions, {
                    input_x: batch,
                    dropout_keep_probability: 1.0
                })
                predicted_classes = np.concatenate(
                    [predicted_classes, batch_pred])
                # print(predicted_classes)
            # pred = sess.run(predicted_classes, {input_x: x_test, dropout_keep_probability:1.0})
            return predicted_classes


# print("Evaluating...")
# # checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
# graph = tf.Graph()
# with graph.as_default():
# 	# Config preferences for session
# 	config_pref = tf.ConfigProto(allow_soft_placement = allow_soft_placement,
# 								 log_device_placement = log_device_placement)
# 	sess = tf.Session(config=config_pref)
# 	with sess.as_default():
# 		# Load saved graph
# 		saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
# 		saver.restore(sess, checkpoint_file)
#
# 		# Get saved variables by name
# 		input_x = graph.get_operation_by_name("input_x").outputs[0]
# 		dropout_keep_probability = graph.get_operation_by_name("dropout_keep_probability").outputs[0]
#
# 		# Tensors we want to classify
# 		predictions = graph.get_operation_by_name("output/predictions").outputs[0]
#
# 		# Generate batches
# 		batch_data = data_util.batch_iter(x_test, batch_size, 1, shuffle=False)
#
# 		predicted_classes = []
# 		# Classify batches
# 		for test_batch in batch_data:
# 			batch_predictions = sess.run(predictions, {input_x: test_batch, dropout_keep_probability: 1.0})
# 			predicted_classes = np.concatenate([predicted_classes, batch_predictions])
#
# # print accuracy if y_test exists
# if y_test is not None:
# 	correct = float(sum(predicted_classes == y_test))
# 	print("Number of test examples: ", len(y_test))
# 	print("Accuracy: ", correct/float(len(y_test)))
#
# printed_predictions = np.column_stack((np.array(x_unfiltered), predicted_classes))
# output_dir = os.path.join(checkpoint_dir, "..", "predictions.csv")
# print("Saving classes to: ", output_dir)
# with open(output_dir, 'w') as f:
# 	csv.writer(f).writerows(printed_predictions)
#
# for input_text, classification in zip(x_unfiltered, predicted_classes):
# 	print("Input: ", input_text)
# 	if classification:
# 		print("Class: Positive")
# 	else:
# 		print("Class: Negative")
Exemplo n.º 5
0
            cnn.input_x: x_batch,
            cnn.input_y: y_batch,
            cnn.dropout_keep_probability: 1.0
        }
        step, summaries, loss, accuracy = sess.run(
            [global_step, val_summary_op, cnn.loss, cnn.accuracy],
            feed_dict=feed_dict)
        time_str = datetime.datetime.now().isoformat()
        print("{}: step {}, loss{:g}, accuracy{:g}".format(
            time_str, step, loss, accuracy))
        if writer:
            writer.add_summary(summaries, step)

    # Generate batches
    batches = data_util.batch_iter(list(zip(x_train, y_train)),
                                   batch_size=batch_size,
                                   num_epochs=num_epochs)
    # Training loop
    for batch in batches:
        x_batch, y_batch = zip(*batch)
        train_step(x_batch, y_batch)
        current_step = tf.train.global_step(sess, global_step)
        if current_step % evaluate_every == 0:
            print("\n Evaluation: ")
            dev_step(x_test, y_test, writer=val_summary_writer)
            print("")
        if current_step % checkpoint_every == 0:
            path = saver.save(sess,
                              checkpoint_prefix,
                              global_step=current_step)
            print("Saved checkpoint of model to {} \n".format(path))