Exemplo n.º 1
0
def evaluate(experiment_name, step=''):
    logging.info('*' * 50)
    logging.info('RUNNING EVALUATION FOR MODEL: %s', experiment_name)
    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(
            os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint',
                                              experiment_name,
                                              'step-{}'.format(step))
    dataset_manager = DatasetManager()
    dataset_manager.boot()

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s',
                     interesting_checkpoint)
        saver = tf.train.import_meta_graph(
            '{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s',
                     interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s',
                         interesting_checkpoint)
            list_predictions = []
            list_labels = []
            for docs, labels in dataset_manager.get_test_by_batch(
                    batch_size=FLAGS.BATCH_SIZE):
                tf_input = gr.get_tensor_by_name('input/tf_input:0')
                tf_predictions = gr.get_tensor_by_name('prediction:0')

                prediction = sess.run(tf_predictions,
                                      feed_dict={tf_input: docs})
                list_predictions.extend(prediction)
                list_labels.extend(labels)
                logging.debug('-- Prediction length: %s/%s',
                              len(list_predictions),
                              dataset_manager.test_y.shape[0])
            logging.info('-- Report for model: %s', experiment_name)
            logging.info(
                classification_report(y_true=list_labels,
                                      y_pred=list_predictions,
                                      digits=4))
            logging.info(
                confusion_matrix(y_true=list_labels, y_pred=list_predictions))
Exemplo n.º 2
0
def predict(list_docs, experiment_name, step='', batch_size=64):

    logging.info('*' * 50)
    logging.info('RUNNING PREDICT FOR MODEL: %s', experiment_name)
    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step))
    dataset_manager = DatasetManager()
    dataset_manager.boot()

    list_preprocessed_sentences = preprocessor.preprocess(list_docs)

    list_vecs = dataset_manager.text2vec.doc_to_vec(list_preprocessed_sentences)
    print(dataset_manager.text2vec.vec_to_doc(list_vecs))
    list_vecs = dataset_manager.equalize_vector_length_to_np(list_vectors=list_vecs,
                                                             max_length=model_v6.SENTENCE_LENGTH_MAX)

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s', interesting_checkpoint)
        saver = tf.train.import_meta_graph('{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s', interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s', interesting_checkpoint)
            list_predictions = []

            num_steps = len(list_vecs) // batch_size
            logging.info('There will be %s steps', num_steps + 1)
            for i in range(num_steps + 1):
                tf_input = gr.get_tensor_by_name('input/tf_input:0')
                tf_predictions = gr.get_tensor_by_name('prediction:0')

                prediction = sess.run(tf_predictions, feed_dict={
                    tf_input: list_vecs[i*batch_size: (i+1)*batch_size]
                })
                list_predictions.extend([dataset_manager.LABEL_UNMAPPING[p] for p in prediction])

            return list_predictions
Exemplo n.º 3
0
def predict(list_sentences,
            output_file,
            experiment_name,
            step='',
            list_labels=[]):
    dataset_manager = DatasetManager()
    dataset_manager.boot()
    list_preprocessed_sentences = preprocessor.preprocess(list_sentences)
    list_vecs = dataset_manager.text2vec.doc_to_vec(
        list_preprocessed_sentences)
    list_vecs = dataset_manager.equalize_vector_length_to_np(
        list_vectors=list_vecs, max_length=model_v1.SENTENCE_LENGTH_MAX)
    list_labels = dataset_manager.convert_labels_to_np(list_labels)

    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(
            os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint',
                                              experiment_name,
                                              'step-{}'.format(step))

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s',
                     interesting_checkpoint)
        saver = tf.train.import_meta_graph(
            '{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s',
                     interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s',
                         interesting_checkpoint)

            tf_input = gr.get_tensor_by_name('input/tf_input:0')
            tf_predictions = gr.get_tensor_by_name('prediction:0')

            prediction = sess.run(tf_predictions,
                                  feed_dict={tf_input: list_vecs})

            if len(list_labels) != 0:
                logging.info('-- Report for model: %s', experiment_name)
                logging.info(
                    classification_report(y_true=list_labels,
                                          y_pred=prediction))

            result_dict = dict()
            result_dict['sentence'] = list_sentences
            result_dict['pre-processed'] = list_preprocessed_sentences
            result_dict[
                'pre-processed_recover'] = dataset_manager.text2vec.vec_to_doc(
                    list_vecs)
            result_dict['predict'] = prediction

            if len(list_labels) != 0:
                result_dict['label'] = list_labels

            pd.DataFrame(result_dict).to_csv(output_file, index=None)
            logging.debug('Saved result at %s', output_file)
def run(experiment_name):
    BEST_THRES = 3
    WORST_THRES = 3
    POPULATION_STEPS = 500
    ITERATIONS = 100
    POPULATION_SIZE = 10
    accuracy_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS))
    l1_scale_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS))
    best_accuracy_hist = np.zeros((POPULATION_STEPS, ))
    best_l1_scale_hist = np.zeros((POPULATION_STEPS, ))

    with tf.Graph().as_default() as gr:

        with tf.variable_scope('input'):
            tf_input = tf.placeholder(
                dtype=tf.int32,
                shape=[
                    None, model_population_based_tunning.SENTENCE_LENGTH_MAX
                ],
                name='tf_input')
            tf_labels = tf.placeholder(dtype=tf.int32,
                                       shape=[None],
                                       name='tf_labels')

        models = [
            create_model(
                i, is_included_regularization=FLAGS.IS_INCLUDED_REGULARIZATION)
            for i in range(10)
        ]
        # It will help us with creation of different scope_name for each model
        for index, model in enumerate(models):
            with tf.variable_scope(str(index)):
                model.boot(tf_input, tf_labels)

        logging.info('Graph size: %s', utils.count_trainable_variables())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options,
                allow_soft_placement=True,
                log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default(
                ) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            dataset_manager = DatasetManager()
            dataset_manager.boot()

            dataset_generator = dataset_manager.get_batch(
                batch_size=FLAGS.BATCH_SIZE,
                number_epochs=10 * FLAGS.NUMBER_EPOCHS)
            for i in range(POPULATION_STEPS):

                # Copy best
                sess.run([
                    m.get_copy_from_op(models[0])
                    for m in models[-WORST_THRES:]
                ])
                # Perturb others
                sess.run([m.l1_scale_perturb_op for m in models[BEST_THRES:]])
                # Training
                for _ in range(ITERATIONS):
                    docs, labels = next(dataset_generator)
                    sess.run([m.tf_optimizer for m in models],
                             feed_dict={
                                 tf_input: docs,
                                 tf_labels: labels
                             })
                docs, labels = next(dataset_generator)
                # Evaluate
                l1_scales = sess.run({m: m.l1_scale for m in models})
                accuracies = sess.run({m: m.tf_acc
                                       for m in models},
                                      feed_dict={
                                          tf_input: docs,
                                          tf_labels: labels
                                      })
                models.sort(key=lambda m: accuracies[m], reverse=True)
                # Logging
                best_accuracy_hist[i] = accuracies[models[0]]
                best_l1_scale_hist[i] = l1_scales[models[0]]
                for m in models:
                    l1_scale_hist[m.model_id, i] = l1_scales[m]
                    accuracy_hist[m.model_id, i] = accuracies[m]
            with open('temp', 'w') as output_f:
                json.dump(
                    {
                        'accuracy_hist': accuracy_hist,
                        'l1_scale_hist': l1_scale_hist,
                        'best_accuracy_hist': best_accuracy_hist,
                        'best_l1_scale_hist': best_l1_scale_hist
                    }, output_f)
Exemplo n.º 5
0
def run(experiment_name):
    with tf.Graph().as_default() as gr:
        with tf.variable_scope('input'):
            tf_input = tf.placeholder(dtype=tf.int32,
                                      shape=[None, model.SENTENCE_LENGTH_MAX],
                                      name='tf_input')
            tf_labels = tf.placeholder(dtype=tf.int32,
                                       shape=[None],
                                       name='tf_labels')

        tf_logits = model.inference(tf_input)
        tf_loss = model.loss(tf_logits, tf_labels)

        tf_optimizer, tf_global_step = model.optimize(tf_loss)
        model.measure_acc(tf_logits, tf_labels)

        tf_all_summary = tf.summary.merge_all()

        tf_train_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'summary', 'train_' + experiment_name),
                                                graph=gr)
        tf_test_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'summary', 'test_' + experiment_name),
                                               graph=gr)

        tf_embedding_writer = tf.summary.FileWriter(logdir=os.path.join(
            CURRENT_DIR, '..', 'checkpoint', experiment_name))

        # Visual word embedding
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = 'embedding/word_embeddings'  # Reference model_v6.py
        embedding.metadata_path = os.path.join(CURRENT_DIR, 'data',
                                               DatasetManager.VOCAB_FILE)
        projector.visualize_embeddings(tf_embedding_writer, config)

        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=0.03)

        logging.info('Graph size: %s', utils.count_trainable_variables())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options,
                allow_soft_placement=True,
                log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default(
                ) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            dataset_manager = DatasetManager()
            dataset_manager.boot()

            for docs, labels in dataset_manager.get_batch(
                    batch_size=FLAGS.BATCH_SIZE,
                    number_epochs=FLAGS.NUMBER_EPOCHS):
                _, global_step = sess.run([tf_optimizer, tf_global_step],
                                          feed_dict={
                                              tf_input: docs,
                                              tf_labels: labels
                                          })
                summary_interval_step = 10
                if global_step % summary_interval_step == 0:
                    logging.debug('Global step: %s', global_step)
                    train_summary_data = sess.run(tf_all_summary,
                                                  feed_dict={
                                                      tf_input: docs,
                                                      tf_labels: labels
                                                  })
                    tf_train_writer.add_summary(train_summary_data,
                                                global_step=global_step)

                if global_step % summary_interval_step == 0:
                    docs_test, labels_test = dataset_manager.get_test_set(
                        FLAGS.TEST_SIZE, is_shuffled=True)
                    test_summary_data = sess.run(tf_all_summary,
                                                 feed_dict={
                                                     tf_input: docs_test,
                                                     tf_labels: labels_test
                                                 })
                    tf_test_writer.add_summary(test_summary_data,
                                               global_step=global_step)

                if global_step % 200 == 0:
                    path_to_save = os.path.join(CURRENT_DIR, '..',
                                                'checkpoint', experiment_name)
                    if not os.path.exists(path_to_save):
                        os.makedirs(path_to_save)
                    saved_file = saver.save(sess,
                                            save_path=os.path.join(
                                                path_to_save, 'step'),
                                            global_step=global_step,
                                            write_meta_graph=True)
                    logging.debug('Saving model at %s', saved_file)