Exemplo n.º 1
0
def predict(list_docs, experiment_name, step='', batch_size=64):

    logging.info('*' * 50)
    logging.info('RUNNING PREDICT FOR MODEL: %s', experiment_name)
    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step))
    dataset_manager = DatasetManager()
    dataset_manager.boot()

    list_preprocessed_sentences = preprocessor.preprocess(list_docs)

    list_vecs = dataset_manager.text2vec.doc_to_vec(list_preprocessed_sentences)
    print(dataset_manager.text2vec.vec_to_doc(list_vecs))
    list_vecs = dataset_manager.equalize_vector_length_to_np(list_vectors=list_vecs,
                                                             max_length=model_v6.SENTENCE_LENGTH_MAX)

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s', interesting_checkpoint)
        saver = tf.train.import_meta_graph('{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s', interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s', interesting_checkpoint)
            list_predictions = []

            num_steps = len(list_vecs) // batch_size
            logging.info('There will be %s steps', num_steps + 1)
            for i in range(num_steps + 1):
                tf_input = gr.get_tensor_by_name('input/tf_input:0')
                tf_predictions = gr.get_tensor_by_name('prediction:0')

                prediction = sess.run(tf_predictions, feed_dict={
                    tf_input: list_vecs[i*batch_size: (i+1)*batch_size]
                })
                list_predictions.extend([dataset_manager.LABEL_UNMAPPING[p] for p in prediction])

            return list_predictions
Exemplo n.º 2
0
def predict(list_sentences,
            output_file,
            experiment_name,
            step='',
            list_labels=[]):
    dataset_manager = DatasetManager()
    dataset_manager.boot()
    list_preprocessed_sentences = preprocessor.preprocess(list_sentences)
    list_vecs = dataset_manager.text2vec.doc_to_vec(
        list_preprocessed_sentences)
    list_vecs = dataset_manager.equalize_vector_length_to_np(
        list_vectors=list_vecs, max_length=model_v1.SENTENCE_LENGTH_MAX)
    list_labels = dataset_manager.convert_labels_to_np(list_labels)

    if step == '':
        interesting_checkpoint = tf.train.latest_checkpoint(
            os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name))
    else:
        interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint',
                                              experiment_name,
                                              'step-{}'.format(step))

    with tf.Graph().as_default() as gr:
        logging.info('-- Restoring graph for model: %s',
                     interesting_checkpoint)
        saver = tf.train.import_meta_graph(
            '{}.meta'.format(interesting_checkpoint))
        logging.info('-- Restored graph for model named: %s',
                     interesting_checkpoint)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)).as_default() as sess:
            saver.restore(sess=sess, save_path=interesting_checkpoint)
            logging.info('-- Restored variables for model named: %s',
                         interesting_checkpoint)

            tf_input = gr.get_tensor_by_name('input/tf_input:0')
            tf_predictions = gr.get_tensor_by_name('prediction:0')

            prediction = sess.run(tf_predictions,
                                  feed_dict={tf_input: list_vecs})

            if len(list_labels) != 0:
                logging.info('-- Report for model: %s', experiment_name)
                logging.info(
                    classification_report(y_true=list_labels,
                                          y_pred=prediction))

            result_dict = dict()
            result_dict['sentence'] = list_sentences
            result_dict['pre-processed'] = list_preprocessed_sentences
            result_dict[
                'pre-processed_recover'] = dataset_manager.text2vec.vec_to_doc(
                    list_vecs)
            result_dict['predict'] = prediction

            if len(list_labels) != 0:
                result_dict['label'] = list_labels

            pd.DataFrame(result_dict).to_csv(output_file, index=None)
            logging.debug('Saved result at %s', output_file)