예제 #1
0
print 'Load documents from CSV'
corpus = Corpus(source_file_path='input/egc.csv',
                language='french',  # language for stop words
                vectorization='tfidf',  # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency)
                max_relative_frequency=0.8,  # ignore words which relative frequency is > than max_relative_frequency
                min_absolute_frequency=4,  # ignore words which absolute frequency is < than min_absolute_frequency
                preprocessor=FrenchLemmatizer())  # pre-process documents
print 'corpus size:', corpus.size
print 'vocabulary size:', len(corpus.vocabulary)
print 'Vector representation of document 0:\n', corpus.vector_for_document(0)

# Instantiate a topic model
topic_model = NonNegativeMatrixFactorization(corpus)

# Estimate the optimal number of topics
viz = Visualization(topic_model)
viz.plot_greene_metric(min_num_topics=10,
                       max_num_topics=30,
                       tao=10, step=1,
                       top_n_words=10)
viz.plot_arun_metric(min_num_topics=5,
                     max_num_topics=30,
                     iterations=10)
viz.plot_consensus_metric(min_num_topics=5,
                        max_num_topics=30,
                        iterations=10)

# Infer topics
print 'Inferring topics...'
topic_model.infer_topics(num_topics=15)
# Save model on disk
 def __init__(self, config):
     self.config = config
     self.visualization = Visualization(self.config)
     pass
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Clstmnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING,
                                              reuse_lstm=None)
        mse = evaluation.loss(predictions=predictions_testing,
                              labels=data_set.testing_set.y,
                              mode_name=config.MODE.TESTING)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            summary = None
            start_time = time.time()
            mses = []
            actual_labels = []
            predicted_labels = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, mse_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, mse
                        ])

                    mses.append(mse_result)
                    actual_labels.append(actual_label)
                    predicted_labels.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   mse_result)

            summary_writer.add_summary(summary, 1)

            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: MSE @ 1 = %.9f' %
                  (datetime.now(), np.array(mses).mean()))

            visualization.display_on_map(actual_labels, predicted_labels,
                                         session_name,
                                         np.array(mses).mean())

            sessions_helper.end()
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Cnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING)
        is_correct = evaluation.correct_number(predictions_testing,
                                               data_set.testing_set.y)
        predictions_testing = tf.argmax(predictions_testing, 1)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            true_count = 0
            summary = None
            start_time = time.time()
            labels = []
            predictions = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, is_correct_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, is_correct
                        ])
                    true_count += np.sum(is_correct_result)

                    labels.append(actual_label)
                    predictions.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   is_correct_result)

            summary_writer.add_summary(summary, 1)

            np_labels = np.array(labels)
            np_predictions = np.array(predictions)

            conf_matrix = tf.confusion_matrix(
                labels=tf.squeeze(np_labels),
                predictions=tf.squeeze(np_predictions),
                num_classes=config.NUM_CLASSES)
            print()
            c_m = sess.run(conf_matrix)
            print(c_m)

            precision = true_count / data_set.testing_set.size
            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: accuracy @ 1 = %.3f' %
                  (datetime.now(), precision * 100))

            sessions_helper.end()
예제 #5
0
import time
from flask_cors import CORS
import os
from flask import Flask, render_template, request, jsonify
#from werkzeug import secure_filename
from werkzeug.utils import secure_filename
from visualization.visualization import Visualization
from forecast_model.prophet import ProphetModel
from forecast_model.arima import ArimaModel
from anomaly.model import anomaly
ano = anomaly()

ar = ArimaModel()
draw = Visualization()
UPLOAD_FOLDER = '/data'

app = Flask(__name__)
app.secret_key = "secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
CORS(app)


@app.route('/')
def homepage():
    return render_template('index.html')


ALLOWED_EXTENSIONS = set(['csv'])

def visualize():
    """Visualize the processed dataset and output the plots into the result/visualization folder"""
    Visualization().run()