Пример #1
0
print 'Load documents from CSV'
corpus = Corpus(source_file_path='input/egc.csv',
                language='french',  # language for stop words
                vectorization='tfidf',  # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency)
                max_relative_frequency=0.8,  # ignore words which relative frequency is > than max_relative_frequency
                min_absolute_frequency=4,  # ignore words which absolute frequency is < than min_absolute_frequency
                preprocessor=FrenchLemmatizer())  # pre-process documents
print 'corpus size:', corpus.size
print 'vocabulary size:', len(corpus.vocabulary)
print 'Vector representation of document 0:\n', corpus.vector_for_document(0)

# Instantiate a topic model
topic_model = NonNegativeMatrixFactorization(corpus)

# Estimate the optimal number of topics
viz = Visualization(topic_model)
viz.plot_greene_metric(min_num_topics=10,
                       max_num_topics=30,
                       tao=10, step=1,
                       top_n_words=10)
viz.plot_arun_metric(min_num_topics=5,
                     max_num_topics=30,
                     iterations=10)
viz.plot_consensus_metric(min_num_topics=5,
                        max_num_topics=30,
                        iterations=10)

# Infer topics
print 'Inferring topics...'
topic_model.infer_topics(num_topics=15)
# Save model on disk
class Cnn:
    def __init__(self, config):
        self.config = config
        self.visualization = Visualization(self.config)
        pass

    def inference(self, x, mode_name):
        histogram_summary = False if mode_name == self.config.MODE.VALIDATION else True
        kernel_image_summary = False if mode_name == self.config.MODE.VALIDATION else True
        activation_image_summary = False if mode_name == self.config.MODE.VALIDATION else True

        with tf.name_scope('inputs'):
            x = tf.reshape(x, [
                -1, self.config.IMAGE_SIZE.HEIGHT,
                self.config.IMAGE_SIZE.WIDTH, self.config.IMAGE_SIZE.CHANNELS
            ])

        tf.summary.image("/inputs", x, max_outputs=4)

        with tf.variable_scope('convolution1'):
            convolution_1 = self.conv_layer(
                input_tensor=x,
                depth_in=self.config.IMAGE_SIZE.CHANNELS,
                depth_out=64,
                mode_name=mode_name,
                histogram_summary=histogram_summary,
                kernel_image_summary=kernel_image_summary,
                activation_image_summary=activation_image_summary)

        with tf.variable_scope('max_pooling1'):
            max_pooling_1 = tf.layers.max_pooling2d(inputs=convolution_1,
                                                    pool_size=[2, 2],
                                                    strides=2)

        with tf.variable_scope('convolution2'):
            convolution_2 = self.conv_layer(
                input_tensor=max_pooling_1,
                depth_in=64,
                depth_out=128,
                mode_name=mode_name,
                histogram_summary=histogram_summary,
                kernel_image_summary=False,
                activation_image_summary=activation_image_summary)

        with tf.variable_scope('max_pooling2'):
            max_pooling_2 = tf.layers.max_pooling2d(inputs=convolution_2,
                                                    pool_size=[2, 2],
                                                    strides=2)

        with tf.variable_scope('convolution3'):
            convolution_3 = self.conv_layer(
                input_tensor=max_pooling_2,
                depth_in=128,
                depth_out=256,
                mode_name=mode_name,
                histogram_summary=histogram_summary,
                kernel_image_summary=False,
                activation_image_summary=activation_image_summary)

        with tf.variable_scope('max_pooling3'):
            max_pooling_3 = tf.layers.max_pooling2d(inputs=convolution_3,
                                                    pool_size=[2, 2],
                                                    strides=2)

        with tf.variable_scope('convolution4'):
            convolution_4 = self.conv_layer(
                input_tensor=max_pooling_3,
                depth_in=256,
                depth_out=512,
                mode_name=mode_name,
                histogram_summary=histogram_summary,
                kernel_image_summary=False,
                activation_image_summary=activation_image_summary)

        with tf.variable_scope('max_pooling4'):
            max_pooling_4 = tf.layers.max_pooling2d(inputs=convolution_4,
                                                    pool_size=[2, 2],
                                                    strides=2)

        with tf.variable_scope('dense1'):
            max_pooling_3 = tf.reshape(max_pooling_4, [
                max_pooling_4.shape[0].value, max_pooling_4.shape[1].value *
                max_pooling_4.shape[2].value * max_pooling_4.shape[3].value
            ])
            dense_1 = tf.layers.dense(inputs=max_pooling_3,
                                      units=1024,
                                      activation=activation.lrelu)

        if mode_name == self.config.MODE.TRAINING:
            tf.summary.histogram('dense1'.format(mode_name), dense_1)

        with tf.variable_scope('logits'):
            logits = tf.layers.dense(inputs=dense_1,
                                     units=self.config.NUM_CLASSES,
                                     activation=activation.lrelu)

        if mode_name == self.config.MODE.TRAINING:
            tf.summary.histogram('logits', logits)
            tf.summary.histogram('softmax', tf.nn.softmax(logits=logits))

        return logits

    def _conv2d(self, x, weights, strides):
        return tf.nn.conv2d(x, weights, strides=strides, padding='SAME')

    def conv_layer(self,
                   mode_name,
                   input_tensor,
                   depth_in,
                   depth_out,
                   kernel_height=3,
                   kernel_width=3,
                   strides=(1, 1, 1, 1),
                   activation_fn=activation.lrelu,
                   histogram_summary=False,
                   kernel_image_summary=False,
                   activation_image_summary=False):

        weights = tf.get_variable(
            "weights", [kernel_height, kernel_width, depth_in, depth_out],
            initializer=tf.truncated_normal_initializer(stddev=0.01))
        biases = tf.get_variable("biases", [depth_out],
                                 initializer=tf.constant_initializer(0.01))
        convolutions = self._conv2d(input_tensor, weights, strides=strides)
        activations = activation_fn(convolutions + biases,
                                    self.config.LEAKY_RELU_ALPHA)

        if histogram_summary:
            tf.summary.histogram(mode_name + '_weights', weights)
            tf.summary.histogram(mode_name + '_activations', activations)

        if kernel_image_summary:
            weights_image_grid = self.visualization.kernels_image_grid(
                kernel=weights)
            tf.summary.image(mode_name + '/features',
                             weights_image_grid,
                             max_outputs=1)

        if activation_image_summary:
            activation_image = self.visualization.activation_image(
                activations=activations)
            tf.summary.image("/activated", activation_image)

        return activations
 def __init__(self, config):
     self.config = config
     self.visualization = Visualization(self.config)
     pass
Пример #4
0
print 'Load documents from CSV'
corpus = Corpus(source_file_path='input/egc.csv',
                language='french',  # determines the stop words
                vectorization='tf',  # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency)
                max_relative_frequency=0.8,  # ignore words which relative frequency is > than max_relative_frequency
                min_absolute_frequency=4,  # ignore words which absolute frequency is < than min_absolute_frequency
                preprocessor=None)  # determines how documents are preprocessed (e.g. stemming, lemmatization)
print 'corpus size:', corpus.size
print 'vocabulary size:', len(corpus.vocabulary)
print 'Vector representation of document 2:\n', corpus.vector_for_document(2)

# Instanciate a topic model
topic_model = LatentDirichletAllocation(corpus=corpus)

# Estimating the optimal number of topics using the method proposed by Arun et al.
viz = Visualization(topic_model)
viz.plot_greene_metric(min_num_topics=10, max_num_topics=12, tao=10, step=1,
                           top_n_words=10, file_path='output/greene.png')
# viz.plot_arun_metric(10, 30, 5, '/Users/adrien/Desktop/arun.png')

# Infer topics
topic_model.infer_topics(num_topics=20)

# Print results
print '\nTopics:'
topic_model.print_topics(num_words=10)
print '\nDocument 2:', topic_model.corpus.full_content(2)
print '\nTopic distribution for document 2:', topic_model.topic_distribution_for_document(2)
print '\nMost likely topic for document 2:', topic_model.most_likely_topic_for_document(2)
print '\nTopics frequency:', topic_model.topics_frequency()
print '\nTopic 2 frequency:', topic_model.topic_frequency(2)
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Clstmnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING,
                                              reuse_lstm=None)
        mse = evaluation.loss(predictions=predictions_testing,
                              labels=data_set.testing_set.y,
                              mode_name=config.MODE.TESTING)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            summary = None
            start_time = time.time()
            mses = []
            actual_labels = []
            predicted_labels = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, mse_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, mse
                        ])

                    mses.append(mse_result)
                    actual_labels.append(actual_label)
                    predicted_labels.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   mse_result)

            summary_writer.add_summary(summary, 1)

            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: MSE @ 1 = %.9f' %
                  (datetime.now(), np.array(mses).mean()))

            visualization.display_on_map(actual_labels, predicted_labels,
                                         session_name,
                                         np.array(mses).mean())

            sessions_helper.end()
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Cnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING)
        is_correct = evaluation.correct_number(predictions_testing,
                                               data_set.testing_set.y)
        predictions_testing = tf.argmax(predictions_testing, 1)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            true_count = 0
            summary = None
            start_time = time.time()
            labels = []
            predictions = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, is_correct_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, is_correct
                        ])
                    true_count += np.sum(is_correct_result)

                    labels.append(actual_label)
                    predictions.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   is_correct_result)

            summary_writer.add_summary(summary, 1)

            np_labels = np.array(labels)
            np_predictions = np.array(predictions)

            conf_matrix = tf.confusion_matrix(
                labels=tf.squeeze(np_labels),
                predictions=tf.squeeze(np_predictions),
                num_classes=config.NUM_CLASSES)
            print()
            c_m = sess.run(conf_matrix)
            print(c_m)

            precision = true_count / data_set.testing_set.size
            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: accuracy @ 1 = %.3f' %
                  (datetime.now(), precision * 100))

            sessions_helper.end()
Пример #7
0
import time
from flask_cors import CORS
import os
from flask import Flask, render_template, request, jsonify
#from werkzeug import secure_filename
from werkzeug.utils import secure_filename
from visualization.visualization import Visualization
from forecast_model.prophet import ProphetModel
from forecast_model.arima import ArimaModel
from anomaly.model import anomaly
ano = anomaly()

ar = ArimaModel()
draw = Visualization()
UPLOAD_FOLDER = '/data'

app = Flask(__name__)
app.secret_key = "secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
CORS(app)


@app.route('/')
def homepage():
    return render_template('index.html')


ALLOWED_EXTENSIONS = set(['csv'])

def visualize():
    """Visualize the processed dataset and output the plots into the result/visualization folder"""
    Visualization().run()