print 'Load documents from CSV' corpus = Corpus(source_file_path='input/egc.csv', language='french', # language for stop words vectorization='tfidf', # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency) max_relative_frequency=0.8, # ignore words which relative frequency is > than max_relative_frequency min_absolute_frequency=4, # ignore words which absolute frequency is < than min_absolute_frequency preprocessor=FrenchLemmatizer()) # pre-process documents print 'corpus size:', corpus.size print 'vocabulary size:', len(corpus.vocabulary) print 'Vector representation of document 0:\n', corpus.vector_for_document(0) # Instantiate a topic model topic_model = NonNegativeMatrixFactorization(corpus) # Estimate the optimal number of topics viz = Visualization(topic_model) viz.plot_greene_metric(min_num_topics=10, max_num_topics=30, tao=10, step=1, top_n_words=10) viz.plot_arun_metric(min_num_topics=5, max_num_topics=30, iterations=10) viz.plot_consensus_metric(min_num_topics=5, max_num_topics=30, iterations=10) # Infer topics print 'Inferring topics...' topic_model.infer_topics(num_topics=15) # Save model on disk
def __init__(self, config): self.config = config self.visualization = Visualization(self.config) pass
def test(session_name=None, is_visualize=False): if session_name is None: session_name = input("Session name: ") config = Configuration() # general settings data_sets = DataSet(config) # data sets retrieval model = Clstmnn(config) # model builder evaluation = Evaluation(config) visualization = Visualization(config) with tf.Graph().as_default(): data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE) print('Building model...') predictions_testing = model.inference(x=data_set.testing_set.x, mode_name=config.MODE.TESTING, reuse_lstm=None) mse = evaluation.loss(predictions=predictions_testing, labels=data_set.testing_set.y, mode_name=config.MODE.TESTING) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) merged = tf.summary.merge_all() saver = tf.train.Saver() print('Starting session...') with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( config.OUTPUT_PATH + session_name + '_tested', sess.graph) sessions_helper = Sessions(config=config, session=sess, saver=saver, session_name=session_name, summary_writer=summary_writer, coordinator=coord, threads=threads) sessions_helper.restore() print() summary = None start_time = time.time() mses = [] actual_labels = [] predicted_labels = [] for epoch in range(config.TESTING_EPOCHS): for step in range( int(data_set.testing_set.size / config.TESTING_BATCH_SIZE)): summary = sess.run(merged) sys.stdout.write('\r>> Examples tested: {}/{}'.format( step, int(data_set.testing_set.size / config.TESTING_BATCH_SIZE))) sys.stdout.flush() example_image, actual_label, predicted_label, mse_result = sess.run( [ data_set.testing_set.x, data_set.testing_set.y, predictions_testing, mse ]) mses.append(mse_result) actual_labels.append(actual_label) predicted_labels.append(predicted_label) if is_visualize: visualization.show_example(predicted_label, actual_label, example_image, mse_result) summary_writer.add_summary(summary, 1) print() print('testing completed in %s' % (time.time() - start_time)) print('%s: MSE @ 1 = %.9f' % (datetime.now(), np.array(mses).mean())) visualization.display_on_map(actual_labels, predicted_labels, session_name, np.array(mses).mean()) sessions_helper.end()
def test(session_name=None, is_visualize=False): if session_name is None: session_name = input("Session name: ") config = Configuration() # general settings data_sets = DataSet(config) # data sets retrieval model = Cnn(config) # model builder evaluation = Evaluation(config) visualization = Visualization(config) with tf.Graph().as_default(): data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE) print('Building model...') predictions_testing = model.inference(x=data_set.testing_set.x, mode_name=config.MODE.TESTING) is_correct = evaluation.correct_number(predictions_testing, data_set.testing_set.y) predictions_testing = tf.argmax(predictions_testing, 1) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) merged = tf.summary.merge_all() saver = tf.train.Saver() print('Starting session...') with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( config.OUTPUT_PATH + session_name + '_tested', sess.graph) sessions_helper = Sessions(config=config, session=sess, saver=saver, session_name=session_name, summary_writer=summary_writer, coordinator=coord, threads=threads) sessions_helper.restore() print() true_count = 0 summary = None start_time = time.time() labels = [] predictions = [] for epoch in range(config.TESTING_EPOCHS): for step in range( int(data_set.testing_set.size / config.TESTING_BATCH_SIZE)): summary = sess.run(merged) sys.stdout.write('\r>> Examples tested: {}/{}'.format( step, int(data_set.testing_set.size / config.TESTING_BATCH_SIZE))) sys.stdout.flush() example_image, actual_label, predicted_label, is_correct_result = sess.run( [ data_set.testing_set.x, data_set.testing_set.y, predictions_testing, is_correct ]) true_count += np.sum(is_correct_result) labels.append(actual_label) predictions.append(predicted_label) if is_visualize: visualization.show_example(predicted_label, actual_label, example_image, is_correct_result) summary_writer.add_summary(summary, 1) np_labels = np.array(labels) np_predictions = np.array(predictions) conf_matrix = tf.confusion_matrix( labels=tf.squeeze(np_labels), predictions=tf.squeeze(np_predictions), num_classes=config.NUM_CLASSES) print() c_m = sess.run(conf_matrix) print(c_m) precision = true_count / data_set.testing_set.size print() print('testing completed in %s' % (time.time() - start_time)) print('%s: accuracy @ 1 = %.3f' % (datetime.now(), precision * 100)) sessions_helper.end()
import time from flask_cors import CORS import os from flask import Flask, render_template, request, jsonify #from werkzeug import secure_filename from werkzeug.utils import secure_filename from visualization.visualization import Visualization from forecast_model.prophet import ProphetModel from forecast_model.arima import ArimaModel from anomaly.model import anomaly ano = anomaly() ar = ArimaModel() draw = Visualization() UPLOAD_FOLDER = '/data' app = Flask(__name__) app.secret_key = "secret key" app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 CORS(app) @app.route('/') def homepage(): return render_template('index.html') ALLOWED_EXTENSIONS = set(['csv'])
def visualize(): """Visualize the processed dataset and output the plots into the result/visualization folder""" Visualization().run()