Exemplo n.º 1
0
def run(xml_data="../xml/RTE2_dev.xml"):

    #the ML, learning and classifying the data set.
    # the classifier imports and runs the features.py file to extract the features.
    # remember to out comment the two run statements at end of file.
    classifier.run()
    classifier.run(False)

    #evaluating the results of part3 classification.
    os.system(os.getcwd() + "/eval_rte.py "+ xml_data+ " " + os.getcwd()+"/results_part3.txt")
Exemplo n.º 2
0
def main():

	data_folder = 'data/sources/wikipedia'
	models_folder = 'classifier/models'
	save_loc = '/usr/share/nginx/html/wiki'

	if not os.path.exists(data_folder):
		os.makedirs(data_folder)

	if next(os.walk(data_folder))[1]:
		
		retrain = True
		if retrain:
			input, target, classes = data.sample(data_folder)
			model = classifier.build(input.shape, target.shape)
			classifier.train(model, input, target)
			classifier.save(models_folder, model, classes)

		else:
			model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1])
		
		for root, dirs, files in os.walk(data_folder):
			for file in files:
				if not file.startswith('.'):
					with open(root+'/'+file) as f:
						input = data.str2mat(f.read())
						output = classifier.run(model, input)
						data.backtest(save_loc+'/'+file, classes, input, output)
	else:
		print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")
Exemplo n.º 3
0
def predict():
    try:
        data = request.get_json()
        query = data['Title'] + ' ' + data['Body']
        stance = data['Stance']
    except Exception as e:
        return 'bad input or could not process.', 400

    return jsonify(run('oraw1_15k', query).tolist())
Exemplo n.º 4
0
def main():
    xs_train = np.loadtxt(FLAGS.path_to_xtrain)
    xs_test = np.loadtxt(FLAGS.path_to_xtest)
    kms = build_kmeans_model_with_random_input(FLAGS.model_dir, 'kmeans',
                                               xs_train,
                                               FLAGS.depict_output_dim)
    outputs_train = kms.predict(xs_train)
    output_test = kms.predict(xs_test)
    metrics = classifier.run(outputs_train, output_test, FLAGS)
    # print(metrics)
    pprint.pprint(metrics)
Exemplo n.º 5
0
def k_fold_cross_validation(docs, class_labels, type_of_classifier='knn', n_splits=2, k_neighbors=3):
    print 'k_neighbors:', k_neighbors
    vocabulary = build_vocabulary()

    # n-fold cross validation
    seed = 1
    enable_shuffle = False
    k_fold = KFold(n_splits=n_splits, random_state=seed, shuffle=enable_shuffle)

    m_accuracy = 0.0
    m_f1_score = 0.0
    iteration = 0

    # # ros = RandomOverSampler(random_state=1)
    # ros = EditedNearestNeighbours(random_state=1)

    for train_index, test_index in k_fold.split(docs, class_labels):
        iteration += 1

        train = []
        test = []
        for i in train_index:
            train.append(docs[i])
        for i in test_index:
            test.append(docs[i])

        tf_idf_train, train_vocabulary = preprocess.get_tf_idf_training(train)
        tf_idf_test = preprocess.get_tf_idf_testing(train_vocabulary, test)

        train_labels = []
        for i in train_index:
            train_labels.append(class_labels[i])

        test_labels = []
        for i in test_index:
            test_labels.append(class_labels[i])

        # random sampling
        # tf_idf_train_ros, train_labels_ros = ros.fit_sample(tf_idf_train, train_labels)
        predict_labels = classifier.run(tf_idf_train, train_labels, tf_idf_test, type_of_classifier, k_neighbors=k_neighbors)


        accuracy = calculate_accuracy(test_labels, predict_labels)
        m_accuracy += accuracy

        m_f1_score += f1_score(test_labels, predict_labels, average='weighted')

        print 'iteration:', iteration
        print '\taccuracy:', accuracy
        print '\tf1-score: ', f1_score(test_labels, predict_labels, average='weighted')

    return m_accuracy / n_splits, m_f1_score / n_splits
Exemplo n.º 6
0
    if type == 'train':
        print('training......')
        accuracy, f1_score = k_fold_cross_validation(
            train_docs,
            train_labels,
            type_of_classifier=type_of_classifier,
            n_splits=10,
            k_neighbors=k_neighbors
        )
        print 'average accuracy = ', accuracy
        print 'average f1_score = ', f1_score


    if type == 'test':
        print('testing......')
        tf_idf_train, train_vocabulary = preprocess.get_tf_idf_training(train_docs)
        tf_idf_test = preprocess.get_tf_idf_testing(train_vocabulary, test_docs)

        predict_labels = classifier.run(
            tf_idf_train,
            train_labels,
            tf_idf_test,
            type_of_classifier,
            k_neighbors=k_neighbors
        )
        print len(predict_labels)

        output_file_name = '../data/format.dat'
        with open(output_file_name, 'w') as raw_text:
            for label in predict_labels:
                raw_text.write(label + '\n')
Exemplo n.º 7
0
import numpy as np
from matplotlib import pyplot
import classifier

X_train, X_test, y_train, y_test = classifier.getdata()

# accuracy vs hyperparameter graphs

#comparing number of epochs with overall accuracy

resEp100 = classifier.run(X_train, X_test, y_train, y_test, 100, 0.01, 0.5)

y = []
x = []
for i, l in enumerate(resEp100):
    y.append(np.mean(resEp100[i]))
    x.append(i + 1)

pyplot.plot(x, y)
pyplot.title('Number of Epochs vs Overall Accuracy')
pyplot.xlabel('Number of Epochs')
pyplot.ylabel('Accuracy')
pyplot.ylim(0.94, 0.98)
pyplot.show()

#comparing overall accuracy with different penalty

y = []
x = []
penalties = [0.1, 0.2, 0.5, 1, 2, 5]
for p in penalties:
Exemplo n.º 8
0
import numpy as np

import classifier
from loadMNIST_py import MnistDataloader

mnistDataLoader = MnistDataloader(
    'train-images.idx3-ubyte',
    'train-labels.idx1-ubyte',
    't10k-images.idx3-ubyte',
    't10k-labels.idx1-ubyte')
(trainImages, trainLabels), (t10kImages, t10kLabels) = mnistDataLoader.load_data()

print("1st run: ")
initial_centroids = np.random.randn(10, 28 * 28)
classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels)

print("2nd run: ")
initial_centroids = np.random.randn(10, 28 * 28)
classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels)

print("3rd run: ")
initial_centroids = np.random.randn(10, 28 * 28)
initial_centroids = classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels)

print("4th run with chosen initialized centroids: ")
# F)
classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels)
Exemplo n.º 9
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    prepare_file_system()

    # FLAGS.eval_step_interval = 1
    # FLAGS.infer_step_interal = 10

    # TODO: OOP
    train_graph = tf.Graph()
    with train_graph.as_default():
        train_filenames, train_iterator, train_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size)
        train_inputs, train_cost, optimizer = build_train_graph(
            train_elements,
            FLAGS.depict_input_dim,
            FLAGS.depict_output_dim,
            func=FLAGS.loss_function)
        train_saver = tf.train.Saver()
        train_merger = tf.summary.merge_all()
        train_initializer = tf.global_variables_initializer()
        # train_parameters = tf.trainable_variables()
    eval_graph = tf.Graph()
    with eval_graph.as_default():
        eval_filenames, eval_iterator, eval_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size)
        eval_inputs, eval_outputs = build_eval_graph(eval_elements,
                                                     FLAGS.depict_input_dim,
                                                     FLAGS.depict_output_dim)
        eval_saver = tf.train.Saver()
        eval_merger = tf.summary.merge_all()
        eval_initializer = tf.global_variables_initializer()
        # eval_parameters = tf.trainable_variables()
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        infer_filenames, infer_iterator, infer_elements = \
            build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size)
        infer_inputs, infer_outputs = build_infer_graph(
            infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim)
        rbfnn_metrics = build_metrics_graph('rbfnn')
        # kmeans_metrics = build_metrics_graph('kmeans')
        infer_saver = tf.train.Saver()
        infer_merger = tf.summary.merge_all()
        infer_initializer = tf.global_variables_initializer()

    config = tf.ConfigProto(device_count={"CPU": 24, "GPU": 0})
    train_sess = tf.Session(graph=train_graph, config=config)
    eval_sess = tf.Session(graph=eval_graph, config=config)
    infer_sess = tf.Session(graph=infer_graph, config=config)

    # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph)
    # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph)
    # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph)

    results = dict()

    train_sess.run(train_initializer)
    train_sess.run(train_iterator.initializer,
                   feed_dict={train_filenames: [FLAGS.path_to_xtrain]})
    # train_sess.run(train_iterator.initializer)
    for i in itertools.count():
        if i > FLAGS.how_many_training_steps:
            break

        try:
            xs_train = train_sess.run(train_elements)
            # print(xs_train)
        except tf.errors.OutOfRangeError:
            train_sess.run(train_iterator.initializer,
                           feed_dict={train_filenames: [FLAGS.path_to_xtrain]})
            xs_train = train_sess.run(train_elements)
        # train_summary, _ = train_sess.run([optimizer, train_merger]) #
        _, training_cost, train_summary = train_sess.run(
            [optimizer, train_cost, train_merger],
            feed_dict={train_inputs: xs_train})
        # train_writer.add_summary(train_summary, i)
        # print('epoch: %6d, training cost: %.8f'%(i, training_cost))
        # time.sleep(1)

        # if i % FLAGS.eval_step_interval == 0:
        if i % pow(10, len(str(i)) - 1) == 0:
            # print(train_sess.run(train_parameters[0]))

            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=i)
            eval_saver.restore(eval_sess, checkpoint_path)
            # print(eval_sess.run(eval_parameters[0]))
            eval_sess.run(eval_iterator.initializer,
                          feed_dict={eval_filenames: [FLAGS.path_to_xtest]})
            while FLAGS.data_to_eval:
                try:
                    xs_eval = eval_sess.run(eval_elements)
                except tf.errors.OutOfRangeError:
                    # eval_sess.run(eval_iterator.initializer,
                    #                feed_dict={eval_filenames: [r'../../data/x_1000_128.txt']})
                    # xs_eval = eval_sess.run(eval_elements)
                    break
                # training_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_train})
                # evaluation_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_eval})
                evaluation_cost, eval_summary = train_sess.run(
                    [train_cost, train_merger],
                    feed_dict={train_inputs: xs_eval})
                tf.logging.info("epoch: %d, training cost: %f" %
                                (i, training_cost))
                tf.logging.info("epoch: %d, evaluation cost: %f" %
                                (i, evaluation_cost))
                # validation_writer.add_summary(eval_summary, i)
                break

        # if i % FLAGS.infer_step_interval == 0:
        if i % pow(10, len(str(i)) - 1) == 0:
            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=i)
            train_saver.save(train_sess,
                             FLAGS.saved_model_dir + '/checkpoints_' +
                             str(FLAGS.depict_output_dim),
                             global_step=i)
            infer_saver.restore(infer_sess, checkpoint_path)

            infers_train = []
            infer_sess.run(infer_iterator.initializer,
                           feed_dict={infer_filenames: [FLAGS.path_to_xtrain]})
            while FLAGS.data_to_infer:
                try:
                    xs_infer = infer_sess.run(infer_elements)
                except tf.errors.OutOfRangeError:
                    break
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_train.extend(ys_infer)
            # print(infers_train)

            infers_test = []
            infer_sess.run(infer_iterator.initializer,
                           feed_dict={infer_filenames: [FLAGS.path_to_xtest]})
            while FLAGS.data_to_infer:
                try:
                    xs_infer = infer_sess.run(infer_elements)
                except tf.errors.OutOfRangeError:
                    break
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                print(xs_infer.shape, xs_infer.flatten())
                print(ys_infer.shape, ys_infer.flatten())
                infers_test.extend(ys_infer)
            # print(infers_test)
            metrics = classifier.run(infers_train, infers_test, FLAGS)
            pprint.pprint(metrics)
            # infer_summary = metrics_to_metrics(infer_sess, infer_merger, rbfnn_metrics, metrics)
            # infer_writer.add_summary(infer_summary, i)
            results[i] = metrics

            # TODO:
            with open('../../results/results.txt', 'a') as f:
                line = list()
                line.extend(
                    [FLAGS.rbfnn_num_center, FLAGS.depict_output_dim, i])
                line.extend(metrics['err_train'].tolist())
                line.extend([metrics['acc_train']])
                line.extend(metrics['stsm_train'].tolist())
                line.extend(metrics['err_test'].tolist())
                line.extend([metrics['acc_test']])
                line = [str(item) for item in line]
                line = ' '.join(line)
                f.write(line)
                f.write('\n')
    train_sess.close()
    eval_sess.close()
    infer_sess.close()
    return results
Exemplo n.º 10
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    prepare_file_system()

    # FLAGS.eval_step_interval = 1
    # FLAGS.infer_step_interal = 10

    # TODO: OOP
    train_graph = tf.Graph()
    with train_graph.as_default():
        train_filenames, train_iterator, train_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size)
        train_inputs, train_cost, optimizer = build_train_graph(
            train_elements,
            FLAGS.depict_input_dim,
            FLAGS.depict_output_dim,
            func=FLAGS.loss_function)
        train_saver = tf.train.Saver()
        train_merger = tf.summary.merge_all()
        train_initializer = tf.global_variables_initializer()
        # train_parameters = tf.trainable_variables()
    eval_graph = tf.Graph()
    with eval_graph.as_default():
        eval_filenames, eval_iterator, eval_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size)
        eval_inputs, eval_outputs = build_eval_graph(eval_elements,
                                                     FLAGS.depict_input_dim,
                                                     FLAGS.depict_output_dim)
        eval_saver = tf.train.Saver()
        eval_merger = tf.summary.merge_all()
        eval_initializer = tf.global_variables_initializer()
        # eval_parameters = tf.trainable_variables()
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        infer_filenames, infer_iterator, infer_elements = \
            build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size)
        infer_inputs, infer_outputs = build_infer_graph(
            infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim)
        rbfnn_metrics = build_metrics_graph('rbfnn')
        # kmeans_metrics = build_metrics_graph('kmeans')
        infer_saver = tf.train.Saver()
        infer_merger = tf.summary.merge_all()
        infer_initializer = tf.global_variables_initializer()

    config = tf.ConfigProto(device_count={"GPU": 1})
    train_sess = tf.Session(graph=train_graph, config=config)
    eval_sess = tf.Session(graph=eval_graph, config=config)
    infer_sess = tf.Session(graph=infer_graph, config=config)

    # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph)
    # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph)
    # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph)

    train_sess.run(train_initializer)
    # eval_sess.run(eval_initializer)
    # infer_sess.run(infer_initializer)

    import utils

    results = dict()
    for epoch in itertools.count():
        if epoch > FLAGS.how_many_training_epoches:
            break

        train_generator = utils.build_data_generator(
            xtrain, shuffle=True, batch_size=FLAGS.train_batch_size)
        for batch, xs_train in enumerate(train_generator):
            _, training_cost = train_sess.run(
                [optimizer, train_cost], feed_dict={train_inputs: xs_train})
        if epoch % 1 == 0:
            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=epoch)
            # train_saver.save(train_sess, FLAGS.saved_model_dir + '/checkpoints_' + str(FLAGS.depict_output_dim), global_step=epoch)
            infer_saver.restore(infer_sess, checkpoint_path)

            infers_train = []
            infer_generator = utils.build_data_generator(
                xtrain, shuffle=False, batch_size=FLAGS.infer_batch_size)
            for batch, xs_infer in enumerate(infer_generator):
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_train.extend(ys_infer)
            infers_test = []
            infer_generator = utils.build_data_generator(
                xtest, shuffle=False, batch_size=FLAGS.infer_batch_size)
            for batch, xs_infer in enumerate(infer_generator):
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_test.extend(ys_infer)
            print(len(infers_train), len(infers_test))

            metrics = classifier.run(infers_train, infers_test, FLAGS)
            pprint.pprint(metrics)
            results[i] = metrics
            utils.write_results(FLAGS, metrics, epoch)
    train_sess.close()
    eval_sess.close()
    infer_sess.close()
    return results
Exemplo n.º 11
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    prepare_file_system()

    FLAGS.eval_step_interval = 1
    FLAGS.infer_step_interal = 10

    # TODO: OOP
    train_graph = tf.Graph()
    with train_graph.as_default():
        train_filenames, train_iterator, train_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size)
        train_inputs, train_cost, optimizer = build_train_graph(
            train_elements,
            FLAGS.depict_input_dim,
            FLAGS.depict_output_dim,
            func='func_02')
        train_saver = tf.train.Saver()
        train_merger = tf.summary.merge_all()
        train_initializer = tf.global_variables_initializer()
        # train_parameters = tf.trainable_variables()
    eval_graph = tf.Graph()
    with eval_graph.as_default():
        eval_filenames, eval_iterator, eval_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size)
        eval_inputs, eval_outputs = build_eval_graph(eval_elements,
                                                     FLAGS.depict_input_dim,
                                                     FLAGS.depict_output_dim)
        eval_saver = tf.train.Saver()
        eval_merger = tf.summary.merge_all()
        eval_initializer = tf.global_variables_initializer()
        # eval_parameters = tf.trainable_variables()
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        infer_filenames, infer_iterator, infer_elements = \
            build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size)
        infer_inputs, infer_outputs = build_infer_graph(
            infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim)
        rbfnn_metrics = build_metrics_graph('rbfnn')
        # kmeans_metrics = build_metrics_graph('kmeans')
        infer_saver = tf.train.Saver()
        infer_merger = tf.summary.merge_all()
        infer_initializer = tf.global_variables_initializer()

    train_sess = tf.Session(graph=train_graph)
    eval_sess = tf.Session(graph=eval_graph)
    infer_sess = tf.Session(graph=infer_graph)

    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         train_graph)
    validation_writer = tf.summary.FileWriter(
        FLAGS.summaries_dir + '/validation', eval_graph)
    infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference',
                                         infer_graph)

    train_sess.run(train_initializer)
    train_sess.run(train_iterator.initializer,
                   feed_dict={train_filenames: [FLAGS.path_to_xtrain]})
    # train_sess.run(train_iterator.initializer)
    for i in itertools.count():
        try:
            xs_train = train_sess.run(train_elements)
            # print(xs_train)
        except tf.errors.OutOfRangeError:
            train_sess.run(train_iterator.initializer,
                           feed_dict={train_filenames: [FLAGS.path_to_xtrain]})
            xs_train = train_sess.run(train_elements)
        # train_summary, _ = train_sess.run([optimizer, train_merger]) #
        _, training_cost, train_summary = train_sess.run(
            [optimizer, train_cost, train_merger],
            feed_dict={train_inputs: xs_train})
        train_writer.add_summary(train_summary, i)
        # print('epoch: %6d, training cost: %.8f'%(i, training_cost))
        # time.sleep(1)

        # if i % FLAGS.eval_step_interval == 0:
        if i % pow(10, len(str(i)) - 1) == 0:
            # print(train_sess.run(train_parameters[0]))
            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=i)
            eval_saver.restore(eval_sess, checkpoint_path)
            # print(eval_sess.run(eval_parameters[0]))
            eval_sess.run(eval_iterator.initializer,
                          feed_dict={eval_filenames: [FLAGS.path_to_xtest]})
            while FLAGS.data_to_eval:
                try:
                    xs_eval = eval_sess.run(eval_elements)
                except tf.errors.OutOfRangeError:
                    # eval_sess.run(eval_iterator.initializer,
                    #                feed_dict={eval_filenames: [r'../../data/x_1000_128.txt']})
                    # xs_eval = eval_sess.run(eval_elements)
                    break
                # training_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_train})
                # evaluation_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_eval})
                evaluation_cost, eval_summary = train_sess.run(
                    [train_cost, train_merger],
                    feed_dict={train_inputs: xs_eval})
                tf.logging.info("epoch: %d, training cost: %f" %
                                (i, training_cost))
                tf.logging.info("epoch: %d, evaluation cost: %f" %
                                (i, evaluation_cost))
                validation_writer.add_summary(eval_summary, i)
                break

        # if i % FLAGS.infer_step_interval == 0:
        if i % pow(10, len(str(i)) - 1) == 0:
            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=i)
            infer_saver.restore(infer_sess, checkpoint_path)

            infers_train = []
            infer_sess.run(infer_iterator.initializer,
                           feed_dict={infer_filenames: [FLAGS.path_to_xtrain]})
            while FLAGS.data_to_infer:
                try:
                    xs_infer = infer_sess.run(infer_elements)
                except tf.errors.OutOfRangeError:
                    break
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_train.extend(ys_infer)
            # print(infers_train)

            infers_test = []
            infer_sess.run(infer_iterator.initializer,
                           feed_dict={infer_filenames: [FLAGS.path_to_xtest]})
            while FLAGS.data_to_infer:
                try:
                    xs_infer = infer_sess.run(infer_elements)
                except tf.errors.OutOfRangeError:
                    break
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                print(xs_infer.shape, xs_infer.flatten())
                print(ys_infer.shape, ys_infer.flatten())
                infers_test.extend(ys_infer)
            # print(infers_test)
            metrics = classifier.run(infers_train, infers_test, FLAGS)
            # print(metrics)
            pprint.pprint(metrics)
            infer_summary = metrics_to_metrics(infer_sess, infer_merger,
                                               rbfnn_metrics, metrics)
            infer_writer.add_summary(infer_summary, i)
Exemplo n.º 12
0
xrand = np.loadtxt(FLAGS.path_to_xrand)
print(xtrain.shape, xtest.shape, xrand.shape)

FLAGS.rbfnn_num_center = 120
for i in range(7, 15 + 1):
    num_cluster = 1 << i
    print(num_cluster)
    FLAGS.depict_output_dim = num_cluster
    FLAGS.rbfnn_input_dim = num_cluster
    pprint.pprint(FLAGS)

    # kms = cluster.build_kmeans_model_with_fixed_input(FLAGS, xrand)
    kms = cluster.build_kmeans_model_with_random_input(FLAGS, xtrain)
    ca_train = kms.predict(xtrain)
    ca_test = kms.predict(xtest)
    metrics = classifier.run(ca_train, ca_test, FLAGS)
    pprint.pprint(metrics)

    # TODO:
    if not os.path.exists(FLAGS.saved_results_dir):
        os.makedirs(FLAGS.saved_results_dir)
    outfile = os.path.join(
        FLAGS.saved_results_dir,
        '%s_r%d_kmeans_results.txt' % (FLAGS.database_name, FLAGS.split_round))
    with open(outfile, 'a') as f:
        line = list()
        line.extend([FLAGS.rbfnn_num_center, FLAGS.depict_output_dim, 0])
        line.extend(metrics['err_train'].tolist())
        line.extend([metrics['acc_train']])
        line.extend(metrics['stsm_train'].tolist())
        line.extend(metrics['err_test'].tolist())
Exemplo n.º 13
0
#data_path = 'articles.csv' if args.full else 'split80/test.csv'
data = {seed: setup(seed, path=f'data/articles.csv') for seed in SEEDS}

from utils import LABELS
analysis = {'embedding': [], 'truth': [], 'text': [], 'pred': [], LABELS: []}
    
for dim in DIMS:
    for vector in VECTORS:
        #cols['embedding'].append(f'{dim}d_{vector}')
        results = np.zeros(len(METRICS), dtype='float')
        for seed in SEEDS:
        
            TEXT, LABEL, train_data, test_data = data[seed]
        
            if vector == 'RANDOM':
                test_results_a = run(seed, 'a', analysis, DIR, None, dim, TEXT, LABEL, train_data, test_data, randomize=True, saved=SAVED)
                test_results_b = run(seed, 'b', analysis, DIR, None, dim, TEXT, LABEL, test_data, train_data, randomize=True, saved=SAVED)
            else:   
                test_results_a = run(seed, 'a', analysis, DIR, vector, dim, TEXT, LABEL, train_data, test_data, randomize=RANDOMIZE, saved=SAVED)
                test_results_b = run(seed, 'b', analysis, DIR, vector, dim, TEXT, LABEL, test_data, train_data, randomize=RANDOMIZE, saved=SAVED)
                    
            cols['embedding'].append(f'{vector}.{dim}d_{seed}a')
            cols['embedding'].append(f'{vector}.{dim}d_{seed}b')
            for j, metric in enumerate(METRICS):
                cols[metric].append(test_results_a[j])
                cols[metric].append(test_results_b[j])
        print(f'finished {vector}.{dim}d')
        
df = pd.DataFrame(cols)
with open(os.path.join('results', DIR, 'results.pkl'), 'wb') as f:
    pickle.dump(df, f)
Exemplo n.º 14
0
def run(progress=True,
        verbose=False,
        loadFile=False,
        printtweets=False,
        causeFilename="causeSunWedFri",
        outputDivider=900,
        produceResult=False,
        chunkScatter=False):
    if progress:
        classifier.run(Covid=True,
                       verbose=verbose)  #this gets the reference accuracy
        """
        various file input options as [dates]
        """
        #dates = ["../2020-04-19 Coronavirus Tweets.csv","../2020-04-21 Coronavirus Tweets.csv","../2020-04-22 Coronavirus Tweets.csv"]#,"../2020-04-24 Coronavirus Tweets.csv" ]

        # April overall Sun/Wed
        dates = ([
            "../2020-03-29 Coronavirus Tweets.csv",
            "../2020-04-01 Coronavirus Tweets.csv",
            "../2020-04-05 Coronavirus Tweets.csv",
            "../2020-04-08 Coronavirus Tweets.csv"
        ] + [
            "../2020-04-{} Coronavirus Tweets.csv".format(i)
            for i in range(12, 31, 7)
        ] + [
            "../2020-04-{} Coronavirus Tweets.csv".format(i)
            for i in range(15, 31, 7)
        ])

        # April overall Mon/Thu
        #dates = (["../2020-03-30 Coronavirus Tweets.csv","../2020-04-02 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-09 Coronavirus Tweets.csv"]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(13,31,7)]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31,7)])

        # April overall Sun/Wed/Fri
        #dates = (["../2020-03-29 Coronavirus Tweets.csv","../2020-04-01 Coronavirus Tweets.csv","../2020-04-03 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-08 Coronavirus Tweets.csv"]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(10,31,7)]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(12,31,7)]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(15,31,7)])

        # April overall Mon/Thu/Sat
        #dates = (["../2020-03-30 Coronavirus Tweets.csv","../2020-04-02 Coronavirus Tweets.csv","../2020-04-04 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-09 Coronavirus Tweets.csv"]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(13,31,7)]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31,7)]
        #          +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(11,31,7)])

        # This part handles the loading/saving of the cause file for feedback usage.
        # Because the cause.run(heavily running code) doesn't run when you loadFile=True, this would be helpful.
        # If you don't have the files or don't have computation power, set loadFile=True and use the preset cause.pkl.
        if loadFile:
            loading = open(causeFilename + ".pkl", 'rb')
            xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S, cm4 = load(
                loading)
            loading.close()
        else:
            xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S, cm4 = cause.run(
                verbose=verbose,
                dates=dates,
                printtweets=printtweets,
                chunkScatter=chunkScatter)
            saving = open(causeFilename + ".pkl", "wb")
            dump((xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S,
                  cm4), saving, -1)
            saving.close()

        # This part shows you the accuracy information.
        """ Feedback sandbox examples
        set scorefactor => sf
        sf = 0.2 : exclusive cause reinforce
        classifier.run(Covid = True, verbose = verbose, feed_back = [xA, xF, xJ, xS],sf=0.2)
        sf = -0.4 : non-cause deduction *(Sun/Wed => 0.84)
        classifier.run(Covid = True, verbose = verbose, feed_back = [cmFJS, cmAJS, cmAFS, cmAFJ],sf=-0.4)
        sf = -0.2 : inclusive cause reinforce *(Sun/Wed/Fri => 0.85)
        classifier.run(Covid = True, verbose = verbose, feed_back = [_A,_F,_J,_S],sf=-0.2)
        sf = ? : inclusive cause reinforce
        classifier.run(Covid = True, verbose = verbose, feed_back = [cm4,cm4,cm4,cm4],sf=0)"""
        fb = [cmFJS, cmAJS, cmAFS, cmAFJ]
        scoreFactor = -0.4
        classifier.run(Covid=True,
                       verbose=verbose,
                       feed_back=fb,
                       sf=scoreFactor)
        # ^this part only tries the feedback on evaluation. This is just to show how accurate the classifier we will use on the bottom will be.
        # vThe real work is right below.

        #This part produces result(ex: 03-00 - Anger:4000, Fear: 1000, ...).
        if loadFile and produceResult:
            dateChunks = [  # weekly analysis
                #["../2020-03-00 Coronavirus Tweets (pre 2020-03-12).csv"],
                #["../2020-03-12 Coronavirus Tweets.csv"],
                #["../2020-03-15 Coronavirus Tweets.csv"],
                #["../2020-03-00 Coronavirus Tweets (pre 2020-03-12).csv"]+["../2020-03-12 Coronavirus Tweets.csv"]+["../2020-03-15 Coronavirus Tweets.csv"],
                #["../2020-03-20 Coronavirus Tweets.csv"],
                #["../2020-03-25 Coronavirus Tweets.csv"],
                #["../2020-03-28 Coronavirus Tweets.csv"],
                #["../2020-03-29 Coronavirus Tweets.csv"],
                #["../2020-03-25 Coronavirus Tweets.csv","../2020-03-28 Coronavirus Tweets.csv","../2020-03-29 Coronavirus Tweets.csv"],
                #["../2020-03-30 Coronavirus Tweets.csv","../2020-03-31 Coronavirus Tweets.csv"]
                #+["../2020-04-0{} Coronavirus Tweets.csv".format(i) for i in range(1,6)],
                [
                    "../2020-04-0{} Coronavirus Tweets.csv".format(i)
                    for i in range(6, 10)
                ] + [
                    "../2020-04-{} Coronavirus Tweets.csv".format(i)
                    for i in range(10, 13)
                ],
                [
                    "../2020-04-{} Coronavirus Tweets.csv".format(i)
                    for i in range(13, 20)
                ],
                [
                    "../2020-04-{} Coronavirus Tweets.csv".format(i)
                    for i in range(20, 27)
                ],
                [
                    "../2020-04-{} Coronavirus Tweets.csv".format(i)
                    for i in range(27, 31)
                ]
            ]

            # This part will run and get the percentage informations.
            # REMEMBER, you should have all the files listed in dateChunks to run this part.
            # If you don't have them, set produceResult = False.
            for d in dateChunks:
                check.run(dates=d,
                          verbose=False,
                          outDeminish=outputDivider,
                          feedback=fb,
                          num_samples=3375,
                          printtweets=printtweets)
            # Sidenote! The return of check is the cause chunks in list form from the dataset.
            #       -go to a for loop in check_classifier on line 106 ("for check in checks") for more info.
            #       -the check in checks are tweets as list in each emotions.(The exact format can be learned from line 81 to 96 check_classifier.py)

    else:
        # the list "dates" contain the path of the tweets' files

        # original
        # dates = ["../2020-04-19 Coronavirus Tweets.csv","../2020-04-21 Coronavirus Tweets.csv","../2020-04-22 Coronavirus Tweets.csv"]#,"../2020-04-24 Coronavirus Tweets.csv" ]

        # April 16~30
        #dates = ["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31)]

        # April 01~15
        #dates = (["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(10,16)]+["../2020-04-0{} Coronavirus Tweets.csv".format(i) for i in range(1,10)])
        check_classifier.run(verbose=verbose)
Exemplo n.º 15
0
		e_fn = e_fn + 1
	elif name == 'NEUTROPHIL':
		n_fn = n_fn + 1
	elif name == 'LYMPHOCYTE':
		l_fn = l_fn + 1
	elif name == 'BASOPHIL':
		b_fn = b_fn + 1
	elif name == 'MONOCYTE':
		m_fn = m_fn + 1

classified = []

with open('./output', 'w') as writer:
	for image_path in os.listdir('./images'):
		#print image_path
		name = classifier.run(image_path)
		writer.write(image_path + ', ' + name + '\n')
		#print(name)
		classified.append(name)

with open('./cell_classes') as file:
    lines = file.readlines()

lines = [l.strip() for l in lines] 

correct = 0
total = 0

i = 0

for line in lines:
Exemplo n.º 16
0
import os

path_ad = "D:/Alzheimers/PET_AD_CLEAN"
path_normal = "D:/Alzheimers/PET_NORMAL_CLEAN/"

nr_ad = 48
nr_normal = 48

# CREDIT: https://stackoverflow.com/questions/6687660/keep-persistent-variables-in-memory-between-runs-of-python-script
# Peter Lyons Jul 14 '11
cache = None
if __name__ == "__main__":
    while True:
        if not cache:
            pet_ad = imageutils.read_pet_images(path_ad, nr_ad)
            pet_normal = imageutils.read_pet_images(path_normal, nr_normal)
            cache = (pet_ad, pet_normal)

        try:
            classifier.run(cache, nr_ad, nr_normal)

        except Exception as e:
            print("Error in classifier.py")
            print(e)

        print("Press enter to re-run the script, CTRL-C to exit")
        sys.stdin.readline()

        os.remove(getattr(classifier, '__cached__', 'classifier.pyc'))
        reload(classifier)