def run(xml_data="../xml/RTE2_dev.xml"): #the ML, learning and classifying the data set. # the classifier imports and runs the features.py file to extract the features. # remember to out comment the two run statements at end of file. classifier.run() classifier.run(False) #evaluating the results of part3 classification. os.system(os.getcwd() + "/eval_rte.py "+ xml_data+ " " + os.getcwd()+"/results_part3.txt")
def main(): data_folder = 'data/sources/wikipedia' models_folder = 'classifier/models' save_loc = '/usr/share/nginx/html/wiki' if not os.path.exists(data_folder): os.makedirs(data_folder) if next(os.walk(data_folder))[1]: retrain = True if retrain: input, target, classes = data.sample(data_folder) model = classifier.build(input.shape, target.shape) classifier.train(model, input, target) classifier.save(models_folder, model, classes) else: model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1]) for root, dirs, files in os.walk(data_folder): for file in files: if not file.startswith('.'): with open(root+'/'+file) as f: input = data.str2mat(f.read()) output = classifier.run(model, input) data.backtest(save_loc+'/'+file, classes, input, output) else: print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")
def predict(): try: data = request.get_json() query = data['Title'] + ' ' + data['Body'] stance = data['Stance'] except Exception as e: return 'bad input or could not process.', 400 return jsonify(run('oraw1_15k', query).tolist())
def main(): xs_train = np.loadtxt(FLAGS.path_to_xtrain) xs_test = np.loadtxt(FLAGS.path_to_xtest) kms = build_kmeans_model_with_random_input(FLAGS.model_dir, 'kmeans', xs_train, FLAGS.depict_output_dim) outputs_train = kms.predict(xs_train) output_test = kms.predict(xs_test) metrics = classifier.run(outputs_train, output_test, FLAGS) # print(metrics) pprint.pprint(metrics)
def k_fold_cross_validation(docs, class_labels, type_of_classifier='knn', n_splits=2, k_neighbors=3): print 'k_neighbors:', k_neighbors vocabulary = build_vocabulary() # n-fold cross validation seed = 1 enable_shuffle = False k_fold = KFold(n_splits=n_splits, random_state=seed, shuffle=enable_shuffle) m_accuracy = 0.0 m_f1_score = 0.0 iteration = 0 # # ros = RandomOverSampler(random_state=1) # ros = EditedNearestNeighbours(random_state=1) for train_index, test_index in k_fold.split(docs, class_labels): iteration += 1 train = [] test = [] for i in train_index: train.append(docs[i]) for i in test_index: test.append(docs[i]) tf_idf_train, train_vocabulary = preprocess.get_tf_idf_training(train) tf_idf_test = preprocess.get_tf_idf_testing(train_vocabulary, test) train_labels = [] for i in train_index: train_labels.append(class_labels[i]) test_labels = [] for i in test_index: test_labels.append(class_labels[i]) # random sampling # tf_idf_train_ros, train_labels_ros = ros.fit_sample(tf_idf_train, train_labels) predict_labels = classifier.run(tf_idf_train, train_labels, tf_idf_test, type_of_classifier, k_neighbors=k_neighbors) accuracy = calculate_accuracy(test_labels, predict_labels) m_accuracy += accuracy m_f1_score += f1_score(test_labels, predict_labels, average='weighted') print 'iteration:', iteration print '\taccuracy:', accuracy print '\tf1-score: ', f1_score(test_labels, predict_labels, average='weighted') return m_accuracy / n_splits, m_f1_score / n_splits
if type == 'train': print('training......') accuracy, f1_score = k_fold_cross_validation( train_docs, train_labels, type_of_classifier=type_of_classifier, n_splits=10, k_neighbors=k_neighbors ) print 'average accuracy = ', accuracy print 'average f1_score = ', f1_score if type == 'test': print('testing......') tf_idf_train, train_vocabulary = preprocess.get_tf_idf_training(train_docs) tf_idf_test = preprocess.get_tf_idf_testing(train_vocabulary, test_docs) predict_labels = classifier.run( tf_idf_train, train_labels, tf_idf_test, type_of_classifier, k_neighbors=k_neighbors ) print len(predict_labels) output_file_name = '../data/format.dat' with open(output_file_name, 'w') as raw_text: for label in predict_labels: raw_text.write(label + '\n')
import numpy as np from matplotlib import pyplot import classifier X_train, X_test, y_train, y_test = classifier.getdata() # accuracy vs hyperparameter graphs #comparing number of epochs with overall accuracy resEp100 = classifier.run(X_train, X_test, y_train, y_test, 100, 0.01, 0.5) y = [] x = [] for i, l in enumerate(resEp100): y.append(np.mean(resEp100[i])) x.append(i + 1) pyplot.plot(x, y) pyplot.title('Number of Epochs vs Overall Accuracy') pyplot.xlabel('Number of Epochs') pyplot.ylabel('Accuracy') pyplot.ylim(0.94, 0.98) pyplot.show() #comparing overall accuracy with different penalty y = [] x = [] penalties = [0.1, 0.2, 0.5, 1, 2, 5] for p in penalties:
import numpy as np import classifier from loadMNIST_py import MnistDataloader mnistDataLoader = MnistDataloader( 'train-images.idx3-ubyte', 'train-labels.idx1-ubyte', 't10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte') (trainImages, trainLabels), (t10kImages, t10kLabels) = mnistDataLoader.load_data() print("1st run: ") initial_centroids = np.random.randn(10, 28 * 28) classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels) print("2nd run: ") initial_centroids = np.random.randn(10, 28 * 28) classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels) print("3rd run: ") initial_centroids = np.random.randn(10, 28 * 28) initial_centroids = classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels) print("4th run with chosen initialized centroids: ") # F) classifier.run(initial_centroids, trainImages, trainLabels, t10kImages, t10kLabels)
def main(): tf.logging.set_verbosity(tf.logging.INFO) prepare_file_system() # FLAGS.eval_step_interval = 1 # FLAGS.infer_step_interal = 10 # TODO: OOP train_graph = tf.Graph() with train_graph.as_default(): train_filenames, train_iterator, train_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size) train_inputs, train_cost, optimizer = build_train_graph( train_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim, func=FLAGS.loss_function) train_saver = tf.train.Saver() train_merger = tf.summary.merge_all() train_initializer = tf.global_variables_initializer() # train_parameters = tf.trainable_variables() eval_graph = tf.Graph() with eval_graph.as_default(): eval_filenames, eval_iterator, eval_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size) eval_inputs, eval_outputs = build_eval_graph(eval_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) eval_saver = tf.train.Saver() eval_merger = tf.summary.merge_all() eval_initializer = tf.global_variables_initializer() # eval_parameters = tf.trainable_variables() infer_graph = tf.Graph() with infer_graph.as_default(): infer_filenames, infer_iterator, infer_elements = \ build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size) infer_inputs, infer_outputs = build_infer_graph( infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) rbfnn_metrics = build_metrics_graph('rbfnn') # kmeans_metrics = build_metrics_graph('kmeans') infer_saver = tf.train.Saver() infer_merger = tf.summary.merge_all() infer_initializer = tf.global_variables_initializer() config = tf.ConfigProto(device_count={"CPU": 24, "GPU": 0}) train_sess = tf.Session(graph=train_graph, config=config) eval_sess = tf.Session(graph=eval_graph, config=config) infer_sess = tf.Session(graph=infer_graph, config=config) # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph) # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph) # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph) results = dict() train_sess.run(train_initializer) train_sess.run(train_iterator.initializer, feed_dict={train_filenames: [FLAGS.path_to_xtrain]}) # train_sess.run(train_iterator.initializer) for i in itertools.count(): if i > FLAGS.how_many_training_steps: break try: xs_train = train_sess.run(train_elements) # print(xs_train) except tf.errors.OutOfRangeError: train_sess.run(train_iterator.initializer, feed_dict={train_filenames: [FLAGS.path_to_xtrain]}) xs_train = train_sess.run(train_elements) # train_summary, _ = train_sess.run([optimizer, train_merger]) # _, training_cost, train_summary = train_sess.run( [optimizer, train_cost, train_merger], feed_dict={train_inputs: xs_train}) # train_writer.add_summary(train_summary, i) # print('epoch: %6d, training cost: %.8f'%(i, training_cost)) # time.sleep(1) # if i % FLAGS.eval_step_interval == 0: if i % pow(10, len(str(i)) - 1) == 0: # print(train_sess.run(train_parameters[0])) checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=i) eval_saver.restore(eval_sess, checkpoint_path) # print(eval_sess.run(eval_parameters[0])) eval_sess.run(eval_iterator.initializer, feed_dict={eval_filenames: [FLAGS.path_to_xtest]}) while FLAGS.data_to_eval: try: xs_eval = eval_sess.run(eval_elements) except tf.errors.OutOfRangeError: # eval_sess.run(eval_iterator.initializer, # feed_dict={eval_filenames: [r'../../data/x_1000_128.txt']}) # xs_eval = eval_sess.run(eval_elements) break # training_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_train}) # evaluation_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_eval}) evaluation_cost, eval_summary = train_sess.run( [train_cost, train_merger], feed_dict={train_inputs: xs_eval}) tf.logging.info("epoch: %d, training cost: %f" % (i, training_cost)) tf.logging.info("epoch: %d, evaluation cost: %f" % (i, evaluation_cost)) # validation_writer.add_summary(eval_summary, i) break # if i % FLAGS.infer_step_interval == 0: if i % pow(10, len(str(i)) - 1) == 0: checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=i) train_saver.save(train_sess, FLAGS.saved_model_dir + '/checkpoints_' + str(FLAGS.depict_output_dim), global_step=i) infer_saver.restore(infer_sess, checkpoint_path) infers_train = [] infer_sess.run(infer_iterator.initializer, feed_dict={infer_filenames: [FLAGS.path_to_xtrain]}) while FLAGS.data_to_infer: try: xs_infer = infer_sess.run(infer_elements) except tf.errors.OutOfRangeError: break ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_train.extend(ys_infer) # print(infers_train) infers_test = [] infer_sess.run(infer_iterator.initializer, feed_dict={infer_filenames: [FLAGS.path_to_xtest]}) while FLAGS.data_to_infer: try: xs_infer = infer_sess.run(infer_elements) except tf.errors.OutOfRangeError: break ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) print(xs_infer.shape, xs_infer.flatten()) print(ys_infer.shape, ys_infer.flatten()) infers_test.extend(ys_infer) # print(infers_test) metrics = classifier.run(infers_train, infers_test, FLAGS) pprint.pprint(metrics) # infer_summary = metrics_to_metrics(infer_sess, infer_merger, rbfnn_metrics, metrics) # infer_writer.add_summary(infer_summary, i) results[i] = metrics # TODO: with open('../../results/results.txt', 'a') as f: line = list() line.extend( [FLAGS.rbfnn_num_center, FLAGS.depict_output_dim, i]) line.extend(metrics['err_train'].tolist()) line.extend([metrics['acc_train']]) line.extend(metrics['stsm_train'].tolist()) line.extend(metrics['err_test'].tolist()) line.extend([metrics['acc_test']]) line = [str(item) for item in line] line = ' '.join(line) f.write(line) f.write('\n') train_sess.close() eval_sess.close() infer_sess.close() return results
def main(): tf.logging.set_verbosity(tf.logging.INFO) prepare_file_system() # FLAGS.eval_step_interval = 1 # FLAGS.infer_step_interal = 10 # TODO: OOP train_graph = tf.Graph() with train_graph.as_default(): train_filenames, train_iterator, train_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size) train_inputs, train_cost, optimizer = build_train_graph( train_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim, func=FLAGS.loss_function) train_saver = tf.train.Saver() train_merger = tf.summary.merge_all() train_initializer = tf.global_variables_initializer() # train_parameters = tf.trainable_variables() eval_graph = tf.Graph() with eval_graph.as_default(): eval_filenames, eval_iterator, eval_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size) eval_inputs, eval_outputs = build_eval_graph(eval_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) eval_saver = tf.train.Saver() eval_merger = tf.summary.merge_all() eval_initializer = tf.global_variables_initializer() # eval_parameters = tf.trainable_variables() infer_graph = tf.Graph() with infer_graph.as_default(): infer_filenames, infer_iterator, infer_elements = \ build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size) infer_inputs, infer_outputs = build_infer_graph( infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) rbfnn_metrics = build_metrics_graph('rbfnn') # kmeans_metrics = build_metrics_graph('kmeans') infer_saver = tf.train.Saver() infer_merger = tf.summary.merge_all() infer_initializer = tf.global_variables_initializer() config = tf.ConfigProto(device_count={"GPU": 1}) train_sess = tf.Session(graph=train_graph, config=config) eval_sess = tf.Session(graph=eval_graph, config=config) infer_sess = tf.Session(graph=infer_graph, config=config) # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph) # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph) # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph) train_sess.run(train_initializer) # eval_sess.run(eval_initializer) # infer_sess.run(infer_initializer) import utils results = dict() for epoch in itertools.count(): if epoch > FLAGS.how_many_training_epoches: break train_generator = utils.build_data_generator( xtrain, shuffle=True, batch_size=FLAGS.train_batch_size) for batch, xs_train in enumerate(train_generator): _, training_cost = train_sess.run( [optimizer, train_cost], feed_dict={train_inputs: xs_train}) if epoch % 1 == 0: checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=epoch) # train_saver.save(train_sess, FLAGS.saved_model_dir + '/checkpoints_' + str(FLAGS.depict_output_dim), global_step=epoch) infer_saver.restore(infer_sess, checkpoint_path) infers_train = [] infer_generator = utils.build_data_generator( xtrain, shuffle=False, batch_size=FLAGS.infer_batch_size) for batch, xs_infer in enumerate(infer_generator): ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_train.extend(ys_infer) infers_test = [] infer_generator = utils.build_data_generator( xtest, shuffle=False, batch_size=FLAGS.infer_batch_size) for batch, xs_infer in enumerate(infer_generator): ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_test.extend(ys_infer) print(len(infers_train), len(infers_test)) metrics = classifier.run(infers_train, infers_test, FLAGS) pprint.pprint(metrics) results[i] = metrics utils.write_results(FLAGS, metrics, epoch) train_sess.close() eval_sess.close() infer_sess.close() return results
def main(): tf.logging.set_verbosity(tf.logging.INFO) prepare_file_system() FLAGS.eval_step_interval = 1 FLAGS.infer_step_interal = 10 # TODO: OOP train_graph = tf.Graph() with train_graph.as_default(): train_filenames, train_iterator, train_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size) train_inputs, train_cost, optimizer = build_train_graph( train_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim, func='func_02') train_saver = tf.train.Saver() train_merger = tf.summary.merge_all() train_initializer = tf.global_variables_initializer() # train_parameters = tf.trainable_variables() eval_graph = tf.Graph() with eval_graph.as_default(): eval_filenames, eval_iterator, eval_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size) eval_inputs, eval_outputs = build_eval_graph(eval_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) eval_saver = tf.train.Saver() eval_merger = tf.summary.merge_all() eval_initializer = tf.global_variables_initializer() # eval_parameters = tf.trainable_variables() infer_graph = tf.Graph() with infer_graph.as_default(): infer_filenames, infer_iterator, infer_elements = \ build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size) infer_inputs, infer_outputs = build_infer_graph( infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) rbfnn_metrics = build_metrics_graph('rbfnn') # kmeans_metrics = build_metrics_graph('kmeans') infer_saver = tf.train.Saver() infer_merger = tf.summary.merge_all() infer_initializer = tf.global_variables_initializer() train_sess = tf.Session(graph=train_graph) eval_sess = tf.Session(graph=eval_graph) infer_sess = tf.Session(graph=infer_graph) train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph) validation_writer = tf.summary.FileWriter( FLAGS.summaries_dir + '/validation', eval_graph) infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph) train_sess.run(train_initializer) train_sess.run(train_iterator.initializer, feed_dict={train_filenames: [FLAGS.path_to_xtrain]}) # train_sess.run(train_iterator.initializer) for i in itertools.count(): try: xs_train = train_sess.run(train_elements) # print(xs_train) except tf.errors.OutOfRangeError: train_sess.run(train_iterator.initializer, feed_dict={train_filenames: [FLAGS.path_to_xtrain]}) xs_train = train_sess.run(train_elements) # train_summary, _ = train_sess.run([optimizer, train_merger]) # _, training_cost, train_summary = train_sess.run( [optimizer, train_cost, train_merger], feed_dict={train_inputs: xs_train}) train_writer.add_summary(train_summary, i) # print('epoch: %6d, training cost: %.8f'%(i, training_cost)) # time.sleep(1) # if i % FLAGS.eval_step_interval == 0: if i % pow(10, len(str(i)) - 1) == 0: # print(train_sess.run(train_parameters[0])) checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=i) eval_saver.restore(eval_sess, checkpoint_path) # print(eval_sess.run(eval_parameters[0])) eval_sess.run(eval_iterator.initializer, feed_dict={eval_filenames: [FLAGS.path_to_xtest]}) while FLAGS.data_to_eval: try: xs_eval = eval_sess.run(eval_elements) except tf.errors.OutOfRangeError: # eval_sess.run(eval_iterator.initializer, # feed_dict={eval_filenames: [r'../../data/x_1000_128.txt']}) # xs_eval = eval_sess.run(eval_elements) break # training_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_train}) # evaluation_outputs = eval_sess.run(eval_outputs, feed_dict={eval_inputs: xs_eval}) evaluation_cost, eval_summary = train_sess.run( [train_cost, train_merger], feed_dict={train_inputs: xs_eval}) tf.logging.info("epoch: %d, training cost: %f" % (i, training_cost)) tf.logging.info("epoch: %d, evaluation cost: %f" % (i, evaluation_cost)) validation_writer.add_summary(eval_summary, i) break # if i % FLAGS.infer_step_interval == 0: if i % pow(10, len(str(i)) - 1) == 0: checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=i) infer_saver.restore(infer_sess, checkpoint_path) infers_train = [] infer_sess.run(infer_iterator.initializer, feed_dict={infer_filenames: [FLAGS.path_to_xtrain]}) while FLAGS.data_to_infer: try: xs_infer = infer_sess.run(infer_elements) except tf.errors.OutOfRangeError: break ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_train.extend(ys_infer) # print(infers_train) infers_test = [] infer_sess.run(infer_iterator.initializer, feed_dict={infer_filenames: [FLAGS.path_to_xtest]}) while FLAGS.data_to_infer: try: xs_infer = infer_sess.run(infer_elements) except tf.errors.OutOfRangeError: break ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) print(xs_infer.shape, xs_infer.flatten()) print(ys_infer.shape, ys_infer.flatten()) infers_test.extend(ys_infer) # print(infers_test) metrics = classifier.run(infers_train, infers_test, FLAGS) # print(metrics) pprint.pprint(metrics) infer_summary = metrics_to_metrics(infer_sess, infer_merger, rbfnn_metrics, metrics) infer_writer.add_summary(infer_summary, i)
xrand = np.loadtxt(FLAGS.path_to_xrand) print(xtrain.shape, xtest.shape, xrand.shape) FLAGS.rbfnn_num_center = 120 for i in range(7, 15 + 1): num_cluster = 1 << i print(num_cluster) FLAGS.depict_output_dim = num_cluster FLAGS.rbfnn_input_dim = num_cluster pprint.pprint(FLAGS) # kms = cluster.build_kmeans_model_with_fixed_input(FLAGS, xrand) kms = cluster.build_kmeans_model_with_random_input(FLAGS, xtrain) ca_train = kms.predict(xtrain) ca_test = kms.predict(xtest) metrics = classifier.run(ca_train, ca_test, FLAGS) pprint.pprint(metrics) # TODO: if not os.path.exists(FLAGS.saved_results_dir): os.makedirs(FLAGS.saved_results_dir) outfile = os.path.join( FLAGS.saved_results_dir, '%s_r%d_kmeans_results.txt' % (FLAGS.database_name, FLAGS.split_round)) with open(outfile, 'a') as f: line = list() line.extend([FLAGS.rbfnn_num_center, FLAGS.depict_output_dim, 0]) line.extend(metrics['err_train'].tolist()) line.extend([metrics['acc_train']]) line.extend(metrics['stsm_train'].tolist()) line.extend(metrics['err_test'].tolist())
#data_path = 'articles.csv' if args.full else 'split80/test.csv' data = {seed: setup(seed, path=f'data/articles.csv') for seed in SEEDS} from utils import LABELS analysis = {'embedding': [], 'truth': [], 'text': [], 'pred': [], LABELS: []} for dim in DIMS: for vector in VECTORS: #cols['embedding'].append(f'{dim}d_{vector}') results = np.zeros(len(METRICS), dtype='float') for seed in SEEDS: TEXT, LABEL, train_data, test_data = data[seed] if vector == 'RANDOM': test_results_a = run(seed, 'a', analysis, DIR, None, dim, TEXT, LABEL, train_data, test_data, randomize=True, saved=SAVED) test_results_b = run(seed, 'b', analysis, DIR, None, dim, TEXT, LABEL, test_data, train_data, randomize=True, saved=SAVED) else: test_results_a = run(seed, 'a', analysis, DIR, vector, dim, TEXT, LABEL, train_data, test_data, randomize=RANDOMIZE, saved=SAVED) test_results_b = run(seed, 'b', analysis, DIR, vector, dim, TEXT, LABEL, test_data, train_data, randomize=RANDOMIZE, saved=SAVED) cols['embedding'].append(f'{vector}.{dim}d_{seed}a') cols['embedding'].append(f'{vector}.{dim}d_{seed}b') for j, metric in enumerate(METRICS): cols[metric].append(test_results_a[j]) cols[metric].append(test_results_b[j]) print(f'finished {vector}.{dim}d') df = pd.DataFrame(cols) with open(os.path.join('results', DIR, 'results.pkl'), 'wb') as f: pickle.dump(df, f)
def run(progress=True, verbose=False, loadFile=False, printtweets=False, causeFilename="causeSunWedFri", outputDivider=900, produceResult=False, chunkScatter=False): if progress: classifier.run(Covid=True, verbose=verbose) #this gets the reference accuracy """ various file input options as [dates] """ #dates = ["../2020-04-19 Coronavirus Tweets.csv","../2020-04-21 Coronavirus Tweets.csv","../2020-04-22 Coronavirus Tweets.csv"]#,"../2020-04-24 Coronavirus Tweets.csv" ] # April overall Sun/Wed dates = ([ "../2020-03-29 Coronavirus Tweets.csv", "../2020-04-01 Coronavirus Tweets.csv", "../2020-04-05 Coronavirus Tweets.csv", "../2020-04-08 Coronavirus Tweets.csv" ] + [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(12, 31, 7) ] + [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(15, 31, 7) ]) # April overall Mon/Thu #dates = (["../2020-03-30 Coronavirus Tweets.csv","../2020-04-02 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-09 Coronavirus Tweets.csv"] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(13,31,7)] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31,7)]) # April overall Sun/Wed/Fri #dates = (["../2020-03-29 Coronavirus Tweets.csv","../2020-04-01 Coronavirus Tweets.csv","../2020-04-03 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-08 Coronavirus Tweets.csv"] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(10,31,7)] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(12,31,7)] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(15,31,7)]) # April overall Mon/Thu/Sat #dates = (["../2020-03-30 Coronavirus Tweets.csv","../2020-04-02 Coronavirus Tweets.csv","../2020-04-04 Coronavirus Tweets.csv","../2020-04-06 Coronavirus Tweets.csv","../2020-04-09 Coronavirus Tweets.csv"] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(13,31,7)] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31,7)] # +["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(11,31,7)]) # This part handles the loading/saving of the cause file for feedback usage. # Because the cause.run(heavily running code) doesn't run when you loadFile=True, this would be helpful. # If you don't have the files or don't have computation power, set loadFile=True and use the preset cause.pkl. if loadFile: loading = open(causeFilename + ".pkl", 'rb') xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S, cm4 = load( loading) loading.close() else: xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S, cm4 = cause.run( verbose=verbose, dates=dates, printtweets=printtweets, chunkScatter=chunkScatter) saving = open(causeFilename + ".pkl", "wb") dump((xA, xF, xJ, xS, cmFJS, cmAJS, cmAFS, cmAFJ, _A, _F, _J, _S, cm4), saving, -1) saving.close() # This part shows you the accuracy information. """ Feedback sandbox examples set scorefactor => sf sf = 0.2 : exclusive cause reinforce classifier.run(Covid = True, verbose = verbose, feed_back = [xA, xF, xJ, xS],sf=0.2) sf = -0.4 : non-cause deduction *(Sun/Wed => 0.84) classifier.run(Covid = True, verbose = verbose, feed_back = [cmFJS, cmAJS, cmAFS, cmAFJ],sf=-0.4) sf = -0.2 : inclusive cause reinforce *(Sun/Wed/Fri => 0.85) classifier.run(Covid = True, verbose = verbose, feed_back = [_A,_F,_J,_S],sf=-0.2) sf = ? : inclusive cause reinforce classifier.run(Covid = True, verbose = verbose, feed_back = [cm4,cm4,cm4,cm4],sf=0)""" fb = [cmFJS, cmAJS, cmAFS, cmAFJ] scoreFactor = -0.4 classifier.run(Covid=True, verbose=verbose, feed_back=fb, sf=scoreFactor) # ^this part only tries the feedback on evaluation. This is just to show how accurate the classifier we will use on the bottom will be. # vThe real work is right below. #This part produces result(ex: 03-00 - Anger:4000, Fear: 1000, ...). if loadFile and produceResult: dateChunks = [ # weekly analysis #["../2020-03-00 Coronavirus Tweets (pre 2020-03-12).csv"], #["../2020-03-12 Coronavirus Tweets.csv"], #["../2020-03-15 Coronavirus Tweets.csv"], #["../2020-03-00 Coronavirus Tweets (pre 2020-03-12).csv"]+["../2020-03-12 Coronavirus Tweets.csv"]+["../2020-03-15 Coronavirus Tweets.csv"], #["../2020-03-20 Coronavirus Tweets.csv"], #["../2020-03-25 Coronavirus Tweets.csv"], #["../2020-03-28 Coronavirus Tweets.csv"], #["../2020-03-29 Coronavirus Tweets.csv"], #["../2020-03-25 Coronavirus Tweets.csv","../2020-03-28 Coronavirus Tweets.csv","../2020-03-29 Coronavirus Tweets.csv"], #["../2020-03-30 Coronavirus Tweets.csv","../2020-03-31 Coronavirus Tweets.csv"] #+["../2020-04-0{} Coronavirus Tweets.csv".format(i) for i in range(1,6)], [ "../2020-04-0{} Coronavirus Tweets.csv".format(i) for i in range(6, 10) ] + [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(10, 13) ], [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(13, 20) ], [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(20, 27) ], [ "../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(27, 31) ] ] # This part will run and get the percentage informations. # REMEMBER, you should have all the files listed in dateChunks to run this part. # If you don't have them, set produceResult = False. for d in dateChunks: check.run(dates=d, verbose=False, outDeminish=outputDivider, feedback=fb, num_samples=3375, printtweets=printtweets) # Sidenote! The return of check is the cause chunks in list form from the dataset. # -go to a for loop in check_classifier on line 106 ("for check in checks") for more info. # -the check in checks are tweets as list in each emotions.(The exact format can be learned from line 81 to 96 check_classifier.py) else: # the list "dates" contain the path of the tweets' files # original # dates = ["../2020-04-19 Coronavirus Tweets.csv","../2020-04-21 Coronavirus Tweets.csv","../2020-04-22 Coronavirus Tweets.csv"]#,"../2020-04-24 Coronavirus Tweets.csv" ] # April 16~30 #dates = ["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(16,31)] # April 01~15 #dates = (["../2020-04-{} Coronavirus Tweets.csv".format(i) for i in range(10,16)]+["../2020-04-0{} Coronavirus Tweets.csv".format(i) for i in range(1,10)]) check_classifier.run(verbose=verbose)
e_fn = e_fn + 1 elif name == 'NEUTROPHIL': n_fn = n_fn + 1 elif name == 'LYMPHOCYTE': l_fn = l_fn + 1 elif name == 'BASOPHIL': b_fn = b_fn + 1 elif name == 'MONOCYTE': m_fn = m_fn + 1 classified = [] with open('./output', 'w') as writer: for image_path in os.listdir('./images'): #print image_path name = classifier.run(image_path) writer.write(image_path + ', ' + name + '\n') #print(name) classified.append(name) with open('./cell_classes') as file: lines = file.readlines() lines = [l.strip() for l in lines] correct = 0 total = 0 i = 0 for line in lines:
import os path_ad = "D:/Alzheimers/PET_AD_CLEAN" path_normal = "D:/Alzheimers/PET_NORMAL_CLEAN/" nr_ad = 48 nr_normal = 48 # CREDIT: https://stackoverflow.com/questions/6687660/keep-persistent-variables-in-memory-between-runs-of-python-script # Peter Lyons Jul 14 '11 cache = None if __name__ == "__main__": while True: if not cache: pet_ad = imageutils.read_pet_images(path_ad, nr_ad) pet_normal = imageutils.read_pet_images(path_normal, nr_normal) cache = (pet_ad, pet_normal) try: classifier.run(cache, nr_ad, nr_normal) except Exception as e: print("Error in classifier.py") print(e) print("Press enter to re-run the script, CTRL-C to exit") sys.stdin.readline() os.remove(getattr(classifier, '__cached__', 'classifier.pyc')) reload(classifier)