def harvest_ratio(in_path, out_path, classifier): relevants = {} num_pages = {} for site in DOMAINS.iterkeys(): relevants[site] = 0 num_pages[site] = 0 count = 0 for site, html in read_file_multiple(in_path): count += 1 print count num_pages[site] += 1 if classifier.classify(html): relevants[site] += 1 rows = [[ 'Domain', '# Relevant pages', '# Downloaded pages', '# Harvest Ratio' ]] total_pages = 0 total_relevants = 0 for site in DOMAINS.iterkeys(): total_pages += num_pages[site] total_relevants += relevants[site] domain_hr = float(relevants[site]) / num_pages[site] rows.append([site, relevants[site], num_pages[site], domain_hr]) hr = float(total_relevants) / total_pages rows.append(['Total', total_relevants, total_pages, hr]) save_csv(out_path, rows)
def report(self, n=None, sort='v', directory=None, filename=None): if self._detail is None: raise Exception('Nothing to report') else: vars = [ 'experiment', 'alg', 'batch_size', 'learning_rate', 'learning_rate_sched', 'learning_rate_sched_label', 'stop_metric', 'stop_metric_label', 'time_decay', 'step_decay', 'step_epochs', 'exp_decay', 'precision', 'i_s', 'maxiter', 'stop_metric', 'epochs', 'iterations', 'duration', 'final_costs', 'final_mse' ] df = self._summary df = df[vars] if sort == 't': df = df.sort_values(by=['final_costs', 'duration']) else: df = df.sort_values(by=['final_mse', 'duration']) if directory: if filename is None: filename = self._alg + ' Grid Search.csv' save_csv(df, directory, filename) if n: df = df.iloc[:n] return (df)
def preprocessing_main(filepath, model_name, subset_name): columns = ["ID", "tweet", "affect_dimension", "intensity_class"] df = read_csv(filepath, columns=columns) df = pandas_explode_column(df, "intensity_class", "intensity_scores", "intensity_descriptions", delimiter=":") if model_name == "dl": preprocessed_tweet_list = prep_dl_data(df) preprocessed_filename = "dl_" + subset_name preprocessed_filepath = "data/preprocessed_data/DL_data" elif model_name == "ml": preprocessed_tweet_list = prep_ml_data(df) preprocessed_tweet_list = [' '.join(x) for x in preprocessed_tweet_list] # remove outer list preprocessed_filename = "ml_" + subset_name preprocessed_filepath = "data/preprocessed_data/ML_data" else: print("No Preprocessing for models other than dl and ml") df["tweets"] = preprocessed_tweet_list save_csv(df, preprocessed_filepath, preprocessed_filename, "\t") print("Finished Preprocessing Programme")
def main(): args = parser.parse_args() header, dataset = utils.load_csv(args.input) if len(dataset) == 0: parser.error("Invalid input: file does not exist or is empty.") normalized = standardize(dataset) dendrogram_info = clusterize(normalized, args.linkage) fig = plot(dendrogram_info) fig.savefig(args.output + "_full.png", format="png") plt.show() weights = [args.average_weight, args.sd_weight] trees = cut(dendrogram_info, weights, args.class_range) fig = plot(trees) fig.savefig(args.output + ".png", format="png") plt.show() print("%d clusters were generated." % len(trees)) classified = [header + ["Classification"]] clusters = get_clusters(trees) for i in range(len(dataset)): classified.append(dataset[i] + [clusters[i]]) utils.save_csv(args.output + ".csv", classified)
def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME): if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) codebook = utils.load(filenames.codebook(k, des_name)) filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k, des_name) utils.save_csv(filename, codebook) print("Copied codebook into the file with name {0}. Press any key to exit...".format(filename)) cv2.waitKey()
def save_locations(match_locations): header_locations = ["id", "name", "latitude", "longitude"] formatted_results = [] # Flat the dictionary for location in match_locations: formatted_results.append( [match_locations[location].get(h, None) for h in header_locations]) save_csv(CSV_OUTPUT_LOCATIONS, header_locations, formatted_results)
def detail(self, directory=None, filename=None): if self._alg is None: raise Exception('No algorithm selected.') else: if directory is not None: if filename is None: filename = self._alg + ' Detail.csv' save_csv(self._detail, directory, filename) return(self._detail)
def eval(self, directory=None, filename=None): if self._eval is None: raise Exception('No search results to report.') else: if directory is not None: if filename is None: filename = self._alg + ' Evaluation.csv' save_csv(self._eval, directory, filename) return(self._eval)
def summary(self, directory=None, filename=None): if self._summary is None: raise Exception('No search results to report.') else: if directory is not None: if filename is None: filename = self._alg + ' Summary.csv' save_csv(self._summary, directory, filename) return(self._summary)
def compliance_csv(reports, path): header = [ "Type", "Name", "Total Domains", "Uses HTTPS", "Enforces HTTPS", "HSTS", "Uses HTTPS (%)", "Enforces HTTPS (%)", "HSTS (%)" ] rows = [] for report in reports: rows.append(compliance_csv_row(report)) utils.save_csv(header, rows, path)
def compare_classifiers(): db = tools.load_database() X, Y = tools.prepare_database(db) bag_of_words, bag_of_words_vectors = tools.create_bag_of_words(X) tfidf, tfidf_vectors = tools.create_TfIdf(X) print "Testing Bag of Words" file_path = os.path.join(consts.RESULTS_DIR, 'bag-of-words-results.csv') rows = [[ 'Algorithm', 'Training Time', 'Accuracy', 'Precision', 'Recall', 'F1-Measure' ]] features_train, features_test, labels_train, labels_test = tools.split_dataset( bag_of_words_vectors, Y) rows += try_naive_bayes(features_train, features_test, labels_train, labels_test) rows += try_regression(features_train, features_test, labels_train, labels_test) rows += try_random_forest(features_train, features_test, labels_train, labels_test) rows += try_svm(features_train, features_test, labels_train, labels_test) rows += try_ada_boost(features_train, features_test, labels_train, labels_test) rows += try_knn(features_train, features_test, labels_train, labels_test) save_csv(file_path, rows) print "Testing TF-IDF" file_path = os.path.join(consts.RESULTS_DIR, 'tf-idf-resultss.csv') rows = [[ 'Algorithm', 'Training Time', 'Accuracy', 'Precision', 'Recall', 'F1-Measure' ]] features_train, features_test, labels_train, labels_test = tools.split_dataset( tfidf_vectors, Y) rows += try_naive_bayes(features_train, features_test, labels_train, labels_test) rows += try_regression(features_train, features_test, labels_train, labels_test) rows += try_random_forest(features_train, features_test, labels_train, labels_test) rows += try_svm(features_train, features_test, labels_train, labels_test) rows += try_ada_boost(features_train, features_test, labels_train, labels_test) rows += try_knn(features_train, features_test, labels_train, labels_test) save_csv(file_path, rows)
def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME): if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) codebook = utils.load(filenames.codebook(k, des_name)) filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k, des_name) utils.save_csv(filename, codebook) print( "Copied codebook into the file with name {0}. Press any key to exit..." .format(filename)) cv2.waitKey()
def save_players(players): header_players = [ "id", "name", ] formatted_results = [] # Flat the dictionary for p in players: formatted_results.append([players[p][h] for h in header_players]) save_csv(CSV_OUTPUT_PLAYERS, header_players, formatted_results)
def save_teams(teams): header_teams = [ "id", "country", ] formatted_results = [] # Flat the dictionary for t in teams: formatted_results.append([teams[t][h] for h in header_teams]) save_csv(CSV_OUTPUT_TEAMS, header_teams, formatted_results)
def summary(self, nbest=0, directory=None, filename=None): if self._summary is None: raise Exception("No summary to report") else: if directory is not None: if filename is None: filename = self._alg + ' Lab Summary.csv' save_csv(self._summary, directory, filename) if nbest: s = self._summary.sort_values(by=['final_costs', 'duration']) return (s.head(nbest)) return (self._summary)
def main(): js_projects = np.array(list(get_repos_for_code_search(QUERY_ALL_JS))) js_projects_with_express = np.array( list(get_repos_for_code_search(QUERY_WITH_EXPRESS))) js_projects_with_helmet = np.array( list(get_repos_for_code_search(QUERY_WITH_HELMET))) node_based_docker_projects = np.array( list(get_repos_for_code_search(QUERY_NODE_BASED_DOCKER_IMAGE))) node_based_docker_projects_2 = np.array( list(get_repos_for_code_search(QUERY_NODE_BASED_DOCKER_IMAGE_2))) concatenated_node_based_docker_projects = np.concatenate( (node_based_docker_projects, node_based_docker_projects_2)) all_concatenated = np.concatenate( (js_projects, js_projects_with_express, js_projects_with_helmet, concatenated_node_based_docker_projects)) all_projects = [ dict(t) for t in {tuple(d.items()) for d in all_concatenated} ] all_projects_sorted = sorted(all_projects, key=lambda item: item['name']) fieldnames = [ 'Reference', 'Uses Express', 'Node-based docker image', 'Likely nodejs app', 'Uses Helmet' ] def likely_node_app(item): return (item in concatenated_node_based_docker_projects) or ( item in js_projects_with_express) rows = list( map( lambda item: { fieldnames[0]: item['name'], fieldnames[1]: 'X' if (item in js_projects_with_express) else '', fieldnames[2]: 'X' if (item in concatenated_node_based_docker_projects) else '', fieldnames[3]: 'X' if likely_node_app(item) else '', fieldnames[4]: 'X' if (item in js_projects_with_express) else '', }, all_projects_sorted)) save_csv(OUTPUT_FILE, fieldnames, rows)
def process_dataset(dataset, colors): y = np.load("./data/" + dataset + '_labels.npy') pred = np.load("./data/" + dataset + '_clasification.npy') segments = np.load("./results/" + dataset + '_segments.npy') test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape( y.shape) sc_pred = classify_segments(pred, segments) sc_score = utils.balanced_score(y[test_mask], sc_pred[test_mask]) sc_cm = utils.confusion_matrix(y[test_mask], sc_pred[test_mask]) utils.save_json({"sc": sc_score}, dataset + "_sc_score") utils.save_csv(sc_cm, dataset + "_sc_cm") color_map = color_true_map(sc_pred, labels_colors=colors) save_image(color_map, dataset + "_sc_clasification")
def main(): logger.info(' Preprocessing ...') data_path = 'data/netflix-prize-data/combined_data_1.txt' min_num_users = 50 min_num_movies = 15 t_start_0 = time.time() data_matrix, uid_idx_map, mid_idx_map = filtering( read_combined_data(data_path), min_num_users=min_num_users, min_num_movies=min_num_movies) logger.debug( 'after preprocessed, time used {: 2f} sec.'.format(time.time() - t_start_0)) logger.debug('# of data = {}, user x movie = {}'.format( data_matrix.nnz, data_matrix.shape)) print() logger.info(' Training ...') t_start = time.time() k = 50 max_iter = 100 W, H = rs.non_negative_matrix_factorization(data_matrix, k=k, max_iter=max_iter) del data_matrix logger.debug('after NMF, time used {: 2f} sec.'.format(time.time() - t_start)) logger.info('Totally used {: 2f} sec.'.format(time.time() - t_start_0)) print('>> W ( shape={} )'.format(W.shape)) print('>> H ( shape={} )'.format(H.shape)) # testing qualifying logger.info(' Testing ...') test_path = 'data/netflix-prize-data/qualifying.txt' test_df = read_combined_data(test_path, mode='test') test_matrix, filtered_test_df = filtering_test(test_df, uid_idx_map, mid_idx_map) del test_df scored_test_matrix = test_matrix.toarray() * np.inner(W, H.T) test_scores = [ scored_test_matrix[uid][mid] for uid, mid in zip(test_matrix.row, test_matrix.col) ] filtered_test_df['rating'] = test_scores save_csv( filtered_test_df, 'result/result_nu{}_nm{}.csv'.format(min_num_users, min_num_movies))
def detail(self, nbest=0, directory=None, filename=None): if self._detail is None: raise Exception("No detail to report") else: if directory is not None: if filename is None: filename = self._alg + ' Lab Detail.csv' save_csv(self._detail, directory, filename) if nbest: s = self.summary(nbest=nbest) d = self._detail d = d.loc[d['experiment'].isin(s['experiment'])] return (d) return (self._detail)
def run_training_testing(self, model_weight_path, gpu_memory_fraction): # train the network config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction train_generator_obj = self.obj.train_generator() with tf.Session(config=config) as sess: summary_writer = tf.summary.FileWriter('./checkpoints/', sess.graph) saver = tf.train.Saver(max_to_keep=2) self.model.optimize() sess.run(tf.global_variables_initializer()) self.model.load_weight(sess, model_weight_path) loss = 0 true_positives = 0 for epochs in range(1, self.num_epochs+1): start_time = time.time() for step in range(len(self.obj.train_list)//self.batch_size + 1): x_batch, y_batch = get_batch(train_generator_obj, 'train', height=self.model.height, width=self.model.width) #temp1 = sess.run([self.pool] , feed_dict={self.model.x:x_batch, self.model.y:y_batch}) #print(temp1.shape) _, loss_curr, predicted = sess.run([self.model.optimizer, self.model.loss, self.model.pred] , feed_dict={self.model.x:x_batch, self.model.y:y_batch}) loss = 0.9*loss + 0.1*loss_curr true_positives = true_positives + np.sum(predicted == np.argmax(y_batch,1)) end_time = time.time() print('time_taken', end_time -start_time) print('epochs:',epochs, ' train-loss:', loss, 'train-acc:', true_positives*100.0/len(self.obj.train_list)) true_positives = 0 saver.save(sess, './checkpoints/', global_step=step) self.evaluate(sess, 'val') print('') # predict values for test dataset config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction with tf.Session(config=config) as sess: saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/')) model_pred = self.predict(sess, 'test') #save the results in the required csv format save_csv(model_pred, self.obj)
def save_match_player_relations(matches): header_rel = [ "player_id", "match_id", "is_substitute", "num_goals", "team_id", ] formatted_results = [] # Flat the dictionary for m in matches: formatted_results.append([matches[m][h] for h in header_rel]) save_csv(CSV_OUTPUT_MATCH_PLAYERS_REL, header_rel, formatted_results)
def upload_file(request): file = request.FILES["file"] response = utils.save_csv(file) if response != "address column not found": return utils.JsonResponse({'response': response}) if response == "address column not found": return utils.JsonResponse({'response': response}, status=400)
def run( dataset: List[List[Any]] = None, config: Dict = CONFIG, ): """Run the module. Args: config (dict): Dictionary containing configuration options. dataset (list): Source dataset to transform and save """ transformed: List[List[Any]] = transform( source=dataset, include_scorer_skills=config['include_scorer_skills'], ) save_csv(array=transformed, path=config['output_file_path']) print('Saved to: ' + config['output_file_path'])
def main(): username = input( "Digite o seu nome de usuário, sem a arroba, e aperte < ENTER >. Exemplo: jack\n" ) api = utils.login() favs = utils.fetch_favs(api, username) if not os.path.exists("downloads/"): os.makedirs("downloads/") now = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-") fpath = "downloads/favs-by-" + username + "-" + now + ".csv" utils.save_csv(favs, fpath)
def calc_thiessen(hidroweb_dir, inventory, list_ids, shp, poly, attr, buffer, dates, dir_out): loc_stations = pre_process(hidroweb_dir, inventory) if list_ids: IDS = pd.read_csv(list_ids) stations_in = loc_stations[np.where(loc_stations == IDS.values)[0]] else: if not attr: attr = 'ID' # nome padrão do atributo caso não informado if buffer: buffer = float(buffer) else: buffer = False # padrão caso não informado # extrai vertices do poligono (poly) do shape informado vertices = getvert(shp, poly, attr=attr, buffer=buffer) # verifica quais postos estão dentro do polígono isin = isinpoly3(loc_stations[:,2], loc_stations[:,1], vertices) # seleciona apenas postos dentro do polígono stations_in = loc_stations[isin,:] # converte date de string para datetime dates = pd.to_datetime(dates, format='%d/%m/%Y') dates = pd.date_range(dates[0], dates[1]) # extrai precipitação dos postos dentro do polígono para a data informada pr_med = [] for date in dates: pr_estations = open_files(stations_in, hidroweb_dir, date) # cálculo da precipitação média usando o método de thiessen pr_med.append([date, thiessen(pr_estations[:,1], pr_estations[:,0], vertices[:,0], vertices[:,1], pr_estations[:,2])]) # salva a precipitação média no formato .csv save_csv(dir_out, pr_med, date, poly) return None
def save_matches(matches): header_matches = [ "id", "home_team_id", "away_team_id", "match_date", "location_id", "competition", "winning_team_id", "home_team_score", "away_team_score", ] formatted_results = [] # Flat the dictionary for m in matches: formatted_results.append([matches[m][h] for h in header_matches]) save_csv(CSV_OUTPUT_MATCHES, header_matches, formatted_results)
def _recommend(cfg: dict): top_n = cfg['top_n'] cfg_model, cfg_dataset = cfg['model'], cfg['dataset'] cfg_results = cfg['results'] _, col_files = cfg_dataset['cols'] x_df = utils \ .read_csv(cfg_dataset['path'], usecols=cfg_dataset['cols']) \ .pipe(utils.to_list_of_strings, col=col_files) model_recommender = model.deserialize(cfg_model['path']) y_df = model.recommend(model_recommender, x_df, cfg_dataset['cols'], top_n=top_n) if cfg_results.get('save', True): utils.save_csv(cfg_results['out'], y_df)
def main(): args = get_args() if args.use_dropout == 0: args.use_dropout = False if args.use_dropout ==0: args.use_dropout = False for x in vars(args).items(): print(x) #from utils import data_transforms #print(data_transforms) if args.lr_sch ==5 and torch.__version__ != '0.4.0' : print("for cosine annealing, change to torch==0.4.0 in setup.py") raise AssertionError() elif args.lr_sch !=5 and torch.__version__ == '0.4.0': print("warning : this is torch version {}! nsml report will not be recorded".format(torch.__version__)) model, optimizer, scheduler = model_all.get_model(args) if args.use_gpu: if torch.cuda.device_count() > 1: print("[gpu] Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = torch.nn.DataParallel(model) elif torch.cuda.device_count() == 1: print("[gpu] Let's use", torch.cuda.device_count(), "GPUs!") else: print("[gpu] no available gpus") model = model.cuda() nsml.bind(infer=infer, model=model, optimizer=optimizer) if args.pause: nsml.paused(scope=locals()) nsml.save() if args.mode == 'train': dataloaders, dataset_sizes = utils.data_loader(args, train=True, batch_size=args.batch_size) model = train.train_test(model, optimizer, scheduler, dataloaders, dataset_sizes, args) utils.save_model(model, 'model_state') with open('args.pickle', 'wb') as farg: pickle.dump(args, farg) loader = utils.data_loader(args, train=False, batch_size=1) predict, acc = utils.get_forward_result(model, loader, args) predict = torch.cat(predict, 0) nsml.bind(save=lambda x: utils.save_csv(x, data_csv_fname=os.path.join(DATASET_PATH, 'train', 'test') + '/test_data', results=predict, test_loader=loader)) nsml.save('result')
def process_dataset(dataset, colors): y = np.load("./data/" + dataset + '_labels.npy') pred = np.load("./data/" + dataset + '_clasification.npy') test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape( y.shape) mv_sizes = [3, 5, 9] mv_pred = [majority_vote(pred, size) for size in mv_sizes] mv_scores = [ utils.balanced_score(y[test_mask], p[test_mask]) for p in mv_pred ] mv_cm = [ utils.confusion_matrix(y[test_mask], p[test_mask]) for p in mv_pred ] keys = ["_mv_{}".format(size) for size in mv_sizes] utils.save_json(dict(zip(keys, mv_scores)), dataset + "_mv_scores") for i in range(len(mv_sizes)): utils.save_csv(mv_cm[i], dataset + keys[i] + "_cm") color_map = color_true_map(mv_pred[i], labels_colors=colors) save_image(color_map, dataset + keys[i] + "_clasification")
def main(): username = input( "Digite o nome do usuário que você quer baixar, sem a arroba, e aperte < ENTER >. Exemplo: jack\n" ) api = utils.login() output = utils.fetch_tweets(api, username) if not os.path.exists("downloads/"): os.makedirs("downloads/") now = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-") fpath = "downloads/tweets-by-" + username + "-" + now + ".csv" utils.save_csv(output, fpath) print("Pronto!")
def main(): js_projects = np.array(list(get_repos_for_repo_search(QUERY_JS))) ts_projects = np.array(list(get_repos_for_repo_search(QUERY_TS))) concatenated = np.concatenate((js_projects, ts_projects)) all_projects = [dict(t) for t in {tuple(d.items()) for d in concatenated}] all_projects_sorted = sorted(all_projects, key=lambda item: item['name']) fieldnames = ['Reference', 'JS', 'TS', 'Archived'] rows = list( map( lambda item: { fieldnames[0]: item['name'], fieldnames[1]: 'X' if (item in js_projects) else '', fieldnames[2]: 'X' if (item in ts_projects) else '', fieldnames[3]: 'X' if item['archived'] else '' }, all_projects_sorted)) save_csv(OUTPUT_FILE, fieldnames, rows)
def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_LINEAR): if not is_interactive: experiment_start = time.time() # Check for the dataset of images if not os.path.exists(constants.DATASET_PATH): print("Dataset not found, please copy one.") return dataset = Dataset(constants.DATASET_PATH) dataset.generate_sets() # Check for the directory where stores generated files if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) if is_interactive: des_option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n") k = input("Enter the number of cluster centers you want for the codebook.\n") svm_option = input("Enter [1] for using SVM kernel Linear or [2] to use RBF.\n") svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME log = Log(k, des_name, svm_kernel) codebook_filename = filenames.codebook(k, des_name) if is_interactive: codebook_option = input("Enter [1] for generating a new codebook or [2] to load one.\n") else: codebook_option = constants.GENERATE_OPTION if codebook_option == constants.GENERATE_OPTION: # Calculate all the training descriptors to generate the codebook start = time.time() des = descriptors.all_descriptors(dataset, dataset.get_train_set(), des_option) end = time.time() log.train_des_time(end - start) # Generates the codebook using K Means print("Generating a codebook using K-Means with k={0}".format(k)) start = time.time() codebook = descriptors.gen_codebook(dataset, des, k) end = time.time() log.codebook_time(end - start) # Stores the codebook in a file utils.save(codebook_filename, codebook) print("Codebook saved in {0}".format(codebook_filename)) else: # Load a codebook from a file print("Loading codebook ...") codebook = utils.load(codebook_filename) print("Codebook with shape = {0} loaded.".format(codebook.shape)) # Train and test the dataset classifier = Classifier(dataset, log) svm = classifier.train(svm_kernel, codebook, des_option=des_option, is_interactive=is_interactive) print("Training ready. Now beginning with testing") result, labels = classifier.test(codebook, svm, des_option=des_option, is_interactive=is_interactive) # Store the results from the test classes = dataset.get_classes() log.classes(classes) log.classes_counts(dataset.get_classes_counts()) result_filename = filenames.result(k, des_name, svm_kernel) test_count = len(dataset.get_test_set()[0]) result_matrix = np.reshape(result, (len(classes), test_count)) utils.save_csv(result_filename, result_matrix) # Create a confusion matrix confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32) for i in range(len(result)): predicted_id = int(result[i]) real_id = int(labels[i]) confusion_matrix[real_id][predicted_id] += 1 print("Confusion Matrix =\n{0}".format(confusion_matrix)) log.confusion_matrix(confusion_matrix) log.save() print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel))) if not is_interactive: experiment_end = time.time() elapsed_time = utils.humanize_time(experiment_end - experiment_start) print("Total time during the experiment was {0}".format(elapsed_time)) else: # Show a plot of the confusion matrix on interactive mode utils.show_conf_mat(confusion_matrix) raw_input("Press [Enter] to exit ...")