def printPriors(): # PRINT prior VECTORIZED vectorized_priors = [ utils.readFromFile("global_mse", param.global_res_path).item(), utils.readFromFile("global_rho", param.global_res_path).item(), utils.readFromFile("global_tau", param.global_res_path).item(), utils.readFromFile("global_p@10", param.global_res_path).item(), utils.readFromFile("global_p@20", param.global_res_path).item() ] print("\nWithout LSH (VECTORIZED):") print_evaluation(vectorized_priors)
def getCollections(): for i, item in enumerate(COLLECTIONS_ARCHIVE): print(f"{i}: {item['name']}") while True: collectionId = int(input("Выберите коллекцию: ")) if collectionId >= 0 and collectionId < len(COLLECTIONS_ARCHIVE): break archivePath = COLLECTIONS_ARCHIVE[collectionId]['path'] if checkCollections(archivePath) == -1: print('ОШИБКА: коллекции не прошли проверку') return -1 return (eval(utils.readFromFile(archivePath + COLLECTIONS_FILE_NAME)), eval(utils.readFromFile(archivePath + TF_FILE_NAME)))
def lshUtilizationCounts(): for i in (utils.readFromFile("utilization_counts", param.lsh_res_path)): print(i) norm_ged_index = utils.readFromFile("global_norm_ged_lsh_index", param.global_res_path) pair_geds = [] for i in norm_ged_index: for j in i: if j["lsh_use"]: pair_geds.append(j["target_denorm"]) plot.LSHGEDdistribution(pair_geds, dataset_name, path=param.lsh_res_path)
def vectorizedVSloopy(): for i in utils.readFromFile("zeroDifsCounts", param.vector_loopy_res_path): print(i) differences = utils.readFromFile("AbsolutePiorDifs", param.vector_loopy_res_path) printPriors() # Prior loopy is printed in the next section plot.heatmap(differences, dataset_name, path=param.vector_loopy_res_path + plot_subfolder_name) plot.histogram(differences, dataset_name, path=param.vector_loopy_res_path + plot_subfolder_name)
def plot_statistics(statistics_dicts, out_filename): tex_str = '' offset_strs = list(statistics_dicts.keys()) offset_strs.sort() for offset_str in offset_strs: statistics_dict = statistics_dicts[offset_str] keys = statistics_dict.keys() # all but locally converged (2) and early stopped (5) count as possibly crossed lines = ['({}, {})'.format(n_hidden, 1.0 - ((statistics_dict[n_hidden][2] + statistics_dict[n_hidden][5]) / np.sum(statistics_dict[n_hidden]))) for n_hidden in np.sort(np.array(list(keys)))] tex_str += '\\addplot coordinates {\n' + '\n'.join(lines) + '};\n\\addlegendentry{$\\Delta = ' + offset_str + '$}\n' print('LaTeX code excerpt:') print(tex_str) tex_str = utils.readFromFile('tex_head.txt') + tex_str + utils.readFromFile('tex_tail.txt') utils.writeToFile(out_filename, tex_str)
def main(): tree = None if not u.definitions['LoadFromFile']: dictionaryArray = u.readFromFile(u.definitions['DictionaryPath'], readLines=True) tree = bktree.BKTree(stringmetrics.levenshtein) tree.parallelAdd(dictionaryArray) u.saveObjectToFile(object=tree, savePath=u.definitions['TreeSavePath']) else: tree = u.loadObjectFromFile(u.definitions['TreeSavePath']) book = u.getBook(glob(u.definitions['BookPathAndExt'])[0]) p = Pool(processes=cpu_count()) spellingMistakes = p.map(tree.findMistakes, [(word, book) for word in book]) spellingMistakes = [x for x in spellingMistakes if x is not None] print(spellingMistakes)
def getDatasetName(): d = ['AIDS700nef', 'LINUX', 'IMDBMulti'] #num = input("Which dataset statistics? Press the number" # "\n0. {} \n1. {} \n2. {}".format(d[0], d[1], d[2])) #dataset_name = d[int(num)] dataset_name = d[0] param.initGlobals(dataset_name) tests.testName( dataset_name, utils.readFromFile("datasetName", param.temp_runfiles_path, param_type='str')) return dataset_name
def prepareByFile(imgObject, detector, w, h): ''' resize, возвращение подготовленного для сравнения объекта(-ов) :param imgObject: изображение :param detector: путь до изображения :param w: ширина изображения после resize :param h: высота изображения после resize :returns kp: особые точки изображения :returns desc: описания особых точек изображения ''' img = utils.readFromFile(imgObject) img = utils.resize(img, w, h) print 'PREPARE ' if not detector: raise Exception("Detector can't be None") kp, desc = detector.detectAndCompute(img, None) return kp, desc
from utils import readFromFile, transform from Service import run if __name__ == '__main__': print( "Introduceti: \n 1 - pentru procesare easy.txt \n 2 - pentru procesare medium.txt \n 3 - pentru " "procesare hard.txt \n 4 - pentru procesare fricker26.txt \n 5 - pentru procesare berlin \n" ) ind = int(input()) if (ind == 1): fileName = 'data/easy.txt' if (ind == 2): fileName = 'data/medium.txt' if (ind == 3): fileName = 'data/hard.txt' if (ind == 4): fileName = 'data/fricker26.txt' if (ind == 5): filname = 'data/berlin.in' mat = transform('data/berlin.in') else: mat = readFromFile(fileName) problParam = {'matrix': mat, 'noNodes': len(mat)} generationParam = {'popSize': 400, 'noGen': 2000} run(problParam, generationParam)
def getCollections(): if checkCollections() == -1: print('ОШИБКА: коллекции не прошли проверку') return -1 return eval(utils.readFromFile(COLLECTIONS_FILE_NAME)), eval( utils.readFromFile(TF_FILE_NAME))
from GA import GA import numpy as np import networkx as nx import matplotlib.pyplot as plt import warnings import math from utils import citire, modularity, afisare, generateNewValue, readFromFile, readFromFileMedium, readFromFileCoordonate from random import seed mat = [] nrNoduri, mat = readFromFile(mat, 'easy_01_tsp.txt') gaParam = {"popSize": 15, "noGen": 50, "network": mat} problParam = {'function': modularity, 'retea': mat, 'noNodes': nrNoduri} def main(): ga = GA(gaParam, problParam) ga.initialisation( ) # fac o initializare, generez 300 de cromozomi si ii adaug in populatie ga.evaluation() for g in range(gaParam['noGen']): ga.oneGenerationElitism() bestChromo = ga.bestChromosome() #print(bestChromo.repres)
def maxPopulation(k, dataSet): Cp, C = [], [] for i in dataSet.keys(): Cp.append((i, dataSet[i][0])) Cp = sorted(Cp, key=lambda people: people[1], reverse=True) Cp = Cp[:k] for el in Cp: C.append(el[0]) return C #creazione data-set minDataSet = readFromFile("data/unifiedCancerData_111.csv") mediumDataSet = readFromFile("data/unifiedCancerData_290.csv") maxDataSet = readFromFile("data/unifiedCancerData_896.csv") completeDataSet = readFromFile("data/unifiedCancerData_3108.csv") #Domanda1 k = 15 #number of cluster q = 5 # number of iteration in k-means clustering P = completeDataSet.keys() hierarchical_clusters_dict = hierarchicalClustering(P, k, False) ClusterGraph(hierarchical_clusters_dict) #Domanda2 #Costruisco la lista dei centri = sono le 15 coordinate con la popolazione maggiore
def priorVSposterior(): norm_ged_index = utils.readFromFile("global_norm_ged_lsh_index", param.global_res_path) ged_dif_lsh, ged_dif_nolsh, true_ged = getGlobalGEDDifs(norm_ged_index) # PRINT prior LOOPY loopy_priors = [ utils.readFromFile("global_prior_mse", param.global_res_path).item(), utils.readFromFile("global_prior_rho", param.global_res_path).item(), utils.readFromFile("global_prior_tau", param.global_res_path).item(), utils.readFromFile("global_prior_p@10", param.global_res_path).item(), utils.readFromFile("global_prior_p@20", param.global_res_path).item() ] SSE_noLSH = utils.getSSE(ged_dif_nolsh) # Sum of squared errors AVG_REL_ERROR_noLSH = utils.getAvRelEr(ged_dif_nolsh, true_ged) # Average relative error prior_errors = [ "\nSSE (no LSH) = {}".format(SSE_noLSH), "AVG_REL_ERROR (no LSH) = {}".format(AVG_REL_ERROR_noLSH) ] print("\nWithout LSH (Loop-based):") print_evaluation(metrics=loopy_priors, errors=prior_errors) # #PRINT POSTERIOR loopy_posts = [ utils.readFromFile("global_post_mse", param.global_res_path).item(), utils.readFromFile("global_post_rho", param.global_res_path).item(), utils.readFromFile("global_post_tau", param.global_res_path).item(), utils.readFromFile("global_post_p@10", param.global_res_path).item(), utils.readFromFile("global_post_p@20", param.global_res_path).item() ] SSE_LSH = utils.getSSE(ged_dif_lsh) # Sum of squared errors AVG_REL_ERROR_LSH = utils.getAvRelEr(ged_dif_lsh, true_ged) # Average relative error post_errors = [ "\nSSE (LSH) = {}".format(SSE_LSH), "AVG_REL_ERROR (LSH) = {}".format(AVG_REL_ERROR_LSH) ] print("\nWith LSH:") print_evaluation(metrics=loopy_posts, errors=post_errors) # Now, Global distribution and variance of errors. plot.comparativeDistribution(np.abs(ged_dif_lsh), np.abs(ged_dif_nolsh), dataset_name, path=param.global_res_path + plot_subfolder_name) plot.comparativeScatterplot(np.abs(ged_dif_lsh), np.abs(ged_dif_nolsh), dataset_name, path=param.global_res_path + plot_subfolder_name) return loopy_posts, SSE_LSH, AVG_REL_ERROR_LSH
def drillDownBuckets(drillDownStats): path_for_dd_plots = param.global_res_path + 'drill_down_' + plot_subfolder_name trainable_buckets = utils.readFromFile("trainable_buckets_dict", param.lsh_res_path, param_type='dict') dd_index = utils.readFromFile("drill_down_index", param.lsh_res_path) bucketpriors = [[] for _ in range((len(trainable_buckets["bucketName"])))] bucketposts = [[] for _ in range((len(trainable_buckets["bucketName"])))] buckettargets = [[] for _ in range((len(trainable_buckets["bucketName"])))] for index_j, drill_dict in enumerate(dd_index): bucketpriors[drill_dict["bucket_index"]].append( drill_dict["priorpred"]) bucketposts[drill_dict["bucket_index"]].append(drill_dict["postpred"]) buckettargets[drill_dict["bucket_index"]].append(drill_dict["target"]) for i, b in enumerate(trainable_buckets["bucketName"]): prior = np.array(bucketpriors[i]) ground = np.array(buckettargets[i]) post = np.array(bucketposts[i]) # if there are unutilized buckets they should be skipped if len(prior) == 0: continue # later stats drill_ged_dif_lsh = post - ground drill_ged_dif_nolsh = prior - ground # Prior paper stats prior_drills = [ np.mean( F.mse_loss(torch.tensor(prior), torch.tensor(ground), reduction='none').detach().numpy()), calculate_ranking_correlation(spearmanr, prior, ground), calculate_ranking_correlation(kendalltau, prior, ground), calculate_prec_at_k(10, prior, ground), calculate_prec_at_k(20, prior, ground) ] SSE_noLSH_drill = utils.getSSE(drill_ged_dif_nolsh) AVG_REL_ERROR_noLSH_drill = utils.getAvRelEr(drill_ged_dif_nolsh, ground) prior_errors_drill = [ "\nSSE (no LSH) = {}".format(SSE_noLSH_drill), "AVG_REL_ERROR (no LSH) = {}".format(AVG_REL_ERROR_noLSH_drill) ] print("\nTable {}, bucket {} ({})".format( trainable_buckets["table"][i], b, int(b, 2))) print("\nWITHOUT LSH:") print_evaluation(prior_drills, prior_errors_drill) # Post paper stats scoresDRILLPOST = np.mean( F.mse_loss(torch.tensor(post), torch.tensor(ground), reduction='none').detach().numpy()) rho_listDRILLPOST = calculate_ranking_correlation( spearmanr, post, ground) tau_listDRILLPOST = calculate_ranking_correlation( kendalltau, post, ground) prec_at_10_listDRILLPOST = calculate_prec_at_k(10, post, ground) prec_at_20_listDRILLPOST = calculate_prec_at_k(20, post, ground) SSE_LSH_drill = utils.getSSE(drill_ged_dif_lsh) AVG_REL_ERROR_LSH_drill = utils.getAvRelEr(drill_ged_dif_lsh, ground) post_errors_drill = [ "\nSSE (LSH) = {}".format(SSE_LSH_drill), "AVG_REL_ERROR (LSH) = {}".format(AVG_REL_ERROR_LSH_drill) ] print("\nWITH LSH:") print_evaluation([ scoresDRILLPOST, rho_listDRILLPOST, tau_listDRILLPOST, prec_at_10_listDRILLPOST, prec_at_20_listDRILLPOST ], post_errors_drill) # For bar chart label = "Table {}, bucket {}".format(trainable_buckets["table"][i], int(b, 2)) drillDownStats["labels"].append(label) drillDownStats["mse"].append(scoresDRILLPOST) drillDownStats["rho"].append(rho_listDRILLPOST) drillDownStats["tau"].append(tau_listDRILLPOST) drillDownStats["p10"].append(prec_at_10_listDRILLPOST) drillDownStats["p20"].append(prec_at_20_listDRILLPOST) drillDownStats["sse"].append(SSE_LSH_drill) drillDownStats["ale"].append(AVG_REL_ERROR_LSH_drill) # Error distribution plot.comparativeDistribution(np.abs(drill_ged_dif_lsh), np.abs(drill_ged_dif_nolsh), dataset_name, path=path_for_dd_plots, address=label) plot.comparativeScatterplot(np.abs(drill_ged_dif_lsh), np.abs(drill_ged_dif_nolsh), dataset_name, path=path_for_dd_plots, address=label) # LSH Utilization # used_pairs = len(prior) # how will I get bucket size? # bucket_pairs = 0 # pairspercent = round(used_pairs * 100 / bucket_pairs, 1) # print("\nLSH Usage (pairs): {} of {} ({}%)".format(used_pairs, bucket_pairs, pairspercent)) # Now we plot the drill down bar chart WITH LSH # First the SSE on its own since it's way bigger than the others. plot.drillDownSSE(drillDownStats["labels"], drillDownStats["sse"], dataset_name, path=path_for_dd_plots) plot.drillDownMSE(drillDownStats["labels"], drillDownStats["mse"], dataset_name, path=path_for_dd_plots) plot.drillDownCorrelation(drillDownStats, dataset_name, path=path_for_dd_plots) plot.drillDownStats2(drillDownStats, dataset_name, path=path_for_dd_plots)