def check_good_dominance_interval(iterations): """Check to see wheter the evaluation is inside a given interval found.""" random.seed(0) datasets = ("SHA", "EPI", "HR") header = [""] + list(datasets) n = 100 percentiles = (0, 12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100) res_tot = [[p] for p in percentiles] res_tot.append(['av']) for dataset in datasets: print("\n"*2, "-"*35, dataset, "-"*35, "\n") filename = 'data/' + dataset + '/raw.csv' A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] A = random.sample(A, n) A = normalize(A, axis=0, copy=True, norm='max') A = [list(alt) for alt in A] k = len(A[0]) res = [[p, 0] for p in percentiles] res.append(['av', 0]) for it in range(iterations): iteration_res = check_good_interval_iteration(A, n, k, percentiles) for col in iteration_res: res[col][1] += 1 for i in range(len(res)): res[i][1] /= iterations res_tot[i].append(res[i][1]) helpers.printmatrix(res) helpers.printmatrix([header] + res_tot)
def check_if_dominance_interval(iterations=100): """Check to see wheter the evaluation is inside a given interval found.""" random.seed(0) datasets = ("SHA", "EPI", "HR") header = ["", "Neither", "OR", "AND"] n = 100 percentiles = (0, 12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100) res = [] for dataset in datasets: print("\n"*2, "-"*35, dataset, "-"*35, "\n") filename = 'data/' + dataset + '/raw.csv' A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] A = random.sample(A, n) A = normalize(A, axis=0, copy=True, norm='max') A = [list(alt) for alt in A] k = len(A[0]) res_dataset = [0 for o in range(3)] for it in range(iterations): iteration_res = check_if_interval_iteration(A, n, k) for col in iteration_res: res_dataset[col] += 1 res.append([dataset] + [o/iterations for o in res_dataset]) helpers.printmatrix([header] + res)
def test_ranking(dataset='HDI'): """Test that PIIMV computes same ranking as PII when no missing value.""" data_set = 'data/' + dataset + '/raw.csv' alts, weights = dr.open_raw(data_set)[0][0:5], dr.open_raw(data_set)[1] # print(alts) # print(weights) if weights == []: weights = None if dataset == 'HDI': weights = [0.5, 0.5] ceils = [3, 3] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) prometheeMV = prom.PrometheeMV(alts, weights=weights, ceils=ceils) else: seed = 1 promethee = prom.PrometheeII(alts, weights=weights, seed=seed) prometheeMV = prom.PrometheeMV(alts, weights=weights, seed=seed) # print(promethee.ceils, promethee.weights) scores = promethee.scores scoresMV = prometheeMV.scores rank = promethee.ranking rankMV = prometheeMV.ranking for i in range(len(rank)): print( str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " + str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))
def test_guess_eval(dataset="SHA", alt_num=15, del_number=1, seed=0): """Test guess function.""" filename = 'data/' + dataset + '/raw.csv' all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] alts = random.sample(all_alts, alt_num) alts = mv.delete_l_evaluations(alts, del_number, seed) mv.guess_all_bests_estimations(alts)
def test_check_train_dom(dataset="SHA", alt_num=100): """Check this function.""" datasets = ('HR', 'SHA', 'EPI', 'HP') for dataset in datasets: print('---------------------- ', dataset, ' -----------------------') filename = 'data/' + dataset + '/raw.csv' all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] alts = random.sample(all_alts, alt_num) mv.check_train_dom(alts)
def test_ranking(dataset='HDI'): """Test that PII computes the same ranking that in the article RobustPII. The following mappings should however be applied between countries and and indices: 0 - Norway 10 - Singapore 1 - Australia 11 - Hong Kong 2 - Switzerland 12 - Liechtenstein 3 - Denmark 13 - Sweden 4 - Netherlands 14 - United Kingdom 5 - Germany 15 - Iceland 6 - Ireland 16 - Korea 7 - United States 17 - Israel 8 - Canada 18 - Luxembourg 9 - New Zealand 19 - Japan The ranking expected is: 2::0.31491228070175437 1::0.2500000000000007 8::0.18245614035087707 11::0.18070175438596484 19::0.16315789473684195 17::0.16228070175438677 9::0.059649122807016945 13::0.058771929824561676 0::0.04210526315789358 5::0.007894736842106042 14::-0.02543859649122777 16::-0.02807017543859552 10::-0.07105263157894759 4::-0.08070175438596594 18::-0.09824561403508743 15::-0.13771929824561518 6::-0.14999999999999925 3::-0.17631578947368398 7::-0.28859649122807074 12::-0.3657894736842105 """ data_set = 'data/' + dataset + '/raw.csv' alts, weights = dr.open_raw(data_set)[0][0:20], dr.open_raw(data_set)[1] # print(alts) if weights == []: weights = None if dataset == 'HDI': weights = [0.5, 0.5] ceils = [3, 3] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) else: seed = 1 promethee = prom.PrometheeII(alts, weights=weights, seed=seed) print(promethee.ceils, promethee.weights) print(sum(promethee.weights)) scores = promethee.scores rank = promethee.ranking for i in range(len(rank)): print(str(rank[i] + 1) + '::' + str(scores[rank[i]]))
def check_dominance_assumption(iterations=10): """Test if dominance is still respected.""" datasets = ("SHA", "EPI", "HR") header = ["", "MEAN", "STD"] n = 100 res = [] for dataset in datasets: print("\n"*2, "-"*35, dataset, "-"*35, "\n") filename = 'data/' + dataset + '/raw.csv' A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] A = random.sample(A, n) A = normalize(A, axis=0, copy=True, norm='max') A = [list(alt) for alt in A] k = len(A[0]) res = [[] for i in range(9)] for it in range(iterations): i = random.randint(0, n - 1) c = random.randint(0, k - 1) a = A[i] del A[i] a_miss = a[:] a_miss[c] = NULL indices = de.train_dom(A, c, a_miss) dominant, dominated = de.count_dominant_alts(A, indices, a_miss) indices.append(c) dominant_c, dominated_c = de.count_dominant_alts(A, indices, a) res[0].append(dominant) res[1].append(dominant_c) res[2].append(dominant_c/dominant if dominant else 0) res[3].append(dominated) res[4].append(dominated_c) res[5].append(dominated_c/dominated if dominated else 0) res[6].append(dominated + dominant) res[7].append(dominated_c + dominant_c) res[8].append((dominated_c + dominant_c)/(dominated + dominant) if (dominated + dominant) else 0) A.insert(i, a) final_res = [[" ", " ", "MEAN", "STD"]] lines = ["Dom+", "Dc+", "ratio", "dom-", "dc-", "ratio", "Tot", "tot_c", "ratio"] for i in range(9): final_res.append([lines[i], " ", np.mean(res[i]), np.std(res[i])]) helpers.printmatrix(final_res, width=5)
def test_rr_analysis(data='HDI'): """Check that the rank reversals are correct. These rank reversal should be compared to the one occuring in the article: 'About the computation of robust PROMETHEE II rankings: empirical evidence' by De Smet. The following mappings should however be applied between countries and and indices for the HDI data set: 0 - Norway 10 - Singapore 1 - Australia 11 - Hong Kong 2 - Switzerland 12 - Liechtenstein 3 - Denmark 13 - Sweden 4 - Netherlands 14 - United Kingdom 5 - Germany 15 - Iceland 6 - Ireland 16 - Korea 7 - United States 17 - Israel 8 - Canada 18 - Luxembourg 9 - New Zealand 19 - Japan """ # Data initialisation according to the data set if (data == 'HDI'): data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] ceils = [3, 3] weights = [0.5, 0.5] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) elif (data == 'SHA'): data_set = 'data/SHA/raw_20.csv' alts, weights, coeff, ceils = dr.open_raw(data_set) promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) elif (data == 'EPI'): data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] seed = 0 promethee = prom.PrometheeII(alts, seed=seed) # print("initial ranking :") # print(promethee.ranking) # print("initial scores :") # print(promethee.scores) print("Rank reversals:") rr = promethee.compute_rr_number(True) print("rank reverasal quantity: " + str(rr)) rr_instances = promethee.analyse_rr() print('rank reversal recap :') print(rr_instances)
def compare_rankings(alt_num=20, it=500, del_num=1): """Compare strategies.""" random.seed(1) datasets = ('HR', 'SHA', 'EPI', 'HP') # datasets = ('SHA',) header = [" "] + list(datasets) + ["mean", "std"] methods = { # 'sreg': mv.replace_by_sreg, # 'creg': mv.replace_by_creg, # 'ereg': mv.replace_by_ereg, 'sreg': mv.replace_by_sreg, 'dom': mv.replace_by_dominance, 'd_diff': mv.replace_by_dominance_smallest_diff, 'knn': mv.replace_by_knn, 'mean': mv.replace_by_mean, 'med': mv.replace_by_med } # 'pij': mv.replace_by_pij} results = {method: [] for method in methods} meth_std = {method: [] for method in methods} for dataset in datasets: print('---------------------- ', dataset, ' -----------------------') t0 = time.time() results_dataset = {method: [] for method in methods} filename = 'data/' + dataset + '/raw.csv' all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] if weights == []: weights = None for i in range(it): taus = compare_rankings_once(all_alts, alt_num, weights, del_num, methods) # print(taus) for method in methods: results_dataset[method].append(taus[method]) for method in methods: results[method].append(sum(results_dataset[method]) / it) meth_std[method] += results_dataset[method] print('time:', time.time() - t0) final_matrix = [header] for m in methods: results[m].append(np.mean(results[m])) results[m].append(np.std(meth_std[m])) final_matrix.append([m] + results[m]) helpers.printmatrix(final_matrix)
def compare_refflows(): """Check if the ref-flow computed with ReferencedPII object is correct.""" data_set = 'HDI' random.seed() seed = random.randint(1, 1000) print(seed) alt_num = 20 ref_number = 4 strategy = prom.strategy2 input_file = 'data/' + str(data_set) + '/raw.csv' alternatives = dr.open_raw(input_file)[0] referenced = prom.ReferencedPII(alternatives, strategy=strategy, seed=seed) SRP = referenced.SRP ref_scores = referenced.scores for i, alt in enumerate(alternatives): SRP_alt = SRP[:] SRP_alt.append(alt) promethee = prom.PrometheeII(SRP_alt, seed=seed) scores = promethee.scores if abs(scores[-1] - ref_scores[i]) < 1e-5: print("ok") else: print("There is something wrong") print(scores)
def test_functions(): """Test various functions of the procedure.""" data_set = 'EPI' weights, ceils = None, None seed = 0 res = True input_file = 'data/' + str(data_set) + '/raw.csv' alts = dr.open_raw(input_file)[0] procedure = aqp.Adaptive_procedure(alts, seed=seed, alt_num=10, ref_number=4, pts_per_random_it=2, desired_points=10) # Constraint verification procedure.add_constraint((6, 8)) procedure.add_constraint((7, 8)) procedure.add_constraint((3, 5)) if (procedure.is_admissible([1, 2, 9, 4, 5, 3, 6, 7, 8])): res = False if (not procedure.is_admissible([1, 2, 3, 4, 5, 9, 6, 7, 8])): res = False print(res)
def first_search(pop_size=600, mut_prob=0.01, MAXIT=50): """Try to find sets of reference profiles reproducing th PII ranking. Search for 15 different seeds. Once some positive results have been found, please use the next function to try again seeds that failed. """ data_sets = ['SHA', 'EPI', 'GEQ'] weights, ceils = None, None seeds = range(15) alternative_numbers = [20, 25, 30, 40, 50] for data_set in data_sets: input_file = 'data/' + str(data_set) + '/raw.csv' output = 'res/ReferencedPII/genetic_search/' + str(data_set) + '.txt' alts = dr.open_raw(input_file)[0] for alt_num in alternative_numbers: succes = [] failures = [] failures_tau = [] for s in seeds: t1 = time.time() tau = GS.genetic_search(alts, seed=s, weights=weights, ceils=ceils, alt_num=alt_num, pop_size=pop_size, mut_prob=mut_prob, MAXIT=MAXIT) print(str(s) + ', time: ' + str(time.time() - t1) + ', tau: ' + str(tau)) if (tau > 1 - 1e-5): succes.append(s) else: failures.append(s) tau_rounded = int(tau*1000)/1000 failures_tau.append(tau_rounded) save_res_to_file(output, alt_num, succes, failures, failures_tau)
def test_rr_counting_function(): """Test the function computing the amount of RR between two rankings. The rankings compared are : * [1, 2, 3, 4, 5, 6] * [6, 4, 3, 1, 5] there should therefore be 7 rank reversals: (6,1);(6,3);(6,4);(6,5); (4,3);(4,1); (3,1) """ # we don't care about the parameters, we just want to initialise the object data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] coeffs = [0.61224, 1.2] weights = [0.5, 0.5] promethee = prom.PrometheeII(alts, weights=weights, coefficients=coeffs) # Here start the real interresting test ranking_init = [1, 2, 3, 4, 5, 6] ranking_new = [6, 4, 3, 1, 5] alt_removed = 2 rr = promethee.compare_rankings(ranking_init, ranking_new, alt_removed) """Check that the arguments are not modified.""" print(ranking_init) print(ranking_new) print(rr)
def test(): """Test the data sets and data_reader module. The file contains : Alternatives ##### 81.6,12.6 82.4,13 83,12.8 80.2,12.7 81.6,11.9 80.9,13.1 80.9,12.2 79.1,12.9 82,13 81.8,12.5 83,10.6 84,11.2 80,11.8 82.2,12.1 80.7,13.1 82.6,10.6 81.9,11.9 82.4,12.5 81.7,11.7 83.5,11.5 """ data_set = 'data/HDI/raw.csv' matrix = dr.open_raw(data_set) print(matrix[0])
def count_draws(threshold=0.001): """Test with EPI, SHA, GEQ dataset. This test counts the number of draws. """ data_sets = ['SHA', 'EPI', 'GEQ'] output = "res/ReferencedPII/reference_quantity/thresh_" + str(threshold) \ + ".txt" # Change these parameters if needed ref_numbers = [2, 3, 5, 10, 15, 25] alternative_numbers = [10, 20, 40, 80] seed_list = range(20) ref_set_strategy = prom.strategy1 all_res = [] for ref_number in ref_numbers: res = [] for alt_number in alternative_numbers: tot = 0 for seed in seed_list: for data_set in data_sets: source = "data/" + data_set + "/raw.csv" alts = dr.open_raw(source)[0] ref_prom = prom.ReferencedPII(alts, alt_num=alt_number, strategy=ref_set_strategy, seed=seed, ref_num=ref_number) tot += ref_prom.draws_quantity(ref_prom.scores, threshold) res.append(tot) all_res.append(res) print_to_file(output, ref_numbers, alternative_numbers, seed_list, all_res)
def test_ranking(): """Test if the ranking obtained is the same as in Robust PII article. concerned article: 'About the computation of robust PROMETHEE II rankings: empirical evidence' by De Smet. The following mappings should however be applied between countries and and indices: 0 - Norway 10 - Singapore 1 - Australia 11 - Hong Kong 2 - Switzerland 12 - Liechtenstein 3 - Denmark 13 - Sweden 4 - Netherlands 14 - United Kingdom 5 - Germany 15 - Iceland 6 - Ireland 16 - Korea 7 - United States 17 - Israel 8 - Canada 18 - Luxembourg 9 - New Zealand 19 - Japan """ data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] weights = [0.5, 0.5] ceils = [3, 3] robust = prom.RobustPII(alts, weights=weights, ceils=ceils, R=10000, m=5) rank = robust.ranking scores = robust.scores for i in range(len(rank)): print(str(rank[i]) + '::' + str(scores[rank[i]]))
def test_PMV(dataset="HDI"): """Test PMV with, this time, missing values.""" data_set = 'data/' + dataset + '/raw.csv' alts = dr.open_raw(data_set)[0][:10] proportion = 0.2 seed = 1 print("complete :") prom.printmatrix(alts) original_alts = copy.deepcopy(alts) mv.delete_evaluations(alts, proportion, seed) print("incomplete :") prom.printmatrix(alts) print("Promethee:") promethee = prom.PrometheeII(original_alts, seed=seed) rank = promethee.ranking scores = promethee.scores print("PrometheeMV without missing:") prometheeMV1 = prom.PrometheeMV(original_alts, seed=seed) rankMV1 = prometheeMV1.ranking scoresMV1 = prometheeMV1.scores print("PrometheeMV:") prometheeMV = prom.PrometheeMV(alts, seed=seed) rankMV = prometheeMV.ranking scoresMV = prometheeMV.scores for i in range(len(rank)): print( str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " + str(rankMV1[i] + 1) + '::' + str(scoresMV1[rank[i]]) + " :::: " + str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))
def test_ranking_SHA(dataset='SHA'): """Test that PII computes the same ranking that in the article RobustPII.""" data_set = 'data/' + dataset + '/raw.csv' A, weights = dr.open_raw(data_set)[0], dr.open_raw(data_set)[1] A = normalize(A, axis=0, copy=True, norm='max') print(A) A = [list(alt) for alt in A] # print(alts) weights = [0.1, 0.2, 0.2, 0.2, 0.2, 0.1] percentiles = (25, 75) promethee = prom.PrometheeII(A, weights=weights, percentiles=percentiles) scores = promethee.scores rank = promethee.ranking for i in range(len(rank)): print(str(rank[i]) + '::' + str(scores[rank[i]])) print(promethee.pi[5][6] / 2) print(promethee.alternatives[54][3])
def analyse(alt_num=20, seeds=range(0, 3), data_sets=['EPI', 'SHA', 'GEQ'], rounds=20, make_pdf=False): """Analyse the results of the adaptive questioning procedure.""" weights, ceils = None, None seeds = range(3, 4) output_dir = 'res/ReferencedPII/adaptive_questioning_procedure/' output_file = open(output_dir + "adaptative_questionning_results2.txt", "a") # pp = PdfPages(output_dir + 'kendall_tau_boxplots.pdf') for data_set in data_sets: input_file = 'data/' + str(data_set) + '/raw.csv' alts = dr.open_raw(input_file)[0] for seed in seeds: correct_pts_output = ('res/ReferencedPII_questioning_procedure/' + data_set + '/' + str(seed) + '.csv') title = data_set + ' with ' + str(alt_num) + ' alternatives (seed '\ + str(seed) + ')' title_plot = ( 'Adaptive questioning procedure on a subset of the ' + data_set + ' data set with ' + str(alt_num) + ' alternatives') print(title) if True: # with redirect_stdout(output_file): print(title) procedure = aqp.Adaptive_procedure(alts, seed=seed, alt_num=alt_num, pts_per_random_it=200, desired_points=3000) corrects = procedure.execute(rounds) write_correct_pts(corrects, correct_pts_output) print() if (make_pdf): # Boxplot of the rankings fig = plt.figure(1, figsize=(9, 6)) plt.suptitle(title_plot) ax = fig.add_subplot(111) ax.set_ylim(-0.3, 1.1) ax.yaxis.set_major_locator( ticker.FixedLocator([-0.25, 0, 0.25, 0.5, 0.75, 1])) bp = ax.boxplot(procedure.kendall_taus) # pp.savefig(bbox_inches='tight') fig.savefig(output_dir + title + '.pdf', bbox_inches='tight') plt.clf() output_file.close()
def test_replacements(): """Test that pij are correctly replaced.""" # initialisation purpose only data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] seed = 1 method = 'mean' prometheeMV = prom.PrometheeMV(alts, seed=seed, method=method) alternatives = [[1], [0], ['*'], [2]] f = [myf] pref = [[[0, 1, '*', 0], [0, 0, '*', 0], ['*', '*', 0, '*'], [1, 1, '*', 0]]] for i in pref[0]: print(i) P = prometheeMV.compute_pairwise_comparisons(alternatives, f) print("second round") for i in P[0]: print(i)
def get_dataset(dataset, n=None, random_alts=False, normalised=True): """Get a dataset from dataset.""" filename = 'data/' + dataset + '/raw.csv' A = dr.open_raw(filename)[0] if n is None: n = len(A) if random_alts: A = random.sample(A, n) else: A = A[:n] # print(np.array(A)) if normalised: A = normalize(A, axis=0, copy=True, norm='max') A = [list(alt) for alt in A] return A
def compare(tests_qty=3): """Compare the different stratiegies.""" output = "res/ReferencedPII/strategies/comparisons.txt" data_sets = ['EPI', 'SHA', 'GEQ'] # data_sets = ['HDI'] range_seed = range(0, 0 + tests_qty) alt_num = 30 ref_number = 4 strategies = [ prom.strategy1, prom.strategy2, prom.strategy3, prom.strategy4 ] # strategies = [prom.strategy2] kendall_taus = [[] for i in range(4)] # One list for each strategy titles = [] for data_set in data_sets: input_file = 'data/' + str(data_set) + '/raw.csv' alternatives = dr.open_raw(input_file)[0] for seed in range_seed: promethee = prom.PrometheeII(alternatives, seed=seed, alt_num=alt_num) prom_ranking = promethee.ranking title = data_set + str(seed) titles.append(title) for i, strategy in enumerate(strategies): referenced = prom.ReferencedPII(alternatives, seed=seed, strategy=strategy, alt_num=alt_num) refrank = referenced.ranking tau = stats.kendalltau(refrank, prom_ranking)[0] tau = int(tau * 1000) / 1000 kendall_taus[i].append(tau) print_to_file(output, titles, kendall_taus, tests_qty)
def retry_failed(data_set='SHA', alt_numbers=[20], failed_seeds=[[7, 8]], ref_number=5, maxrep=1, pop_size=600, mut_prob=0.01, MAXIT=50): """Retry the search for subsets which failed the first time.""" weights, ceils = None, None # Here we retry the seeds failed with different parameters t0 = time.time() alternative_numbers = alt_numbers seeds = failed_seeds input_file = 'data/' + str(data_set) + '/raw.csv' output = 'res/ReferencedPII/genetic_search/' + str(data_set) + '.txt' alts = dr.open_raw(input_file)[0] for i, alt_num in enumerate(alternative_numbers): succes = [] failures = [] failures_tau = [] for s in seeds[i]: t1 = time.time() tau = 0 it = 0 while (tau < 1 - 1e-5 and it < maxrep): tau2 = GS.genetic_search(alts, seed=s, weights=weights, SRP_size=ref_number, ceils=ceils, alt_num=alt_num, pop_size=pop_size, mut_prob=mut_prob, MAXIT=MAXIT) tau = max(tau, tau2) print(str(s) + ', total time: ' + str(time.time() - t0) + ", it time: " + str(time.time() - t1) + ', tau: ' + str(tau)) it += 1 if (tau > 1 - 1e-5): succes.append(s) else: failures.append(s) tau_rounded = int(tau*1000)/1000 failures_tau.append(tau_rounded) save_res_to_file(output, alt_num, succes, failures, failures_tau) print("time :" + str(time.time() - t1))
def SRP_from_aqp(data_set="GEQ", seeds=range(3), alt_num=20): """Analyse the correct SRP found with this procedure.""" alts_file_name = "data/" + data_set + "/raw.csv" all_alts = dr.open_raw(alts_file_name)[0] mean_mean_ratio_str = [] var_var_ratio_str = [] mean_var_ratio_str = [] var_mean_ratio_str = [] template_ratio = '{0:^d}|' for i in range(len(all_alts[0])): template_ratio += '{' + str(i + 1) + ':+.3F}|' # Output output_file = "res/ReferencedPII/SRP_analysis/" + data_set for seed in seeds: # Input SRP_prefix = "res/ReferencedPII/adaptive_questioning_procedure/" all_SRP_file_name = data_set + "/" + str(seed) + ".csv" all_SRP = dr.open_raw_RS(SRP_prefix + all_SRP_file_name) # get the correct alt_num for the concerned seed promethee = prom.PrometheeII(all_alts, seed=seed, alt_num=alt_num) alts_per_criterion = list(map(list, zip(*promethee.alternatives))) # Check if the parameteres (= alternative subset) are indeed the same questioning_procedure = aqp.Adaptive_procedure(all_alts, seed=seed, alt_num=alt_num, ref_number=4, pts_per_random_it=200, desired_points=3000) if (not prom.check_parameters(questioning_procedure.promethee, promethee)): print("error") """Will contain lists of means of the ref's evaluation for each criterion ex: all_means_ratio[0] = [mean(c1(r1), ..., mean(c2(r1), ..., c2(r4))] SRP_means[2] = [...] """ # List of all ratios for individual SRP all_mean_ratios = [] all_var_ratios = [] for i in range(len(all_SRP)): SRP = all_SRP[i] # matrix = list of criteria which are lists of refs or # alternatives evaluations refs_per_criterion = list(map(list, zip(*SRP))) # ratio between estimator of on SRP compared to the one of the alts individual_mean_ratios, individual_var_ratios = [], [] for crit in range(len(refs_per_criterion)): var_ref = numpy.var(refs_per_criterion[crit]) mean_ref = numpy.mean(refs_per_criterion[crit]) var_alt = numpy.var(alts_per_criterion[crit]) mean_alt = numpy.mean(alts_per_criterion[crit]) individual_mean_ratios.append(mean_ref / mean_alt) individual_var_ratios.append(var_ref / var_alt) all_mean_ratios.append(individual_mean_ratios) all_var_ratios.append(individual_var_ratios) # transpose the matrix : a list of references sets which are lists # of the estimators for each criterion becomes a list of estimators for # each criterion which contains the estimater for each SRP var_ratios_per_crit = list(map(list, zip(*all_var_ratios))) mean_ratios_per_crit = list(map(list, zip(*all_mean_ratios))) var_var_ratios = [numpy.var(crit) for crit in var_ratios_per_crit] mean_var_ratios = [numpy.mean(crit) for crit in var_ratios_per_crit] var_mean_ratios = [numpy.var(crit) for crit in mean_ratios_per_crit] mean_mean_ratios = [numpy.mean(crit) for crit in mean_ratios_per_crit] # Transorm in strings var_var_ratio_str.append(template_ratio.format(seed, *var_var_ratios)) var_mean_ratio_str.append(template_ratio.format( seed, *var_mean_ratios)) mean_var_ratio_str.append(template_ratio.format( seed, *mean_var_ratios)) mean_mean_ratio_str.append( template_ratio.format(seed, *mean_mean_ratios)) with open(output_file, 'a') as output: output.write("var(var(ref)/var(alt)) \n") for i in var_var_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("var(mean(ref)/mean(alt)) \n") for i in var_mean_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("mean(var(ref)/var(alt)) \n") for i in mean_var_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("mean(mean(ref)/mean(alt)) \n") for i in mean_mean_ratio_str: output.write(i) output.write("\n") output.write("\n")
def analyse_rr(data='SHA', max_rep=20, R_parameter=None, m_parameter=None): """Analyse the rank reversals occuring in RobustPII.""" if (data == 'HDI'): print('try with another dataset') exit() elif (data == 'SHA'): R = 5000 m = 9 # Do not change these parameters ! They are not saved data_set = 'data/SHA/raw_20.csv' alts = dr.open_raw(data_set)[0] weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667] ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750] seed = 1 else: data = 'EPI' R = 5000 m = 16 # Do not change these parameters ! They are not saved data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] weights, ceils = None, None seed = 0 if R_parameter is not None: R = R_parameter if m_parameter is not None: m = m_parameter output = 'res/RobustPII/analyse_rank_reversals/' + str(data) + '.txt' promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed) promethee_rr_instances = promethee.analyse_rr() all_rr_instances = dict() for repetition in range(max_rep): robust = prom.RobustPII(alts, weights=weights, ceils=ceils, seed=seed, R=R, m=m) rr_instances = robust.analyse_rr() for key in rr_instances: all_rr_instances[key] = \ all_rr_instances.get(key, 0) + rr_instances.get(key) all_info = [] key_set = set(all_rr_instances.keys()) | set(promethee_rr_instances.keys()) for key in key_set: line = [ key[0], key[1], all_rr_instances.get(key, 0) / max_rep, promethee_rr_instances.get(key, 0), abs(promethee.scores[key[0]] - promethee.scores[key[1]]), abs(robust.scores[key[0]] - robust.scores[key[1]]) ] all_info.append(line) print_to_file(output, all_info, promethee.scores, robust.scores, max_rep, R, m)
def count_rr(data='HDI', max_rep=10, R_parameter=None, m_parameter=None): """Test the number of rank reversals.""" # Parameter initialization, the interesting stuff is way lower R_list = R_parameter m_list = m_parameter if (data == 'HDI'): # Change these parameters if needed if (R_list is None): R_list = [500, 1000, 5000, 10000] if (m_list is None): m_list = [3, 5, 6, 7, 8, 10, 15] # Do not change these parameters ! They are not saved data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] weights = [0.5, 0.5] ceils = [3, 3] seed = 0 # Not used, here to match the general signature elif (data == 'SHA'): # Change these parameters if needed if (R_list is None): R_list = [1000, 4000, 7000, 12000] m_list = [4, 6, 8, 9, 12, 15, 18] # Do not change these parameters ! They are not saved data_set = 'data/SHA/raw_20.csv' alts = dr.open_raw(data_set)[0] weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667] ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750] seed = 0 # Not used, here to match the general signature else: data = 'EPI' # Change these parameters if needed if (R_list is None): R_list = [500, 1000, 5000, 8000] if (m_list is None): m_list = [3, 4, 7, 9, 12, 14, 16, 18] # Do not change these parameters ! They are not saved data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] weights, ceils = None, None seed = 0 output_dir = 'res/RobustPII/R_m_influence/' output = output_dir + data + '.txt' promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed) rr_promethee = promethee.compute_rr_number() rr_matrix = [] for R in R_list: rr_row = [] for m in m_list: rr = 0 for repetition in range(max_rep): random.seed() robust = prom.RobustPII(alts, weights=weights, ceils=ceils, seed=seed, R=R, m=m) rr += robust.compute_rr_number() rr = rr / max_rep rr_row.append(rr) print(rr_row) rr_matrix.append(rr_row) print_rr_to_file(output, rr_matrix, R_list, m_list, rr_promethee, max_rep)
if __name__ == '__main__': # A = [[1, 2, 3, 4], # [4, 3, 2, 1], # [0, 0, 0, 0]] # for a in A: # print(a) # print(compute_deltas(A, 0, [1, 2, 3])) dataset = "SHA" dataset = "CPU" filename = 'data/' + dataset + '/raw.csv' n = 100 iterations = 1 A = dr.open_raw(filename)[0] A = random.sample(A, n) x = int(input()) crits = compute_criteria(A, x) """ for it in range(iterations): i, c = random.randint(0, len(A)-1), random.randint(0, len(A[0])-1) a_miss = A[i] ev = a_miss[c] a_miss[c] = NULL estimation = get_estimation_by_local_regression(A) print('evaluation: ', ev) print('error: ', ev - estimation) A[i][c] = ev """
worse_c = [b[c] for b in worse] return better_c, worse_c if __name__ == '__main__': datasets = ("SHA", "EPI", "HR") header = ["", "MEAN", "STD"] alt_num = 100 percentiles = [12.5, 25, 37.5, 50, 62.5, 75, 87.5] res = [] perc = 50 dataset = "SHA" filename = 'data/' + dataset + '/raw.csv' alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] alts = random.sample(alts, alt_num) good_ints, bad_ints, no_ints, int_mean, int_std = \ check_dominance_interval(alts, perc) res.append([dataset, good_ints, bad_ints, no_ints, int_mean, int_std]) print('finish') # for perc in percentiles: # print(perc) # res = [] # for dataset in datasets: # filename = 'data/' + dataset + '/raw.csv' # alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] # alts = random.sample(alts, alt_num) # alts = normalize(alts, axis=0, copy=True, norm='max')
def compare_evaluations(alt_num=100, iterations=2, outputdir='res/local_regression/'): """Compare strategies. Output in different files: 1. All the errors for each dataset (prefix dataset): i, j, ev, reg, ... 2. Statistics for each dataset (prefix dataset_statistics): MEAN STD reg ... 3. Global statistics (prefix Global SHA ... MEAN STD reg ... """ datasets = ('SHA', ) datasets = ('HDI', 'SHA', 'HP', 'CPU') global_header = [" ", "mean", "std"] methods = { 'reg': rg.get_regression, # 'lrg': lrg.get_estimation_by_local_regression, # 'dom': de.get_estimations_by_dominance, 'lay_all': layrg.layer_regression_all, 'lay_guess': layrg.layer_regression_guess_layer, # 'diff': de.get_estimations_by_dominance_diff, # 'dk': de.get_estimations_by_dominance_knn, # 'dk2': de.get_estimations_by_dominance_knn_2, # 'dk3': de.get_estimations_by_dominance_knn_3, # 'dk4': de.get_estimations_by_dominance_knn_4, # 'knn': knn.get_knn, 'mean': mv.get_mean, 'med': mv.get_med } dataset_header = [ 'i', 'c', 'ev', 'lay_all', "lay_guess", # 'lrg', 'reg', # 'dom', 'diff', 'dk', 'dk2', # 'dk3', 'dk4', 'knn', 'mean', 'med' ] row_methods_order = dataset_header[3:] global_res = {method: [] for method in methods} # global_std = {method: [] for method in methods} for dataset in datasets: print('---------------------- ', dataset, ' -----------------------') t0 = time.time() # output file for dataset dataset_output = outputdir + dataset + '.csv' dataset_statistics_output = outputdir + dataset + '_statistics.csv' dataset_res = [] dataset_res.append(dataset_header) # used for std and mean dataset_res_dico = {method: [] for method in methods} filename = 'data/' + dataset + '/raw.csv' all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1] A = random.sample(all_alts, alt_num) A = normalize(A, axis=0, copy=True, norm='max') A = [list(alt) for alt in A] for it in range(iterations): res_it = [] i, c = random.randint(0, len(A) - 1), random.randint( 0, len(A[0]) - 1) res_it.append(i) res_it.append(c) ev = A[i][c] A[i][c] = NULL errors = compare_evaluations_once(A, ev, methods) A[i][c] = ev res_it.append(ev) for m in row_methods_order: res = errors[m] res_it.append(res) dataset_res_dico[m].append(res) dataset_res.append(res_it) # print(dataset_res) # helpers.matrix_to_csv(dataset_res, dataset_output) # Make the matrix for the statistics of the given dataset dataset_statistics_res = [] dataset_statistics_res.append([dataset, "MEAN", "STD"]) for method in methods: # keep all the errors for the global satistics global_res[method] += dataset_res_dico[method] line = [ method, np.mean(dataset_res_dico[method]), np.std(dataset_res_dico[method]) ] dataset_statistics_res.append(line) helpers.printmatrix(dataset_statistics_res) # helpers.matrix_to_csv(dataset_statistics_res, dataset_statistics_output) print('time:', time.time() - t0) global_matrix = [global_header] for m in methods: std = np.std(global_res[m]) mean = np.mean(global_res[m]) global_matrix.append([m, mean, std]) helpers.printmatrix(global_matrix)