Exemplos de open_raw em Python, exemplos de data_reader.open_raw em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: dominance_estimation.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def check_good_dominance_interval(iterations):
    """Check to see wheter the evaluation is inside a given interval found."""
    random.seed(0)
    datasets = ("SHA", "EPI", "HR")
    header = [""] + list(datasets)
    n = 100
    percentiles = (0, 12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100)

    res_tot = [[p] for p in percentiles]
    res_tot.append(['av'])

    for dataset in datasets:
        print("\n"*2, "-"*35, dataset, "-"*35, "\n")
        filename = 'data/' + dataset + '/raw.csv'
        A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
        A = random.sample(A, n)
        A = normalize(A, axis=0, copy=True, norm='max')
        A = [list(alt) for alt in A]
        k = len(A[0])

        res = [[p, 0] for p in percentiles]
        res.append(['av', 0])

        for it in range(iterations):
            iteration_res = check_good_interval_iteration(A, n, k, percentiles)
            for col in iteration_res:
                res[col][1] += 1

        for i in range(len(res)):
            res[i][1] /= iterations
            res_tot[i].append(res[i][1])

        helpers.printmatrix(res)
    helpers.printmatrix([header] + res_tot)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: dominance_estimation.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def check_if_dominance_interval(iterations=100):
    """Check to see wheter the evaluation is inside a given interval found."""
    random.seed(0)
    datasets = ("SHA", "EPI", "HR")
    header = ["", "Neither", "OR", "AND"]
    n = 100
    percentiles = (0, 12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100)

    res = []
    for dataset in datasets:
        print("\n"*2, "-"*35, dataset, "-"*35, "\n")
        filename = 'data/' + dataset + '/raw.csv'
        A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
        A = random.sample(A, n)
        A = normalize(A, axis=0, copy=True, norm='max')
        A = [list(alt) for alt in A]
        k = len(A[0])

        res_dataset = [0 for o in range(3)]
        for it in range(iterations):
            iteration_res = check_if_interval_iteration(A, n, k)
            for col in iteration_res:
                res_dataset[col] += 1
        res.append([dataset] + [o/iterations for o in res_dataset])

    helpers.printmatrix([header] + res)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: PrometheeMV_class.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test_ranking(dataset='HDI'):
    """Test that PIIMV computes same ranking as PII when no missing value."""
    data_set = 'data/' + dataset + '/raw.csv'
    alts, weights = dr.open_raw(data_set)[0][0:5], dr.open_raw(data_set)[1]
    # print(alts)
    # print(weights)
    if weights == []:
        weights = None
    if dataset == 'HDI':
        weights = [0.5, 0.5]
        ceils = [3, 3]
        promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils)
        prometheeMV = prom.PrometheeMV(alts, weights=weights, ceils=ceils)
    else:
        seed = 1
        promethee = prom.PrometheeII(alts, weights=weights, seed=seed)
        prometheeMV = prom.PrometheeMV(alts, weights=weights, seed=seed)
        # print(promethee.ceils, promethee.weights)
    scores = promethee.scores
    scoresMV = prometheeMV.scores
    rank = promethee.ranking
    rankMV = prometheeMV.ranking
    for i in range(len(rank)):
        print(
            str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " +
            str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: missing_values_comparison.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test_guess_eval(dataset="SHA", alt_num=15, del_number=1, seed=0):
    """Test guess function."""
    filename = 'data/' + dataset + '/raw.csv'
    all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
    alts = random.sample(all_alts, alt_num)

    alts = mv.delete_l_evaluations(alts, del_number, seed)
    mv.guess_all_bests_estimations(alts)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: missing_values_comparison.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test_check_train_dom(dataset="SHA", alt_num=100):
    """Check this function."""
    datasets = ('HR', 'SHA', 'EPI', 'HP')
    for dataset in datasets:
        print('---------------------- ', dataset, ' -----------------------')
        filename = 'data/' + dataset + '/raw.csv'
        all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
        alts = random.sample(all_alts, alt_num)
        mv.check_train_dom(alts)

Exemplo n.º 6

0

Exibir arquivo

def test_ranking(dataset='HDI'):
    """Test that PII computes the same ranking that in the article RobustPII.

    The following mappings should however be applied between countries and and
    indices:
        0 - Norway            10 - Singapore
        1 - Australia         11 - Hong Kong
        2 - Switzerland       12 - Liechtenstein
        3 - Denmark           13 - Sweden
        4 - Netherlands       14 - United Kingdom
        5 - Germany           15 - Iceland
        6 - Ireland           16 - Korea
        7 - United States     17 - Israel
        8 - Canada            18 - Luxembourg
        9 - New Zealand       19 - Japan

    The ranking expected is:
        2::0.31491228070175437
        1::0.2500000000000007
        8::0.18245614035087707
        11::0.18070175438596484
        19::0.16315789473684195
        17::0.16228070175438677
        9::0.059649122807016945
        13::0.058771929824561676
        0::0.04210526315789358
        5::0.007894736842106042
        14::-0.02543859649122777
        16::-0.02807017543859552
        10::-0.07105263157894759
        4::-0.08070175438596594
        18::-0.09824561403508743
        15::-0.13771929824561518
        6::-0.14999999999999925
        3::-0.17631578947368398
        7::-0.28859649122807074
        12::-0.3657894736842105
    """
    data_set = 'data/' + dataset + '/raw.csv'
    alts, weights = dr.open_raw(data_set)[0][0:20], dr.open_raw(data_set)[1]
    # print(alts)
    if weights == []:
        weights = None
    if dataset == 'HDI':
        weights = [0.5, 0.5]
        ceils = [3, 3]
        promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils)
    else:
        seed = 1
        promethee = prom.PrometheeII(alts, weights=weights, seed=seed)
        print(promethee.ceils, promethee.weights)
        print(sum(promethee.weights))
    scores = promethee.scores
    rank = promethee.ranking
    for i in range(len(rank)):
        print(str(rank[i] + 1) + '::' + str(scores[rank[i]]))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: dominance_estimation.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def check_dominance_assumption(iterations=10):
    """Test if dominance is still respected."""
    datasets = ("SHA", "EPI", "HR")
    header = ["", "MEAN", "STD"]
    n = 100
    res = []

    for dataset in datasets:
        print("\n"*2, "-"*35, dataset, "-"*35, "\n")
        filename = 'data/' + dataset + '/raw.csv'
        A, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
        A = random.sample(A, n)
        A = normalize(A, axis=0, copy=True, norm='max')
        A = [list(alt) for alt in A]
        k = len(A[0])

        res = [[] for i in range(9)]

        for it in range(iterations):
            i = random.randint(0, n - 1)
            c = random.randint(0, k - 1)
            a = A[i]
            del A[i]
            a_miss = a[:]
            a_miss[c] = NULL
            indices = de.train_dom(A, c, a_miss)

            dominant, dominated = de.count_dominant_alts(A, indices, a_miss)
            indices.append(c)
            dominant_c, dominated_c = de.count_dominant_alts(A, indices, a)

            res[0].append(dominant)
            res[1].append(dominant_c)

            res[2].append(dominant_c/dominant if dominant else 0)

            res[3].append(dominated)
            res[4].append(dominated_c)
            res[5].append(dominated_c/dominated if dominated else 0)

            res[6].append(dominated + dominant)
            res[7].append(dominated_c + dominant_c)
            res[8].append((dominated_c + dominant_c)/(dominated + dominant)
                          if (dominated + dominant) else 0)

            A.insert(i, a)

        final_res = [[" ", "   ", "MEAN", "STD"]]
        lines = ["Dom+", "Dc+", "ratio", "dom-", "dc-", "ratio",
                 "Tot", "tot_c", "ratio"]

        for i in range(9):
            final_res.append([lines[i], " ", np.mean(res[i]), np.std(res[i])])

        helpers.printmatrix(final_res, width=5)

Exemplo n.º 8

0

Exibir arquivo

def test_rr_analysis(data='HDI'):
    """Check that the rank reversals are correct.

    These rank reversal should be compared to the one occuring in the article:
        'About the computation of robust PROMETHEE II rankings: empirical
        evidence' by De Smet.

    The following mappings should however be applied between countries and and
    indices for the HDI data set:
        0 - Norway            10 - Singapore
        1 - Australia         11 - Hong Kong
        2 - Switzerland       12 - Liechtenstein
        3 - Denmark           13 - Sweden
        4 - Netherlands       14 - United Kingdom
        5 - Germany           15 - Iceland
        6 - Ireland           16 - Korea
        7 - United States     17 - Israel
        8 - Canada            18 - Luxembourg
        9 - New Zealand       19 - Japan
    """
    # Data initialisation according to the data set
    if (data == 'HDI'):
        data_set = 'data/HDI/raw.csv'
        alts = dr.open_raw(data_set)[0]
        ceils = [3, 3]
        weights = [0.5, 0.5]
        promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils)

    elif (data == 'SHA'):
        data_set = 'data/SHA/raw_20.csv'
        alts, weights, coeff, ceils = dr.open_raw(data_set)
        promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils)

    elif (data == 'EPI'):
        data_set = 'data/EPI/raw.csv'
        alts = dr.open_raw(data_set)[0]
        alts = alts[0:20]
        seed = 0
        promethee = prom.PrometheeII(alts, seed=seed)

    # print("initial ranking :")
    # print(promethee.ranking)
    # print("initial scores :")
    # print(promethee.scores)
    print("Rank reversals:")
    rr = promethee.compute_rr_number(True)
    print("rank reverasal quantity: " + str(rr))
    rr_instances = promethee.analyse_rr()
    print('rank reversal recap :')
    print(rr_instances)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: missing_values_comparison.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def compare_rankings(alt_num=20, it=500, del_num=1):
    """Compare strategies."""
    random.seed(1)
    datasets = ('HR', 'SHA', 'EPI', 'HP')
    # datasets = ('SHA',)
    header = ["    "] + list(datasets) + ["mean", "std"]
    methods = {  # 'sreg': mv.replace_by_sreg,
        # 'creg': mv.replace_by_creg,
        # 'ereg': mv.replace_by_ereg,
        'sreg': mv.replace_by_sreg,
        'dom': mv.replace_by_dominance,
        'd_diff': mv.replace_by_dominance_smallest_diff,
        'knn': mv.replace_by_knn,
        'mean': mv.replace_by_mean,
        'med': mv.replace_by_med
    }
    #          'pij': mv.replace_by_pij}

    results = {method: [] for method in methods}
    meth_std = {method: [] for method in methods}

    for dataset in datasets:
        print('---------------------- ', dataset, ' -----------------------')
        t0 = time.time()
        results_dataset = {method: [] for method in methods}

        filename = 'data/' + dataset + '/raw.csv'
        all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
        if weights == []:
            weights = None

        for i in range(it):
            taus = compare_rankings_once(all_alts, alt_num, weights, del_num,
                                         methods)
            # print(taus)
            for method in methods:
                results_dataset[method].append(taus[method])

        for method in methods:
            results[method].append(sum(results_dataset[method]) / it)
            meth_std[method] += results_dataset[method]
        print('time:', time.time() - t0)

    final_matrix = [header]
    for m in methods:
        results[m].append(np.mean(results[m]))
        results[m].append(np.std(meth_std[m]))
        final_matrix.append([m] + results[m])

    helpers.printmatrix(final_matrix)

Exemplo n.º 10

0

Exibir arquivo

def compare_refflows():
    """Check if the ref-flow computed with ReferencedPII object is correct."""
    data_set = 'HDI'
    random.seed()
    seed = random.randint(1, 1000)
    print(seed)
    alt_num = 20
    ref_number = 4
    strategy = prom.strategy2

    input_file = 'data/' + str(data_set) + '/raw.csv'
    alternatives = dr.open_raw(input_file)[0]

    referenced = prom.ReferencedPII(alternatives, strategy=strategy, seed=seed)

    SRP = referenced.SRP
    ref_scores = referenced.scores
    for i, alt in enumerate(alternatives):
        SRP_alt = SRP[:]
        SRP_alt.append(alt)
        promethee = prom.PrometheeII(SRP_alt, seed=seed)
        scores = promethee.scores
        if abs(scores[-1] - ref_scores[i]) < 1e-5:
            print("ok")
        else:
            print("There is something wrong")
            print(scores)

Exemplo n.º 11

0

Exibir arquivo

def test_functions():
    """Test various functions of the procedure."""
    data_set = 'EPI'
    weights, ceils = None, None
    seed = 0
    res = True

    input_file = 'data/' + str(data_set) + '/raw.csv'
    alts = dr.open_raw(input_file)[0]
    procedure = aqp.Adaptive_procedure(alts,
                                       seed=seed,
                                       alt_num=10,
                                       ref_number=4,
                                       pts_per_random_it=2,
                                       desired_points=10)

    # Constraint verification
    procedure.add_constraint((6, 8))
    procedure.add_constraint((7, 8))
    procedure.add_constraint((3, 5))
    if (procedure.is_admissible([1, 2, 9, 4, 5, 3, 6, 7, 8])):
        res = False
    if (not procedure.is_admissible([1, 2, 3, 4, 5, 9, 6, 7, 8])):
        res = False
    print(res)

Exemplo n.º 12

0

Exibir arquivo

def first_search(pop_size=600, mut_prob=0.01, MAXIT=50):
    """Try to find sets of reference profiles reproducing th PII ranking.

    Search for 15 different seeds. Once some positive results have been found,
    please use the next function to try again seeds that failed.
    """
    data_sets = ['SHA', 'EPI', 'GEQ']
    weights, ceils = None, None
    seeds = range(15)

    alternative_numbers = [20, 25, 30, 40, 50]

    for data_set in data_sets:
        input_file = 'data/' + str(data_set) + '/raw.csv'
        output = 'res/ReferencedPII/genetic_search/' + str(data_set) + '.txt'
        alts = dr.open_raw(input_file)[0]
        for alt_num in alternative_numbers:
            succes = []
            failures = []
            failures_tau = []
            for s in seeds:
                t1 = time.time()
                tau = GS.genetic_search(alts, seed=s, weights=weights,
                                        ceils=ceils, alt_num=alt_num,
                                        pop_size=pop_size, mut_prob=mut_prob,
                                        MAXIT=MAXIT)
                print(str(s) + ', time: ' + str(time.time() - t1) + ', tau: '
                      + str(tau))
                if (tau > 1 - 1e-5):
                    succes.append(s)
                else:
                    failures.append(s)
                    tau_rounded = int(tau*1000)/1000
                    failures_tau.append(tau_rounded)
            save_res_to_file(output, alt_num, succes, failures, failures_tau)

Exemplo n.º 13

0

Exibir arquivo

def test_rr_counting_function():
    """Test the function computing the amount of RR between two rankings.

    The rankings compared are :
        * [1, 2, 3, 4, 5, 6]
        * [6, 4, 3, 1, 5]
        there should therefore be 7 rank reversals:
            (6,1);(6,3);(6,4);(6,5);
            (4,3);(4,1);
            (3,1)
    """
    # we don't care about the parameters, we just want to initialise the object
    data_set = 'data/HDI/raw.csv'
    alts = dr.open_raw(data_set)[0]
    coeffs = [0.61224, 1.2]
    weights = [0.5, 0.5]
    promethee = prom.PrometheeII(alts, weights=weights, coefficients=coeffs)

    # Here start the real interresting test
    ranking_init = [1, 2, 3, 4, 5, 6]
    ranking_new = [6, 4, 3, 1, 5]
    alt_removed = 2
    rr = promethee.compare_rankings(ranking_init, ranking_new, alt_removed)
    """Check that the arguments are not modified."""
    print(ranking_init)
    print(ranking_new)
    print(rr)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: data_reader.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test():
    """Test the data sets and data_reader module.

    The file contains :

        Alternatives
        #####
        81.6,12.6
        82.4,13
        83,12.8
        80.2,12.7
        81.6,11.9
        80.9,13.1
        80.9,12.2
        79.1,12.9
        82,13
        81.8,12.5
        83,10.6
        84,11.2
        80,11.8
        82.2,12.1
        80.7,13.1
        82.6,10.6
        81.9,11.9
        82.4,12.5
        81.7,11.7
        83.5,11.5
    """
    data_set = 'data/HDI/raw.csv'
    matrix = dr.open_raw(data_set)
    print(matrix[0])

Exemplo n.º 15

0

Exibir arquivo

Arquivo: ReferencedPII_references_quantity.py Projeto: gdejaege/Robust-and-Referenced-Promethee-study

def count_draws(threshold=0.001):
    """Test with EPI, SHA, GEQ dataset.

    This test counts the number of draws.
    """
    data_sets = ['SHA', 'EPI', 'GEQ']
    output = "res/ReferencedPII/reference_quantity/thresh_" + str(threshold) \
        + ".txt"

    # Change these parameters if needed
    ref_numbers = [2, 3, 5, 10, 15, 25]
    alternative_numbers = [10, 20, 40, 80]
    seed_list = range(20)

    ref_set_strategy = prom.strategy1

    all_res = []
    for ref_number in ref_numbers:
        res = []
        for alt_number in alternative_numbers:
            tot = 0
            for seed in seed_list:
                for data_set in data_sets:
                    source = "data/" + data_set + "/raw.csv"
                    alts = dr.open_raw(source)[0]
                    ref_prom = prom.ReferencedPII(alts,
                                                  alt_num=alt_number,
                                                  strategy=ref_set_strategy,
                                                  seed=seed,
                                                  ref_num=ref_number)
                    tot += ref_prom.draws_quantity(ref_prom.scores, threshold)
            res.append(tot)
        all_res.append(res)
    print_to_file(output, ref_numbers, alternative_numbers, seed_list, all_res)

Exemplo n.º 16

0

Exibir arquivo

def test_ranking():
    """Test if the ranking obtained is the same as in Robust PII article.

    concerned article:
        'About the computation of robust PROMETHEE II rankings: empirical
        evidence' by De Smet.

    The following mappings should however be applied between countries and and
    indices:
        0 - Norway            10 - Singapore
        1 - Australia         11 - Hong Kong
        2 - Switzerland       12 - Liechtenstein
        3 - Denmark           13 - Sweden
        4 - Netherlands       14 - United Kingdom
        5 - Germany           15 - Iceland
        6 - Ireland           16 - Korea
        7 - United States     17 - Israel
        8 - Canada            18 - Luxembourg
        9 - New Zealand       19 - Japan
    """
    data_set = 'data/HDI/raw.csv'
    alts = dr.open_raw(data_set)[0]
    weights = [0.5, 0.5]
    ceils = [3, 3]
    robust = prom.RobustPII(alts, weights=weights, ceils=ceils,
                            R=10000, m=5)

    rank = robust.ranking
    scores = robust.scores
    for i in range(len(rank)):
        print(str(rank[i]) + '::' + str(scores[rank[i]]))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: PrometheeMV_class.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test_PMV(dataset="HDI"):
    """Test PMV with, this time, missing values."""
    data_set = 'data/' + dataset + '/raw.csv'
    alts = dr.open_raw(data_set)[0][:10]
    proportion = 0.2
    seed = 1
    print("complete :")
    prom.printmatrix(alts)
    original_alts = copy.deepcopy(alts)
    mv.delete_evaluations(alts, proportion, seed)
    print("incomplete :")
    prom.printmatrix(alts)

    print("Promethee:")
    promethee = prom.PrometheeII(original_alts, seed=seed)
    rank = promethee.ranking
    scores = promethee.scores

    print("PrometheeMV without missing:")
    prometheeMV1 = prom.PrometheeMV(original_alts, seed=seed)
    rankMV1 = prometheeMV1.ranking
    scoresMV1 = prometheeMV1.scores

    print("PrometheeMV:")
    prometheeMV = prom.PrometheeMV(alts, seed=seed)
    rankMV = prometheeMV.ranking
    scoresMV = prometheeMV.scores
    for i in range(len(rank)):
        print(
            str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " +
            str(rankMV1[i] + 1) + '::' + str(scoresMV1[rank[i]]) + " :::: " +
            str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))

Exemplo n.º 18

0

Exibir arquivo

def test_ranking_SHA(dataset='SHA'):
    """Test that PII computes the same ranking that in the article RobustPII."""
    data_set = 'data/' + dataset + '/raw.csv'
    A, weights = dr.open_raw(data_set)[0], dr.open_raw(data_set)[1]
    A = normalize(A, axis=0, copy=True, norm='max')
    print(A)
    A = [list(alt) for alt in A]
    # print(alts)
    weights = [0.1, 0.2, 0.2, 0.2, 0.2, 0.1]
    percentiles = (25, 75)
    promethee = prom.PrometheeII(A, weights=weights, percentiles=percentiles)
    scores = promethee.scores
    rank = promethee.ranking
    for i in range(len(rank)):
        print(str(rank[i]) + '::' + str(scores[rank[i]]))

    print(promethee.pi[5][6] / 2)
    print(promethee.alternatives[54][3])

Exemplo n.º 19

0

Exibir arquivo

def analyse(alt_num=20,
            seeds=range(0, 3),
            data_sets=['EPI', 'SHA', 'GEQ'],
            rounds=20,
            make_pdf=False):
    """Analyse the results of the adaptive questioning procedure."""
    weights, ceils = None, None
    seeds = range(3, 4)

    output_dir = 'res/ReferencedPII/adaptive_questioning_procedure/'
    output_file = open(output_dir + "adaptative_questionning_results2.txt",
                       "a")
    # pp = PdfPages(output_dir + 'kendall_tau_boxplots.pdf')

    for data_set in data_sets:
        input_file = 'data/' + str(data_set) + '/raw.csv'
        alts = dr.open_raw(input_file)[0]
        for seed in seeds:
            correct_pts_output = ('res/ReferencedPII_questioning_procedure/' +
                                  data_set + '/' + str(seed) + '.csv')
            title = data_set + ' with ' + str(alt_num) + ' alternatives (seed '\
                + str(seed) + ')'
            title_plot = (
                'Adaptive questioning procedure on a subset of the ' +
                data_set + ' data set with ' + str(alt_num) + ' alternatives')
            print(title)
            if True:
                # with redirect_stdout(output_file):
                print(title)
                procedure = aqp.Adaptive_procedure(alts,
                                                   seed=seed,
                                                   alt_num=alt_num,
                                                   pts_per_random_it=200,
                                                   desired_points=3000)
                corrects = procedure.execute(rounds)
                write_correct_pts(corrects, correct_pts_output)
                print()
            if (make_pdf):
                # Boxplot of the rankings
                fig = plt.figure(1, figsize=(9, 6))
                plt.suptitle(title_plot)
                ax = fig.add_subplot(111)
                ax.set_ylim(-0.3, 1.1)
                ax.yaxis.set_major_locator(
                    ticker.FixedLocator([-0.25, 0, 0.25, 0.5, 0.75, 1]))
                bp = ax.boxplot(procedure.kendall_taus)
                # pp.savefig(bbox_inches='tight')
                fig.savefig(output_dir + title + '.pdf', bbox_inches='tight')
                plt.clf()
    output_file.close()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: PrometheeMV_class.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def test_replacements():
    """Test that pij are correctly replaced."""
    # initialisation purpose only
    data_set = 'data/HDI/raw.csv'
    alts = dr.open_raw(data_set)[0]
    seed = 1
    method = 'mean'
    prometheeMV = prom.PrometheeMV(alts, seed=seed, method=method)
    alternatives = [[1], [0], ['*'], [2]]
    f = [myf]
    pref = [[[0, 1, '*', 0], [0, 0, '*', 0], ['*', '*', 0, '*'],
             [1, 1, '*', 0]]]

    for i in pref[0]:
        print(i)

    P = prometheeMV.compute_pairwise_comparisons(alternatives, f)
    print("second round")
    for i in P[0]:
        print(i)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: helpers.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def get_dataset(dataset, n=None, random_alts=False, normalised=True):
    """Get a dataset from dataset."""
    filename = 'data/' + dataset + '/raw.csv'
    A = dr.open_raw(filename)[0]

    if n is None:
        n = len(A)

    if random_alts:
        A = random.sample(A, n)
    else:
        A = A[:n]

    # print(np.array(A))

    if normalised:
        A = normalize(A, axis=0, copy=True, norm='max')

    A = [list(alt) for alt in A]

    return A

Exemplo n.º 22

0

Exibir arquivo

Arquivo: ReferencedPII_strategies_comparison.py Projeto: gdejaege/Robust-and-Referenced-Promethee-study

def compare(tests_qty=3):
    """Compare the different stratiegies."""
    output = "res/ReferencedPII/strategies/comparisons.txt"
    data_sets = ['EPI', 'SHA', 'GEQ']
    # data_sets = ['HDI']
    range_seed = range(0, 0 + tests_qty)
    alt_num = 30
    ref_number = 4
    strategies = [
        prom.strategy1, prom.strategy2, prom.strategy3, prom.strategy4
    ]
    # strategies = [prom.strategy2]

    kendall_taus = [[] for i in range(4)]  # One list for each strategy
    titles = []

    for data_set in data_sets:
        input_file = 'data/' + str(data_set) + '/raw.csv'
        alternatives = dr.open_raw(input_file)[0]

        for seed in range_seed:
            promethee = prom.PrometheeII(alternatives,
                                         seed=seed,
                                         alt_num=alt_num)
            prom_ranking = promethee.ranking

            title = data_set + str(seed)
            titles.append(title)

            for i, strategy in enumerate(strategies):
                referenced = prom.ReferencedPII(alternatives,
                                                seed=seed,
                                                strategy=strategy,
                                                alt_num=alt_num)
                refrank = referenced.ranking
                tau = stats.kendalltau(refrank, prom_ranking)[0]
                tau = int(tau * 1000) / 1000
                kendall_taus[i].append(tau)

    print_to_file(output, titles, kendall_taus, tests_qty)

Exemplo n.º 23

0

Exibir arquivo

def retry_failed(data_set='SHA', alt_numbers=[20], failed_seeds=[[7, 8]],
                 ref_number=5, maxrep=1, pop_size=600, mut_prob=0.01, MAXIT=50):
    """Retry the search for subsets which failed the first time."""
    weights, ceils = None, None

    # Here we retry the seeds failed with different parameters
    t0 = time.time()
    alternative_numbers = alt_numbers
    seeds = failed_seeds
    input_file = 'data/' + str(data_set) + '/raw.csv'
    output = 'res/ReferencedPII/genetic_search/' + str(data_set) + '.txt'
    alts = dr.open_raw(input_file)[0]
    for i, alt_num in enumerate(alternative_numbers):
        succes = []
        failures = []
        failures_tau = []
        for s in seeds[i]:
            t1 = time.time()
            tau = 0
            it = 0
            while (tau < 1 - 1e-5 and it < maxrep):
                tau2 = GS.genetic_search(alts, seed=s, weights=weights,
                                         SRP_size=ref_number, ceils=ceils,
                                         alt_num=alt_num, pop_size=pop_size,
                                         mut_prob=mut_prob, MAXIT=MAXIT)
                tau = max(tau, tau2)
                print(str(s) + ', total time: ' + str(time.time() - t0) +
                      ", it time: " + str(time.time() - t1) + ', tau: '
                      + str(tau))
                it += 1
            if (tau > 1 - 1e-5):
                succes.append(s)
            else:
                failures.append(s)
                tau_rounded = int(tau*1000)/1000
                failures_tau.append(tau_rounded)
        save_res_to_file(output, alt_num, succes, failures, failures_tau)
    print("time :" + str(time.time() - t1))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: ReferencedPII_search_SRP_properties.py Projeto: gdejaege/Robust-and-Referenced-Promethee-study

def SRP_from_aqp(data_set="GEQ", seeds=range(3), alt_num=20):
    """Analyse the correct SRP found with this procedure."""
    alts_file_name = "data/" + data_set + "/raw.csv"
    all_alts = dr.open_raw(alts_file_name)[0]

    mean_mean_ratio_str = []
    var_var_ratio_str = []
    mean_var_ratio_str = []
    var_mean_ratio_str = []

    template_ratio = '{0:^d}|'
    for i in range(len(all_alts[0])):
        template_ratio += '{' + str(i + 1) + ':+.3F}|'

    # Output
    output_file = "res/ReferencedPII/SRP_analysis/" + data_set

    for seed in seeds:
        # Input
        SRP_prefix = "res/ReferencedPII/adaptive_questioning_procedure/"
        all_SRP_file_name = data_set + "/" + str(seed) + ".csv"
        all_SRP = dr.open_raw_RS(SRP_prefix + all_SRP_file_name)

        # get the correct alt_num for the concerned seed
        promethee = prom.PrometheeII(all_alts, seed=seed, alt_num=alt_num)
        alts_per_criterion = list(map(list, zip(*promethee.alternatives)))

        # Check if the parameteres (= alternative subset) are indeed the same
        questioning_procedure = aqp.Adaptive_procedure(all_alts,
                                                       seed=seed,
                                                       alt_num=alt_num,
                                                       ref_number=4,
                                                       pts_per_random_it=200,
                                                       desired_points=3000)
        if (not prom.check_parameters(questioning_procedure.promethee,
                                      promethee)):
            print("error")
        """Will contain lists of means of the ref's evaluation for each criterion
        ex:
            all_means_ratio[0] = [mean(c1(r1), ..., mean(c2(r1), ..., c2(r4))]
            SRP_means[2] = [...]
        """
        # List of all ratios for individual SRP
        all_mean_ratios = []
        all_var_ratios = []

        for i in range(len(all_SRP)):
            SRP = all_SRP[i]

            # matrix = list of criteria which are lists of refs or
            # alternatives evaluations
            refs_per_criterion = list(map(list, zip(*SRP)))

            # ratio between estimator of on SRP compared to the one of the alts
            individual_mean_ratios, individual_var_ratios = [], []
            for crit in range(len(refs_per_criterion)):
                var_ref = numpy.var(refs_per_criterion[crit])
                mean_ref = numpy.mean(refs_per_criterion[crit])
                var_alt = numpy.var(alts_per_criterion[crit])
                mean_alt = numpy.mean(alts_per_criterion[crit])

                individual_mean_ratios.append(mean_ref / mean_alt)
                individual_var_ratios.append(var_ref / var_alt)

            all_mean_ratios.append(individual_mean_ratios)
            all_var_ratios.append(individual_var_ratios)

        # transpose the matrix : a list of references sets which are lists
        # of the estimators for each criterion becomes a list of estimators for
        # each criterion which contains the estimater for each SRP
        var_ratios_per_crit = list(map(list, zip(*all_var_ratios)))
        mean_ratios_per_crit = list(map(list, zip(*all_mean_ratios)))

        var_var_ratios = [numpy.var(crit) for crit in var_ratios_per_crit]
        mean_var_ratios = [numpy.mean(crit) for crit in var_ratios_per_crit]
        var_mean_ratios = [numpy.var(crit) for crit in mean_ratios_per_crit]
        mean_mean_ratios = [numpy.mean(crit) for crit in mean_ratios_per_crit]

        # Transorm in strings
        var_var_ratio_str.append(template_ratio.format(seed, *var_var_ratios))
        var_mean_ratio_str.append(template_ratio.format(
            seed, *var_mean_ratios))
        mean_var_ratio_str.append(template_ratio.format(
            seed, *mean_var_ratios))
        mean_mean_ratio_str.append(
            template_ratio.format(seed, *mean_mean_ratios))

    with open(output_file, 'a') as output:
        output.write("var(var(ref)/var(alt)) \n")
        for i in var_var_ratio_str:
            output.write(i)
            output.write("\n")
        output.write("\n")

        output.write("var(mean(ref)/mean(alt)) \n")
        for i in var_mean_ratio_str:
            output.write(i)
            output.write("\n")
        output.write("\n")

        output.write("mean(var(ref)/var(alt)) \n")
        for i in mean_var_ratio_str:
            output.write(i)
            output.write("\n")
        output.write("\n")

        output.write("mean(mean(ref)/mean(alt)) \n")
        for i in mean_mean_ratio_str:
            output.write(i)
            output.write("\n")
        output.write("\n")

Exemplo n.º 25

0

Exibir arquivo

Arquivo: RobustPII_analyse_rank_reversals.py Projeto: gdejaege/Robust-and-Referenced-Promethee-study

def analyse_rr(data='SHA', max_rep=20, R_parameter=None, m_parameter=None):
    """Analyse the rank reversals occuring in RobustPII."""
    if (data == 'HDI'):
        print('try with another dataset')
        exit()

    elif (data == 'SHA'):
        R = 5000
        m = 9
        # Do not change these parameters ! They are not saved
        data_set = 'data/SHA/raw_20.csv'
        alts = dr.open_raw(data_set)[0]
        weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667]
        ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750]
        seed = 1
    else:
        data = 'EPI'
        R = 5000
        m = 16
        # Do not change these parameters ! They are not saved
        data_set = 'data/EPI/raw.csv'
        alts = dr.open_raw(data_set)[0]
        alts = alts[0:20]
        weights, ceils = None, None
        seed = 0

    if R_parameter is not None:
        R = R_parameter
    if m_parameter is not None:
        m = m_parameter

    output = 'res/RobustPII/analyse_rank_reversals/' + str(data) + '.txt'

    promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed)
    promethee_rr_instances = promethee.analyse_rr()

    all_rr_instances = dict()
    for repetition in range(max_rep):
        robust = prom.RobustPII(alts,
                                weights=weights,
                                ceils=ceils,
                                seed=seed,
                                R=R,
                                m=m)
        rr_instances = robust.analyse_rr()
        for key in rr_instances:
            all_rr_instances[key] = \
                    all_rr_instances.get(key, 0) + rr_instances.get(key)

    all_info = []

    key_set = set(all_rr_instances.keys()) | set(promethee_rr_instances.keys())
    for key in key_set:
        line = [
            key[0], key[1],
            all_rr_instances.get(key, 0) / max_rep,
            promethee_rr_instances.get(key, 0),
            abs(promethee.scores[key[0]] - promethee.scores[key[1]]),
            abs(robust.scores[key[0]] - robust.scores[key[1]])
        ]
        all_info.append(line)
    print_to_file(output, all_info, promethee.scores, robust.scores, max_rep,
                  R, m)

Exemplo n.º 26

0

Exibir arquivo

def count_rr(data='HDI', max_rep=10, R_parameter=None, m_parameter=None):
    """Test the number of rank reversals."""
    # Parameter initialization, the interesting stuff is way lower
    R_list = R_parameter
    m_list = m_parameter
    if (data == 'HDI'):
        # Change these parameters if needed
        if (R_list is None):
            R_list = [500, 1000, 5000, 10000]
        if (m_list is None):
            m_list = [3, 5, 6, 7, 8, 10, 15]

        # Do not change these parameters ! They are not saved
        data_set = 'data/HDI/raw.csv'
        alts = dr.open_raw(data_set)[0]
        weights = [0.5, 0.5]
        ceils = [3, 3]
        seed = 0  # Not used, here to match the general signature

    elif (data == 'SHA'):
        # Change these parameters if needed
        if (R_list is None):
            R_list = [1000, 4000, 7000, 12000]
            m_list = [4, 6, 8, 9, 12, 15, 18]

        # Do not change these parameters ! They are not saved
        data_set = 'data/SHA/raw_20.csv'
        alts = dr.open_raw(data_set)[0]
        weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667]
        ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750]
        seed = 0  # Not used, here to match the general signature

    else:
        data = 'EPI'
        # Change these parameters if needed
        if (R_list is None):
            R_list = [500, 1000, 5000, 8000]
        if (m_list is None):
            m_list = [3, 4, 7, 9, 12, 14, 16, 18]

        # Do not change these parameters ! They are not saved
        data_set = 'data/EPI/raw.csv'
        alts = dr.open_raw(data_set)[0]
        alts = alts[0:20]
        weights, ceils = None, None
        seed = 0

    output_dir = 'res/RobustPII/R_m_influence/'
    output = output_dir + data + '.txt'

    promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed)
    rr_promethee = promethee.compute_rr_number()

    rr_matrix = []
    for R in R_list:
        rr_row = []
        for m in m_list:
            rr = 0
            for repetition in range(max_rep):
                random.seed()
                robust = prom.RobustPII(alts,
                                        weights=weights,
                                        ceils=ceils,
                                        seed=seed,
                                        R=R,
                                        m=m)
                rr += robust.compute_rr_number()
            rr = rr / max_rep
            rr_row.append(rr)
        print(rr_row)
        rr_matrix.append(rr_row)
    print_rr_to_file(output, rr_matrix, R_list, m_list, rr_promethee, max_rep)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: local_regression.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

if __name__ == '__main__':
    # A = [[1, 2, 3, 4],
    #      [4, 3, 2, 1],
    #      [0, 0, 0, 0]]
    # for a in A:
    #     print(a)
    # print(compute_deltas(A, 0, [1, 2, 3]))

    dataset = "SHA"
    dataset = "CPU"
    filename = 'data/' + dataset + '/raw.csv'

    n = 100
    iterations = 1

    A = dr.open_raw(filename)[0]
    A = random.sample(A, n)

    x = int(input())
    crits = compute_criteria(A, x)
    """
    for it in range(iterations):
        i, c = random.randint(0, len(A)-1), random.randint(0, len(A[0])-1)
        a_miss = A[i]
        ev = a_miss[c]
        a_miss[c] = NULL
        estimation = get_estimation_by_local_regression(A)
        print('evaluation: ', ev)
        print('error: ', ev - estimation)
        A[i][c] = ev
    """

Exemplo n.º 28

0

Exibir arquivo

    worse_c = [b[c] for b in worse]

    return better_c, worse_c


if __name__ == '__main__':
    datasets = ("SHA", "EPI", "HR")
    header = ["", "MEAN", "STD"]
    alt_num = 100
    percentiles = [12.5, 25, 37.5, 50, 62.5, 75, 87.5]
    res = []

    perc = 50
    dataset = "SHA"
    filename = 'data/' + dataset + '/raw.csv'
    alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
    alts = random.sample(alts, alt_num)
    good_ints, bad_ints, no_ints, int_mean, int_std = \
        check_dominance_interval(alts, perc)
    res.append([dataset, good_ints, bad_ints, no_ints, int_mean, int_std])

    print('finish')

    # for perc in percentiles:
    #     print(perc)
    #     res = []
    #     for dataset in datasets:
    #         filename = 'data/' + dataset + '/raw.csv'
    #         alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]
    #         alts = random.sample(alts, alt_num)
    #         alts = normalize(alts, axis=0, copy=True, norm='max')

Exemplo n.º 29

0

Exibir arquivo

Arquivo: missing_values_comparison.py Projeto: gdejaege/Missing-values-management-in-Multi-Criteria-Decision-Aid

def compare_evaluations(alt_num=100,
                        iterations=2,
                        outputdir='res/local_regression/'):
    """Compare strategies.

    Output in different files:
        1. All the errors for each dataset (prefix dataset):
            i, j, ev, reg, ...

        2. Statistics for each dataset (prefix dataset_statistics):
                 MEAN   STD
            reg
            ...

        3. Global statistics (prefix Global
                 SHA ... MEAN   STD
            reg
            ...
    """
    datasets = ('SHA', )
    datasets = ('HDI', 'SHA', 'HP', 'CPU')
    global_header = ["    ", "mean", "std"]
    methods = {
        'reg': rg.get_regression,
        # 'lrg': lrg.get_estimation_by_local_regression,
        # 'dom': de.get_estimations_by_dominance,
        'lay_all': layrg.layer_regression_all,
        'lay_guess': layrg.layer_regression_guess_layer,
        # 'diff': de.get_estimations_by_dominance_diff,
        # 'dk': de.get_estimations_by_dominance_knn,
        # 'dk2': de.get_estimations_by_dominance_knn_2,
        # 'dk3': de.get_estimations_by_dominance_knn_3,
        # 'dk4': de.get_estimations_by_dominance_knn_4,
        # 'knn': knn.get_knn,
        'mean': mv.get_mean,
        'med': mv.get_med
    }

    dataset_header = [
        'i',
        'c',
        'ev',
        'lay_all',
        "lay_guess",
        # 'lrg',
        'reg',
        # 'dom', 'diff', 'dk', 'dk2',
        # 'dk3', 'dk4', 'knn',
        'mean',
        'med'
    ]

    row_methods_order = dataset_header[3:]

    global_res = {method: [] for method in methods}
    # global_std = {method: [] for method in methods}

    for dataset in datasets:
        print('---------------------- ', dataset, ' -----------------------')
        t0 = time.time()

        # output file for dataset
        dataset_output = outputdir + dataset + '.csv'
        dataset_statistics_output = outputdir + dataset + '_statistics.csv'

        dataset_res = []
        dataset_res.append(dataset_header)
        # used for std and mean
        dataset_res_dico = {method: [] for method in methods}

        filename = 'data/' + dataset + '/raw.csv'
        all_alts, weights = dr.open_raw(filename)[0], dr.open_raw(filename)[1]

        A = random.sample(all_alts, alt_num)
        A = normalize(A, axis=0, copy=True, norm='max')
        A = [list(alt) for alt in A]

        for it in range(iterations):
            res_it = []
            i, c = random.randint(0,
                                  len(A) - 1), random.randint(
                                      0,
                                      len(A[0]) - 1)

            res_it.append(i)
            res_it.append(c)

            ev = A[i][c]
            A[i][c] = NULL
            errors = compare_evaluations_once(A, ev, methods)
            A[i][c] = ev

            res_it.append(ev)

            for m in row_methods_order:
                res = errors[m]
                res_it.append(res)
                dataset_res_dico[m].append(res)

            dataset_res.append(res_it)

        # print(dataset_res)
        # helpers.matrix_to_csv(dataset_res, dataset_output)

        # Make the matrix for the statistics of the given dataset
        dataset_statistics_res = []
        dataset_statistics_res.append([dataset, "MEAN", "STD"])

        for method in methods:
            # keep all the errors for the global satistics
            global_res[method] += dataset_res_dico[method]

            line = [
                method,
                np.mean(dataset_res_dico[method]),
                np.std(dataset_res_dico[method])
            ]

            dataset_statistics_res.append(line)

        helpers.printmatrix(dataset_statistics_res)
        # helpers.matrix_to_csv(dataset_statistics_res, dataset_statistics_output)

        print('time:', time.time() - t0)

    global_matrix = [global_header]
    for m in methods:
        std = np.std(global_res[m])
        mean = np.mean(global_res[m])
        global_matrix.append([m, mean, std])

    helpers.printmatrix(global_matrix)