def calculateRRDNormalizer(_user_N, _pro_N, _gf_measure, window):
    """
        Calculate the normalizer of input group fairness measure at input user and protected group setting.
        The function use two constant: NORM_ITERATION AND NORM_CUTPOINT to specify the max iteration and batch size used in the calculation.
        First, get the maximum value of input group fairness measure at different fairness probability.
        Run the above calculation NORM_ITERATION times.
        Then compute the average value of above results as the maximum value of each fairness probability.
        Finally, choose the maximum of value as the normalizer of this group fairness measure.
        
        :param _user_N: The total user number of input ranking
        :param _pro_N: The size of protected group in the input ranking 
        :param _gf_measure: The group fairness measure to be used in calculation 
        
        :return: returns the group fairness value for the unfair ranking generated at input setting

    """
    NORM_CUTPOINT = window
    # set the range of fairness probability based on input group fairness measure
    f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    avg_maximums = [
    ]  #initialize the lists of average results of all iteration
    for fpi in f_probs:
        iter_results = []  #initialize the lists of results of all iteration
        for iteri in range(NORM_ITERATION):
            input_ranking = [x for x in range(_user_N)]
            protected_group = [x for x in range(_pro_N)]
            # generate unfair ranking using algorithm
            unfair_ranking = dataGenerator.generateUnfairRanking(
                input_ranking, protected_group, fpi)
            # calculate the non-normalized group fairness value i.e. input normalized value as 1
            gf = calculateNDFairness(unfair_ranking, protected_group,
                                     NORM_CUTPOINT, _gf_measure, 1)
            iter_results.append(gf)
        avg_maximums.append(np.mean(iter_results))
    return max(avg_maximums)
def getExpRR_protNormalizer(rank_len, r):
    f_probs = [0.0, 1.0]
    max_e = 0
    min_e = float('inf')
    for fpi in f_probs:
        input_ranking = [x for x in range(rank_len)]
        protected_group = [x for x in range(int(rank_len * r))]
        # generate unfair ranking using algorithm
        unfair_ranking = dataGenerator.generateUnfairRanking(
            input_ranking, protected_group, fpi)
        gf = calculaterExpRR_prot(unfair_ranking, protected_group, rank_len,
                                  int(rank_len * r))
        max_e = max(max_e, gf)
        max_e = max(min_e, gf)
    return [min_e, max_e]
def getExpRRNormalizer(rank_len, r):
    f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    maxs = []  #initialize the lists of average results of all iteration
    mins = []  #initialize the lists of average results of all iteration
    for fpi in f_probs:
        iter_results = []  #initialize the lists of results of all iteration
        for iteri in range(NORM_ITERATION):
            input_ranking = [x for x in range(rank_len)]
            protected_group = [x for x in range(int(rank_len * r))]
            # generate unfair ranking using algorithm
            unfair_ranking = dataGenerator.generateUnfairRanking(
                input_ranking, protected_group, fpi)
            gf = calculaterExpRR(unfair_ranking, protected_group, rank_len,
                                 int(rank_len * r))
            iter_results.append(gf)
        maxs.append(np.max(iter_results))
        mins.append(np.min(iter_results))
    return [min(mins), max(maxs)]
def calculateSkewNormalizer(_user_N, _pro_N, _gf_measure, window):
    NORM_CUTPOINT = window
    #handle differently due to negative values
    f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    maxs = []
    mins = []
    for fpi in f_probs:
        iter_results = []  #initialize the lists of results of all iteration
        for iteri in range(NORM_ITERATION):
            input_ranking = [x for x in range(_user_N)]
            protected_group = [x for x in range(_pro_N)]
            # generate unfair ranking using algorithm
            unfair_ranking = dataGenerator.generateUnfairRanking(
                input_ranking, protected_group, fpi)
            # calculate the non-normalized group fairness value i.e. input normalized value as 1
            gf = calculateNDFairness(unfair_ranking, protected_group,
                                     NORM_CUTPOINT, _gf_measure, [0, 1])
            iter_results.append(gf)
        maxs.append(np.max(iter_results))
        mins.append(np.min(iter_results))
    return [np.min(mins), np.max(maxs)]
Пример #5
0
def main(_user_N, _pro_N, _gfmeasure, _cut_point, _rez_fn, _loops):
    """
        Run the group fairness experiments of synthetic unfair rankings.
        Output group fairness results as csv file.        
        
        :param _user_N: The total user number of input ranking
        :param _pro_N: The size of protected group in the input ranking        
        :param _gfmeassure: The group fairness measure to be used in calculation 
                            one of "rKL", "rND" and "rRD" defined as constant in this py file 
        :param _cut_point: The cut off point of set-wise group fairness calculation               
        :param _rez_fn: The file name to output group fairness results
        
        :return: no returns.
    """

    # define the input mixing proportion
    f_probs = [i / 10 for i in range(10)]
    f_probs.append(
        0.98
    )  #using 0.98 as extreme case considering the limitation of random generator

    # define the output dictionary
    keys = []
    values = []

    #define output file name (fn)
    base_file_name = _rez_fn + "_user" + str(_user_N) + "_pro" + str(_pro_N)
    # output_fn = os.path.join('~', 'RMIT','RMITRepo','FairRank', 'datasets', base_file_name)
    output_fn = base_file_name + ".csv"

    with open(output_fn, 'w') as mf:
        mf.write(
            "GF_Measure,MP0.0,MP0.1,MP0.2,MP0.3,MP0.4,MP0.5,MP0.6,MP0.7,MP0.8,MP0.9,MP0.98\n"
        )
    rez_file = open(output_fn, 'a')
    # calculate the normalizer of the input user number and protected group
    max_GF = measures.getNormalizer(_user_N, _pro_N, _gfmeasure)
    # generate a random input ranking and protected group
    input_ranking = [x for x in range(_user_N)]
    sensi_idx = [x for x in range(_pro_N)]

    for i in range(_loops):
        gf_results = []
        # loop the input fairness probabilities
        for fpi in range(len(f_probs)):
            fp = f_probs[fpi]
            gf_iters = 0
            for iteri in range(1, NORM_ITERATION + 1):
                sRFair = dataGenerator.generateUnfairRanking(
                    input_ranking, sensi_idx, fp)
                gf = measures.calculateNDFairness(sRFair, sensi_idx,
                                                  _cut_point, _gfmeasure,
                                                  max_GF)
                gf_iters = gf_iters + gf
            gf_results.append(gf_iters /
                              NORM_ITERATION)  #record average result

            # append the key/value to lists for creation of data csv
            keys.append(fp)
            values.append(gf_iters / NORM_ITERATION)

            # print( "Finished mixing proportion ",fp)

        # output results into csv file
        fline = _gfmeasure + ","
        for item in gf_results:
            fline = fline + str(item) + ","
        rez_file.write(fline + "\n")

    data = list(zip(keys, values))
    df = pd.DataFrame(data, columns=['prob', 'measure'])

    rez_file.close()

    data_file = os.path.join('.', 'PMCdata', base_file_name + '_data' + '.csv')
    df.to_csv(data_file, index=False)

    return