Exemplo n.º 1
0
def createXingData(k):
    pairsOfPAndAlpha = [(0.1, 0.1),  # no real results, skip in evaluation
                        (0.2, 0.1),  # no real results, skip in evaluation
                        (0.3, 0.1),  # no real results, skip in evaluation
                        (0.4, 0.1),  # no real results, skip in evaluation
                        (0.5, 0.0168),
                        (0.6, 0.0321),
                        (0.7, 0.0293),
                        (0.8, 0.0328),
                        (0.9, 0.0375)]

    xingReader = xingProfilesReader.Reader('../rawData/Xing/*.json')  # glob gets abs/rel paths matching the regex
    for queryString, candidates in xingReader.entireDataSet.iterrows():
        dumpRankingsToDisk(candidates['protected'], candidates['nonProtected'], k, queryString,
                           "../results/rankingDumps/Xing" + '/' + queryString + '/', pairsOfPAndAlpha)
        writePickleToDisk(candidates['originalOrdering'], os.getcwd() + '/../results/rankingDumps/Xing/'
                          + '/' + queryString + '/' + 'OriginalOrdering.pickle')
def determineFailProbOfGroupFairnessTesterForStoyanovichRanking():
    """
    determines the probability that the ranked group fairness test fails given an artificial dataset
    created by means of Yang and Stoyanovich ("Ke Yang and Julia Stoyanovich. "Measuring Fairness in
    Ranked Outputs." arXiv preprint arXiv:1610.08559 (2016).") which we believe to be fair.

    """
    numTrials = 10000  # Set to 100 or 10,000
    alpha = 0.01
    k = 1000  # Set to 1000

#     resultFile = open('resultFailuresYangStoyanovichK={0}.csv'.format(k), 'w')
#     wr = csv.writer(resultFile, delimiter=',')
    modelAlpha1 = [0.075378, 0.090049, 0.098331, 0.100432, 0.103713, 0.103976, 0.105475, 0.103502, 0.099602]
    modelAlpha2 = [0.295883, 0.330252, 0.349234, 0.361767, 0.360710, 0.362456, 0.360749, 0.356852, 0.328008]


    failProbs1 = []
    failProbs2 = []
    ps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

    print("numTrials={0}".format(numTrials))

    # percentage describes the generated amount of protected candidates
    for p in ps:
        print("currently running: k={0}, p={1}, alpha={2}".format(k, p, alpha))
        result, expectedCandidates = rankedGroupFairnessInYangStoyanovich(
            alpha, p, k, k, k, numTrials)
        sumOfFailures = sum(result)
        failProb = sumOfFailures / numTrials
        failProbs1.append(failProb)

    for p in ps:
        print("currently running: k={0}, p={1}, alpha={2}".format(k + 500, p, alpha + 0.04))
        result, expectedCandidates = rankedGroupFairnessInYangStoyanovich(
            alpha + 0.04, p, k + 500, k + 500, k + 500, numTrials)
        sumOfFailures = sum(result)
        failProb = sumOfFailures / numTrials
        failProbs2.append(failProb)

    writePickleToDisk(failProbs1, '../results/FailureProbYangMethod/failProbsK=1000.pickle')
    writePickleToDisk(failProbs2, '../results/FailureProbYangMethod/failProbsK=1500.pickle')
#     failProbs1 = loadPickleFromDisk('../results/FailureProbYangMethod/failProbsK=1000.pickle')
#     failProbs2 = loadPickleFromDisk('../results/FailureProbYangMethod/failProbsK=1500.pickle')
    printsAndPlots.plotFourListsInOnePlot(ps, modelAlpha1, failProbs1, modelAlpha2, failProbs2, 'p', 'prob. rejection', filename='../results/plots/FailureProbability10000Trials.pdf')
 def dumpResults(self, directory):
     directory = os.getcwd() + directory
     if not os.path.exists(directory):
         os.makedirs(directory)
     writePickleToDisk(self.compasGenderResults,
                       directory + 'CompasGenderResults.pickle')
     writePickleToDisk(self.compasRaceResults,
                       directory + 'CompasRaceResults.pickle')
     writePickleToDisk(self.germanCreditAge25Results,
                       directory + 'GermanCreditAge25Results.pickle')
     writePickleToDisk(self.germanCreditAge35Results,
                       directory + 'GermanCreditAge35Results.pickle')
     writePickleToDisk(self.germanCreditGenderResults,
                       directory + 'GermanCreditGenderResults.pickle')
     writePickleToDisk(self.SATResults, directory + 'SATResults.pickle')
     writePickleToDisk(self.xingResults, directory + 'XingResults.pickle')
Exemplo n.º 4
0
def rankAndDump(protected, nonProtected, k, dataSetName, directory, pairsOfPAndAlpha):
    """
    creates all rankings we need for one experimental data set and writes them to disk to be used later

    @param protected:        list of protected candidates, assumed to satisfy in-group monotonicty
    @param nonProtected:     list of non-protected candidates, assumed to satisfy in-group monotonicty
    @param k:                length of the rankings we want to create
    @param dataSetName:      determines which data set is used in this experiment
    @param directory:        directory in which to store the rankings
    @param pairsOfPAndAlpha: contains the mapping of a certain alpha correction to be used for a certain p

    The experimental setting is as follows: for a given data set of protected and non-
    protected candidates we create the following rankings:
    * a colorblind ranking,
    * a ranking as in Feldman et al
    * ten rankings using our FairRankingCreator, with p varying from 0.1, 0.2 to 0.9, whereas alpha
      stays 0.1

    """
    print("====================================================================")
    print("create rankings of {0}".format(dataSetName))

    if not os.path.exists(os.getcwd() + '/' + directory + '/'):
        os.makedirs(os.getcwd() + '/' + directory + '/')

    print("colorblind ranking", end='', flush=True)
    colorblindRanking, colorblindNotSelected = fairRanking(k, protected, nonProtected, ESSENTIALLY_ZERO, 0.1)
    print(" [Done]")

    print("fair rankings", end='', flush=True)
    pair01 = [item for item in pairsOfPAndAlpha if item[0] == 0.1][0]
    fairRanking01, fair01NotSelected = fairRanking(k, protected, nonProtected, pair01[0], pair01[1])
    pair02 = [item for item in pairsOfPAndAlpha if item[0] == 0.2][0]
    fairRanking02, fair02NotSelected = fairRanking(k, protected, nonProtected, pair02[0], pair02[1])
    pair03 = [item for item in pairsOfPAndAlpha if item[0] == 0.3][0]
    fairRanking03, fair03NotSelected = fairRanking(k, protected, nonProtected, pair03[0], pair03[1])
    pair04 = [item for item in pairsOfPAndAlpha if item[0] == 0.4][0]
    fairRanking04, fair04NotSelected = fairRanking(k, protected, nonProtected, pair04[0], pair04[1])
    pair05 = [item for item in pairsOfPAndAlpha if item[0] == 0.5][0]
    fairRanking05, fair05NotSelected = fairRanking(k, protected, nonProtected, pair05[0], pair05[1])
    pair06 = [item for item in pairsOfPAndAlpha if item[0] == 0.6][0]
    fairRanking06, fair06NotSelected = fairRanking(k, protected, nonProtected, pair06[0], pair06[1])
    pair07 = [item for item in pairsOfPAndAlpha if item[0] == 0.7][0]
    fairRanking07, fair07NotSelected = fairRanking(k, protected, nonProtected, pair07[0], pair07[1])
    pair08 = [item for item in pairsOfPAndAlpha if item[0] == 0.8][0]
    fairRanking08, fair08NotSelected = fairRanking(k, protected, nonProtected, pair08[0], pair08[1])
    pair09 = [item for item in pairsOfPAndAlpha if item[0] == 0.9][0]
    fairRanking09, fair09NotSelected = fairRanking(k, protected, nonProtected, pair09[0], pair09[1])
    print(" [Done]")

    print("feldman ranking", end='', flush=True)
    feldmanRanking, feldmanNotSelected = fair_ranker.create.feldmanRanking(protected, nonProtected, k)
    print(" [Done]")

    print("Write rankings to disk", end='', flush=True)
    writePickleToDisk(colorblindRanking, os.getcwd() + '/' + directory + '/' + dataSetName + 'ColorblindRanking.pickle')
    writePickleToDisk(colorblindNotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'ColorblindRankingNotSelected.pickle')
    writePickleToDisk(feldmanRanking, os.getcwd() + '/' + directory + '/' + dataSetName + 'FeldmanRanking.pickle')
    writePickleToDisk(feldmanNotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FeldmanRankingNotSelected.pickle')
    writePickleToDisk(fairRanking01, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking01PercentProtected.pickle')
    writePickleToDisk(fair01NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking01NotSelected.pickle')
    writePickleToDisk(fairRanking02, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking02PercentProtected.pickle')
    writePickleToDisk(fair02NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking02NotSelected.pickle')
    writePickleToDisk(fairRanking03, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking03PercentProtected.pickle')
    writePickleToDisk(fair03NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking03NotSelected.pickle')
    writePickleToDisk(fairRanking04, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking04PercentProtected.pickle')
    writePickleToDisk(fair04NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking04NotSelected.pickle')
    writePickleToDisk(fairRanking05, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking05PercentProtected.pickle')
    writePickleToDisk(fair05NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking05NotSelected.pickle')
    writePickleToDisk(fairRanking06, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking06PercentProtected.pickle')
    writePickleToDisk(fair06NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking06NotSelected.pickle')
    writePickleToDisk(fairRanking07, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking07PercentProtected.pickle')
    writePickleToDisk(fair07NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking07NotSelected.pickle')
    writePickleToDisk(fairRanking08, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking08PercentProtected.pickle')
    writePickleToDisk(fair08NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking08NotSelected.pickle')
    writePickleToDisk(fairRanking09, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking09PercentProtected.pickle')
    writePickleToDisk(fair09NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking09NotSelected.pickle')
    print(" [Done]")