def createXingData(k): pairsOfPAndAlpha = [(0.1, 0.1), # no real results, skip in evaluation (0.2, 0.1), # no real results, skip in evaluation (0.3, 0.1), # no real results, skip in evaluation (0.4, 0.1), # no real results, skip in evaluation (0.5, 0.0168), (0.6, 0.0321), (0.7, 0.0293), (0.8, 0.0328), (0.9, 0.0375)] xingReader = xingProfilesReader.Reader('../rawData/Xing/*.json') # glob gets abs/rel paths matching the regex for queryString, candidates in xingReader.entireDataSet.iterrows(): dumpRankingsToDisk(candidates['protected'], candidates['nonProtected'], k, queryString, "../results/rankingDumps/Xing" + '/' + queryString + '/', pairsOfPAndAlpha) writePickleToDisk(candidates['originalOrdering'], os.getcwd() + '/../results/rankingDumps/Xing/' + '/' + queryString + '/' + 'OriginalOrdering.pickle')
def determineFailProbOfGroupFairnessTesterForStoyanovichRanking(): """ determines the probability that the ranked group fairness test fails given an artificial dataset created by means of Yang and Stoyanovich ("Ke Yang and Julia Stoyanovich. "Measuring Fairness in Ranked Outputs." arXiv preprint arXiv:1610.08559 (2016).") which we believe to be fair. """ numTrials = 10000 # Set to 100 or 10,000 alpha = 0.01 k = 1000 # Set to 1000 # resultFile = open('resultFailuresYangStoyanovichK={0}.csv'.format(k), 'w') # wr = csv.writer(resultFile, delimiter=',') modelAlpha1 = [0.075378, 0.090049, 0.098331, 0.100432, 0.103713, 0.103976, 0.105475, 0.103502, 0.099602] modelAlpha2 = [0.295883, 0.330252, 0.349234, 0.361767, 0.360710, 0.362456, 0.360749, 0.356852, 0.328008] failProbs1 = [] failProbs2 = [] ps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] print("numTrials={0}".format(numTrials)) # percentage describes the generated amount of protected candidates for p in ps: print("currently running: k={0}, p={1}, alpha={2}".format(k, p, alpha)) result, expectedCandidates = rankedGroupFairnessInYangStoyanovich( alpha, p, k, k, k, numTrials) sumOfFailures = sum(result) failProb = sumOfFailures / numTrials failProbs1.append(failProb) for p in ps: print("currently running: k={0}, p={1}, alpha={2}".format(k + 500, p, alpha + 0.04)) result, expectedCandidates = rankedGroupFairnessInYangStoyanovich( alpha + 0.04, p, k + 500, k + 500, k + 500, numTrials) sumOfFailures = sum(result) failProb = sumOfFailures / numTrials failProbs2.append(failProb) writePickleToDisk(failProbs1, '../results/FailureProbYangMethod/failProbsK=1000.pickle') writePickleToDisk(failProbs2, '../results/FailureProbYangMethod/failProbsK=1500.pickle') # failProbs1 = loadPickleFromDisk('../results/FailureProbYangMethod/failProbsK=1000.pickle') # failProbs2 = loadPickleFromDisk('../results/FailureProbYangMethod/failProbsK=1500.pickle') printsAndPlots.plotFourListsInOnePlot(ps, modelAlpha1, failProbs1, modelAlpha2, failProbs2, 'p', 'prob. rejection', filename='../results/plots/FailureProbability10000Trials.pdf')
def dumpResults(self, directory): directory = os.getcwd() + directory if not os.path.exists(directory): os.makedirs(directory) writePickleToDisk(self.compasGenderResults, directory + 'CompasGenderResults.pickle') writePickleToDisk(self.compasRaceResults, directory + 'CompasRaceResults.pickle') writePickleToDisk(self.germanCreditAge25Results, directory + 'GermanCreditAge25Results.pickle') writePickleToDisk(self.germanCreditAge35Results, directory + 'GermanCreditAge35Results.pickle') writePickleToDisk(self.germanCreditGenderResults, directory + 'GermanCreditGenderResults.pickle') writePickleToDisk(self.SATResults, directory + 'SATResults.pickle') writePickleToDisk(self.xingResults, directory + 'XingResults.pickle')
def rankAndDump(protected, nonProtected, k, dataSetName, directory, pairsOfPAndAlpha): """ creates all rankings we need for one experimental data set and writes them to disk to be used later @param protected: list of protected candidates, assumed to satisfy in-group monotonicty @param nonProtected: list of non-protected candidates, assumed to satisfy in-group monotonicty @param k: length of the rankings we want to create @param dataSetName: determines which data set is used in this experiment @param directory: directory in which to store the rankings @param pairsOfPAndAlpha: contains the mapping of a certain alpha correction to be used for a certain p The experimental setting is as follows: for a given data set of protected and non- protected candidates we create the following rankings: * a colorblind ranking, * a ranking as in Feldman et al * ten rankings using our FairRankingCreator, with p varying from 0.1, 0.2 to 0.9, whereas alpha stays 0.1 """ print("====================================================================") print("create rankings of {0}".format(dataSetName)) if not os.path.exists(os.getcwd() + '/' + directory + '/'): os.makedirs(os.getcwd() + '/' + directory + '/') print("colorblind ranking", end='', flush=True) colorblindRanking, colorblindNotSelected = fairRanking(k, protected, nonProtected, ESSENTIALLY_ZERO, 0.1) print(" [Done]") print("fair rankings", end='', flush=True) pair01 = [item for item in pairsOfPAndAlpha if item[0] == 0.1][0] fairRanking01, fair01NotSelected = fairRanking(k, protected, nonProtected, pair01[0], pair01[1]) pair02 = [item for item in pairsOfPAndAlpha if item[0] == 0.2][0] fairRanking02, fair02NotSelected = fairRanking(k, protected, nonProtected, pair02[0], pair02[1]) pair03 = [item for item in pairsOfPAndAlpha if item[0] == 0.3][0] fairRanking03, fair03NotSelected = fairRanking(k, protected, nonProtected, pair03[0], pair03[1]) pair04 = [item for item in pairsOfPAndAlpha if item[0] == 0.4][0] fairRanking04, fair04NotSelected = fairRanking(k, protected, nonProtected, pair04[0], pair04[1]) pair05 = [item for item in pairsOfPAndAlpha if item[0] == 0.5][0] fairRanking05, fair05NotSelected = fairRanking(k, protected, nonProtected, pair05[0], pair05[1]) pair06 = [item for item in pairsOfPAndAlpha if item[0] == 0.6][0] fairRanking06, fair06NotSelected = fairRanking(k, protected, nonProtected, pair06[0], pair06[1]) pair07 = [item for item in pairsOfPAndAlpha if item[0] == 0.7][0] fairRanking07, fair07NotSelected = fairRanking(k, protected, nonProtected, pair07[0], pair07[1]) pair08 = [item for item in pairsOfPAndAlpha if item[0] == 0.8][0] fairRanking08, fair08NotSelected = fairRanking(k, protected, nonProtected, pair08[0], pair08[1]) pair09 = [item for item in pairsOfPAndAlpha if item[0] == 0.9][0] fairRanking09, fair09NotSelected = fairRanking(k, protected, nonProtected, pair09[0], pair09[1]) print(" [Done]") print("feldman ranking", end='', flush=True) feldmanRanking, feldmanNotSelected = fair_ranker.create.feldmanRanking(protected, nonProtected, k) print(" [Done]") print("Write rankings to disk", end='', flush=True) writePickleToDisk(colorblindRanking, os.getcwd() + '/' + directory + '/' + dataSetName + 'ColorblindRanking.pickle') writePickleToDisk(colorblindNotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'ColorblindRankingNotSelected.pickle') writePickleToDisk(feldmanRanking, os.getcwd() + '/' + directory + '/' + dataSetName + 'FeldmanRanking.pickle') writePickleToDisk(feldmanNotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FeldmanRankingNotSelected.pickle') writePickleToDisk(fairRanking01, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking01PercentProtected.pickle') writePickleToDisk(fair01NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking01NotSelected.pickle') writePickleToDisk(fairRanking02, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking02PercentProtected.pickle') writePickleToDisk(fair02NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking02NotSelected.pickle') writePickleToDisk(fairRanking03, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking03PercentProtected.pickle') writePickleToDisk(fair03NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking03NotSelected.pickle') writePickleToDisk(fairRanking04, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking04PercentProtected.pickle') writePickleToDisk(fair04NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking04NotSelected.pickle') writePickleToDisk(fairRanking05, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking05PercentProtected.pickle') writePickleToDisk(fair05NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking05NotSelected.pickle') writePickleToDisk(fairRanking06, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking06PercentProtected.pickle') writePickleToDisk(fair06NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking06NotSelected.pickle') writePickleToDisk(fairRanking07, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking07PercentProtected.pickle') writePickleToDisk(fair07NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking07NotSelected.pickle') writePickleToDisk(fairRanking08, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking08PercentProtected.pickle') writePickleToDisk(fair08NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking08NotSelected.pickle') writePickleToDisk(fairRanking09, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking09PercentProtected.pickle') writePickleToDisk(fair09NotSelected, os.getcwd() + '/' + directory + '/' + dataSetName + 'FairRanking09NotSelected.pickle') print(" [Done]")