def randomOneSideMassageData(examples, protectedIndex, protectedValue): bias = signedStatisticalParity(examples, protectedIndex, protectedValue) print("Initial bias:", bias) favored_trait = 1-protectedValue #break up data by label and by the value of the protected trait favored_data = [(x,label) for x,label in examples if x[protectedIndex]==favored_trait] nonfavored_data = [(x,label) for x,label in examples if x[protectedIndex]!=favored_trait] favored_data_positive = [pt for pt in favored_data if pt[1]==1] nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1]==-1] print("len(favored_data): %.3f" % len(favored_data)) print("len(nonfavored_data): %.3f" % len(nonfavored_data)) print("len(favored_data_positive): %.3f" % len(favored_data_positive)) print("len(nonfavored_data_negative): %.3f" % len(nonfavored_data_negative)) #calculate number of labels to flip from -1 to +1 on the nonfavored side num_nonfavored_positive = len(nonfavored_data)-len(nonfavored_data_negative) print("len(num_nonfavored_positive): %.3f" % num_nonfavored_positive) num_to_flip = math.floor((len(nonfavored_data)*len(favored_data_positive) - len(favored_data)*num_nonfavored_positive)/len(favored_data)) print("Number of labels flipped:", num_to_flip) to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip) flipped_examples = [] for data in examples: if data in to_flip_to_pos: flipped_examples.append((data[0],-1*data[1])) else: flipped_examples.append(data) return flipped_examples
def optimalShiftClassifier(self, goal=None, condition=None, rounds=20): if goal == None: goal = lambda d, h: signedStatisticalParity( d, self.protectedIndex, self.protectedValue, h) if condition == None: condition = self.protected return self.conditionalShiftClassifier( self.optimalShift(goal, condition, rounds), condition)
def statistics(train, test, protectedIndex, protectedValue, learner): h = learner(train, protectedIndex, protectedValue) print("Computing error") error = labelError(test, h) print("Computing bias") bias = signedStatisticalParity(test, protectedIndex, protectedValue, h) print("Computing UBIF") ubif = individualFairness(train, learner, 0.2, passProtected=True) return error, bias, ubif
def statistics(train, test, protectedIndex, protectedValue, learner): h = learner(train, protectedIndex, protectedValue) print("Computing error") error = labelError(test, h) print("Computing bias") bias = signedStatisticalParity(test, protectedIndex, protectedValue, h) print("Computing UBIF") ubif = individualFairness(train, learner, flipProportion=0.2, passProtected=True) return error, bias, ubif
def statistics(massager, trainingData, testData, protectedIndex, protectedValue, learner, flipProportion=0.2): massagedData = massager(trainingData, protectedIndex, protectedValue) h = learner(massagedData) error = labelError(testData, h) bias = signedStatisticalParity(testData, protectedIndex, protectedValue, h) ubif = individualFairness(trainingData, learner, flipProportion) return error, bias, ubif
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue, hypotheses, weights, threshold): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) biasedClass = 1 - zeroOneSign(bias) def relabel(pt): proposedLabel = h(pt) if (pt[protectedIndex] == biasedClass and absMargin(pt, hypotheses, weights) < threshold): return -proposedLabel else: return proposedLabel return relabel
def statistics(train, test, protectedIndex, protectedValue, numRounds=20): weight = 0.5 flipProportion = 0.2 error = makeErrorFunction(protectedIndex, protectedValue, weight) weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error) h = boosting.boost(train, weakLearner=weakLearner) bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h) error = ef.labelError(test, h) ubif = ef.individualFairness(train, boosting.boost, flipProportion) return error, bias, ubif
def statistics(train, test, protectedIndex, protectedValue, numRounds=20): weight = 0.5 flipProportion = 0.2 error = makeErrorFunction(protectedIndex, protectedValue, weight) weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error) h = boosting.boost(train, weakLearner = weakLearner) bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h) error = ef.labelError(test, h) ubif = ef.individualFairness(train, boosting.boost, flipProportion) return error, bias, ubif
def optimalShift(self, goal=None, condition=None, rounds=20): if goal == None: goal = lambda d, h: signedStatisticalParity( d, self.protectedIndex, self.protectedValue, h) if condition == None: condition = self.protected low = self.minShift high = self.maxShift dataToUse = self.validationData minGoalValue = goal(dataToUse, self.conditionalShiftClassifier(low, condition)) maxGoalValue = goal(dataToUse, self.conditionalShiftClassifier(high, condition)) #print((low, minGoalValue)) #print((high, maxGoalValue)) if sign(minGoalValue) != sign(maxGoalValue): # a binary search for zero for _ in range(rounds): midpoint = (low + high) / 2 if (sign( goal(dataToUse, self.conditionalShiftClassifier( low, condition))) == sign( goal( dataToUse, self.conditionalShiftClassifier( midpoint, condition)))): low = midpoint else: high = midpoint return midpoint else: print("Warning: bisection method not applicable") bestShift = None bestVal = float('inf') step = (high - low) / rounds for newShift in numpy.arange(low, high, step): newVal = goal( dataToUse, self.conditionalShiftClassifier(newShift, condition)) print(newVal) newVal = abs(newVal) if newVal < bestVal: bestShift = newShift bestVal = newVal return bestShift
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait] NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1]) p = NF*abs(bias)/NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier
def randomOneSideMassageData(examples, protectedIndex, protectedValue): bias = signedStatisticalParity(examples, protectedIndex, protectedValue) print("Initial bias:", bias) favored_trait = 1 - protectedValue # break up data by label and by the value of the protected trait favored_data = [(x, label) for x, label in examples if x[protectedIndex] == favored_trait] nonfavored_data = [(x, label) for x, label in examples if x[protectedIndex] != favored_trait] favored_data_positive = [pt for pt in favored_data if pt[1] == 1] nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1] == -1] print("len(favored_data): %.3f" % len(favored_data)) print("len(nonfavored_data): %.3f" % len(nonfavored_data)) print("len(favored_data_positive): %.3f" % len(favored_data_positive)) print("len(nonfavored_data_negative): %.3f" % len(nonfavored_data_negative)) # calculate number of labels to flip from -1 to +1 on the nonfavored side num_nonfavored_positive = len(nonfavored_data) - len( nonfavored_data_negative) print("len(num_nonfavored_positive): %.3f" % num_nonfavored_positive) num_to_flip = math.floor( (len(nonfavored_data) * len(favored_data_positive) - len(favored_data) * num_nonfavored_positive) / len(favored_data)) print("Number of labels flipped:", num_to_flip) to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip) flipped_examples = [] for data in examples: if data in to_flip_to_pos: flipped_examples.append((data[0], -1 * data[1])) else: flipped_examples.append(data) return flipped_examples
def optimalShift(self, goal=None, condition=None, rounds=20): if goal == None: goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h) if condition == None: condition = self.protected low = self.minShift high = self.maxShift dataToUse = self.validationData minGoalValue = goal(dataToUse, self.conditionalShiftClassifier(low, condition)) maxGoalValue = goal(dataToUse, self.conditionalShiftClassifier(high, condition)) print((low, minGoalValue)) print((high, minGoalValue)) if sign(minGoalValue) != sign(maxGoalValue): # a binary search for zero for _ in range(rounds): midpoint = (low + high) / 2 if (sign(goal(dataToUse, self.conditionalShiftClassifier(low, condition))) == sign(goal(dataToUse, self.conditionalShiftClassifier(midpoint, condition)))): low = midpoint else: high = midpoint return midpoint else: print("Warning: bisection method not applicable") bestShift = None bestVal = float('inf') step = (high-low)/rounds for newShift in numpy.arange(low, high, step): newVal = goal(dataToUse, self.conditionalShiftClassifier(newShift, condition)) print(newVal) newVal = abs(newVal) if newVal < bestVal: bestShift = newShift bestVal = newVal return bestShift
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats, label) for feats, label in trainingData if not feats[protectedIndex] == favored_trait] NF, NFn = (len(nonfavored_data), len([1 for x, label in nonfavored_data if h(x) == -1])) p = NF * abs(bias) / NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier
def optimalShiftClassifier(self, goal=None, condition=None, rounds=20): if goal == None: goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h) if condition == None: condition = self.protected return self.conditionalShiftClassifier(self.optimalShift(goal, condition, rounds), condition)