forked from j2kun/fkl-SDM16
-
Notifications
You must be signed in to change notification settings - Fork 0
/
relabeling.py
63 lines (50 loc) · 2.09 KB
/
relabeling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from errorfunctions import signedStatisticalParity
from boosting import absMargin
from random import random
from utils import zeroOneSign
# return a new hypothesis which flips the label of a data point if it is in the
# protected class and its boosting margin is less than the given threshold in
# magnitude
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
hypotheses, weights, threshold):
bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
biasedClass = 1 - zeroOneSign(bias)
def relabel(pt):
proposedLabel = h(pt)
if (pt[protectedIndex] == biasedClass and absMargin(pt, hypotheses, weights) < threshold):
return -proposedLabel
else:
return proposedLabel
return relabel
# randomly flips labels of input classifier to kill bias of feature at index proteted_feature_index
# outputs the modified classifier
# only chooses labels that are on the 'non-favored' side of the feature that were rated -1
# to get rated 1
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
favored_trait = zeroOneSign(bias)
nonfavored_data = [(feats, label) for feats, label in trainingData
if not feats[protectedIndex] == favored_trait]
NF, NFn = (
len(nonfavored_data),
len([1 for x, label in nonfavored_data if h(x) == -1])
)
p = NF * abs(bias) / NFn
def relabeledClassifier(point):
origClass = h(point)
if point[protectedIndex] != favored_trait and origClass == -1:
if random() < p:
return -origClass
else:
return origClass
else:
return origClass
return relabeledClassifier
if __name__ == '__main__':
from data import adult
from boosting import boost
trainingData, testData = adult.load()
protectedIndex = adult.protectedIndex
protectedValue = adult.protectedValue
h = boost(trainingData, 5)
rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)