Exemplo n.º 1
0
def enforce_single_threshold(categorical_results):
    single_threshold_data = {}
    thresholds = {}
    single_thresh = 0
    merged = []
    max_acc = 0
    for k, v in categorical_results.items():
        merged.extend(v)

    max_range_thresh = max(merged, key=lambda x: x[0])
    min_range_thresh = min(merged, key=lambda x: x[0])

    for thresh in np.arange(min_range_thresh[0], max_range_thresh[0], 0.01):
        for key, value in categorical_results.items():
            result = utils.apply_threshold(value, thresh)
            single_threshold_data[key] = result
        acc = utils.get_total_accuracy(single_threshold_data)
        if (acc > max_acc):
            max_acc = acc
            single_thresh = thresh

    for key, value in categorical_results.items():
        thresholds[key] = single_thresh
        result = utils.apply_threshold(value, thresholds[key])
        single_threshold_data[key] = result
    # Must complete this function!
    #return single_threshold_data, thresholds

    return single_threshold_data, thresholds
Exemplo n.º 2
0
def enforce_maximum_profit(categorical_results):
    mp_data = {}
    thresholds = {}

    for key, value in categorical_results.items():
        li = []
        max_acc = 0
        threshold = 0
        max_range_thresh = max(value, key=lambda x: x[0])
        min_range_thresh = min(value, key=lambda x: x[0])
        for thresh in np.arange(min_range_thresh[0], max_range_thresh[0],
                                0.01):
            result = utils.apply_threshold(value, thresh)
            total_num_cases = 0
            total_correct = 0
            for prediction, label in result:
                total_num_cases += 1.0
                if prediction == label:
                    total_correct += 1.0
            acc = total_correct / total_num_cases
            # print(key, thresh, acc)
            if acc > max_acc:
                max_acc = acc
                threshold = thresh
        thresholds[key] = threshold
    for key, value in categorical_results.items():
        result = utils.apply_threshold(value, thresholds[key])
        mp_data[key] = result

    acc = utils.get_total_accuracy(mp_data)
    # print(acc)
    # Must complete this function!
    # return mp_data, thresholds

    return mp_data, thresholds
Exemplo n.º 3
0
def enforce_equal_opportunity(categorical_results, epsilon):
    thresholds = {}
    equal_opportunity_data = {}

    tpr_data = {}

    for threshold in np.arange(0, 1, 0.01):
        for key, value in categorical_results.items():
            t_data = utils.apply_threshold(value, threshold)
            tpr = utils.get_true_positive_rate(t_data)
            if (key not in tpr_data):
                tpr_data[key] = []
            tpr_data[key].append([tpr, threshold])

    keys = [*tpr_data]
    tpr_data_refined = []
    for tpr_d_0 in tpr_data[keys[0]]:
        for tpr_d_1 in tpr_data[keys[1]]:
            if (abs(tpr_d_0[0] - tpr_d_1[0]) <= epsilon):
                tpr_data_refined.append([tpr_d_0, tpr_d_1])

    tpr_data_refined_2 = []
    for val in tpr_data_refined:
        for tpr_d_2 in tpr_data[keys[2]]:
            if (abs(tpr_d_2[0] - val[0][0]) <= epsilon):
                if (abs(tpr_d_2[0] - val[1][0]) <= epsilon):
                    tpr_data_refined_2.append([val[0], val[1], tpr_d_2])

    tpr_data_refined_3 = []
    for val in tpr_data_refined_2:
        for tpr_d_3 in tpr_data[keys[3]]:
            if (abs(tpr_d_3[0] - val[0][0]) <= epsilon):
                if (abs(tpr_d_3[0] - val[1][0]) <= epsilon):
                    if (abs(tpr_d_3[0] - val[2][0]) <= epsilon):
                        tpr_data_refined_3.append(
                            [val[0], val[1], val[2], tpr_d_3])

    #print(len(tpr_data_refined_3))
    max_acc = 0
    temp = {}
    for thresh in tpr_data_refined_3:
        temp['African-American'] = utils.apply_threshold(
            categorical_results['African-American'], thresh[0][1])
        temp['Caucasian'] = utils.apply_threshold(
            categorical_results['Caucasian'], thresh[1][1])
        temp['Hispanic'] = utils.apply_threshold(
            categorical_results['Hispanic'], thresh[2][1])
        temp['Other'] = utils.apply_threshold(categorical_results['Other'],
                                              thresh[3][1])
        acc = utils.get_total_accuracy(temp)
        if (acc > max_acc):
            max_acc = acc
            thresholds['African-American'] = thresh[0][1]
            thresholds['Caucasian'] = thresh[1][1]
            thresholds['Hispanic'] = thresh[2][1]
            thresholds['Other'] = thresh[3][1]
            equal_opportunity_data = temp.copy()

    return equal_opportunity_data, thresholds
Exemplo n.º 4
0
def enforce_demographic_parity(categorical_results, epsilon):
    demographic_parity_data = {}
    thresholds = {}
    npp = []
    npp_data = {}

    for threshold in np.arange(0, 1, 0.01):
        for key, value in categorical_results.items():
            t_data = utils.apply_threshold(value, threshold)
            npp = (utils.get_num_predicted_positives(t_data) / len(t_data))
            if (key not in npp_data):
                npp_data[key] = []
            npp_data[key].append([npp, threshold])

    keys = [*npp_data]
    npp_data_refined = []
    for npp_d_0 in npp_data[keys[0]]:
        for npp_d_1 in npp_data[keys[1]]:
            if (abs(npp_d_0[0] - npp_d_1[0]) <= epsilon):
                npp_data_refined.append([npp_d_0, npp_d_1])

    npp_data_refined_2 = []
    for val in npp_data_refined:
        for npp_d_2 in npp_data[keys[2]]:
            if (abs(npp_d_2[0] - val[0][0]) <= epsilon):
                if (abs(npp_d_2[0] - val[1][0]) <= epsilon):
                    npp_data_refined_2.append([val[0], val[1], npp_d_2])

    npp_data_refined_3 = []
    for val in npp_data_refined_2:
        for npp_d_3 in npp_data[keys[3]]:
            if (abs(npp_d_3[0] - val[0][0]) <= epsilon):
                if (abs(npp_d_3[0] - val[1][0]) <= epsilon):
                    if (abs(npp_d_3[0] - val[2][0]) <= epsilon):
                        npp_data_refined_3.append(
                            [val[0], val[1], val[2], npp_d_3])

    max_acc = 0
    temp = {}
    for thresh in npp_data_refined_3:
        temp['African-American'] = utils.apply_threshold(
            categorical_results['African-American'], thresh[0][1])
        temp['Caucasian'] = utils.apply_threshold(
            categorical_results['Caucasian'], thresh[1][1])
        temp['Hispanic'] = utils.apply_threshold(
            categorical_results['Hispanic'], thresh[2][1])
        temp['Other'] = utils.apply_threshold(categorical_results['Other'],
                                              thresh[3][1])
        acc = utils.get_total_accuracy(temp)
        if (acc > max_acc):
            max_acc = acc
            thresholds['African-American'] = thresh[0][1]
            thresholds['Caucasian'] = thresh[1][1]
            thresholds['Hispanic'] = thresh[2][1]
            thresholds['Other'] = thresh[3][1]
            demographic_parity_data = temp.copy()

    max_acc = 0
    return demographic_parity_data, thresholds
Exemplo n.º 5
0
def enforce_maximum_profit(categorical_results):
    mp_data = {}
    thresholds = {}
    
    
    
    a=categorical_results['African-American']
    b=categorical_results['Caucasian']
    c=categorical_results['Hispanic']
    d=categorical_results['Other']
    
    best_accuracy=0
    best_threshold=None
    
    
    l1=l2=l3=l4=[]
    for i in range(10):
        l1.append(i/10)
        
    l2=l3=l4=l1
    
    for i in (itertools.product(l1,l2,l3,l4)):
        arr1=u.apply_threshold(a, i[0])
        arr2=u.apply_threshold(b, i[1])
        arr3=u.apply_threshold(c, i[2])
        arr4=u.apply_threshold(d, i[3])
        
        
        d9={'African-American':arr1,'Caucasian':arr2,'Hispanic':arr3,'Other':arr4}
        acc=u.get_total_accuracy(d9)
        
        if(best_accuracy<acc):
            best_accuracy=acc
            best_threshold=[i[0],i[1],i[2],i[3]]
            mp_data={}
            mp_data=d9
        d9={}
        
        '''
        if(acc>=0.63):
            break
        '''
   
    
    thresholds={'African-American':best_threshold[0],
                'Caucasian':best_threshold[1],
                'Hispanic':best_threshold[2],
                'Other':best_threshold[3]}    
    
    
    
    

    # Must complete this function!
    return mp_data, thresholds
Exemplo n.º 6
0
def enforce_single_threshold(categorical_results):
    single_threshold_data = {}
    thresholds = {}
    
    
    a=categorical_results['African-American']
    b=categorical_results['Caucasian']
    c=categorical_results['Hispanic']
    d=categorical_results['Other']
    
    best_accuracy=0
    best_threshold=None
    
    for i in range(100):
        theshold=i/100
        arr1=u.apply_threshold(a, theshold)
        arr2=u.apply_threshold(b, theshold)
        arr3=u.apply_threshold(c, theshold)  
        arr4=u.apply_threshold(d, theshold)
        
        d9={'African-American':arr1,
           'Caucasian':arr2,
           'Hispanic':arr3,
           'Other':arr4}
    
        acc=u.get_total_accuracy(d9)
        
        if(best_accuracy<acc):
            best_accuracy=acc
            best_threshold=theshold
            single_threshold_data={}
            single_threshold_data =d9
        d9={}
        
    
    
    
    
    thresholds={'African-American':best_threshold,
                'Caucasian':best_threshold,
                'Hispanic':best_threshold,
                'Other':best_threshold}
    
    
    
    #Must complete this function!
    return single_threshold_data, thresholds
    
    #return None, None
Exemplo n.º 7
0
def enforce_predictive_parity(categorical_results,
                              epsilon):  # Kedaar Raghavendra Rao
    import utils as u
    predictive_parity_data = {}
    thresholds = {}

    max_total_acc = 0
    temp_thresh = {}  # {race, [thresh]}
    max_thresh = {}  # {race, [thresh]}
    max_thresh_pred = {}  # {race, [thresholded_pred]}
    temp_thresh_pred = {}  # {race, [thresholded_pred]}

    race_cases = categorical_results  #categorical_results contains [predicted value, actual label]

    for p in range(1, 100):
        prob = p / 100
        temp_thresh_pred = {}
        temp_thresh = {}
        for race in race_cases:
            for thresh in range(1, 100):
                t = thresh / 100
                x = {}
                x[race] = u.apply_threshold(race_cases[race],
                                            t)  #thresholded_pred
                r_prob = u.get_positive_predictive_value(
                    x[race]
                )  # u.get_num_predicted_positives(x[race]) / len(x[race])
                if (compare_probs(r_prob, prob, epsilon)):
                    temp_thresh[race] = [t]
                    temp_thresh_pred[race] = x[race]
                    break

        if len(temp_thresh_pred) == len(race_cases):
            total_accuracy = u.get_total_accuracy(temp_thresh_pred)
            if total_accuracy > max_total_acc:
                max_total_acc = total_accuracy
                max_thresh_pred = temp_thresh_pred
                max_thresh = temp_thresh

    predictive_parity_data = max_thresh_pred
    thresholds = max_thresh

    return predictive_parity_data, thresholds
Exemplo n.º 8
0
def enforce_equal_opportunity(categorical_results, epsilon):  # Rishabh Sharma
    import utils as u
    thresholds = {}
    equal_opportunity_data = {}

    max_total_acc = 0
    temp_thresh = {}  # {race, [thresh]}
    max_thresh = {}  # {race, [thresh]}
    max_thresh_pred = {}  # {race, [thresholded_pred]}
    temp_thresh_pred = {}  # {race, [thresholded_pred]}

    race_cases = categorical_results  # categorical_results contains [predicted value, actual label]

    for p in range(1, 100):
        prob = p / 100
        temp_thresh_pred = {}
        temp_thresh = {}
        for race in race_cases:
            for thresh in range(1, 100):
                t = thresh / 100
                x = {}
                x[race] = u.apply_threshold(race_cases[race],
                                            t)  #thresholded_pred
                r_prob = u.get_true_positive_rate(x[race])
                if (compare_probs(r_prob, prob, epsilon)):
                    temp_thresh[race] = t
                    temp_thresh_pred[race] = x[race]
                    break

        if len(temp_thresh_pred) == len(race_cases):
            total_accuracy = u.get_total_accuracy(temp_thresh_pred)
            if total_accuracy > max_total_acc:
                max_total_acc = total_accuracy
                max_thresh_pred = temp_thresh_pred
                max_thresh = temp_thresh

    equal_opportunity_data = max_thresh_pred
    thresholds = max_thresh

    return equal_opportunity_data, thresholds
Exemplo n.º 9
0
def enforce_equal_opportunity(categorical_results, epsilon):

    thresholds = {}
    equal_opportunity_data = {}
    
    
    a=categorical_results['African-American']
    b=categorical_results['Caucasian']
    c=categorical_results['Hispanic']
    d=categorical_results['Other']
    
    
    (c1,d1,b1)=u.get_ROC_data(a, 'African-American')
    (c2,d2,b2)=u.get_ROC_data(b, 'Caucasian')
    (c3,d3,b3)=u.get_ROC_data(c, 'Hispanic')
    (c4,d4,b4)=u.get_ROC_data(d, 'Other')
   
  
    arr=[]
   
    for i in range(100):
        
        n1=c1[i]-0.01
        n2=c1[i]+0.01
        
        for j in range(100):
            if(c2[j]>=n1 and c2[j]<=n2):
                
                for k in range(100):
                    if(c3[k]>=n1 and c3[k]<=n2):
                        
                        for l in range(100):
                            if(c4[l]>=n1 and c4[l]<=n2):
                                #print("i am coming here")
                                p=[i/100.,j/100.,k/100.,l/100.]
                                arr.append(p)
                                
   
    
    accValue=0
    thresholdList=None
    abc = arr.copy()
    for i in range(len(arr)):
        arr1=u.apply_threshold(a, abc[i][0])
        arr2=u.apply_threshold(b, abc[i][1])
        arr3=u.apply_threshold(c, abc[i][2])
        arr4=u.apply_threshold(d, abc[i][3])
        
        
        
            
        d9={'African-American':arr1,
           'Caucasian':arr2,
           'Hispanic':arr3,
           'Other':arr4}
    
        acc=u.get_total_accuracy(d9)
        
        
        if(accValue<acc):
            accValue=acc
            thresholdList=arr[i]
            equal_opportunity_data={}
            equal_opportunity_data =d9
            
        d9={}
        
    
    
    thresholds={'African-American':thresholdList[0],
                'Caucasian':thresholdList[1],
                'Hispanic':thresholdList[2],
                'Other':thresholdList[3]}
    
   
    
    
    # Must complete this function!
    return equal_opportunity_data, thresholds
Exemplo n.º 10
0
def enforce_predictive_parity(categorical_results, epsilon):
    predictive_parity_data = {}
    thresholds = {}
    
    
    a=categorical_results['African-American']
    b=categorical_results['Caucasian']
    c=categorical_results['Hispanic']
    d=categorical_results['Other']
    
    a1=func(a)
    b1=func(b)
    c1=func(c)
    d1=func(d)
    
    best_accuracy=0
    best_threshold=None
    
    arr=[]
    
    for i in range(100):
        n1=a1[i]-0.01
        n2=a1[i]+0.01
        for j in range(100):
            if(b1[j]>=n1 and b1[j]<=n2):
                for k in range(100):
                    if(c1[k]>=n1 and c1[k]<=n2):
                        for l in range(100):
                            if(d1[l]>=n1 and d1[l]<=n2):
                                p=[i/100,j/100,k/100,l/100]
                                arr.append(p)
                                
    
    
    
    
    best_accuracy=0
    best_threshold=None
    
   
    testdata = list(set(tuple(x) for x in arr))
    
    
    myarray = np.asarray(arr)
    
    unique_dict_a={}
    unique_dict_b={}
    unique_dict_c={}
    unique_dict_d={}
    
    uniqueValues=np.unique(myarray)
    
    for i in range(len(uniqueValues)):
        i1 = uniqueValues[i]
        unique_dict_a.update({ i1 : u.apply_threshold(a, i1) } )
        unique_dict_b.update({ i1 : u.apply_threshold(b, i1) } )
        unique_dict_c.update({ i1 : u.apply_threshold(c, i1) } )
        unique_dict_d.update({ i1 : u.apply_threshold(d, i1) } )
    
   
    
    
    for i in range(10000):
        
        if(arr[i][0] in unique_dict_a.keys()):
            arr1=unique_dict_a[arr[i][0]]
        else:
            arr1=u.apply_threshold(a, arr[i][0])
            
        if(arr[i][0] in unique_dict_a.keys()):
            arr2=unique_dict_b[arr[i][1]]
        else:
            arr2=u.apply_threshold(b, arr[i][1])
            
            
        if(arr[i][0] in unique_dict_a.keys()):
            arr3=unique_dict_c[arr[i][2]]
        else:
            arr3=u.apply_threshold(c, arr[i][2])
            
        if(arr[i][0] in unique_dict_a.keys()):
            arr4=unique_dict_d[arr[i][3]]
        else:
            arr4=u.apply_threshold(d, arr[i][3])
        
        
         
        
    
        d9={'African-American':arr1,
           'Caucasian':arr2,
           'Hispanic':arr3,
           'Other':arr4}
    
        acc=u.get_total_accuracy(d9)
        
        
        if(best_accuracy<acc):
            best_accuracy=acc
            best_threshold=arr[i]
            predictive_parity_data={}
            predictive_parity_data =d9
        d9={}
        
    
    
    thresholds={'African-American':best_threshold[0],
                'Caucasian':best_threshold[1],
                'Hispanic':best_threshold[2],
                'Other':best_threshold[3]}
    
    
    # Must complete this function!
    return predictive_parity_data, thresholds
Exemplo n.º 11
0
def enforce_predictive_parity(categorical_results, epsilon):
    predictive_parity_data = {}
    thresholds = {}
    thresholds_new = {}
    ppv = []
    max_range_thresh = [1, 1, 1, 1]
    min_range_thresh = [0, 0, 0, 0]
    threshold_set = []

    for key, val in categorical_results.items():
        max_range_thresh.append(
            max(categorical_results[key], key=lambda x: x[0])[0])
        min_range_thresh.append(
            min(categorical_results[key], key=lambda x: x[0])[0])

    ppv_data = {}

    for threshold in np.arange(0, 1, 0.01):
        for key, value in categorical_results.items():
            t_data = utils.apply_threshold(value, threshold)
            ppv = utils.get_positive_predictive_value(t_data)
            if (key not in ppv_data):
                ppv_data[key] = []
            ppv_data[key].append([ppv, threshold])

    keys = [*ppv_data]
    ppv_data_refined = []
    for ppv_d_0 in ppv_data[keys[0]]:
        for ppv_d_1 in ppv_data[keys[1]]:
            if (abs(ppv_d_0[0] - ppv_d_1[0]) <= epsilon):
                ppv_data_refined.append([ppv_d_0, ppv_d_1])

    ppv_data_refined_2 = []
    for val in ppv_data_refined:
        for ppv_d_2 in ppv_data[keys[2]]:
            if (abs(ppv_d_2[0] - val[0][0]) <= epsilon):
                if (abs(ppv_d_2[0] - val[1][0]) <= epsilon):
                    ppv_data_refined_2.append([val[0], val[1], ppv_d_2])

    ppv_data_refined_3 = []
    for val in ppv_data_refined_2:
        for ppv_d_3 in ppv_data[keys[3]]:
            if (abs(ppv_d_3[0] - val[0][0]) <= epsilon):
                if (abs(ppv_d_3[0] - val[1][0]) <= epsilon):
                    if (abs(ppv_d_3[0] - val[2][0]) <= epsilon):
                        ppv_data_refined_3.append(
                            [val[0], val[1], val[2], ppv_d_3])

    #print(len(ppv_data_refined_3))
    max_acc = 0
    temp = {}
    for thresh in ppv_data_refined_3:
        temp['African-American'] = utils.apply_threshold(
            categorical_results['African-American'], thresh[0][1])
        temp['Caucasian'] = utils.apply_threshold(
            categorical_results['Caucasian'], thresh[1][1])
        temp['Hispanic'] = utils.apply_threshold(
            categorical_results['Hispanic'], thresh[2][1])
        temp['Other'] = utils.apply_threshold(categorical_results['Other'],
                                              thresh[3][1])
        acc = utils.get_total_accuracy(temp)
        if (acc > max_acc):
            max_acc = acc
            thresholds['African-American'] = thresh[0][1]
            thresholds['Caucasian'] = thresh[1][1]
            thresholds['Hispanic'] = thresh[2][1]
            thresholds['Other'] = thresh[3][1]
            predictive_parity_data = temp.copy()

    max_acc = 0
    return predictive_parity_data, thresholds