Exemplo n.º 1
0
def train_classifier(X_train, X_test, y_train, y_test, sa_index, p_Group,
                     dataset, mutex, mode, base_learners):

    if mode == 0:
        classifier = AdaFairSP(n_estimators=base_learners,
                               saIndex=sa_index,
                               saValue=p_Group,
                               cumul=False,
                               CSB="CSB1")
    elif mode == 1:
        classifier = AdaFairSP(n_estimators=base_learners,
                               saIndex=sa_index,
                               saValue=p_Group,
                               CSB="CSB1")

    classifier.fit(X_train, y_train)
    y_pred_labels = classifier.predict(X_test)

    mutex.acquire()
    infile = open(dataset + "_sp_acc", 'rb')
    dict_to_ram = pickle.load(infile)
    infile.close()
    dict_to_ram.performance.append(
        calculate_performance_SP(X_test, y_test, y_pred_labels, sa_index,
                                 p_Group))
    outfile = open(dataset + "_sp_acc", 'wb')
    pickle.dump(dict_to_ram, outfile)
    outfile.close()
    mutex.release()
Exemplo n.º 2
0
def train_classifier(X_train, X_test, y_train, y_test, sa_index, p_Group,
                     dataset, mutex, mode, base_learners, c):
    if mode == 0:
        classifier = AdaCostClassifier(saIndex=sa_index,
                                       saValue=p_Group,
                                       n_estimators=base_learners,
                                       CSB="CSB1")
    elif mode == 1:
        classifier = AdaFairSP(n_estimators=base_learners,
                               saIndex=sa_index,
                               saValue=p_Group,
                               CSB="CSB2",
                               c=c)
    elif mode == 2:
        classifier = SMOTEBoost(n_estimators=base_learners,
                                saIndex=sa_index,
                                n_samples=10,
                                saValue=p_Group,
                                CSB="CSB1")
    else:
        train = []
        for i in range(len(X_train)):
            train.append((X_train[i], y_train[i]))
        classifier = boostLearner(train, sa_index, p_Group)

    if mode in [0, 1, 2]:
        classifier.fit(X_train, y_train)
        y_pred_probs = classifier.predict_proba(X_test)[:, 1]
        y_pred_labels = classifier.predict(X_test)

    else:
        y_pred_labels = [classifier(x) for x in X_test]
        y_pred_probs = [0 for i in y_pred_labels]

    mutex.acquire()
    infile = open(dataset, 'rb')
    dict_to_ram = pickle.load(infile)
    infile.close()
    dict_to_ram.performance.append(
        calculate_performance_SP(X_test, y_test, y_pred_labels, y_pred_probs,
                                 sa_index, p_Group))
    outfile = open(dataset, 'wb')
    pickle.dump(dict_to_ram, outfile)
    outfile.close()
    mutex.release()
Exemplo n.º 3
0
def train_classifier(X_train, X_test, y_train, y_test, sa_index, p_Group, dataset, mutex, mode, base_learners, c, dataset_name):
    if mode == 0:
        classifier = AdaCostClassifier(saIndex=sa_index, saValue=p_Group, n_estimators=base_learners, CSB="CSB1")
    elif mode == 1:
        classifier = AdaFairSP(n_estimators=base_learners, saIndex=sa_index, saValue=p_Group, CSB="CSB1", c=c)
    elif mode == 2:
        if dataset_name == 'adult-gender' or dataset == 'bank':
            samples = 100
        elif dataset_name == 'compass-gender':
            samples = 2
        else:
            samples = 500
        classifier = SMOTEBoost(n_estimators=base_learners,saIndex=sa_index, n_samples=samples, saValue=p_Group,  CSB="CSB1" )
    else:
        classifier=marginAnalyzer(X_train,y_train,sa_index=sa_index,p_Group=p_Group,numRounds=200)
    
    if  mode in [0,1,2]:      
          classifier.fit(X_train, y_train)
          # y_pred_probs = classifier.predict_proba(X_test)[:, 1]
          y_pred_labels = classifier.predict(X_test)

    else:
        y_pred_labels=classifier.pred(X_test)
        # y_pred_probs=classifier.margin(classifier.clf,X_test)
        # y_pred_probs=[abs(prob) for prob in y_pred_probs]

    mutex.acquire()
    infile = open(dataset, 'rb')
    dict_to_ram = pickle.load(infile)
    infile.close()
    dict_to_ram.performance.append(
        calculate_performance_SP(X_test, y_test, y_pred_labels, sa_index, p_Group))
    outfile = open(dataset, 'wb')
    pickle.dump(dict_to_ram, outfile)
    outfile.close()
    mutex.release()
Exemplo n.º 4
0
def predict(clf, X_test, y_test, sa_index, p_Group):
    y_pred_probs = clf.predict_proba(X_test)[:, 1]
    y_pred_labels = clf.predict(X_test)
    return calculate_performance_SP(X_test, y_test, y_pred_labels,
                                    y_pred_probs, sa_index, p_Group)