Exemplo n.º 1
0
def main(argv=sys.argv):
    if len(argv) != 1:
        usage(argv)

    # Preapre data into FairTest friendly format
    FILENAME = "../../../data/staples/staples.csv"
    data = prepare.data_from_csv(FILENAME, to_drop=['zipcode', 'distance'])
    OUTPUT_DIR = "."

    # Initializing parameters for experiment
    EXPL = []
    SENS = ['income']
    TARGET = 'price'

    data_source = DataSource(data)

    # Instantiate the experiment
    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0)

    # Train the classifier
    train([inv])

    # Evaluate on the testing set
    test([inv])

    # Create the report
    report([inv], "testing", OUTPUT_DIR)
Exemplo n.º 2
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Prepare data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME)
    OUTPUT_DIR = argv[2]

    to_drop = ['logloss', 'Entropy Abs', 'PdDistrict']

    EXPL = ['Entropy Bin']
    SENS = ['Category']
    TARGET = 'Error Str'

    data_source = DataSource(data, train_size=0.5)

    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0,
                  to_drop=to_drop)
    train([inv])
    test([inv], exact=False)
    if EXPL:
        report(
            [inv],
            "sf" + SENS[0].replace(' ', '') + TARGET.replace(' ','') + EXPL[0].replace(' ', ''),
            OUTPUT_DIR
        )
    else:
        report(
            [inv],
            "sf" + SENS[0].replace(' ', '') + TARGET.replace(' ',''),
            OUTPUT_DIR
        )
Exemplo n.º 3
0
 def setUp(self):
     FILENAME = "fairtest/tests/data/tiny_predictions_reg.csv"
     self.data = DataSource(prepare.data_from_csv(FILENAME))
     self.SENS = ['Age']
     self.TARGET = 'Prediction'
     self.GROUND_TRUTH = 'Ground_Truth'
     self.EXPL = None
Exemplo n.º 4
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Preapre data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME)
    OUTPUT_DIR = argv[2]

    data_source = DataSource(data)

    # Initializing parameters for experiment
    EXPL = []
    SENS = ['race']
    TARGET = 'price'

    # Instanciate the experiment
    t1 = time()
    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0)
    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv])

    # Create the report
    t4 = time()
    report([inv], "benchmark", OUTPUT_DIR)

    t5 = time()

    print "Testing:Benchmark:Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
Exemplo n.º 5
0
def main(argv=sys.argv):
    if len(argv) != 1:
        usage(argv)

    FILENAME = "../../../data/recommender/recommendations.txt"
    OUTPUT_DIR = "."
    data = prepare.data_from_csv(FILENAME, sep='\t',
                                 to_drop=['Types', 'Avg Movie Age',
                                          'Avg Recommended Rating',
                                          'Avg Seen Rating'])
    SENS = ['Gender']
    TARGET = 'RMSE'
    EXPL = []

    data_source = DataSource(data)

    # Instantiate the experiment
    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0)
    # Train the classifier
    train([inv])

    # Evaluate on the testing set
    test([inv])

    # Create the report
    report([inv], "error_profiling", OUTPUT_DIR)
Exemplo n.º 6
0
def demo_run(experiment_dict):
    # retrive dictionary parameteres
    output = experiment_dict['out']
    sens = [experiment_dict['sens']]
    dataset = experiment_dict['dataset']
    experiment_folder = experiment_dict['experiments_folder']

    if 'expl' in experiment_dict:
        expl = experiment_dict['expl']
    else:
        expl = []

    # run experiment and place report at proper place
    try:
        try:
            data = prepare.data_from_csv(dataset)
        except Exception, error:
            print "Error:", error
        data_source = DataSource(data)
        inv = Testing(data_source, sens, output, expl)

        print "Experiment parameters:", experiment_dict
        train([inv])
        test([inv])
        report_name = os.path.basename(dataset).split('.')[0]
        tmp_folder = mkdtemp(prefix="fairtest_")
        report([inv], report_name, tmp_folder)
        src_path = os.path.join(tmp_folder, "report_" + report_name + ".txt")
        dst_path = os.path.join(
            experiment_folder,
            report_name + "_" + sens[0] + "_" + output + ".txt")
        print src_path, dst_path
        os.rename(src_path, dst_path)
        os.rmdir(tmp_folder)
Exemplo n.º 7
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Preapre data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME, sep='\\t')
    OUTPUT_DIR = argv[2]

    TARGET = 'Labels'
    SENS = ['Race']
    EXPL = []

    labeled_data = [ast.literal_eval(s) for s in data[TARGET]]
    for l in labeled_data:
        assert len(l) == 5
    label_encoder = preprocessing.MultiLabelBinarizer()
    labeled_data = label_encoder.fit_transform(labeled_data)
    labels = label_encoder.classes_
    df_labels = pd.DataFrame(labeled_data, columns=labels)
    data = pd.concat([data.drop(TARGET, axis=1), df_labels], axis=1)
    TARGET = labels.tolist()

    data_source = DataSource(data)

    # Instantiate the experiment
    t1 = time()
    inv = Discovery(data_source, SENS, TARGET, EXPL, topk=35, random_state=0)

    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv])

    # Create the report
    t4 = time()
    report([inv], "overfeat", OUTPUT_DIR)

    t5 = time()

    print "Discovery:Overfeat:Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
Exemplo n.º 8
0
    def setUp(self):
        FILENAME = "../data/images/overfeat_raw.txt"
        data = prepare.data_from_csv(FILENAME, sep='\\t')

        TARGET = 'Labels'
        self.SENS = ['Race']
        self.EXPL = []

        labeled_data = [ast.literal_eval(s) for s in data[TARGET]]
        for l in labeled_data:
            assert len(l) == 5
        label_encoder = preprocessing.MultiLabelBinarizer()
        labeled_data = label_encoder.fit_transform(labeled_data)
        labels = label_encoder.classes_
        df_labels = pd.DataFrame(labeled_data, columns=labels)
        self.data = DataSource(
            pd.concat([data.drop(TARGET, axis=1), df_labels], axis=1))
        self.TARGET = labels.tolist()
Exemplo n.º 9
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Prepare data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME)
    OUTPUT_DIR = argv[2]

    data_source = DataSource(data)

    # Initializing parameters for experiment
    TARGET = 'Prediction'
    GROUND_TRUTH = 'Ground_Truth'
    SENS = ['Age']
    EXPL = []

    # Instantiate the experiment
    t1 = time()
    inv = ErrorProfiling(data_source, SENS, TARGET, GROUND_TRUTH, EXPL,
                         random_state=0)

    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv])

    # Create the report
    t4 = time()
    report([inv], "medical_reg", OUTPUT_DIR)

    t5 = time()

    print "Error:Health(Cont.):Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
Exemplo n.º 10
0
def main(argv=sys.argv):
    if len(argv) != 1:
        usage(argv)

    FILENAME = "../../../data/recommender/recommendations.txt"
    OUTPUT_DIR = "."
    data = prepare.data_from_csv(FILENAME,
                                 sep='\\t',
                                 to_drop=[
                                     'RMSE', 'Avg Movie Age',
                                     'Avg Recommended Rating',
                                     'Avg Seen Rating', 'Occupation'
                                 ])
    TARGET = 'Types'
    SENS = ['Gender']

    EXPL = []
    labeled_data = [ast.literal_eval(s) for s in data[TARGET]]
    for labels in labeled_data:
        assert len(labels) == 5
    label_encoder = preprocessing.MultiLabelBinarizer()
    labeled_data = label_encoder.fit_transform(labeled_data)
    labels = label_encoder.classes_
    df_labels = pd.DataFrame(labeled_data, columns=labels)
    data = pd.concat([data.drop(TARGET, axis=1), df_labels], axis=1)
    TARGET = labels.tolist()

    data_source = DataSource(data)

    # Instantiate the experiment
    inv = Discovery(data_source, SENS, TARGET, EXPL, topk=10, random_state=0)

    # Train the classifier
    train([inv])

    # Evaluate on the testing set
    test([inv])

    # Create the report
    report([inv], "discovery", OUTPUT_DIR)
Exemplo n.º 11
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    '''
    1. Testing (average movie rating across age)
    '''
    # Prepare data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME, sep='\t')
    OUTPUT_DIR = argv[2]

    # prepare age
    data['Age'] = map(lambda a: 10 if a == 1
                           else 20 if a == 18
                           else 30 if a == 25
                           else 40 if a == 35
                           else 50 if a == 45 or a == 50
                           else 60 if a == 56 else None, data['Age'])

    data['Avg Seen Rating'] = ['low' if x < np.mean(data['Avg Seen Rating'])
                                   else 'high' for x in data['Avg Seen Rating']]

    data_source = DataSource(data)

    # Instantiate the experiments
    t1 = time()

    #
    # Test of associations on movie popularity
    #
    SENS = ['Gender', 'Age']
    TARGET = 'Avg Recommended Rating'
    EXPL = []

    test_ratings = Testing(data_source, SENS, TARGET, EXPL, random_state=0,
                           to_drop=['RMSE', 'Avg Movie Age',
                                    'Types', 'Avg Seen Rating'])

    #
    # Test of associations on movie popularity conditioned on error
    #
    SENS = ['Gender', 'Age']
    TARGET = 'Avg Recommended Rating'
    EXPL = ['Avg Seen Rating']

    test_ratings_expl = Testing(data_source, SENS, TARGET, EXPL, random_state=0,
                                to_drop=['RMSE', 'Avg Movie Age', 'Types'])

    inv = [test_ratings, test_ratings_expl]

    # Train the classifier
    t2 = time()
    train(inv)

    # Evaluate on the testing set
    t3 = time()
    test(inv)

    # Create the report
    t4 = time()
    report(inv, "recommender", OUTPUT_DIR)

    t5 = time()
    print "Testing:Recommender:Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
Exemplo n.º 12
0
 def setUp(self):
     FILENAME = "fairtest/tests/data/tiny_berkeley.csv"
     self.data = prepare.data_from_csv(FILENAME)
     self.SENS = ['gender']
     self.TARGET = 'accepted'
     self.EXPL = None
Exemplo n.º 13
0
def do_benchmark((contents, feature_range, size_range)):
    """
    main method doing the benchmark
    """
    BASE_FILENAME = "/tmp/temp_fairtest"

    MICROSECONDS = int(
        (datetime.now() - datetime(1970, 1, 1)).total_seconds() * 10**6)
    RANDOM_SEED = MICROSECONDS % 10**8
    seed(RANDOM_SEED)

    BASE_FEATURES = ['state', 'gender', 'race', 'income', 'price']
    N_BASE = len(BASE_FEATURES)

    _contents = deepcopy(contents)

    # create more features without including the last two that will
    # be used as sensitive and output. For each size, create the map
    # once for the maximum feature size.
    range_min = 0
    range_max = N_BASE - 3
    features = [
        randint(range_min, range_max)
        for _ in range(0, feature_range[-1] - N_BASE)
    ]

    # create header
    features_header = ','.join(BASE_FEATURES[:-1]) + ',' + \
                      ','.join([BASE_FEATURES[feature]
                                for feature in features]) + ',price'

    # shuffle entries of the file loaded in memory
    # and copied within this function
    shuffle(_contents)
    _contents = _contents[:size_range[-1]]

    random_suffix = str(randint(1, 99999999))
    current_filename = BASE_FILENAME + random_suffix
    f_temp = open(current_filename, "w+")

    print >> f_temp, features_header
    for content in magnify_contents(_contents, features):
        print >> f_temp, ','.join(content)

    f_temp.close()

    # Prepare data into FairTest friendly format
    data = prepare.data_from_csv(current_filename)

    # shuffle around additional feature vales
    data = shuffle_column_contents(data, BASE_FEATURES)
    os.remove(current_filename)

    # Initializing parameters for experiment
    EXPL = []
    SENS = ['income']
    TARGET = 'price'

    # initialize the dictionary
    results = {}
    for n_features in feature_range:
        results[n_features] = {}
        for size in size_range:
            results[n_features][size] = {}

    additional_features_range = [x - N_BASE for x in feature_range]
    for additional_features in additional_features_range:
        n_features = additional_features + N_BASE
        results[n_features] = {}

        for size in size_range:
            # Instantiate the experiment
            _data = data.drop(data.columns[range(
                N_BASE - 1,
                additional_features_range[-1] - 1 - additional_features)],
                              axis=1).head(size)

            data_source = DataSource(_data)
            inv = Testing(data_source,
                          SENS,
                          TARGET,
                          EXPL,
                          random_state=int(random_suffix))

            # Train the classifier
            t1 = time()
            train([inv])
            t2 = time()

            # Evaluate on the testing set
            test([inv])
            t3 = time()

            # Create the report
            _random_suffix = str(randint(1, 99999999))
            report([inv],
                   "nop_benchmark_performance" + _random_suffix,
                   output_dir="/tmp")

            train_time = t2 - t1
            test_time = t3 - t2

            avg_no_of_feat_values = get_avg_no_of_feat_values(_contents[:size])
            results[n_features][size] = [
                train_time, test_time, avg_no_of_feat_values
            ]
            del _data
            # print n_features, size, results[n_features][size]
        # for all sizes
    # for all feature numbers
    return results
def do_benchmark((classes, pool, guard_lines)):
    """
    main method doing the benchmark
    """
    results = {}
    BASE_FILENAME = "/tmp/temp_fairtest"

    MICROSECONDS = int(
        (datetime.now() - datetime(1970, 1, 1)).total_seconds() * 10**6)
    # keep last digits of this very large number
    RANDOM_SEED = MICROSECONDS % 10**8
    seed(RANDOM_SEED)

    _classes = deepcopy(classes)
    # iterate for  various population sizes
    # sorted numericaly by population size.
    for _class in [str(x) for x in sorted([int(y) for y in _classes.keys()])]:

        selected = []
        shuffle(_classes[_class])

        for _ in range(0, 10):
            state_race = _classes[_class].pop()

            # keep the contexts selected to compare later
            # with Fairtest results
            selected.append(state_race)

        results[int(_class)] = {}
        # iterate for various effects
        for effect in [2.5, 5, 10, 15]:
            _pool = deepcopy(pool)
            _selected = deepcopy(selected)
            # TODO: Keep the same populations for a specific size
            # and iterate over different effects. In this way, the
            # graph will be readable in the y-axis since the comparison
            # will be on the same popultions -- as per Roxana's sugegstion

            random_suffix = str(randint(1, 999999))
            current_filename = BASE_FILENAME + random_suffix
            f_temp = open(current_filename, "w+")
            print >> f_temp, "state,gender,race,income,price"

            lines = 0
            for state_race in _selected:

                # create a pool with exactly the (50-effect)% discounts for poor
                # and (50+effect)% discounts for rich, so that this are a bit
                # more deterministic.
                poor_price_pool, rich_price_pool =\
                        make_price_pools(pool[state_race], effect)

                for entry in _pool[state_race]:
                    state = entry.split(",")[0]
                    gender = entry.split(",")[1]
                    race = entry.split(",")[2]
                    income = entry.split(",")[3]
                    # TODO: randomize this also
                    if income == 'income >= 50K':
                        price = "low" if rich_price_pool.pop() else "high"
                    else:
                        price = "low" if poor_price_pool.pop() else "high"

                    print >> f_temp, "%s,%s,%s,%s,%s" % (state, gender, race,
                                                         income, price)
                    lines += 1
                del _pool[state_race]

            # print 'bias in populations {} of size {}'.format(_selected,_class)
            # This will be printing the remaining populations
            for state_race in _pool:
                # create exactly 50-50 split of discounts for the rest
                price_pool = [True]*(len(_pool[state_race])/2 + 1) +\
                             [False]*(len(_pool[state_race])/2 + 1)

                for entry in _pool[state_race]:
                    price = "low" if price_pool.pop() else "high"
                    print >> f_temp, "%s,%s,%s,%s,%s" % (
                        entry.split(",")[0], entry.split(",")[1],
                        entry.split(",")[2], entry.split(",")[3], price)
                    lines += 1

            f_temp.close()
            assert guard_lines == lines

            # Prepare data into FairTest friendly format
            data = prepare.data_from_csv(current_filename)
            data_source = DataSource(data)
            os.remove(current_filename)

            # Initializing parameters for experiment
            EXPL = []
            SENS = ['income']
            TARGET = 'price'

            # Instantiate the experiment
            inv = Testing(data_source,
                          SENS,
                          TARGET,
                          EXPL,
                          random_state=RANDOM_SEED)

            # Train the classifier
            train([inv], min_leaf_size=50)

            exact_stats = False
            if int(_class) < 1000:
                exact_stats = True

            # Evaluate on the testing set
            test([inv], exact=exact_stats)

            # Create the report (apply no filtering)
            context_list = report([inv],
                                  "benchmark_" + random_suffix,
                                  output_dir="/tmp",
                                  node_filter='all',
                                  filter_conf=0)

            # count success
            found = 0
            for context in context_list:
                if ('state' in context and 'race' in context) and\
                        (len(context) == 2 or not FIND_CONTEXTS_STRICT):
                    state_race = str(context['state']) + "_" + \
                                 str(context['race'])
                    if state_race in _selected:
                        # remove it so that we don't count multiple
                        # times sub-sub-populations of a population
                        _selected.remove(state_race)
                        found += 1

            results[int(_class)][effect] = found
            del _selected
        # end of iterations on effects
    # end of iterations on classes of sizes
    return results
Exemplo n.º 15
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    log.set_params(level=logging.DEBUG)

    EXPL = ['less_than_median']
    # EXPL = ['juv_fel_count_bin']
    # EXPL = ['score_very_high']
    EXPL = ['']
    # SENS = ['age_cat', 'sex', 'race']
    SENS = ['race']
    TARGET = 'FP'
    to_drop = [
        #        'FP',
        'FN',
        #        'age',
        'priors_count.1',
        'priors_count',
        'decile_score.1',
        'id',
        'name',
        'first',
        'last',
        'start',
        'end',
        'event',
        'is_recid',
        'two_year_recid',
        'compas_screening_date',
        'dob',
        'days_b_screening_arrest',
        'c_jail_in',
        'c_jail_out',
        'c_case_number',
        'c_offense_date',
        'c_charge_desc',
        "r_case_number",
        "c_arrest_date",
        "c_days_from_compas",
        "c_charge_degree",
        "r_charge_degree",
        "r_days_from_arrest",
        "r_offense_date",
        "r_charge_desc",
        "r_jail_in",
        "r_jail_out",
        "violent_recid",
        "is_violent_recid",
        "vr_case_number",
        "vr_charge_degree",
        "vr_offense_date",
        "vr_charge_desc",
        "screening_date",
        "v_screening_date",
        "in_custody",
        "out_custody"
    ]

    # Preapre data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME, to_drop=to_drop)
    OUTPUT_DIR = argv[2]

    data_source = DataSource(data, train_size=0.5)

    # Initializing parameters for experiment
    inv = Testing(data_source,
                  SENS,
                  TARGET,
                  EXPL,
                  metrics={'race': 'NMI'},
                  random_state=10)
    train([inv], max_bins=5)
    test([inv])
    report([inv], "compas" + "_" + "_".join(SENS + [TARGET] + EXPL),
           OUTPUT_DIR)

    print
Exemplo n.º 16
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Preapre data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME)
    OUTPUT_DIR = argv[2]

    data_source = DataSource(data, budget=2)
    """
    First Experiment Without Explanatory Features
    """

    # Initializing parameters for experiment
    EXPL = []
    SENS = ['gender']
    TARGET = 'accepted'

    # Instantiate the experiment
    t1 = time()
    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0)

    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv])

    # Create the report
    t4 = time()
    report([inv], "berkeley", OUTPUT_DIR)

    t5 = time()
    print "Testing:Berkeley:Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
    """
    Second Experiment With Explanatory Feature
    """

    # Initializing parameters for experiment
    EXPL = ['department']
    SENS = ['gender']
    TARGET = 'accepted'

    # Instantiate the experiment
    t1 = time()
    inv = Testing(data_source, SENS, TARGET, EXPL, random_state=0)

    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv])

    # Create the report
    t4 = time()
    report([inv], "berkeley_expl", OUTPUT_DIR)

    t5 = time()
    print "Testing:Berkeley_Expl:Instantiation: %.2f, Train: %.2f, " \
          "Test: %.2f, Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
Exemplo n.º 17
0
def main(argv=sys.argv):
    if len(argv) != 3:
        usage(argv)

    # Prepare data into FairTest friendly format
    FILENAME = argv[1]
    data = prepare.data_from_csv(FILENAME)
    OUTPUT_DIR = argv[2]

    # Initializing parameters for experiment
    EXPL = []
    SENS = ['ReportsAbs']
    TARGET = 'PoliceUnitsPerReport'
    GROUND_TRUTH = 'Mean'

    to_drop = [
#        'Shift',
#        'Zipcode',
#        'Reports',
#        'ReportsAbs',
#        'Households',
        'MedianAgeMale',
        'MedianAgeFemale',
        'PoliceUnits',
        'HouseHolder65to74',
        'HouseHolder55to59',
        'HouseHolder35to44',
        'HouseHolder45to54',
        'HouseHolder25to34',
        'HouseHolder75to84',
        'HouseHolder85over',
        'HouseHolder15to24',
        'HouseHolder60to64',
        'NonFamilyHouseholds',
        'Households7PlusPerson',
        'Households2Person',
        'Households4Person',
        'Households6Person',
        'HouseholdsWith60Plus',
        'HouseholdsWith75Plus',
    ]

    data[SENS] = np.round(data[SENS])

    data_source = DataSource(data, train_size=0.25)

    # Instantiate the experiment
    t1 = time()
#    inv = Testing(data_source, SENS, TARGET, EXPL,
#                         random_state=0, to_drop=to_drop)
    inv = ErrorProfiling(data_source, SENS, TARGET, GROUND_TRUTH, EXPL,
                         random_state=0, to_drop=to_drop)


    # Train the classifier
    t2 = time()
    train([inv])

    # Evaluate on the testing set
    t3 = time()
    test([inv], exact=False)

    # Create the report
    t4 = time()
    report([inv], "scheduling", OUTPUT_DIR)

    t5 = time()
    print "Testing:Scheduling:Instantiation: %.2f, Train: %.2f, Test: %.2f, " \
          "Report: %.2f" % ((t2-t1), (t3-t2), (t4-t3), (t5-t4))
    print "-" * 80
    print
Exemplo n.º 18
0
 def setUp(self):
     FILENAME = "../data/adult/adult.csv"
     self.data = DataSource(prepare.data_from_csv(FILENAME))
     self.SENS = ['sex']
     self.TARGET = 'income'
     self.EXPL = None