Пример #1
0
def robustness_exp(max_tree_depth, num_experiments, dataset, score='accuracy',
                   weight=None, n_splits=10, device='cpu', use_agreement=False, delta=1):

    kf_scores = []
    kf = KFold(n_splits=n_splits)
    x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True)

    score_func, score_metric = metrics.get_socre_foncs(score)
    c = datasets.get_number_of_classes(dataset)

    trees = []
    for train, test in kf.split(x):
        X_train, _, y_train, _ = x[train], x[test], y[train], y[test]
        X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train)
        X_nn = np.concatenate([X_train, X_val], axis=0)
        y_nn = np.concatenate([y_train, y_val], axis=0)
        k_scores = []
        nn_score = []
        for k in range(num_experiments):
            p_ic, nn_test_score = train_oracle_and_predict(dataset, X_nn, y_nn, X_test, y_test, c, device)
            f_med, f_voting, f_m = compress_tree(max_tree_depth, X_train, y_train, p_ic, weight=weight, X_val=X_val,
                                                 y_val=y_val, score=score_metric, delta=delta)
            k_scores.append(score_func(None, None, f_med, f_voting, f_m, X_train, y_train, X_test, y_test))
            nn_score.append(nn_test_score)

        trees.append((f_m, f_voting, f_med))
        kf_scores.append(metrics.average_scores(k_scores, num_experiments))

    means = metrics.mean_and_std(kf_scores, mean_only=True, show_rf=False)
    output = metrics.agreement_score(trees, X_test) if use_agreement else None

    nn_av = np.mean(nn_score)
    return output, means, nn_av
def generalization_exp(dataset, max_tree_depth, forest_depth, num_trees, num_experiments=1,
                      score='accuracy', weight=None, n_splits=10, delta=1):

    kf_scores = []
    kf = KFold(n_splits=n_splits)
    x, y, _, _, = datasets.prepare_data(dataset, return_test=False)
    c = datasets.get_number_of_classes(dataset)
    score_func, score_metric = metrics.get_socre_foncs(score)

    for k in range(num_experiments):
        f_scores = []
        for train, test in kf.split(x):
            X_train, X_test, y_train, y_test = x[train], x[test], y[train], y[test]
            X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train)
            rf, _, f_med, f_all, f_m = compress_tree(num_trees, max_tree_depth, forest_depth, X_train, y_train,
                                                     c, weight=weight, X_val=X_val, y_val=y_val,
                                                     score=score_metric, delta=delta)

            f_scores.append(score_func(rf, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test))

        mean_var_win = metrics.mean_and_std(f_scores, mean_only=False)
        kf_scores.append(mean_var_win)

    print('\nFinal results:')
    print(f'Average RF mean {sum([score[0] for score in kf_scores]) / num_experiments}, var {sum([score[1] for score in kf_scores]) / num_experiments}')
    idx = 2
    for t in ('BM', 'VT', 'MED'):
        t_mean = sum([score[idx] for score in kf_scores]) / num_experiments
        t_wins = sum([score[idx + 2] for score in kf_scores]) / num_experiments
        idx += 3
        print(f'Average {t} mean {t_mean}, wins {t_wins}')

    return
def robustness_exp(dataset, max_tree_depth, forest_depth, num_trees, num_experiments=1,
                      score='accuracy', weight=None, n_splits=10, use_agreement=False, delta=1):

    kf_scores = []
    kf = KFold(n_splits=n_splits)
    x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True)
    c = datasets.get_number_of_classes(dataset)
    score_func, score_metric = metrics.get_socre_foncs(score)

    trees = []
    for train, test in kf.split(x):
        X_train, _, y_train, _ = x[train], x[test], y[train], y[test]
        X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train)
        k_scores = []
        for k in range(num_experiments):
            rf, _, f_med, f_all, f_m = compress_tree(num_trees, max_tree_depth, forest_depth, X_train, y_train,
                                                     c, weight=weight, X_val=X_val, y_val=y_val,
                                                     score=score_metric, delta=delta)

            k_scores.append(score_func(rf, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test))

        trees.append((rf, f_m, f_all, f_med))
        kf_scores.append(metrics.average_scores(k_scores, num_experiments))

    means = metrics.mean_and_std(kf_scores, mean_only=True)
    output = metrics.agreement_score(trees, X_test) if use_agreement else None
    return output, means
Пример #4
0
def crembo_sklearn_example():
    # set arguments
    dataset = 'dermatology'
    args = {
        'dataset': 'iris',
        'num_trees': 100,
        'tree_depth': 4,
        'forest_depth': 10,
        'weight': 'balanced',
        'sklearn': True
    }

    # Create train, test, val sets
    x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True)
    X_train, X_val, y_train, y_val = datasets.prepare_val(x, y)
    train_loader = (X_train, y_train)
    test_loader = (X_test, y_test)
    val_loader = (X_val, y_val)

    # train large model
    M = RandomForestClassifier(n_estimators=args['num_trees'],
                               max_depth=args['forest_depth'],
                               class_weight=args['weight'])
    M.fit(X_train, y_train)

    # define create_model method
    # Here we create a tree model. All sklearn models should be wrapped by a class that
    # inherits from the MCSkLearnConsistensy class.
    create_model_func = MCConsistentTree(depth=args['tree_depth'],
                                         class_weight=args['weight']).get_clone

    # define train_hypothesis method
    train_hypothesis_func = train_sklearn

    # define evel_model method
    eval_model_func = eval_sklearn

    # initiate CREMBO class
    crembo = CREMBO(create_model_func,
                    train_hypothesis_func,
                    eval_model_func,
                    args,
                    delta=1)

    # run crembo
    f = crembo(M, train_loader, test_loader, val_loader, device=None)

    # print scores
    f_score = eval_model_func(f, test_loader, None)
    M_score = eval_model_func(M, test_loader, None)
    print(f'M score: {M_score}, CREMBO: {f_score}')

    return f
Пример #5
0
def generalization_exp(max_tree_depth, num_experiments=1, dataset='mnist',
              score='accuracy', weight=None, device='cpu', delta=1, n_splits=10):

    kf_scores = []
    kf = KFold(n_splits=n_splits)
    x, y, _, _,  = datasets.prepare_data(dataset, return_test=False)

    c = datasets.get_number_of_classes(dataset)
    score_func, score_metric = metrics.get_socre_foncs(score)

    nn_score = []
    for k in range(num_experiments):
        print(f'Experiment number {k+1}')
        f_scores = []
        for train, test in kf.split(x):
            X_train, X_test, y_train, y_test = x[train], x[test], y[train], y[test]
            X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train)
            X_nn = np.concatenate([X_train, X_val], axis=0)
            y_nn = np.concatenate([y_train, y_val], axis=0)

            p_ic, nn_test_score = train_oracle_and_predict(dataset, X_nn, y_nn, X_test, y_test, c, device)

            f_med, f_all, f_m = compress_tree(max_tree_depth, X_train, y_train, p_ic, weight=weight, X_val=X_val,
                                                 y_val=y_val, score=score_metric, delta=delta)
            f_scores.append(score_func(None, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test))
            nn_score.append(nn_test_score)

        mean_var_win = metrics.mean_and_std(f_scores, mean_only=False, show_rf=False, nn_score=nn_score)
        kf_scores.append(mean_var_win)

    print('\nFinal results:')
    print(f'Average NN mean {sum([score[0] for score in kf_scores]) / num_experiments}, std {sum([score[1] for score in kf_scores]) / num_experiments}')
    idx = 2
    for t in ('BM', 'VT', 'MED'):
        t_mean = sum([score[idx] for score in kf_scores]) / num_experiments
        t_wins = sum([score[idx + 2] for score in kf_scores]) / num_experiments
        idx += 3
        print(f'Average {t} mean {t_mean}, wins {t_wins}')

    return