def robustness_exp(max_tree_depth, num_experiments, dataset, score='accuracy', weight=None, n_splits=10, device='cpu', use_agreement=False, delta=1): kf_scores = [] kf = KFold(n_splits=n_splits) x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True) score_func, score_metric = metrics.get_socre_foncs(score) c = datasets.get_number_of_classes(dataset) trees = [] for train, test in kf.split(x): X_train, _, y_train, _ = x[train], x[test], y[train], y[test] X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train) X_nn = np.concatenate([X_train, X_val], axis=0) y_nn = np.concatenate([y_train, y_val], axis=0) k_scores = [] nn_score = [] for k in range(num_experiments): p_ic, nn_test_score = train_oracle_and_predict(dataset, X_nn, y_nn, X_test, y_test, c, device) f_med, f_voting, f_m = compress_tree(max_tree_depth, X_train, y_train, p_ic, weight=weight, X_val=X_val, y_val=y_val, score=score_metric, delta=delta) k_scores.append(score_func(None, None, f_med, f_voting, f_m, X_train, y_train, X_test, y_test)) nn_score.append(nn_test_score) trees.append((f_m, f_voting, f_med)) kf_scores.append(metrics.average_scores(k_scores, num_experiments)) means = metrics.mean_and_std(kf_scores, mean_only=True, show_rf=False) output = metrics.agreement_score(trees, X_test) if use_agreement else None nn_av = np.mean(nn_score) return output, means, nn_av
def generalization_exp(dataset, max_tree_depth, forest_depth, num_trees, num_experiments=1, score='accuracy', weight=None, n_splits=10, delta=1): kf_scores = [] kf = KFold(n_splits=n_splits) x, y, _, _, = datasets.prepare_data(dataset, return_test=False) c = datasets.get_number_of_classes(dataset) score_func, score_metric = metrics.get_socre_foncs(score) for k in range(num_experiments): f_scores = [] for train, test in kf.split(x): X_train, X_test, y_train, y_test = x[train], x[test], y[train], y[test] X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train) rf, _, f_med, f_all, f_m = compress_tree(num_trees, max_tree_depth, forest_depth, X_train, y_train, c, weight=weight, X_val=X_val, y_val=y_val, score=score_metric, delta=delta) f_scores.append(score_func(rf, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test)) mean_var_win = metrics.mean_and_std(f_scores, mean_only=False) kf_scores.append(mean_var_win) print('\nFinal results:') print(f'Average RF mean {sum([score[0] for score in kf_scores]) / num_experiments}, var {sum([score[1] for score in kf_scores]) / num_experiments}') idx = 2 for t in ('BM', 'VT', 'MED'): t_mean = sum([score[idx] for score in kf_scores]) / num_experiments t_wins = sum([score[idx + 2] for score in kf_scores]) / num_experiments idx += 3 print(f'Average {t} mean {t_mean}, wins {t_wins}') return
def robustness_exp(dataset, max_tree_depth, forest_depth, num_trees, num_experiments=1, score='accuracy', weight=None, n_splits=10, use_agreement=False, delta=1): kf_scores = [] kf = KFold(n_splits=n_splits) x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True) c = datasets.get_number_of_classes(dataset) score_func, score_metric = metrics.get_socre_foncs(score) trees = [] for train, test in kf.split(x): X_train, _, y_train, _ = x[train], x[test], y[train], y[test] X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train) k_scores = [] for k in range(num_experiments): rf, _, f_med, f_all, f_m = compress_tree(num_trees, max_tree_depth, forest_depth, X_train, y_train, c, weight=weight, X_val=X_val, y_val=y_val, score=score_metric, delta=delta) k_scores.append(score_func(rf, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test)) trees.append((rf, f_m, f_all, f_med)) kf_scores.append(metrics.average_scores(k_scores, num_experiments)) means = metrics.mean_and_std(kf_scores, mean_only=True) output = metrics.agreement_score(trees, X_test) if use_agreement else None return output, means
def crembo_sklearn_example(): # set arguments dataset = 'dermatology' args = { 'dataset': 'iris', 'num_trees': 100, 'tree_depth': 4, 'forest_depth': 10, 'weight': 'balanced', 'sklearn': True } # Create train, test, val sets x, y, X_test, y_test = datasets.prepare_data(dataset, return_test=True) X_train, X_val, y_train, y_val = datasets.prepare_val(x, y) train_loader = (X_train, y_train) test_loader = (X_test, y_test) val_loader = (X_val, y_val) # train large model M = RandomForestClassifier(n_estimators=args['num_trees'], max_depth=args['forest_depth'], class_weight=args['weight']) M.fit(X_train, y_train) # define create_model method # Here we create a tree model. All sklearn models should be wrapped by a class that # inherits from the MCSkLearnConsistensy class. create_model_func = MCConsistentTree(depth=args['tree_depth'], class_weight=args['weight']).get_clone # define train_hypothesis method train_hypothesis_func = train_sklearn # define evel_model method eval_model_func = eval_sklearn # initiate CREMBO class crembo = CREMBO(create_model_func, train_hypothesis_func, eval_model_func, args, delta=1) # run crembo f = crembo(M, train_loader, test_loader, val_loader, device=None) # print scores f_score = eval_model_func(f, test_loader, None) M_score = eval_model_func(M, test_loader, None) print(f'M score: {M_score}, CREMBO: {f_score}') return f
def generalization_exp(max_tree_depth, num_experiments=1, dataset='mnist', score='accuracy', weight=None, device='cpu', delta=1, n_splits=10): kf_scores = [] kf = KFold(n_splits=n_splits) x, y, _, _, = datasets.prepare_data(dataset, return_test=False) c = datasets.get_number_of_classes(dataset) score_func, score_metric = metrics.get_socre_foncs(score) nn_score = [] for k in range(num_experiments): print(f'Experiment number {k+1}') f_scores = [] for train, test in kf.split(x): X_train, X_test, y_train, y_test = x[train], x[test], y[train], y[test] X_train, X_val, y_train, y_val = datasets.prepare_val(X_train, y_train) X_nn = np.concatenate([X_train, X_val], axis=0) y_nn = np.concatenate([y_train, y_val], axis=0) p_ic, nn_test_score = train_oracle_and_predict(dataset, X_nn, y_nn, X_test, y_test, c, device) f_med, f_all, f_m = compress_tree(max_tree_depth, X_train, y_train, p_ic, weight=weight, X_val=X_val, y_val=y_val, score=score_metric, delta=delta) f_scores.append(score_func(None, None, f_med, f_all, f_m, X_train, y_train, X_test, y_test)) nn_score.append(nn_test_score) mean_var_win = metrics.mean_and_std(f_scores, mean_only=False, show_rf=False, nn_score=nn_score) kf_scores.append(mean_var_win) print('\nFinal results:') print(f'Average NN mean {sum([score[0] for score in kf_scores]) / num_experiments}, std {sum([score[1] for score in kf_scores]) / num_experiments}') idx = 2 for t in ('BM', 'VT', 'MED'): t_mean = sum([score[idx] for score in kf_scores]) / num_experiments t_wins = sum([score[idx + 2] for score in kf_scores]) / num_experiments idx += 3 print(f'Average {t} mean {t_mean}, wins {t_wins}') return