def select_candidate_enhanced(train_df, train_labels, best_classifiers, ensemble, i): initial_ensemble_size = 2 max_candidates=50 if len(ensemble) >= initial_ensemble_size: candidates = choice(best_classifiers.index.values, min(max_candidates, len(best_classifiers)), replace = False) candidate_scores = [common.score(train_labels, train_df[ensemble + [candidate]].mean(axis = 1)) for candidate in candidates] best_candidate = candidates[common.argbest(candidate_scores)] else: best_candidate = best_classifiers.index.values[i] return best_candidate
def select_candidate_sdi(train_df, train_labels, best_classifiers, ensemble, i): if len(ensemble) >= initial_ensemble_size: candidates = choice(best_classifiers.index.values, min(max_candidates, len(best_classifiers)), replace=False) candidate_diversity_scores = [ 1 - abs(common.diversity_score( train_df[ensemble + [candidate]].values)) for candidate in candidates ] # 1 - kappa so larger = more diverse candidate_scores = [ accuracy_weight * best_classifiers.ix[candidate] + (1 - accuracy_weight) * candidate_diversity_scores[candidate_i] for candidate_i, candidate in enumerate(candidates) ] best_candidate = candidates[common.argbest(candidate_scores)] else: best_candidate = best_classifiers.index.values[i] return best_candidate
def select_candidate_drep(train_df, train_labels, best_classifiers, ensemble, i): if len(ensemble) >= initial_ensemble_size: candidates = choice(best_classifiers.index.values, min(max_candidates, len(best_classifiers)), replace=False) candidate_diversity_scores = [ abs(common.diversity_score(train_df[ensemble + [candidate]].values)) for candidate in candidates ] candidate_diversity_ranks = array(candidate_diversity_scores).argsort() diversity_candidates = candidates[ candidate_diversity_ranks[:max_diversity_candidates]] candidate_accuracy_scores = [ common.score(train_labels, train_df[ensemble + [candidate]].mean(axis=1)) for candidate in diversity_candidates ] best_candidate = candidates[common.argbest(candidate_accuracy_scores)] else: best_candidate = best_classifiers.index.values[i] return best_candidate