def CES_run(): start_time = time.time() classifiers, bps_weight = get_bps(project_path, seed, metric, size) original = deepcopy(classifiers) ensemble = [] set_ensembles = {} for i in range(init_ens_size): ensemble.append(select_top_classifier(classifiers, seed, fold, RULE)) find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles) if len(set_ensembles) > 0: sel_ens = max(set_ensembles, key=set_ensembles.get) else: sel_ens = ensemble actual = [] for o in range(len(original)): if original[o] in sel_ens: actual.append(o + 1) val_score = fmax_score( *aggregate_predictions(sel_ens, seed, fold, "valid", RULE)) test_score = fmax_score( *aggregate_predictions(sel_ens, seed, fold, "test", RULE)) seconds = time.time() - start_time string = "Fold_%i (val = %f) (test = %f) :: (%s) [%s]\n%s" % ( fold, val_score, test_score, ", ".join(str(a) for a in actual), time.strftime('%H:%M:%S', time.gmtime(seconds)), bps2string(original)) dst = '%s/CES_OUTPUT/ORDER%i/bp%i_fold%i_seed%i_%s_start-%s.fmax' % ( project_path, seed, size, fold, seed, RULE, init_ens_size) with open(dst, 'wb') as f: f.write('%s' % string) f.close() print "\t%s (%s)" % (dst, (time.strftime('%H:%M:%S', time.gmtime(seconds))))
def CES_ens(): start_time = time.time() y_true = DataFrame(columns=["label"]) y_score = DataFrame(columns=["prediction"]) string = "" for fold in range(fold_count): filename_fold = '%s/CES_OUTPUT/ORDER%s/bp%s_fold%s_seed%s_%s_start-%s.%s' % ( project_path, seed, size, fold, seed, RULE, start, metric) ensemble = get_fold_ens(filename_fold) ensemble_bps = get_ens_bps(ensemble, filename_fold) inner_y_true, inner_y_score = aggregate_predictions( ensemble_bps, seed, fold, "test", RULE) y_true = concat([y_true, inner_y_true], axis=0) y_score = concat([y_score, inner_y_score], axis=0) string += ("fold_%i,%f\n" % (fold, fmax_score(inner_y_true, inner_y_score))) string += ("final,%f\n" % fmax_score(y_true, y_score)) dst = '%s/CES_RESULTS/ORDER%i/CES_bp%i_seed%i_%s_start-%s.%s' % ( project_path, seed, size, seed, RULE, start, metric) with open(dst, 'wb') as f: f.write(string) f.close() print "\t%s (%s)" % (dst, (time.strftime( '%H:%M:%S', time.gmtime(time.time() - start_time))))
def select_top_classifier(classifiers, seed, fold, RULE): scores = [ fmax_score(*aggregate_predictions([classifiers[i]], seed, fold, "valid", RULE)) for i in range(len(classifiers)) ] top_classifier = classifiers[argmax(scores)] classifiers.remove(top_classifier) return top_classifier
def find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles): if len(classifiers) == 0 or len(ensemble) == max_ens_size: return ensemble else: potential_ensembles = get_potential_ensembles( ensemble, random.choice(classifiers, len(classifiers), replace=False)) scores = [ fmax_score(*aggregate_predictions(pe, seed, fold, "valid", RULE)) for pe in potential_ensembles ] ensemble.append(potential_ensembles[argmax(scores)][-1]) #print "\t adding CURRENT ENSEMBLE:" #for c in ensemble: # print "\t - %s" % c #print "\t ==> $%s" % max(scores) set_ensembles[tuple(deepcopy(ensemble))] = max(scores) classifiers.remove(potential_ensembles[argmax(scores)][-1]) find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles)
def FULL_ens(parameters): size, seed = parameters y_true = DataFrame(columns=["label"]) y_score = DataFrame(columns=["prediction"]) string = "" for fold in range(fold_count): ensemble_bps = get_bps(project_path, seed, metric, size)[0] inner_y_true, inner_y_score = aggregate_predictions( ensemble_bps, seed, fold, "test", RULE) y_true = concat([y_true, inner_y_true], axis=0) y_score = concat([y_score, inner_y_score], axis=0) string += ("fold_%i,%f\n" % (fold, fmax_score(inner_y_true, inner_y_score))) string += ("final,%f\n" % fmax_score(y_true, y_score)) filename = '%s/%s/%s%i/FE_bp%i_seed%i_%s.fmax' % ( project_path, directory, subdirectory, seed, size, seed, RULE) with open(filename, 'wb') as f: f.write(string) f.close() print filename