示例#1
0
def CES_run():
    start_time = time.time()
    classifiers, bps_weight = get_bps(project_path, seed, metric, size)
    original = deepcopy(classifiers)
    ensemble = []
    set_ensembles = {}
    for i in range(init_ens_size):
        ensemble.append(select_top_classifier(classifiers, seed, fold, RULE))
    find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles)

    if len(set_ensembles) > 0:
        sel_ens = max(set_ensembles, key=set_ensembles.get)
    else:
        sel_ens = ensemble
    actual = []
    for o in range(len(original)):
        if original[o] in sel_ens:
            actual.append(o + 1)

    val_score = fmax_score(
        *aggregate_predictions(sel_ens, seed, fold, "valid", RULE))
    test_score = fmax_score(
        *aggregate_predictions(sel_ens, seed, fold, "test", RULE))
    seconds = time.time() - start_time
    string = "Fold_%i (val = %f) (test = %f) :: (%s) [%s]\n%s" % (
        fold, val_score, test_score, ", ".join(str(a) for a in actual),
        time.strftime('%H:%M:%S', time.gmtime(seconds)), bps2string(original))
    dst = '%s/CES_OUTPUT/ORDER%i/bp%i_fold%i_seed%i_%s_start-%s.fmax' % (
        project_path, seed, size, fold, seed, RULE, init_ens_size)
    with open(dst, 'wb') as f:
        f.write('%s' % string)
    f.close()
    print "\t%s (%s)" % (dst,
                         (time.strftime('%H:%M:%S', time.gmtime(seconds))))
示例#2
0
def CES_ens():
    start_time = time.time()

    y_true = DataFrame(columns=["label"])
    y_score = DataFrame(columns=["prediction"])
    string = ""
    for fold in range(fold_count):
        filename_fold = '%s/CES_OUTPUT/ORDER%s/bp%s_fold%s_seed%s_%s_start-%s.%s' % (
            project_path, seed, size, fold, seed, RULE, start, metric)
        ensemble = get_fold_ens(filename_fold)
        ensemble_bps = get_ens_bps(ensemble, filename_fold)
        inner_y_true, inner_y_score = aggregate_predictions(
            ensemble_bps, seed, fold, "test", RULE)
        y_true = concat([y_true, inner_y_true], axis=0)
        y_score = concat([y_score, inner_y_score], axis=0)
        string += ("fold_%i,%f\n" %
                   (fold, fmax_score(inner_y_true, inner_y_score)))
    string += ("final,%f\n" % fmax_score(y_true, y_score))

    dst = '%s/CES_RESULTS/ORDER%i/CES_bp%i_seed%i_%s_start-%s.%s' % (
        project_path, seed, size, seed, RULE, start, metric)
    with open(dst, 'wb') as f:
        f.write(string)
    f.close()
    print "\t%s (%s)" % (dst, (time.strftime(
        '%H:%M:%S', time.gmtime(time.time() - start_time))))
示例#3
0
def select_top_classifier(classifiers, seed, fold, RULE):
    scores = [
        fmax_score(*aggregate_predictions([classifiers[i]], seed, fold,
                                          "valid", RULE))
        for i in range(len(classifiers))
    ]
    top_classifier = classifiers[argmax(scores)]
    classifiers.remove(top_classifier)
    return top_classifier
示例#4
0
def find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles):
    if len(classifiers) == 0 or len(ensemble) == max_ens_size:
        return ensemble
    else:
        potential_ensembles = get_potential_ensembles(
            ensemble,
            random.choice(classifiers, len(classifiers), replace=False))
        scores = [
            fmax_score(*aggregate_predictions(pe, seed, fold, "valid", RULE))
            for pe in potential_ensembles
        ]
        ensemble.append(potential_ensembles[argmax(scores)][-1])
        #print "\t adding CURRENT ENSEMBLE:"
        #for c in ensemble:
        #    print "\t - %s" % c
        #print "\t ==> $%s" % max(scores)
        set_ensembles[tuple(deepcopy(ensemble))] = max(scores)
        classifiers.remove(potential_ensembles[argmax(scores)][-1])
        find_ensemble(ensemble, classifiers, seed, fold, RULE, set_ensembles)
示例#5
0
def FULL_ens(parameters):
    size, seed = parameters

    y_true = DataFrame(columns=["label"])
    y_score = DataFrame(columns=["prediction"])
    string = ""
    for fold in range(fold_count):
        ensemble_bps = get_bps(project_path, seed, metric, size)[0]
        inner_y_true, inner_y_score = aggregate_predictions(
            ensemble_bps, seed, fold, "test", RULE)
        y_true = concat([y_true, inner_y_true], axis=0)
        y_score = concat([y_score, inner_y_score], axis=0)
        string += ("fold_%i,%f\n" %
                   (fold, fmax_score(inner_y_true, inner_y_score)))
    string += ("final,%f\n" % fmax_score(y_true, y_score))
    filename = '%s/%s/%s%i/FE_bp%i_seed%i_%s.fmax' % (
        project_path, directory, subdirectory, seed, size, seed, RULE)

    with open(filename, 'wb') as f:
        f.write(string)
    f.close()
    print filename