示例#1
0
def selection_del(dataset, result=None):
    feature_count = get_feature_count(dataset)
    log = []
    if result is None:
        features_all = list(range(feature_count))
        error = count_error(dataset, features_all)

        log.append({"error": error, "feature_count": feature_count})

        result = {"error": error, "features": features_all, "log": log}

    while len(result["features"]) > 1:
        result_cur = result

        for feat in result["features"]:
            features = result["features"][:]
            features.remove(feat)

            error = count_error(dataset, features)

            log.append({"error": error, "feature_count": len(features)})

            if error <= result_cur["error"]:
                result_cur = {"error": error, "features": features, "log": log}

        if result_cur == result:
            break

        result = result_cur

    return result
示例#2
0
def make_plot(dataset, result):
    logs = result["log"]
    if not logs:
        return

    feature_count = get_feature_count(dataset)

    __set_labels()

    __plot_all(dataset, logs, feature_count)
    __plot_best_in_col(logs, feature_count)
    __plot_best(result)
示例#3
0
def __sort_features(dataset):
    feature_count = get_feature_count(dataset)
    feature_errors = list(
        map(lambda i: {
            "i": i,
            "error": count_error(dataset, [i])
        }, range(feature_count)))
    features_sorted = list(sorted(feature_errors, key=lambda x: x["error"]))
    return list(
        map(lambda f, pos: {
            "i": f["i"],
            "pos": pos
        }, features_sorted, range(feature_count)))
示例#4
0
def selection_full_search(dataset):
    feature_count = get_feature_count(dataset)
    log = []
    result = {"error": 9999, "features": [], "log": log}

    for feat_cur in __feat_gen(feature_count):
        error = count_error(dataset, feat_cur)

        log.append({"error": error, "feature_count": len(feat_cur)})

        if error < result["error"]:
            result = {"error": error, "features": feat_cur, "log": log}

    return result
示例#5
0
def make_compare_plot(dataset, result, draw_ticks=False, **kwargs):
    logs = result["log"]
    if not logs:
        return

    feature_count = get_feature_count(dataset)

    __set_labels()

    if draw_ticks:
        __draw_x_ticks(feature_count)

    y_ticks = __plot_best_in_col(logs, feature_count, **kwargs)
    __plot_best(result, **kwargs)

    plot.yticks(y_ticks, __make_quality_tick_labels(dataset, y_ticks))
示例#6
0
def selection_dfs(dataset):
    feature_count = get_feature_count(dataset)
    features = __sort_features(dataset)

    logs = []
    results = list(
        map(lambda x: {
            "error": 9999,
            "features": []
        }, range(feature_count + 1)))

    __increase([], features, results, dataset, logs)

    result = min(results, key=lambda x: x["error"])
    result["features"] = sorted(__flatten_features(result["features"]))
    result["log"] = logs

    return result
示例#7
0
def selection_bfs(dataset, iter_limit=1):
    feature_count = get_feature_count(dataset)
    logs = []
    result_best = {
        "error": 9999,
        "features": [],
        "log": logs
    }

    result = list(map(lambda i: {"features": [i]}, range(feature_count)))

    for iteration in range(1, feature_count + 1):
        for res in result:
            res["error"] = count_error(dataset, res["features"])

            logs.append({
                "error": res["error"],
                "feature_count": len(res["features"])
            })

        result.sort(key=lambda r: r["error"])

        if len(result) > iter_limit:
            result = result[:iter_limit]

        if result[0]["error"] > result_best["error"]:
            break

        if result[0]["error"] < result_best["error"]:
            result_best = result[0]

        result_new = []
        for res in result:
            for i in range(feature_count):
                if i not in res["features"]:
                    result_new.append({"features": res["features"] + [i]})

        result = result_new

    result_best["features"].sort()
    result_best["log"] = logs

    return result_best
示例#8
0
def selection_add(dataset, result=None):
    feature_count = get_feature_count(dataset)
    log = []
    if result is None:
        result = {
            "error": 9999,
            "features": [],
            "log": log
        }

    while True:
        result_cur = result

        for feat in range(feature_count):
            if feat in result["features"]:
                continue

            features = result["features"] + [feat]
            features.sort()

            error = count_error(dataset, features)

            log.append({
                "error": error,
                "feature_count": len(features)
            })

            if error <= result_cur["error"]:
                result_cur = {
                    "error": error,
                    "features": features,
                    "log": log
                }

        if result_cur == result:
            break

        result = result_cur

    return result