Пример #1
0
def selection_del(dataset, result=None):
    feature_count = get_feature_count(dataset)
    log = []
    if result is None:
        features_all = list(range(feature_count))
        error = count_error(dataset, features_all)

        log.append({"error": error, "feature_count": feature_count})

        result = {"error": error, "features": features_all, "log": log}

    while len(result["features"]) > 1:
        result_cur = result

        for feat in result["features"]:
            features = result["features"][:]
            features.remove(feat)

            error = count_error(dataset, features)

            log.append({"error": error, "feature_count": len(features)})

            if error <= result_cur["error"]:
                result_cur = {"error": error, "features": features, "log": log}

        if result_cur == result:
            break

        result = result_cur

    return result
Пример #2
0
def __sort_features(dataset):
    feature_count = get_feature_count(dataset)
    feature_errors = list(
        map(lambda i: {
            "i": i,
            "error": count_error(dataset, [i])
        }, range(feature_count)))
    features_sorted = list(sorted(feature_errors, key=lambda x: x["error"]))
    return list(
        map(lambda f, pos: {
            "i": f["i"],
            "pos": pos
        }, features_sorted, range(feature_count)))
Пример #3
0
def selection_full_search(dataset):
    feature_count = get_feature_count(dataset)
    log = []
    result = {"error": 9999, "features": [], "log": log}

    for feat_cur in __feat_gen(feature_count):
        error = count_error(dataset, feat_cur)

        log.append({"error": error, "feature_count": len(feat_cur)})

        if error < result["error"]:
            result = {"error": error, "features": feat_cur, "log": log}

    return result
Пример #4
0
def selection_bfs(dataset, iter_limit=1):
    feature_count = get_feature_count(dataset)
    logs = []
    result_best = {
        "error": 9999,
        "features": [],
        "log": logs
    }

    result = list(map(lambda i: {"features": [i]}, range(feature_count)))

    for iteration in range(1, feature_count + 1):
        for res in result:
            res["error"] = count_error(dataset, res["features"])

            logs.append({
                "error": res["error"],
                "feature_count": len(res["features"])
            })

        result.sort(key=lambda r: r["error"])

        if len(result) > iter_limit:
            result = result[:iter_limit]

        if result[0]["error"] > result_best["error"]:
            break

        if result[0]["error"] < result_best["error"]:
            result_best = result[0]

        result_new = []
        for res in result:
            for i in range(feature_count):
                if i not in res["features"]:
                    result_new.append({"features": res["features"] + [i]})

        result = result_new

    result_best["features"].sort()
    result_best["log"] = logs

    return result_best
Пример #5
0
def selection_add(dataset, result=None):
    feature_count = get_feature_count(dataset)
    log = []
    if result is None:
        result = {
            "error": 9999,
            "features": [],
            "log": log
        }

    while True:
        result_cur = result

        for feat in range(feature_count):
            if feat in result["features"]:
                continue

            features = result["features"] + [feat]
            features.sort()

            error = count_error(dataset, features)

            log.append({
                "error": error,
                "feature_count": len(features)
            })

            if error <= result_cur["error"]:
                result_cur = {
                    "error": error,
                    "features": features,
                    "log": log
                }

        if result_cur == result:
            break

        result = result_cur

    return result
Пример #6
0
def __increase(features_cur, features, results, dataset, logs):
    length = len(features_cur)
    result = results[length]
    error = count_error(
        dataset, __flatten_features(features_cur)) if length != 0 else 9999

    if error != 9999:
        logs.append({"error": error, "feature_count": length})

    for j in range(length):
        if results[j]["error"] < error:
            return

    if error < result["error"]:
        result["error"] = error
        result["features"] = features_cur

    max_feature = __max_feature_pos(features_cur)
    for feature in features:
        if feature["pos"] > max_feature:
            __increase(features_cur + [feature], features, results, dataset,
                       logs)
Пример #7
0
def log(message, func):
    print(message)

    result = measure_time(func)
    print("Количество ошибок:", result["error"])
    log_quality(result)
    print("Количество признаков: ", len(result["features"]))
    log_features(result)
    print("-------------------------------------------")


features_all = list(range(feature_count))
log(
    "Полный набор признаков", lambda: {
        "error": count_error(dataset, features_all),
        "features": features_all
    })

if feature_count <= 15:
    log("Полный перебор", lambda: selection_full_search(dataset))

log("Алгоритм ADD", lambda: selection_add(dataset))

log("Алгоритм DEL", lambda: selection_del(dataset))

log("Алгоритм ADD-DEL", lambda: selection_add_del(dataset))

log('Поиск в глубину', lambda: selection_dfs(dataset))

log('Поиск в ширину', lambda: selection_bfs(dataset, iter_limit=10))