Пример #1
0
def main():
    relieff_file = open(RELIEFF_FILENAME, "r")
    relieff_features = parse_relieff_list(relieff_file.readlines())
    relieff_features.sort(key=lambda x: x[0], reverse=True)

    results, features, feature_names = read_data(FEATURE_FILENAME)

    #features = np.hstack((features, results.reshape((len(results), 1))))

    correlations = find_correlation(features)
    #print("\n".join("{}: {}".format(x, correlations[x]) for x in correlations))
    #return

    for threshold in frange(1.0, 0.0, -0.1):
        selected = select_features(features, relieff_features, correlations,
                                   threshold)

        print("============================================================")
        print("THRESHOLD =", threshold)
        print("Count:", len(selected))
        print("\n".join(
            ["{} ({})".format(x, feature_names[x]) for x in selected]))

        print()
        print()
        print()
Пример #2
0
def main():
    name, fmt = correlation.parse_options(argv)
    results, features, feature_names = correlation.read_data(name, fmt)

    features = N.hstack((features, results.reshape((len(results), 1))))

    correlations = correlation.find_correlation(features)

    g = nx.Graph()

    #g.add_nodes_from(range(len(results) + 1))

    threshold = 0.8

    for (f1, f2) in [c for c in correlations.keys() if correlations[c][0] > threshold]:
        g.add_edge(f1, f2)

    cliques = sorted(nx.find_cliques(g), key=len, reverse=True)
    cliques = [c for c in cliques if len(c) >= 3]

    print("=====CLIQUES=====")
    for clique in cliques:
        print()
        print("Clique, length:", len(clique))
        for node in sorted(clique):
            print("{:>3} ({})".format(node, feature_names[node]))

    print()
    print()
    print("=====CLIQUE COUNT=====")
    print()
    cliquecount = {}
    for clique in cliques:
        for node in clique:
            cliquecount[node] = cliquecount.get(node, 0) + 1
    cliquesizelist = [(c, cliquecount[c]) for c in sorted(cliquecount, key=lambda x: (cliquecount.get(x), -1 * x), reverse=True)]
    for c in cliquesizelist: print("{:>3}: {:>3} ({})".format(c[1], c[0], feature_names[c[0]]))
def main():
    relieff_file = open(RELIEFF_FILENAME, "r")
    relieff_features = parse_relieff_list(relieff_file.readlines())
    relieff_features.sort(key=lambda x: x[0], reverse=True)

    results, features, feature_names = read_data(FEATURE_FILENAME)

    #features = np.hstack((features, results.reshape((len(results), 1))))

    correlations = find_correlation(features)
    #print("\n".join("{}: {}".format(x, correlations[x]) for x in correlations))
    #return

    for threshold in frange(1.0, 0.0, -0.1):
        selected = select_features(features, relieff_features, correlations, threshold)

        print("============================================================")
        print("THRESHOLD =", threshold)
        print("Count:", len(selected))
        print("\n".join(["{} ({})".format(x, feature_names[x]) for x in selected]))

        print()
        print()
        print()