示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("network_file", help="Network file used for initial\
                                              clustering")
    parser.add_argument("cluster_file", help="Clustering results file")
    parser.add_argument("-c", "--no_conversion", action="store_true")
    parser.add_argument("-d", "--directed", action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n", "--node_list", nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-s", "--simple_conversion", action="store_true")
    opts = parser.parse_args()

    if opts.node_list:
        node_list = io.get_node_list(opts.node_list)
    clusters = io.read_clusters(opts.cluster_file)
    if opts.node_list:
        G = io.build_ig_graph_from_matrix(opts.network_file, False, node_list)
    else:
        G = ig.Graph.Read_Ncol(opts.network_file, directed=opts.directed)

    clusters_to_process, final_clusters = [], []
    for cluster in clusters:
        if len(cluster) > MAX_CL_SIZE:
            clusters_to_process.append(cluster)
        else:
            final_clusters.append(cluster)

    # if all nodes have been clustered, stop looping, otherwise continue to
    # recurse on each large cluster
    step = 1
    while clusters_to_process:
        processing = clusters_to_process
        clusters_to_process = []

        for cluster in processing:
            id_cluster = names_to_ids(G, cluster)
            SG = G.subgraph(cluster)

            cluster_size = len(cluster)
            num_clusters = 2
            '''
            num_clusters = (int(cluster_size / float(100)) if cluster_size > 200
                                                           else 2)
            '''
            clusters = cl.spectral_clustering(SG, num_clusters,
                                              no_conversion=opts.no_conversion,
                                              simple_conversion=opts.simple_conversion)
            for cluster in clusters:
                if len(cluster) > MAX_CL_SIZE:
                    clusters_to_process.append([SG.vs[i]['name'] for i in cluster])
                else:
                    final_clusters.append([SG.vs[i]['name'] for i in cluster])
        step += 1

    io.output_clusters(final_clusters, '')
示例#2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("gc_results_dir")
    opts = parser.parse_args()

    clusters = generate_overlapping_clusters(opts.gc_results_dir)
    filtered_clusters = filter_clusters(clusters)
    io.output_clusters(filtered_clusters, '')
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("gc_clustering")
    parser.add_argument("other_clustering")
    opts = parser.parse_args()

    gc_clusters = io.read_clusters(opts.gc_clustering)
    other_clusters = io.read_clusters(opts.other_clustering)

    clusters = resolve_clusters(gc_clusters, other_clusters)
    io.output_clusters(clusters, '')
示例#4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument("cluster_file", help="Clustering results file")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    opts = parser.parse_args()

    node_list = io.get_node_list(opts.node_list)
    clusters = io.read_clusters(opts.cluster_file)
    G = io.build_ig_graph_from_matrix(opts.dsd_file, False, node_list)

    clusters_to_process, final_clusters = [], []
    for cluster in clusters:
        if len(cluster) > MAX_CL_SIZE:
            clusters_to_process.append(cluster)
        else:
            final_clusters.append(cluster)

    # if all nodes have been clustered, stop looping, otherwise continue to
    # recurse on each large cluster
    step = 1
    while clusters_to_process:
        processing = clusters_to_process
        clusters_to_process = []

        for cluster in processing:
            id_cluster = names_to_ids(G, cluster)
            SG = G.subgraph(cluster)

            cluster_size = len(cluster)
            num_clusters = (int(cluster_size /
                                float(100)) if cluster_size > 200 else 2)
            mat = SG.get_adjacency(attribute='weight')
            dist_matrix = np.array(mat.data)
            del mat
            clusters = cl.spectral_clustering(dist_matrix, num_clusters)
            del dist_matrix
            for cluster in clusters:
                if len(cluster) > MAX_CL_SIZE:
                    clusters_to_process.append(
                        [SG.vs[i]['name'] for i in cluster])
                else:
                    final_clusters.append([SG.vs[i]['name'] for i in cluster])
        step += 1

    io.output_clusters(final_clusters, '')
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input_file", help="Original clusters input file")
    parser.add_argument("-c",
                        "--cutoff",
                        nargs="?",
                        default=DEFAULT_MIN_SIZE,
                        help="Cutoff for filtering cluster size")
    opts = parser.parse_args()

    clusters = io.read_clusters(opts.input_file)
    filtered_clusters = [c for c in clusters if len(c) >= int(opts.cutoff)]
    io.output_clusters(filtered_clusters, '')
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("gc_file", help='GeneCentric cluster file')
    parser.add_argument("cluster_file",
                        help="File containing cluster filepaths")
    opts = parser.parse_args()

    cluster_nodes = get_cluster_nodes(opts.cluster_file)
    gc_nodes = get_cluster_nodes(opts.gc_file)
    gc_clusters = io.read_clusters(opts.gc_file)
    difference_nodes = list(set(cluster_nodes) - set(gc_nodes))
    gc_clusters.append(difference_nodes)
    io.output_clusters(gc_clusters, '')
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument("network_file", help="Original network input file")
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument(
        "-a",
        "--algorithm",
        nargs="?",
        default=DEFAULT_ALG,
        help="The clustering algorithm to use - 1 for spectral,\
                              2 for threshold clustering, and 3 for simple\
                              shortest-path divisive hierarchical clustering.\
                              Defaults to spectral clustering.")
    parser.add_argument("-c", "--no_conversion", action="store_true")
    parser.add_argument("-d",
                        "--directed",
                        action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-o",
                        "--output_file",
                        nargs="?",
                        default="",
                        help="Optionally specify an output file. Output is to\
                              stdout if no file is specified.")
    parser.add_argument("-p",
                        "--parameter",
                        nargs="?",
                        default='',
                        help="Specify a parameter (i.e. number of clusters,\
                              distance threshold) to be used with clustering\
                              algorithm. If none is provided, a sensible\
                              default is used.")
    parser.add_argument("-s", "--simple_conversion", action="store_true")
    opts = parser.parse_args()

    if USE_NETWORKX:
        import clustering_algs_nx as cl
        # G = io.build_nx_graph_from_matrix(opts.dsd_file, opts.directed)
        G = io.build_nx_graph_from_edgelist(opts.dsd_file, opts.directed)
    else:
        import clustering_algs_ig as cl
        if opts.node_list:
            G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed)
        else:
            # G = io.build_ig_graph_from_edgelist(opts.dsd_file, opts.directed)
            # temporary, TODO remove after consensus experiments
            G = ig.Graph.Read_Ncol(opts.dsd_file, directed=opts.directed)

    # nodes = io.get_node_list(opts.node_list) if opts.node_list else []
    if opts.node_list:
        nodes = io.get_node_list(opts.node_list)
    else:
        nodes = zip(
            *sorted([(v.index, v['name'])
                     for v in G.vs], key=lambda x: x[0]))[1]

    opts.algorithm = int(opts.algorithm)
    if opts.algorithm == SPECTRAL:
        k_val = int(opts.parameter) if opts.parameter else 100
        clusters = cl.spectral_clustering(
            G,
            n_clusters=k_val,
            node_map=nodes,
            no_conversion=opts.no_conversion,
            simple_conversion=opts.simple_conversion)
    elif opts.algorithm == THRESHOLD:
        filter_weight = float(opts.parameter) if opts.parameter else 5.0
        clusters = cl.threshold_clustering(G,
                                           threshold=filter_weight,
                                           node_map=nodes)
    elif opts.algorithm == HIERARCHICAL:
        filter_weight = float(opts.parameter) if opts.parameter else 1.0
        clusters = cl.hierarchical_clustering(G, threshold=filter_weight)
    else:
        sys.exit('Please pick a valid clustering algorithm')

    io.output_clusters(clusters, opts.output_file)
示例#8
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument("network_file", help="Original network input file")
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument(
        "-a",
        "--algorithm",
        nargs="?",
        default=DEFAULT_ALG,
        help="The clustering algorithm to use - 1 for spectral,\
                              2 for threshold clustering, and 3 for simple\
                              shortest-path divisive hierarchical clustering.\
                              Defaults to spectral clustering.")
    parser.add_argument("-d",
                        "--directed",
                        action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-o",
                        "--output_file",
                        nargs="?",
                        default="",
                        help="Optionally specify an output file. Output is to\
                              stdout if no file is specified.")
    parser.add_argument("-p",
                        "--parameter",
                        nargs="?",
                        default='',
                        help="Specify a parameter (i.e. number of clusters,\
                              distance threshold) to be used with clustering\
                              algorithm. If none is provided, a sensible\
                              default is used.")
    opts = parser.parse_args()

    G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed)

    nodes = io.get_node_list(opts.node_list) if opts.node_list else []

    opts.algorithm = int(opts.algorithm)
    if opts.algorithm == SPECTRAL:
        import numpy as np
        k_val = int(opts.parameter) if opts.parameter else 100
        mat = G.get_adjacency(attribute='weight')
        del G
        dist_matrix = np.array(mat.data)
        del mat
        clusters = cl.spectral_clustering(dist_matrix,
                                          n_clusters=k_val,
                                          node_map=nodes)
    elif opts.algorithm == THRESHOLD:
        filter_weight = float(opts.parameter) if opts.parameter else 5.0
        clusters = cl.threshold_clustering(G,
                                           threshold=filter_weight,
                                           node_map=nodes)
    elif opts.algorithm == HIERARCHICAL:
        sys.exit('Hierarchical clustering is not implemented, please choose\
                  another algorithm')
    else:
        sys.exit('Please pick a valid clustering algorithm')

    io.output_clusters(clusters, opts.output_file)