示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("network_file", help="Network file used for initial\
                                              clustering")
    parser.add_argument("cluster_file", help="Clustering results file")
    parser.add_argument("-c", "--no_conversion", action="store_true")
    parser.add_argument("-d", "--directed", action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n", "--node_list", nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-s", "--simple_conversion", action="store_true")
    opts = parser.parse_args()

    if opts.node_list:
        node_list = io.get_node_list(opts.node_list)
    clusters = io.read_clusters(opts.cluster_file)
    if opts.node_list:
        G = io.build_ig_graph_from_matrix(opts.network_file, False, node_list)
    else:
        G = ig.Graph.Read_Ncol(opts.network_file, directed=opts.directed)

    clusters_to_process, final_clusters = [], []
    for cluster in clusters:
        if len(cluster) > MAX_CL_SIZE:
            clusters_to_process.append(cluster)
        else:
            final_clusters.append(cluster)

    # if all nodes have been clustered, stop looping, otherwise continue to
    # recurse on each large cluster
    step = 1
    while clusters_to_process:
        processing = clusters_to_process
        clusters_to_process = []

        for cluster in processing:
            id_cluster = names_to_ids(G, cluster)
            SG = G.subgraph(cluster)

            cluster_size = len(cluster)
            num_clusters = 2
            '''
            num_clusters = (int(cluster_size / float(100)) if cluster_size > 200
                                                           else 2)
            '''
            clusters = cl.spectral_clustering(SG, num_clusters,
                                              no_conversion=opts.no_conversion,
                                              simple_conversion=opts.simple_conversion)
            for cluster in clusters:
                if len(cluster) > MAX_CL_SIZE:
                    clusters_to_process.append([SG.vs[i]['name'] for i in cluster])
                else:
                    final_clusters.append([SG.vs[i]['name'] for i in cluster])
        step += 1

    io.output_clusters(final_clusters, '')
示例#2
0
def main():
	'''
	Prints the modularity of the graph made from the given DSD matrix using the Louvain algorithm to generate clusters

	Usage: python louvain_clustering.py <dsd_file>
	'''

	parser = argparse.ArgumentParser()
	parser.add_argument("dsd_file", help = "Distance (i.e. DSD) matrix for network")
        parser.add_argument("node_list", help = "Node list")
        parser.add_argument("-p", "--ppi",  action = "store_true", help = "Flag specifying if the input is a ppi network. Defaults to false.")
	parser.add_argument("-d", "--directed", action = "store_true", help = "Flag specifying if the input represents a directed graph. Defaults to false.")

	opts = parser.parse_args()
        node_list = io.get_node_list(opts.node_list)
        #print node_list

        if opts.ppi == True:
                G = io.build_ig_graph_from_edgelist(opts.dsd_file)
                sys.stderr.write("Read in\n")
#                print G.vs['name']

        else:
                G = io.build_ig_graph_from_matrix(opts.dsd_file, node_list = node_list)
                sys.stderr.write("Read in\n")
#                for edge in G.es:
#                        edge['weight'] = 1/edge['weight']

#                sys.stderr.write("flipped edges\n")

        all_info = {}

        all_info['unused_nodes'] = []
        all_info['enriched_clusters'] = []
        all_info['total_clusters'] = []
        all_info['ratio'] = []
        all_info['avg_enrichments'] = []
        all_info['avg_logodds'] = []
        all_info['cluster_sizes'] = []
        all_info['enriched_per_size'] = []

        for repeat in range(0,10):
                sys.stderr.write("starting trial " + str(repeat))
                x = random.randint(0, G.vcount()-1)
                for i in range(1,x):
                        G = shuffle(G)

                sys.stderr.write(": shuffled, ")
                partition = build_clusters(G)
                #partition = G.community_multilevel(weights = 'weight', return_levels = True)[0]

                sys.stderr.write("clustered\n")
                f = open("trial"+str(repeat), "w")
                i = 0
                for line in partition:
                        f.write(str(i) + line)
                        i += 1
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument("cluster_file", help="Clustering results file")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    opts = parser.parse_args()

    node_list = io.get_node_list(opts.node_list)
    clusters = io.read_clusters(opts.cluster_file)
    G = io.build_ig_graph_from_matrix(opts.dsd_file, False, node_list)

    clusters_to_process, final_clusters = [], []
    for cluster in clusters:
        if len(cluster) > MAX_CL_SIZE:
            clusters_to_process.append(cluster)
        else:
            final_clusters.append(cluster)

    # if all nodes have been clustered, stop looping, otherwise continue to
    # recurse on each large cluster
    step = 1
    while clusters_to_process:
        processing = clusters_to_process
        clusters_to_process = []

        for cluster in processing:
            id_cluster = names_to_ids(G, cluster)
            SG = G.subgraph(cluster)

            cluster_size = len(cluster)
            num_clusters = (int(cluster_size /
                                float(100)) if cluster_size > 200 else 2)
            mat = SG.get_adjacency(attribute='weight')
            dist_matrix = np.array(mat.data)
            del mat
            clusters = cl.spectral_clustering(dist_matrix, num_clusters)
            del dist_matrix
            for cluster in clusters:
                if len(cluster) > MAX_CL_SIZE:
                    clusters_to_process.append(
                        [SG.vs[i]['name'] for i in cluster])
                else:
                    final_clusters.append([SG.vs[i]['name'] for i in cluster])
        step += 1

    io.output_clusters(final_clusters, '')
示例#4
0
def main():
    '''
	Prints the modularity of the graph made from the given DSD matrix using the Louvain algorithm to generate clusters

	Usage: python louvain_clustering.py <dsd_file>
	'''

    parser = argparse.ArgumentParser()
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument("node_list", help="Node list")
    parser.add_argument(
        "-p",
        "--ppi",
        action="store_true",
        help="Flag specifying if the input is a ppi network. Defaults to false."
    )
    parser.add_argument(
        "-d",
        "--directed",
        action="store_true",
        help=
        "Flag specifying if the input represents a directed graph. Defaults to false."
    )

    opts = parser.parse_args()
    node_list = io.get_node_list(opts.node_list)
    #print node_list

    if opts.ppi == True:
        G = io.build_ig_graph_from_edgelist(opts.dsd_file)

    else:
        G = io.build_ig_graph_from_matrix(opts.dsd_file, node_list=node_list)

        #for edge in G.es:
        #        edge['weight'] = 1/edge['weight']

# the first level seems to have more clusters
    partition = build_clusters(G, 4)
    #print(partition)
    #for part in partition:
    #        print part
    #        partition = G.community_walktrap(weights = 'weight', steps = 2).as_clustering()
    f = open("trial", "w")
    i = 0
    for line in partition:
        f.write(str(i) + line)
        i += 1
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("network_file", help="Original network input file")
    parser.add_argument("cluster_file", help="A cluster results file")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        default=[],
                        help="Optionally specify a list of the nodes in\
                              the graph.")
    parser.add_argument("-o",
                        "--output_prefix",
                        nargs="?",
                        default="",
                        help="Optionally specify a prefix for output files")
    opts = parser.parse_args()

    nodes = io.get_node_list(opts.node_list) if opts.node_list else []
    output_pref = opts.output_prefix if opts.output_prefix else './clusters_'
    G = io.build_ig_graph_from_matrix(opts.network_file,
                                      is_directed=False,
                                      node_list=nodes)
    if opts.node_list: G.vs['name'] = nodes
    clusters = io.read_clusters(opts.cluster_file)

    for idx, cluster in enumerate(clusters):
        try:
            output_file = '{}{}.txt'.format(output_pref, idx)
            output_fp = open(output_file, 'w')
        except IOError:
            sys.exit("Could not open file: {}".format(output_file))
        id_cluster = names_to_ids(G, cluster)
        SG = G.subgraph(id_cluster)
        for e in SG.es:
            output_fp.write('{}\t{}\t{}\n'.format(SG.vs[e.source]['name'],
                                                  SG.vs[e.target]['name'],
                                                  e['weight']))
        output_fp.close()
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument("network_file", help="Original network input file")
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument(
        "-a",
        "--algorithm",
        nargs="?",
        default=DEFAULT_ALG,
        help="The clustering algorithm to use - 1 for spectral,\
                              2 for threshold clustering, and 3 for simple\
                              shortest-path divisive hierarchical clustering.\
                              Defaults to spectral clustering.")
    parser.add_argument("-c", "--no_conversion", action="store_true")
    parser.add_argument("-d",
                        "--directed",
                        action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-o",
                        "--output_file",
                        nargs="?",
                        default="",
                        help="Optionally specify an output file. Output is to\
                              stdout if no file is specified.")
    parser.add_argument("-p",
                        "--parameter",
                        nargs="?",
                        default='',
                        help="Specify a parameter (i.e. number of clusters,\
                              distance threshold) to be used with clustering\
                              algorithm. If none is provided, a sensible\
                              default is used.")
    parser.add_argument("-s", "--simple_conversion", action="store_true")
    opts = parser.parse_args()

    if USE_NETWORKX:
        import clustering_algs_nx as cl
        # G = io.build_nx_graph_from_matrix(opts.dsd_file, opts.directed)
        G = io.build_nx_graph_from_edgelist(opts.dsd_file, opts.directed)
    else:
        import clustering_algs_ig as cl
        if opts.node_list:
            G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed)
        else:
            # G = io.build_ig_graph_from_edgelist(opts.dsd_file, opts.directed)
            # temporary, TODO remove after consensus experiments
            G = ig.Graph.Read_Ncol(opts.dsd_file, directed=opts.directed)

    # nodes = io.get_node_list(opts.node_list) if opts.node_list else []
    if opts.node_list:
        nodes = io.get_node_list(opts.node_list)
    else:
        nodes = zip(
            *sorted([(v.index, v['name'])
                     for v in G.vs], key=lambda x: x[0]))[1]

    opts.algorithm = int(opts.algorithm)
    if opts.algorithm == SPECTRAL:
        k_val = int(opts.parameter) if opts.parameter else 100
        clusters = cl.spectral_clustering(
            G,
            n_clusters=k_val,
            node_map=nodes,
            no_conversion=opts.no_conversion,
            simple_conversion=opts.simple_conversion)
    elif opts.algorithm == THRESHOLD:
        filter_weight = float(opts.parameter) if opts.parameter else 5.0
        clusters = cl.threshold_clustering(G,
                                           threshold=filter_weight,
                                           node_map=nodes)
    elif opts.algorithm == HIERARCHICAL:
        filter_weight = float(opts.parameter) if opts.parameter else 1.0
        clusters = cl.hierarchical_clustering(G, threshold=filter_weight)
    else:
        sys.exit('Please pick a valid clustering algorithm')

    io.output_clusters(clusters, opts.output_file)
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument("network_file", help="Original network input file")
    parser.add_argument("dsd_file",
                        help="Distance (i.e. DSD) matrix for network")
    parser.add_argument(
        "-a",
        "--algorithm",
        nargs="?",
        default=DEFAULT_ALG,
        help="The clustering algorithm to use - 1 for spectral,\
                              2 for threshold clustering, and 3 for simple\
                              shortest-path divisive hierarchical clustering.\
                              Defaults to spectral clustering.")
    parser.add_argument("-d",
                        "--directed",
                        action="store_true",
                        help="Flag specifying if the input represents\
                              a directed graph. Defaults to false.")
    parser.add_argument("-n",
                        "--node_list",
                        nargs="?",
                        help="Optionally specify a list of the nodes in\
                              the DSD file. Default is all the nodes in the\
                              graph.")
    parser.add_argument("-o",
                        "--output_file",
                        nargs="?",
                        default="",
                        help="Optionally specify an output file. Output is to\
                              stdout if no file is specified.")
    parser.add_argument("-p",
                        "--parameter",
                        nargs="?",
                        default='',
                        help="Specify a parameter (i.e. number of clusters,\
                              distance threshold) to be used with clustering\
                              algorithm. If none is provided, a sensible\
                              default is used.")
    opts = parser.parse_args()

    G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed)

    nodes = io.get_node_list(opts.node_list) if opts.node_list else []

    opts.algorithm = int(opts.algorithm)
    if opts.algorithm == SPECTRAL:
        import numpy as np
        k_val = int(opts.parameter) if opts.parameter else 100
        mat = G.get_adjacency(attribute='weight')
        del G
        dist_matrix = np.array(mat.data)
        del mat
        clusters = cl.spectral_clustering(dist_matrix,
                                          n_clusters=k_val,
                                          node_map=nodes)
    elif opts.algorithm == THRESHOLD:
        filter_weight = float(opts.parameter) if opts.parameter else 5.0
        clusters = cl.threshold_clustering(G,
                                           threshold=filter_weight,
                                           node_map=nodes)
    elif opts.algorithm == HIERARCHICAL:
        sys.exit('Hierarchical clustering is not implemented, please choose\
                  another algorithm')
    else:
        sys.exit('Please pick a valid clustering algorithm')

    io.output_clusters(clusters, opts.output_file)