def main(): parser = argparse.ArgumentParser() parser.add_argument("network_file", help="Network file used for initial\ clustering") parser.add_argument("cluster_file", help="Clustering results file") parser.add_argument("-c", "--no_conversion", action="store_true") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-s", "--simple_conversion", action="store_true") opts = parser.parse_args() if opts.node_list: node_list = io.get_node_list(opts.node_list) clusters = io.read_clusters(opts.cluster_file) if opts.node_list: G = io.build_ig_graph_from_matrix(opts.network_file, False, node_list) else: G = ig.Graph.Read_Ncol(opts.network_file, directed=opts.directed) clusters_to_process, final_clusters = [], [] for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append(cluster) else: final_clusters.append(cluster) # if all nodes have been clustered, stop looping, otherwise continue to # recurse on each large cluster step = 1 while clusters_to_process: processing = clusters_to_process clusters_to_process = [] for cluster in processing: id_cluster = names_to_ids(G, cluster) SG = G.subgraph(cluster) cluster_size = len(cluster) num_clusters = 2 ''' num_clusters = (int(cluster_size / float(100)) if cluster_size > 200 else 2) ''' clusters = cl.spectral_clustering(SG, num_clusters, no_conversion=opts.no_conversion, simple_conversion=opts.simple_conversion) for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append([SG.vs[i]['name'] for i in cluster]) else: final_clusters.append([SG.vs[i]['name'] for i in cluster]) step += 1 io.output_clusters(final_clusters, '')
def main(): ''' Prints the modularity of the graph made from the given DSD matrix using the Louvain algorithm to generate clusters Usage: python louvain_clustering.py <dsd_file> ''' parser = argparse.ArgumentParser() parser.add_argument("dsd_file", help = "Distance (i.e. DSD) matrix for network") parser.add_argument("node_list", help = "Node list") parser.add_argument("-p", "--ppi", action = "store_true", help = "Flag specifying if the input is a ppi network. Defaults to false.") parser.add_argument("-d", "--directed", action = "store_true", help = "Flag specifying if the input represents a directed graph. Defaults to false.") opts = parser.parse_args() node_list = io.get_node_list(opts.node_list) #print node_list if opts.ppi == True: G = io.build_ig_graph_from_edgelist(opts.dsd_file) sys.stderr.write("Read in\n") # print G.vs['name'] else: G = io.build_ig_graph_from_matrix(opts.dsd_file, node_list = node_list) sys.stderr.write("Read in\n") # for edge in G.es: # edge['weight'] = 1/edge['weight'] # sys.stderr.write("flipped edges\n") all_info = {} all_info['unused_nodes'] = [] all_info['enriched_clusters'] = [] all_info['total_clusters'] = [] all_info['ratio'] = [] all_info['avg_enrichments'] = [] all_info['avg_logodds'] = [] all_info['cluster_sizes'] = [] all_info['enriched_per_size'] = [] for repeat in range(0,10): sys.stderr.write("starting trial " + str(repeat)) x = random.randint(0, G.vcount()-1) for i in range(1,x): G = shuffle(G) sys.stderr.write(": shuffled, ") partition = build_clusters(G) #partition = G.community_multilevel(weights = 'weight', return_levels = True)[0] sys.stderr.write("clustered\n") f = open("trial"+str(repeat), "w") i = 0 for line in partition: f.write(str(i) + line) i += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument("cluster_file", help="Clustering results file") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") opts = parser.parse_args() node_list = io.get_node_list(opts.node_list) clusters = io.read_clusters(opts.cluster_file) G = io.build_ig_graph_from_matrix(opts.dsd_file, False, node_list) clusters_to_process, final_clusters = [], [] for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append(cluster) else: final_clusters.append(cluster) # if all nodes have been clustered, stop looping, otherwise continue to # recurse on each large cluster step = 1 while clusters_to_process: processing = clusters_to_process clusters_to_process = [] for cluster in processing: id_cluster = names_to_ids(G, cluster) SG = G.subgraph(cluster) cluster_size = len(cluster) num_clusters = (int(cluster_size / float(100)) if cluster_size > 200 else 2) mat = SG.get_adjacency(attribute='weight') dist_matrix = np.array(mat.data) del mat clusters = cl.spectral_clustering(dist_matrix, num_clusters) del dist_matrix for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append( [SG.vs[i]['name'] for i in cluster]) else: final_clusters.append([SG.vs[i]['name'] for i in cluster]) step += 1 io.output_clusters(final_clusters, '')
def main(): ''' Prints the modularity of the graph made from the given DSD matrix using the Louvain algorithm to generate clusters Usage: python louvain_clustering.py <dsd_file> ''' parser = argparse.ArgumentParser() parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument("node_list", help="Node list") parser.add_argument( "-p", "--ppi", action="store_true", help="Flag specifying if the input is a ppi network. Defaults to false." ) parser.add_argument( "-d", "--directed", action="store_true", help= "Flag specifying if the input represents a directed graph. Defaults to false." ) opts = parser.parse_args() node_list = io.get_node_list(opts.node_list) #print node_list if opts.ppi == True: G = io.build_ig_graph_from_edgelist(opts.dsd_file) else: G = io.build_ig_graph_from_matrix(opts.dsd_file, node_list=node_list) #for edge in G.es: # edge['weight'] = 1/edge['weight'] # the first level seems to have more clusters partition = build_clusters(G, 4) #print(partition) #for part in partition: # print part # partition = G.community_walktrap(weights = 'weight', steps = 2).as_clustering() f = open("trial", "w") i = 0 for line in partition: f.write(str(i) + line) i += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument("network_file", help="Original network input file") parser.add_argument("cluster_file", help="A cluster results file") parser.add_argument("-n", "--node_list", nargs="?", default=[], help="Optionally specify a list of the nodes in\ the graph.") parser.add_argument("-o", "--output_prefix", nargs="?", default="", help="Optionally specify a prefix for output files") opts = parser.parse_args() nodes = io.get_node_list(opts.node_list) if opts.node_list else [] output_pref = opts.output_prefix if opts.output_prefix else './clusters_' G = io.build_ig_graph_from_matrix(opts.network_file, is_directed=False, node_list=nodes) if opts.node_list: G.vs['name'] = nodes clusters = io.read_clusters(opts.cluster_file) for idx, cluster in enumerate(clusters): try: output_file = '{}{}.txt'.format(output_pref, idx) output_fp = open(output_file, 'w') except IOError: sys.exit("Could not open file: {}".format(output_file)) id_cluster = names_to_ids(G, cluster) SG = G.subgraph(id_cluster) for e in SG.es: output_fp.write('{}\t{}\t{}\n'.format(SG.vs[e.source]['name'], SG.vs[e.target]['name'], e['weight'])) output_fp.close()
def main(): parser = argparse.ArgumentParser() # parser.add_argument("network_file", help="Original network input file") parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument( "-a", "--algorithm", nargs="?", default=DEFAULT_ALG, help="The clustering algorithm to use - 1 for spectral,\ 2 for threshold clustering, and 3 for simple\ shortest-path divisive hierarchical clustering.\ Defaults to spectral clustering.") parser.add_argument("-c", "--no_conversion", action="store_true") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-o", "--output_file", nargs="?", default="", help="Optionally specify an output file. Output is to\ stdout if no file is specified.") parser.add_argument("-p", "--parameter", nargs="?", default='', help="Specify a parameter (i.e. number of clusters,\ distance threshold) to be used with clustering\ algorithm. If none is provided, a sensible\ default is used.") parser.add_argument("-s", "--simple_conversion", action="store_true") opts = parser.parse_args() if USE_NETWORKX: import clustering_algs_nx as cl # G = io.build_nx_graph_from_matrix(opts.dsd_file, opts.directed) G = io.build_nx_graph_from_edgelist(opts.dsd_file, opts.directed) else: import clustering_algs_ig as cl if opts.node_list: G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed) else: # G = io.build_ig_graph_from_edgelist(opts.dsd_file, opts.directed) # temporary, TODO remove after consensus experiments G = ig.Graph.Read_Ncol(opts.dsd_file, directed=opts.directed) # nodes = io.get_node_list(opts.node_list) if opts.node_list else [] if opts.node_list: nodes = io.get_node_list(opts.node_list) else: nodes = zip( *sorted([(v.index, v['name']) for v in G.vs], key=lambda x: x[0]))[1] opts.algorithm = int(opts.algorithm) if opts.algorithm == SPECTRAL: k_val = int(opts.parameter) if opts.parameter else 100 clusters = cl.spectral_clustering( G, n_clusters=k_val, node_map=nodes, no_conversion=opts.no_conversion, simple_conversion=opts.simple_conversion) elif opts.algorithm == THRESHOLD: filter_weight = float(opts.parameter) if opts.parameter else 5.0 clusters = cl.threshold_clustering(G, threshold=filter_weight, node_map=nodes) elif opts.algorithm == HIERARCHICAL: filter_weight = float(opts.parameter) if opts.parameter else 1.0 clusters = cl.hierarchical_clustering(G, threshold=filter_weight) else: sys.exit('Please pick a valid clustering algorithm') io.output_clusters(clusters, opts.output_file)
def main(): parser = argparse.ArgumentParser() # parser.add_argument("network_file", help="Original network input file") parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument( "-a", "--algorithm", nargs="?", default=DEFAULT_ALG, help="The clustering algorithm to use - 1 for spectral,\ 2 for threshold clustering, and 3 for simple\ shortest-path divisive hierarchical clustering.\ Defaults to spectral clustering.") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-o", "--output_file", nargs="?", default="", help="Optionally specify an output file. Output is to\ stdout if no file is specified.") parser.add_argument("-p", "--parameter", nargs="?", default='', help="Specify a parameter (i.e. number of clusters,\ distance threshold) to be used with clustering\ algorithm. If none is provided, a sensible\ default is used.") opts = parser.parse_args() G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed) nodes = io.get_node_list(opts.node_list) if opts.node_list else [] opts.algorithm = int(opts.algorithm) if opts.algorithm == SPECTRAL: import numpy as np k_val = int(opts.parameter) if opts.parameter else 100 mat = G.get_adjacency(attribute='weight') del G dist_matrix = np.array(mat.data) del mat clusters = cl.spectral_clustering(dist_matrix, n_clusters=k_val, node_map=nodes) elif opts.algorithm == THRESHOLD: filter_weight = float(opts.parameter) if opts.parameter else 5.0 clusters = cl.threshold_clustering(G, threshold=filter_weight, node_map=nodes) elif opts.algorithm == HIERARCHICAL: sys.exit('Hierarchical clustering is not implemented, please choose\ another algorithm') else: sys.exit('Please pick a valid clustering algorithm') io.output_clusters(clusters, opts.output_file)