def main(): parser = argparse.ArgumentParser() parser.add_argument("network_file", help="Network file used for initial\ clustering") parser.add_argument("cluster_file", help="Clustering results file") parser.add_argument("-c", "--no_conversion", action="store_true") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-s", "--simple_conversion", action="store_true") opts = parser.parse_args() if opts.node_list: node_list = io.get_node_list(opts.node_list) clusters = io.read_clusters(opts.cluster_file) if opts.node_list: G = io.build_ig_graph_from_matrix(opts.network_file, False, node_list) else: G = ig.Graph.Read_Ncol(opts.network_file, directed=opts.directed) clusters_to_process, final_clusters = [], [] for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append(cluster) else: final_clusters.append(cluster) # if all nodes have been clustered, stop looping, otherwise continue to # recurse on each large cluster step = 1 while clusters_to_process: processing = clusters_to_process clusters_to_process = [] for cluster in processing: id_cluster = names_to_ids(G, cluster) SG = G.subgraph(cluster) cluster_size = len(cluster) num_clusters = 2 ''' num_clusters = (int(cluster_size / float(100)) if cluster_size > 200 else 2) ''' clusters = cl.spectral_clustering(SG, num_clusters, no_conversion=opts.no_conversion, simple_conversion=opts.simple_conversion) for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append([SG.vs[i]['name'] for i in cluster]) else: final_clusters.append([SG.vs[i]['name'] for i in cluster]) step += 1 io.output_clusters(final_clusters, '')
def main(): parser = argparse.ArgumentParser() parser.add_argument("gc_results_dir") opts = parser.parse_args() clusters = generate_overlapping_clusters(opts.gc_results_dir) filtered_clusters = filter_clusters(clusters) io.output_clusters(filtered_clusters, '')
def main(): parser = argparse.ArgumentParser() parser.add_argument("gc_clustering") parser.add_argument("other_clustering") opts = parser.parse_args() gc_clusters = io.read_clusters(opts.gc_clustering) other_clusters = io.read_clusters(opts.other_clustering) clusters = resolve_clusters(gc_clusters, other_clusters) io.output_clusters(clusters, '')
def main(): parser = argparse.ArgumentParser() parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument("cluster_file", help="Clustering results file") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") opts = parser.parse_args() node_list = io.get_node_list(opts.node_list) clusters = io.read_clusters(opts.cluster_file) G = io.build_ig_graph_from_matrix(opts.dsd_file, False, node_list) clusters_to_process, final_clusters = [], [] for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append(cluster) else: final_clusters.append(cluster) # if all nodes have been clustered, stop looping, otherwise continue to # recurse on each large cluster step = 1 while clusters_to_process: processing = clusters_to_process clusters_to_process = [] for cluster in processing: id_cluster = names_to_ids(G, cluster) SG = G.subgraph(cluster) cluster_size = len(cluster) num_clusters = (int(cluster_size / float(100)) if cluster_size > 200 else 2) mat = SG.get_adjacency(attribute='weight') dist_matrix = np.array(mat.data) del mat clusters = cl.spectral_clustering(dist_matrix, num_clusters) del dist_matrix for cluster in clusters: if len(cluster) > MAX_CL_SIZE: clusters_to_process.append( [SG.vs[i]['name'] for i in cluster]) else: final_clusters.append([SG.vs[i]['name'] for i in cluster]) step += 1 io.output_clusters(final_clusters, '')
def main(): parser = argparse.ArgumentParser() parser.add_argument("input_file", help="Original clusters input file") parser.add_argument("-c", "--cutoff", nargs="?", default=DEFAULT_MIN_SIZE, help="Cutoff for filtering cluster size") opts = parser.parse_args() clusters = io.read_clusters(opts.input_file) filtered_clusters = [c for c in clusters if len(c) >= int(opts.cutoff)] io.output_clusters(filtered_clusters, '')
def main(): parser = argparse.ArgumentParser() parser.add_argument("gc_file", help='GeneCentric cluster file') parser.add_argument("cluster_file", help="File containing cluster filepaths") opts = parser.parse_args() cluster_nodes = get_cluster_nodes(opts.cluster_file) gc_nodes = get_cluster_nodes(opts.gc_file) gc_clusters = io.read_clusters(opts.gc_file) difference_nodes = list(set(cluster_nodes) - set(gc_nodes)) gc_clusters.append(difference_nodes) io.output_clusters(gc_clusters, '')
def main(): parser = argparse.ArgumentParser() # parser.add_argument("network_file", help="Original network input file") parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument( "-a", "--algorithm", nargs="?", default=DEFAULT_ALG, help="The clustering algorithm to use - 1 for spectral,\ 2 for threshold clustering, and 3 for simple\ shortest-path divisive hierarchical clustering.\ Defaults to spectral clustering.") parser.add_argument("-c", "--no_conversion", action="store_true") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-o", "--output_file", nargs="?", default="", help="Optionally specify an output file. Output is to\ stdout if no file is specified.") parser.add_argument("-p", "--parameter", nargs="?", default='', help="Specify a parameter (i.e. number of clusters,\ distance threshold) to be used with clustering\ algorithm. If none is provided, a sensible\ default is used.") parser.add_argument("-s", "--simple_conversion", action="store_true") opts = parser.parse_args() if USE_NETWORKX: import clustering_algs_nx as cl # G = io.build_nx_graph_from_matrix(opts.dsd_file, opts.directed) G = io.build_nx_graph_from_edgelist(opts.dsd_file, opts.directed) else: import clustering_algs_ig as cl if opts.node_list: G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed) else: # G = io.build_ig_graph_from_edgelist(opts.dsd_file, opts.directed) # temporary, TODO remove after consensus experiments G = ig.Graph.Read_Ncol(opts.dsd_file, directed=opts.directed) # nodes = io.get_node_list(opts.node_list) if opts.node_list else [] if opts.node_list: nodes = io.get_node_list(opts.node_list) else: nodes = zip( *sorted([(v.index, v['name']) for v in G.vs], key=lambda x: x[0]))[1] opts.algorithm = int(opts.algorithm) if opts.algorithm == SPECTRAL: k_val = int(opts.parameter) if opts.parameter else 100 clusters = cl.spectral_clustering( G, n_clusters=k_val, node_map=nodes, no_conversion=opts.no_conversion, simple_conversion=opts.simple_conversion) elif opts.algorithm == THRESHOLD: filter_weight = float(opts.parameter) if opts.parameter else 5.0 clusters = cl.threshold_clustering(G, threshold=filter_weight, node_map=nodes) elif opts.algorithm == HIERARCHICAL: filter_weight = float(opts.parameter) if opts.parameter else 1.0 clusters = cl.hierarchical_clustering(G, threshold=filter_weight) else: sys.exit('Please pick a valid clustering algorithm') io.output_clusters(clusters, opts.output_file)
def main(): parser = argparse.ArgumentParser() # parser.add_argument("network_file", help="Original network input file") parser.add_argument("dsd_file", help="Distance (i.e. DSD) matrix for network") parser.add_argument( "-a", "--algorithm", nargs="?", default=DEFAULT_ALG, help="The clustering algorithm to use - 1 for spectral,\ 2 for threshold clustering, and 3 for simple\ shortest-path divisive hierarchical clustering.\ Defaults to spectral clustering.") parser.add_argument("-d", "--directed", action="store_true", help="Flag specifying if the input represents\ a directed graph. Defaults to false.") parser.add_argument("-n", "--node_list", nargs="?", help="Optionally specify a list of the nodes in\ the DSD file. Default is all the nodes in the\ graph.") parser.add_argument("-o", "--output_file", nargs="?", default="", help="Optionally specify an output file. Output is to\ stdout if no file is specified.") parser.add_argument("-p", "--parameter", nargs="?", default='', help="Specify a parameter (i.e. number of clusters,\ distance threshold) to be used with clustering\ algorithm. If none is provided, a sensible\ default is used.") opts = parser.parse_args() G = io.build_ig_graph_from_matrix(opts.dsd_file, opts.directed) nodes = io.get_node_list(opts.node_list) if opts.node_list else [] opts.algorithm = int(opts.algorithm) if opts.algorithm == SPECTRAL: import numpy as np k_val = int(opts.parameter) if opts.parameter else 100 mat = G.get_adjacency(attribute='weight') del G dist_matrix = np.array(mat.data) del mat clusters = cl.spectral_clustering(dist_matrix, n_clusters=k_val, node_map=nodes) elif opts.algorithm == THRESHOLD: filter_weight = float(opts.parameter) if opts.parameter else 5.0 clusters = cl.threshold_clustering(G, threshold=filter_weight, node_map=nodes) elif opts.algorithm == HIERARCHICAL: sys.exit('Hierarchical clustering is not implemented, please choose\ another algorithm') else: sys.exit('Please pick a valid clustering algorithm') io.output_clusters(clusters, opts.output_file)