def test_cluster_manta(self): """ Checks whether the correct cluster IDs are assigned. """ clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) clusters = nx.get_node_attributes(clustered_graph[0], 'cluster') self.assertEqual(clusters['OTU_10'], clusters['OTU_6'])
def test_layout(self): """ Checks whether the layout function returns a dictionary of coordinates. """ clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) coords = generate_layout(clustered_graph[0]) self.assertEqual(len(coords[list(coords.keys())[0]]), 2)
def perm_clusters(graph, limit, max_clusters, min_clusters, min_cluster_size, iterations, ratio, partialperms, relperms, subset, error, verbose): """ Calls the rewire_graph function and robustness function to compute robustness of cluster assignments. Scores close to 1 imply that the scores are robust to perturbation. Parameters ---------- :param graph: NetworkX graph of a microbial association network. Cluster assignment should be a network property. :param limit: Percentage in error decrease until matrix is considered converged. :param max_clusters: Maximum number of clusters to evaluate in K-means clustering. :param min_clusters: Minimum number of clusters to evaluate in K-means clustering. :param min_cluster_size: Minimum cluster size as fraction of network size :param iterations: If algorithm does not converge, it stops here. :param ratio: Ratio of scores that need to be positive or negative for a stable edge :param partialperms: Number of permutations for partial diffusion. :param relperms: Number of permutations for reliability testing. :param subset: Fraction of edges used in subsetting procedure :param error: Fraction of edges to rewire for reliability metric. :param verbose: Verbosity level of function :return: """ assignments = list() rev_assignments = list() for i in range(relperms): permutation, swapfail = rewire_graph(graph, error) if swapfail: return permutation, mat = cluster_graph(graph=permutation, limit=limit, max_clusters=max_clusters, min_clusters=min_clusters, min_cluster_size=min_cluster_size, iterations=iterations, ratio=ratio, edgescale=0, permutations=partialperms, subset=subset, verbose=False) cluster = nx.get_node_attributes(permutation, 'cluster') # cluster.values() has same order as permutation.nodes assignments.append(cluster) subassignments = dict() for k, v in cluster.items(): subassignments.setdefault(v, set()).add(k) rev_assignments.append(subassignments) if verbose: logger.info('Permutation ' + str(i)) graphclusters = nx.get_node_attributes(graph, 'cluster') clusjaccards, nodejaccards, ci_width = robustness(graphclusters, assignments) lowerCI = dict() upperCI = dict() for node in nodejaccards: lowerCI[node] = np.asscalar(nodejaccards[node][0]) upperCI[node] = np.asscalar(nodejaccards[node][1]) ci_width[node] = np.asscalar(ci_width[node]) nx.set_node_attributes(graph, lowerCI, "lowerCI") nx.set_node_attributes(graph, upperCI, "upperCI") nx.set_node_attributes(graph, ci_width, "widthCI") if verbose: logger.info("Completed estimation of node Jaccard similarities across bootstraps.")
def test_directed_manta(self): """ Checks whether the main function can carry out clustering on a DiGraph. """ clustered_graph = cluster_graph(deepcopy(directg), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) clusters = nx.get_node_attributes(clustered_graph[0], 'cluster') self.assertEqual(len(clusters), 10)
def test_central_node(self): """ Checks if, given a graph that has been tested for centrality, no nodes are identified as hubs (actually the p-value is too low). """ results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) graph = results[0] central_edge(graph, percentile, permutations, error, verbose) central_node(graph) self.assertEqual(len(nx.get_node_attributes(graph, 'hub')), 0)
def test_default_manta(self): """ Checks whether the main function carries out both clustering and centrality estimates. """ clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) graph = clustered_graph[0] central_edge(graph, percentile, rel, error, verbose) central_node(graph) clusters = nx.get_node_attributes(graph, 'cluster') hubs = nx.get_edge_attributes(graph, 'hub') self.assertGreater(len(hubs), 0)
def test_bootstrap(self): """Checks if reliability scores for the graph are returned. """ results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) # calculates the ratio of positive / negative weights # note that ratios need to be adapted, because the matrix is symmetric matrix = results[1] negthresh = np.percentile(matrix, percentile) posthresh = np.percentile(matrix, 100 - percentile) neghubs = list(map(tuple, np.argwhere(matrix <= negthresh))) poshubs = list(map(tuple, np.argwhere(matrix >= posthresh))) bootmats = perm_edges(g, permutations, percentile, poshubs, neghubs, error=0.1) self.assertEqual((len(poshubs) + len(neghubs)), len(bootmats))
def test_center_manta(self): """ Checks if the edge between 1 and 2 is identified as a positive hub. WARNING: at the moment the test indicates that centrality measures are not stable. """ results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose) graph = results[0] central_edge(graph, percentile, permutations, error, verbose) hubs = nx.get_edge_attributes(graph, 'hub') if ('OTU_4', 'OTU_3') in hubs: hubs['OTU_3', 'OTU_4'] = hubs['OTU_4', 'OTU_3'] # 3.5 test sometimes swaps keys around apparently self.assertEqual(hubs[('OTU_3', 'OTU_4')], 'negative hub')
def main(): args = set_manta().parse_args(sys.argv[1:]) args = vars(args) if args['version']: info = VersionInfo('manta') logger.info('Version ' + info.version_string()) exit(0) if args['graph'] != 'demo': filename = args['graph'].split(sep=".") extension = filename[len(filename)-1] # see if the file can be detected # if not, try appending current working directory and then read. if not os.path.isfile(args['graph']): if os.path.isfile(os.getcwd() + '/' + args['graph']): args['graph'] = os.getcwd() + '/' else: logger.error('Could not find the specified file. Is your file path correct?') exit() try: if extension == 'graphml': network = nx.read_graphml(args['graph']) elif extension == 'txt': network = nx.read_weighted_edgelist(args['graph']) elif extension == 'gml': network = nx.read_gml(args['graph']) elif extension == 'cyjs': network = read_cyjson(args['graph']) else: logger.warning('Format not accepted. ' 'Please specify the filename including extension (e.g. test.graphml).', exc_info=True) exit() except Exception: logger.error('Could not import network file!', exc_info=True) exit() # first need to convert network to undirected elif args['graph'] == 'demo': path = os.path.dirname(manta.__file__) path = path + '//demo.graphml' network = nx.read_graphml(path) if args['direction']: if extension == 'txt': logger.warning('Directed networks from edge lists not supported, use graphml or cyjs! ') exit() else: network = nx.to_undirected(network) if args['bin']: orig_edges = dict() # store original edges for export for edge in network.edges: orig_edges[edge] = network.edges[edge]['weight'] network.edges[edge]['weight'] = np.sign(network.edges[edge]['weight']) if sum(value == 0 for value in np.any(nx.get_edge_attributes(network, 'weight').values())) > 0: logger.error("Some edges in the network have a weight of exactly 0. \n" "Such edges cannot be clustered. Try converting weights to 1 and -1. ") weight_properties = nx.get_edge_attributes(network, 'weight') if len(weight_properties) == 0: logger.error("The imported network has no 'weight' edge property. \n" "Please make sure you are formatting the network correctly. ") results = cluster_graph(network, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], subset=args['subset'], ratio=args['ratio'], edgescale=args['edgescale'], permutations=args['perm'], verbose=args['verbose']) graph = results[0] if args['cr']: perm_clusters(graph=graph, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], ratio=args['ratio'], partialperms=args['perm'], relperms=args['rel'], subset=args['subset'], error=args['error'], verbose=args['verbose']) layout = None if args['bin']: for edge in network.edges: network.edges[edge]['weight'] = orig_edges[edge] if args['layout']: layout = generate_layout(graph, args['tax']) if args['fp']: if args['f'] == 'graphml': nx.write_graphml(graph, args['fp'] + '.graphml') elif args['f'] == 'csv': node_keys = graph.nodes[list(graph.nodes)[0]].keys() properties = {} for key in node_keys: properties[key] = nx.get_node_attributes(graph, key) data = pd.DataFrame(properties) data.to_csv(args['fp'] + '.csv') elif args['f'] == 'gml': nx.write_gml(graph, args['fp'] + '.gml') elif args['f'] == 'cyjs': write_cyjson(graph=graph, filename=args['fp'] + '.cyjs', layout=layout) logger.info('Wrote clustered network to ' + args['fp'] + '.' + args['f']) else: logger.error('Could not write network to disk, no file path given.') exit(0)
def main(): args = set_manta().parse_args(sys.argv[1:]) args = vars(args) if args['version']: info = VersionInfo('manta') logger.info('Version ' + info.version_string()) exit(0) if args['graph'] != 'demo': filename = args['graph'].split(sep=".") extension = filename[len(filename) - 1] # see if the file can be detected # if not, try appending current working directory and then read. if not os.path.isfile(args['graph']): if os.path.isfile(os.getcwd() + '/' + args['graph']): args['graph'] = os.getcwd() + '/' else: logger.error( 'Could not find the specified file. Is your file path correct?' ) exit() try: if extension == 'graphml': network = nx.read_graphml(args['graph']) elif extension == 'txt': network = nx.read_weighted_edgelist(args['graph']) elif extension == 'gml': network = nx.read_gml(args['graph']) elif extension == 'cyjs': network = read_cyjson(args['graph']) else: logger.warning( 'Format not accepted. ' 'Please specify the filename including extension (e.g. test.graphml).', exc_info=True) exit() except Exception: logger.error('Could not import network file!', exc_info=True) exit() # first need to convert network to undirected elif args['graph'] == 'demo': path = os.path.dirname(manta.__file__) path = path + '//demo.graphml' network = nx.read_graphml(path) if args['direction']: if extension == 'txt': logger.warning( 'Directed networks from edge lists not supported, use graphml or cyjs! ' ) exit() else: network = nx.to_undirected(network) if args['bin']: orig_edges = dict() # store original edges for export for edge in network.edges: orig_edges[edge] = network.edges[edge]['weight'] network.edges[edge]['weight'] = np.sign( network.edges[edge]['weight']) results = cluster_graph(network, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], subset=args['subset'], ratio=args['ratio'], edgescale=args['edgescale'], permutations=args['perm'], verbose=args['verbose']) graph = results[0] if args['cr']: perm_clusters(graph=graph, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], ratio=args['ratio'], partialperms=args['perm'], relperms=args['rel'], subset=args['subset'], error=args['error'], verbose=args['verbose']) layout = None if args['bin']: for edge in network.edges: network.edges[edge]['weight'] = orig_edges[edge] if args['layout']: layout = generate_layout(graph, args['tax']) if args['fp']: if args['f'] == 'graphml': nx.write_graphml(graph, args['fp'] + '.graphml') elif args['f'] == 'edgelist': nx.write_weighted_edgelist(graph, args['fp'] + '.txt') elif args['f'] == 'gml': nx.write_gml(graph, args['fp'] + '.gml') elif args['f'] == 'adj': nx.write_multiline_adjlist(graph, args['fp'] + '.txt') elif args['f'] == 'cyjs': write_cyjson(graph=graph, filename=args['fp'] + '.cyjs', layout=layout) logger.info('Wrote clustered network to ' + args['fp'] + '.' + args['f']) else: logger.error('Could not write network to disk, no file path given.') exit(0)