Пример #1
0
 def test_cluster_manta(self):
     """
     Checks whether the correct cluster IDs are assigned.
     """
     clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters,
                                     min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
     clusters = nx.get_node_attributes(clustered_graph[0], 'cluster')
     self.assertEqual(clusters['OTU_10'], clusters['OTU_6'])
Пример #2
0
 def test_layout(self):
     """
     Checks whether the layout function returns a dictionary of coordinates.
     """
     clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters, min_cluster_size,
                                     iterations, subset, ratio, edgescale, permutations, verbose)
     coords = generate_layout(clustered_graph[0])
     self.assertEqual(len(coords[list(coords.keys())[0]]), 2)
Пример #3
0
def perm_clusters(graph, limit, max_clusters, min_clusters, min_cluster_size,
                  iterations, ratio, partialperms, relperms, subset, error, verbose):
    """
    Calls the rewire_graph function and robustness function
    to compute robustness of cluster assignments.
    Scores close to 1 imply that the scores are robust to perturbation.


    Parameters
    ----------
    :param graph: NetworkX graph of a microbial association network. Cluster assignment should be a network property.
    :param limit: Percentage in error decrease until matrix is considered converged.
    :param max_clusters: Maximum number of clusters to evaluate in K-means clustering.
    :param min_clusters: Minimum number of clusters to evaluate in K-means clustering.
    :param min_cluster_size: Minimum cluster size as fraction of network size
    :param iterations: If algorithm does not converge, it stops here.
    :param ratio: Ratio of scores that need to be positive or negative for a stable edge
    :param partialperms: Number of permutations for partial diffusion.
    :param relperms: Number of permutations for reliability testing.
    :param subset: Fraction of edges used in subsetting procedure
    :param error: Fraction of edges to rewire for reliability metric.
    :param verbose: Verbosity level of function
    :return:
    """
    assignments = list()
    rev_assignments = list()
    for i in range(relperms):
        permutation, swapfail = rewire_graph(graph, error)
        if swapfail:
            return
        permutation, mat = cluster_graph(graph=permutation, limit=limit, max_clusters=max_clusters,
                                         min_clusters=min_clusters, min_cluster_size=min_cluster_size,
                                         iterations=iterations,
                                         ratio=ratio, edgescale=0, permutations=partialperms,
                                         subset=subset,
                                         verbose=False)
        cluster = nx.get_node_attributes(permutation, 'cluster')
        # cluster.values() has same order as permutation.nodes
        assignments.append(cluster)
        subassignments = dict()
        for k, v in cluster.items():
            subassignments.setdefault(v, set()).add(k)
        rev_assignments.append(subassignments)
        if verbose:
            logger.info('Permutation ' + str(i))
    graphclusters = nx.get_node_attributes(graph, 'cluster')
    clusjaccards, nodejaccards, ci_width = robustness(graphclusters, assignments)
    lowerCI = dict()
    upperCI = dict()
    for node in nodejaccards:
        lowerCI[node] = np.asscalar(nodejaccards[node][0])
        upperCI[node] = np.asscalar(nodejaccards[node][1])
        ci_width[node] = np.asscalar(ci_width[node])
    nx.set_node_attributes(graph, lowerCI, "lowerCI")
    nx.set_node_attributes(graph, upperCI, "upperCI")
    nx.set_node_attributes(graph, ci_width, "widthCI")
    if verbose:
        logger.info("Completed estimation of node Jaccard similarities across bootstraps.")
Пример #4
0
 def test_directed_manta(self):
     """
     Checks whether the main function can carry out clustering
     on a DiGraph.
     """
     clustered_graph = cluster_graph(deepcopy(directg), limit, max_clusters, min_clusters,
                                     min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
     clusters = nx.get_node_attributes(clustered_graph[0], 'cluster')
     self.assertEqual(len(clusters), 10)
Пример #5
0
 def test_central_node(self):
     """
     Checks if, given a graph that has been tested for centrality,
     no nodes are identified as hubs (actually the p-value is too low).
     """
     results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters,
                             min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
     graph = results[0]
     central_edge(graph, percentile, permutations, error, verbose)
     central_node(graph)
     self.assertEqual(len(nx.get_node_attributes(graph, 'hub')), 0)
Пример #6
0
 def test_default_manta(self):
     """
     Checks whether the main function carries out both clustering and centrality estimates.
     """
     clustered_graph = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters,
                                     min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
     graph = clustered_graph[0]
     central_edge(graph, percentile, rel,
                  error, verbose)
     central_node(graph)
     clusters = nx.get_node_attributes(graph, 'cluster')
     hubs = nx.get_edge_attributes(graph, 'hub')
     self.assertGreater(len(hubs), 0)
Пример #7
0
 def test_bootstrap(self):
     """Checks if reliability scores for the graph are returned. """
     results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters,
                             min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
     # calculates the ratio of positive / negative weights
     # note that ratios need to be adapted, because the matrix is symmetric
     matrix = results[1]
     negthresh = np.percentile(matrix, percentile)
     posthresh = np.percentile(matrix, 100 - percentile)
     neghubs = list(map(tuple, np.argwhere(matrix <= negthresh)))
     poshubs = list(map(tuple, np.argwhere(matrix >= posthresh)))
     bootmats = perm_edges(g, permutations, percentile, poshubs, neghubs, error=0.1)
     self.assertEqual((len(poshubs) + len(neghubs)), len(bootmats))
Пример #8
0
    def test_center_manta(self):
        """
        Checks if the edge between 1 and 2 is identified as a positive hub.

        WARNING: at the moment the test indicates that centrality measures
        are not stable.
        """
        results = cluster_graph(deepcopy(g), limit, max_clusters, min_clusters,
                                min_cluster_size, iterations, subset, ratio, edgescale, permutations, verbose)
        graph = results[0]
        central_edge(graph, percentile, permutations, error, verbose)
        hubs = nx.get_edge_attributes(graph, 'hub')
        if ('OTU_4', 'OTU_3') in hubs:
            hubs['OTU_3', 'OTU_4']  = hubs['OTU_4', 'OTU_3']
            # 3.5 test sometimes swaps keys around apparently
        self.assertEqual(hubs[('OTU_3', 'OTU_4')], 'negative hub')
Пример #9
0
def main():
    args = set_manta().parse_args(sys.argv[1:])
    args = vars(args)
    if args['version']:
        info = VersionInfo('manta')
        logger.info('Version ' + info.version_string())
        exit(0)
    if args['graph'] != 'demo':
        filename = args['graph'].split(sep=".")
        extension = filename[len(filename)-1]
        # see if the file can be detected
        # if not, try appending current working directory and then read.
        if not os.path.isfile(args['graph']):
            if os.path.isfile(os.getcwd() + '/' + args['graph']):
                args['graph'] = os.getcwd() + '/'
            else:
                logger.error('Could not find the specified file. Is your file path correct?')
                exit()
        try:
            if extension == 'graphml':
                network = nx.read_graphml(args['graph'])
            elif extension == 'txt':
                network = nx.read_weighted_edgelist(args['graph'])
            elif extension == 'gml':
                network = nx.read_gml(args['graph'])
            elif extension == 'cyjs':
                network = read_cyjson(args['graph'])
            else:
                logger.warning('Format not accepted. '
                               'Please specify the filename including extension (e.g. test.graphml).', exc_info=True)
                exit()
        except Exception:
            logger.error('Could not import network file!', exc_info=True)
            exit()
        # first need to convert network to undirected
    elif args['graph'] == 'demo':
        path = os.path.dirname(manta.__file__)
        path = path + '//demo.graphml'
        network = nx.read_graphml(path)
    if args['direction']:
        if extension == 'txt':
            logger.warning('Directed networks from edge lists not supported, use graphml or cyjs! ')
            exit()
    else:
        network = nx.to_undirected(network)
    if args['bin']:
        orig_edges = dict()
        # store original edges for export
        for edge in network.edges:
            orig_edges[edge] = network.edges[edge]['weight']
            network.edges[edge]['weight'] = np.sign(network.edges[edge]['weight'])
    if sum(value == 0 for value in
           np.any(nx.get_edge_attributes(network, 'weight').values())) > 0:
        logger.error("Some edges in the network have a weight of exactly 0. \n"
                     "Such edges cannot be clustered. Try converting weights to 1 and -1. ")
    weight_properties = nx.get_edge_attributes(network, 'weight')
    if len(weight_properties) == 0:
        logger.error("The imported network has no 'weight' edge property. \n"
                     "Please make sure you are formatting the network correctly. ")
    results = cluster_graph(network, limit=args['limit'], max_clusters=args['max'],
                            min_clusters=args['min'], min_cluster_size=args['ms'],
                            iterations=args['iter'], subset=args['subset'],
                            ratio=args['ratio'], edgescale=args['edgescale'],
                            permutations=args['perm'], verbose=args['verbose'])
    graph = results[0]
    if args['cr']:
        perm_clusters(graph=graph, limit=args['limit'], max_clusters=args['max'],
                      min_clusters=args['min'], min_cluster_size=args['ms'],
                      iterations=args['iter'], ratio=args['ratio'],
                      partialperms=args['perm'], relperms=args['rel'], subset=args['subset'],
                      error=args['error'], verbose=args['verbose'])
    layout = None
    if args['bin']:
        for edge in network.edges:
            network.edges[edge]['weight'] = orig_edges[edge]
    if args['layout']:
        layout = generate_layout(graph, args['tax'])
    if args['fp']:
        if args['f'] == 'graphml':
            nx.write_graphml(graph, args['fp'] + '.graphml')
        elif args['f'] == 'csv':
            node_keys = graph.nodes[list(graph.nodes)[0]].keys()
            properties = {}
            for key in node_keys:
                properties[key] = nx.get_node_attributes(graph, key)
            data = pd.DataFrame(properties)
            data.to_csv(args['fp'] + '.csv')
        elif args['f'] == 'gml':
            nx.write_gml(graph, args['fp'] + '.gml')
        elif args['f'] == 'cyjs':
            write_cyjson(graph=graph, filename=args['fp'] + '.cyjs', layout=layout)
        logger.info('Wrote clustered network to ' + args['fp'] + '.' + args['f'])
    else:
        logger.error('Could not write network to disk, no file path given.')
    exit(0)
Пример #10
0
def main():
    args = set_manta().parse_args(sys.argv[1:])
    args = vars(args)
    if args['version']:
        info = VersionInfo('manta')
        logger.info('Version ' + info.version_string())
        exit(0)
    if args['graph'] != 'demo':
        filename = args['graph'].split(sep=".")
        extension = filename[len(filename) - 1]
        # see if the file can be detected
        # if not, try appending current working directory and then read.
        if not os.path.isfile(args['graph']):
            if os.path.isfile(os.getcwd() + '/' + args['graph']):
                args['graph'] = os.getcwd() + '/'
            else:
                logger.error(
                    'Could not find the specified file. Is your file path correct?'
                )
                exit()
        try:
            if extension == 'graphml':
                network = nx.read_graphml(args['graph'])
            elif extension == 'txt':
                network = nx.read_weighted_edgelist(args['graph'])
            elif extension == 'gml':
                network = nx.read_gml(args['graph'])
            elif extension == 'cyjs':
                network = read_cyjson(args['graph'])
            else:
                logger.warning(
                    'Format not accepted. '
                    'Please specify the filename including extension (e.g. test.graphml).',
                    exc_info=True)
                exit()
        except Exception:
            logger.error('Could not import network file!', exc_info=True)
            exit()
        # first need to convert network to undirected
    elif args['graph'] == 'demo':
        path = os.path.dirname(manta.__file__)
        path = path + '//demo.graphml'
        network = nx.read_graphml(path)
    if args['direction']:
        if extension == 'txt':
            logger.warning(
                'Directed networks from edge lists not supported, use graphml or cyjs! '
            )
            exit()
    else:
        network = nx.to_undirected(network)
    if args['bin']:
        orig_edges = dict()
        # store original edges for export
        for edge in network.edges:
            orig_edges[edge] = network.edges[edge]['weight']
            network.edges[edge]['weight'] = np.sign(
                network.edges[edge]['weight'])
    results = cluster_graph(network,
                            limit=args['limit'],
                            max_clusters=args['max'],
                            min_clusters=args['min'],
                            min_cluster_size=args['ms'],
                            iterations=args['iter'],
                            subset=args['subset'],
                            ratio=args['ratio'],
                            edgescale=args['edgescale'],
                            permutations=args['perm'],
                            verbose=args['verbose'])
    graph = results[0]
    if args['cr']:
        perm_clusters(graph=graph,
                      limit=args['limit'],
                      max_clusters=args['max'],
                      min_clusters=args['min'],
                      min_cluster_size=args['ms'],
                      iterations=args['iter'],
                      ratio=args['ratio'],
                      partialperms=args['perm'],
                      relperms=args['rel'],
                      subset=args['subset'],
                      error=args['error'],
                      verbose=args['verbose'])
    layout = None
    if args['bin']:
        for edge in network.edges:
            network.edges[edge]['weight'] = orig_edges[edge]
    if args['layout']:
        layout = generate_layout(graph, args['tax'])
    if args['fp']:
        if args['f'] == 'graphml':
            nx.write_graphml(graph, args['fp'] + '.graphml')
        elif args['f'] == 'edgelist':
            nx.write_weighted_edgelist(graph, args['fp'] + '.txt')
        elif args['f'] == 'gml':
            nx.write_gml(graph, args['fp'] + '.gml')
        elif args['f'] == 'adj':
            nx.write_multiline_adjlist(graph, args['fp'] + '.txt')
        elif args['f'] == 'cyjs':
            write_cyjson(graph=graph,
                         filename=args['fp'] + '.cyjs',
                         layout=layout)
        logger.info('Wrote clustered network to ' + args['fp'] + '.' +
                    args['f'])
    else:
        logger.error('Could not write network to disk, no file path given.')
    exit(0)