示例#1
0
def generate_graph(input_data,
                   dis,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap_params=0.75,
                   min_samples=3,
                   resolution_params="auto",
                   filter_=Filter.PCOA):
    tm = mapper.Mapper(verbose=1)
    # TDA Step2. Projection
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter_(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    if global_verbose:
        print("projection takes: ", time.time() - t1)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    r = resolution_params
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap_params)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    if global_verbose:
        print(graph.info())
        print("graph time: ", time.time() - t1)

    graph_name = "{eps}_{overlap}_{r}_{filter}.graph".format(
        eps=eps_threshold,
        overlap=overlap_params,
        r=r,
        filter=lens[0].__class__.__name__)
    return graph, graph_name, projected_X
示例#2
0
def generate_graph(input_data,
                   dis=None,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap=0.75,
                   min_samples=3,
                   r=40,
                   filter='PCOA',
                   verbose=1):
    if filter not in _filter_dict:
        logger("Wrong filter you provide, available fitler are",
               ','.join(_filter_dict.keys()),
               verbose=1)
        return
    else:
        filter = _filter_dict[filter]
    tm = mapper.Mapper(verbose=verbose)
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    logger("projection takes: ", time.time() - t1, verbose=verbose)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    logger(graph.info(), verbose=verbose)
    logger("graph generator take: ", time.time() - t1, verbose=verbose)
    return graph
示例#3
0
 def __init__(self, metric=Metric(metric="euclidean"), h=0.3):
     # default metric: euclidean
     # components is of 1-D
     super(GaussianDensity, self).__init__(components=[0], metric=metric)
     if h == 0:
         raise Exception("Parameter h must not be zero.")
     self.h = h
示例#4
0
 def __init__(self,
              components=[0, 1],
              metric=Metric(metric="euclidean"),
              random_state=None,
              **kwds):
     # PCA only accept raw data and calculate euclidean distance "internally"
     super(PCA, self).__init__(components=components, metric=None)
     self.pca = decomposition.PCA(n_components=max(self.components) + 1,
                                  random_state=random_state,
                                  **kwds)
示例#5
0
    def __init__(self,
                 components=[0, 1],
                 metric=Metric(metric="euclidean"),
                 **kwds):
        super(UMAP, self).__init__(components=components, metric=metric)

        if self.metric.name in _METRIC_BUILT_IN:
            self.umap = umap.UMAP(n_components=max(self.components) + 1,
                                  metric=self.metric.name,
                                  **kwds)
        else:
            self.umap = umap.UMAP(n_components=max(self.components) + 1,
                                  metric="precomputed",
                                  **kwds)
示例#6
0
    def __init__(self,
                 components=[0, 1],
                 metric=Metric(metric="euclidean"),
                 **kwds):
        super(TSNE, self).__init__(components=components, metric=metric)

        if self.metric.name in _METRIC_BUILT_IN:
            self.tsne = manifold.TSNE(n_components=max(self.components) + 1,
                                      metric=self.metric.name,
                                      **kwds)
        else:
            self.tsne = manifold.TSNE(n_components=max(self.components) + 1,
                                      metric="precomputed",
                                      **kwds)
示例#7
0
    def __init__(self,
                 components=[0, 1],
                 metric=Metric(metric="euclidean"),
                 **kwds):
        super(MDS, self).__init__(components=components, metric=metric)

        if self.metric.name == "euclidean":
            self.mds = manifold.MDS(n_components=max(self.components) + 1,
                                    dissimilarity="euclidean",
                                    n_jobs=-1,
                                    **kwds)
        else:
            self.mds = manifold.MDS(n_components=max(self.components) + 1,
                                    dissimilarity="precomputed",
                                    n_jobs=-1,
                                    **kwds)
示例#8
0
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler

from tmap.tda import mapper, Filter
from tmap.tda.cover import Cover
from tmap.tda.metric import Metric
from tmap.tda.utils import optimize_dbscan_eps
from tmap.test import load_data

# load taxa abundance data, sample metadata and precomputed distance matrix
X = load_data.FGFP_genus_profile()
metadata = load_data.FGFP_metadata_ready()
dm = squareform(pdist(X, metric='braycurtis'))
############################################################
tm = mapper.Mapper(verbose=1)
metric = Metric(metric="precomputed")
lens = [Filter.PCOA(components=[0, 1], metric=metric)]  # for quick
projected_X = tm.filter(dm, lens=lens)
eps = optimize_dbscan_eps(X, threshold=95)
clusterer = DBSCAN(eps=eps, min_samples=3)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.75)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
node_data = graph.transform_sn(X)

from tmap.netx.SAFE import SAFE_batch, get_SAFE_summary

n_iter = 5000
safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=100)
enriched_SAFE_metadata, declined_SAFE_metadata = safe_scores['enrich'], safe_scores['decline']
safe_summary_metadata = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata,
                                         n_iter=n_iter, p_value=0.05)
示例#9
0
 def __init__(self, metric=Metric(metric="euclidean")):
     # default metric: euclidean
     # components is of 1-D
     super(LinfCentrality, self).__init__(components=[0], metric=metric)
示例#10
0
 def __init__(self, metric=Metric(metric="euclidean"), **kwds):
     super(PCOA, self).__init__()
     self.metric = metric
示例#11
0
 def __init__(self,
              components=[0, 1],
              metric=Metric(metric="euclidean"),
              **kwds):
     super(PCOA, self).__init__(components=components, metric=metric)
     self.metric = metric