def generate_graph(input_data, dis, _eu_dm=None, eps_threshold=95, overlap_params=0.75, min_samples=3, resolution_params="auto", filter_=Filter.PCOA): tm = mapper.Mapper(verbose=1) # TDA Step2. Projection t1 = time.time() metric = Metric(metric="precomputed") lens = [filter_(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dis, lens=lens) if global_verbose: print("projection takes: ", time.time() - t1) ### t1 = time.time() eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm) clusterer = DBSCAN(eps=eps, min_samples=min_samples) r = resolution_params cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=r, overlap=overlap_params) graph = tm.map(data=input_data, cover=cover, clusterer=clusterer) if global_verbose: print(graph.info()) print("graph time: ", time.time() - t1) graph_name = "{eps}_{overlap}_{r}_{filter}.graph".format( eps=eps_threshold, overlap=overlap_params, r=r, filter=lens[0].__class__.__name__) return graph, graph_name, projected_X
def generate_graph(input_data, dis=None, _eu_dm=None, eps_threshold=95, overlap=0.75, min_samples=3, r=40, filter='PCOA', verbose=1): if filter not in _filter_dict: logger("Wrong filter you provide, available fitler are", ','.join(_filter_dict.keys()), verbose=1) return else: filter = _filter_dict[filter] tm = mapper.Mapper(verbose=verbose) t1 = time.time() metric = Metric(metric="precomputed") lens = [filter(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dis, lens=lens) logger("projection takes: ", time.time() - t1, verbose=verbose) ### t1 = time.time() eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm) clusterer = DBSCAN(eps=eps, min_samples=min_samples) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=r, overlap=overlap) graph = tm.map(data=input_data, cover=cover, clusterer=clusterer) logger(graph.info(), verbose=verbose) logger("graph generator take: ", time.time() - t1, verbose=verbose) return graph
def __init__(self, metric=Metric(metric="euclidean"), h=0.3): # default metric: euclidean # components is of 1-D super(GaussianDensity, self).__init__(components=[0], metric=metric) if h == 0: raise Exception("Parameter h must not be zero.") self.h = h
def __init__(self, components=[0, 1], metric=Metric(metric="euclidean"), random_state=None, **kwds): # PCA only accept raw data and calculate euclidean distance "internally" super(PCA, self).__init__(components=components, metric=None) self.pca = decomposition.PCA(n_components=max(self.components) + 1, random_state=random_state, **kwds)
def __init__(self, components=[0, 1], metric=Metric(metric="euclidean"), **kwds): super(UMAP, self).__init__(components=components, metric=metric) if self.metric.name in _METRIC_BUILT_IN: self.umap = umap.UMAP(n_components=max(self.components) + 1, metric=self.metric.name, **kwds) else: self.umap = umap.UMAP(n_components=max(self.components) + 1, metric="precomputed", **kwds)
def __init__(self, components=[0, 1], metric=Metric(metric="euclidean"), **kwds): super(TSNE, self).__init__(components=components, metric=metric) if self.metric.name in _METRIC_BUILT_IN: self.tsne = manifold.TSNE(n_components=max(self.components) + 1, metric=self.metric.name, **kwds) else: self.tsne = manifold.TSNE(n_components=max(self.components) + 1, metric="precomputed", **kwds)
def __init__(self, components=[0, 1], metric=Metric(metric="euclidean"), **kwds): super(MDS, self).__init__(components=components, metric=metric) if self.metric.name == "euclidean": self.mds = manifold.MDS(n_components=max(self.components) + 1, dissimilarity="euclidean", n_jobs=-1, **kwds) else: self.mds = manifold.MDS(n_components=max(self.components) + 1, dissimilarity="precomputed", n_jobs=-1, **kwds)
from sklearn.cluster import DBSCAN from sklearn.preprocessing import MinMaxScaler from tmap.tda import mapper, Filter from tmap.tda.cover import Cover from tmap.tda.metric import Metric from tmap.tda.utils import optimize_dbscan_eps from tmap.test import load_data # load taxa abundance data, sample metadata and precomputed distance matrix X = load_data.FGFP_genus_profile() metadata = load_data.FGFP_metadata_ready() dm = squareform(pdist(X, metric='braycurtis')) ############################################################ tm = mapper.Mapper(verbose=1) metric = Metric(metric="precomputed") lens = [Filter.PCOA(components=[0, 1], metric=metric)] # for quick projected_X = tm.filter(dm, lens=lens) eps = optimize_dbscan_eps(X, threshold=95) clusterer = DBSCAN(eps=eps, min_samples=3) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.75) graph = tm.map(data=X, cover=cover, clusterer=clusterer) node_data = graph.transform_sn(X) from tmap.netx.SAFE import SAFE_batch, get_SAFE_summary n_iter = 5000 safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=100) enriched_SAFE_metadata, declined_SAFE_metadata = safe_scores['enrich'], safe_scores['decline'] safe_summary_metadata = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata, n_iter=n_iter, p_value=0.05)
def __init__(self, metric=Metric(metric="euclidean")): # default metric: euclidean # components is of 1-D super(LinfCentrality, self).__init__(components=[0], metric=metric)
def __init__(self, metric=Metric(metric="euclidean"), **kwds): super(PCOA, self).__init__() self.metric = metric
def __init__(self, components=[0, 1], metric=Metric(metric="euclidean"), **kwds): super(PCOA, self).__init__(components=components, metric=metric) self.metric = metric