示例#1
0
def tsne_emb(model_path: str, proc_dir: str, layer_name: str,
             perplexities: List[int], n_iter: int):
    model = torch.load(model_path, map_location=torch.device('cpu'))['model']
    w = model[layer_name].numpy()

    affinities_multiscale_mixture = affinity.Multiscale(
        w,
        perplexities=perplexities,
        metric="cosine",
        n_jobs=-1,
        random_state=3)

    init = initialization.pca(w, random_state=42)

    embedding = TSNEEmbedding(init,
                              affinities_multiscale_mixture,
                              negative_gradient_method="fft",
                              n_jobs=-1,
                              random_state=4,
                              verbose=True)

    embedding = embedding.optimize(n_iter=n_iter,
                                   exaggeration=None,
                                   momentum=0.8,
                                   learning_rate="auto")
    df = pd.DataFrame(embedding, columns=['x', 'y'])
    df.to_csv(path.join(proc_dir, 'tsne_emb.csv'), index=False)

    with open(path.join(proc_dir, 'kl_divergence'), 'w') as f:
        f.write(f'{embedding.kl_divergence:.4f}')
示例#2
0
    def test_affinity(self):
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(self.x, perplexity=30),
                affinity.Uniform(self.x, k_neighbors=30),
                affinity.FixedSigmaNN(self.x, sigma=1, k=30),
                affinity.Multiscale(self.x, perplexities=[10, 30]),
                affinity.MultiscaleMixture(self.x, perplexities=[10, 30]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
            new_embedding = embedding.prepare_partial(self.x)
            new_embedding.optimize(10, learning_rate=0.1, inplace=True)
            self.eval_embedding(new_embedding, self.y,
                                f"transform::{aff.__class__.__name__}")

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
            new_embedding = embedding.prepare_partial(self.x)
            new_embedding.optimize(10, learning_rate=0.1, inplace=True)
            self.eval_embedding(new_embedding, self.y,
                                f"transform::{aff.__class__.__name__}")
示例#3
0
    def test_affinity_with_precomputed_neighbors(self):
        nn = NearestNeighbors(n_neighbors=30)
        nn.fit(self.x)
        distances, neighbors = nn.kneighbors(n_neighbors=30)

        knn_index = nearest_neighbors.PrecomputedNeighbors(
            neighbors, distances)
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30),
                affinity.Uniform(knn_index=knn_index, k_neighbors=30),
                affinity.FixedSigmaNN(knn_index=knn_index, sigma=1),
                affinity.Multiscale(knn_index=knn_index, perplexities=[10,
                                                                       20]),
                affinity.MultiscaleMixture(knn_index=knn_index,
                                           perplexities=[10, 20]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
示例#4
0
    def test_affinity_with_precomputed_distances(self):
        d = squareform(pdist(self.x))
        knn_index = nearest_neighbors.PrecomputedDistanceMatrix(d, k=30)
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30),
                affinity.Uniform(knn_index=knn_index, k_neighbors=30),
                affinity.FixedSigmaNN(knn_index=knn_index, sigma=1),
                affinity.Multiscale(knn_index=knn_index, perplexities=[10,
                                                                       20]),
                affinity.MultiscaleMixture(knn_index=knn_index,
                                           perplexities=[10, 20]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, aff.__class__.__name__)

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, aff.__class__.__name__)
示例#5
0
def art_of_tsne(X: np.ndarray,
                metric: Union[str, Callable] = "euclidean",
                exaggeration: float = -1,
                perplexity: int = 30,
                n_jobs: int = -1) -> TSNEEmbedding:
    """
    Implementation of Dmitry Kobak and Philipp Berens
    "The art of using t-SNE for single-cell transcriptomics" based on openTSNE.
    See https://doi.org/10.1038/s41467-019-13056-x | www.nature.com/naturecommunications
    Args:
        X				The data matrix of shape (n_cells, n_genes) i.e. (n_samples, n_features)
        metric			Any metric allowed by PyNNDescent (default: 'euclidean')
        exaggeration	The exaggeration to use for the embedding
        perplexity		The perplexity to use for the embedding

    Returns:
        The embedding as an opentsne.TSNEEmbedding object (which can be cast to an np.ndarray)
    """
    n = X.shape[0]
    if n > 100_000:
        if exaggeration == -1:
            exaggeration = 1 + n / 333_333
        # Subsample, optimize, then add the remaining cells and optimize again
        # Also, use exaggeration == 4
        logging.info(f"Creating subset of {n // 40} elements")
        # Subsample and run a regular art_of_tsne on the subset
        indices = np.random.permutation(n)
        reverse = np.argsort(indices)
        X_sample, X_rest = X[indices[:n // 40]], X[indices[n // 40:]]
        logging.info(f"Embedding subset")
        Z_sample = art_of_tsne(X_sample)

        logging.info(
            f"Preparing partial initial embedding of the {n - n // 40} remaining elements"
        )
        if isinstance(Z_sample.affinities, affinity.Multiscale):
            rest_init = Z_sample.prepare_partial(X_rest,
                                                 k=1,
                                                 perplexities=[1 / 3, 1 / 3])
        else:
            rest_init = Z_sample.prepare_partial(X_rest, k=1, perplexity=1 / 3)
        logging.info(f"Combining the initial embeddings, and standardizing")
        init_full = np.vstack((Z_sample, rest_init))[reverse]
        init_full = init_full / (np.std(init_full[:, 0]) * 10000)

        logging.info(f"Creating multiscale affinities")
        affinities = affinity.PerplexityBasedNN(X,
                                                perplexity=perplexity,
                                                metric=metric,
                                                method="approx",
                                                n_jobs=n_jobs)
        logging.info(f"Creating TSNE embedding")
        Z = TSNEEmbedding(init_full,
                          affinities,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 1")
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 2")
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    elif n > 3_000:
        if exaggeration == -1:
            exaggeration = 1
        # Use multiscale perplexity
        affinities_multiscale_mixture = affinity.Multiscale(
            X,
            perplexities=[perplexity, n / 100],
            metric=metric,
            method="approx",
            n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          affinities_multiscale_mixture,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    else:
        if exaggeration == -1:
            exaggeration = 1
        # Just a plain TSNE with high learning rate
        lr = max(200, n / 12)
        aff = affinity.PerplexityBasedNN(X,
                                         perplexity=perplexity,
                                         metric=metric,
                                         method="approx",
                                         n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          aff,
                          learning_rate=lr,
                          n_jobs=n_jobs,
                          negative_gradient_method="fft")
        Z.optimize(250,
                   exaggeration=12,
                   momentum=0.5,
                   inplace=True,
                   n_jobs=n_jobs)
        Z.optimize(750,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   inplace=True,
                   n_jobs=n_jobs)
    return Z
示例#6
0
total = len(ctgs)
profile = np.ones((total, 256), dtype=np.float)
i = 0
for ctg in ctgs:
    raw = fp[ctg][:]
    norm = raw * 256.0 / sum(raw)
    profile[0, ] = norm

## 2. Run t-SNE
seed = 211
threads = int(cores)

affinities_multiscale_mixture = affinity.Multiscale(
    profile,
    perplexities=[30, 300],
    metric="cosine",
    n_jobs=threads,
    random_state=seed,
)

init = initialization.pca(profile, random_state=seed)

embedding = TSNEEmbedding(
    init,
    affinities_multiscale_mixture,
    negative_gradient_method="fft",
    n_jobs=threads,
)

embedding1 = embedding.optimize(n_iter=250, exaggeration=6, momentum=0.5)
embedding2 = embedding1.optimize(n_iter=750, exaggeration=1, momentum=0.8)