Пример #1
0
def ex3(wv):
    phrases = [
        "szkoda",
        "strata",
        "uszczerbek",
        "szkoda majątkowa",
        # "uszczerbek na zdrowiu",
        "krzywda",
        "niesprawiedliwość",
        "nieszczęście"
    ]

    tsne = MulticoreTSNE(n_components=2, n_jobs=os.cpu_count())
    tsne.fit(wv.vectors)

    vectors_embedded = tsne.fit_transform(wv[(sanitize(phrase)
                                              for phrase in phrases)])

    fig, ax = plt.subplots()
    ax.scatter(vectors_embedded[:, 0], vectors_embedded[:, 1])

    for i, phrase in enumerate(phrases):
        ax.annotate(phrase, (vectors_embedded[:, 0][i], vectors_embedded[:, 1][i]))

    plt.show()
Пример #2
0
def toTsne(train,test,n_component=2,file_name='tf_idf',path='data/'):
    tsne=TSNE(n_components=n_component,random_state=1123,njobs=-1)
    lentrain=train.shape[0]
    X=np.vstack([train,test])
    tsne.fit(X)
    res=tsne.embedding_
    #print res
    pd.to_pickle(res[:lentrain],path+'train_svd_20_tsne_'+str(n_component)+'_'+file_name+'.pkl')
    pd.to_pickle(res[lentrain:],path+'test_svd_20_tsne_'+str(n_component)+'_'+file_name+'.pkl')
    return 'Success'
Пример #3
0
def tsneError_MultiCpore_Tsne(params, data, perplexity):
    # need to not recreate P
    mtSNE = MulticoreTSNE(perplexity=perplexity,
                          init=params,
                          n_iter=0,
                          early_exaggeration=1)
    mtSNE = mtSNE.fit(data)
    error = mtSNE.kl_divergence_
    print(error)
    # Yeah it's a hack.
    return error
Пример #4
0
def fit_tsne(X,
             perplexity=PERPLEXITY,
             n_iter=N_ITER,
             learn_rate=200.,
             early_exag=12.):
    try:
        from MulticoreTSNE import MulticoreTSNE
        tsne = MulticoreTSNE(n_iter=500,
                             perplexity=perplexity,
                             learning_rate=learn_rate,
                             early_exaggeration=early_exag,
                             random_state=69,
                             n_jobs=40)
    except ImportError:
        tsne = TSNEApprox(
            n_iter=500,
            perplexity=perplexity,
            learning_rate=learn_rate,
            early_exaggeration=early_exag,
            random_state=69,
        )
    tsne.fit(X)
    embedding = tsne.embedding_
    return embedding
Пример #5
0
def visualize(assembled,
              labels,
              namespace,
              data_names,
              gene_names=None,
              gene_expr=None,
              genes=None,
              n_iter=N_ITER,
              perplexity=PERPLEXITY,
              verbose=VERBOSE,
              learn_rate=200.,
              early_exag=12.,
              embedding=None,
              shuffle_ds=False,
              size=1,
              multicore_tsne=True,
              image_suffix='.svg',
              viz_cluster=False,
              colors=None):
    # Fit t-SNE.
    if embedding is None:
        try:
            from MulticoreTSNE import MulticoreTSNE
            tsne = MulticoreTSNE(n_iter=n_iter,
                                 perplexity=perplexity,
                                 verbose=verbose,
                                 random_state=69,
                                 learning_rate=learn_rate,
                                 early_exaggeration=early_exag,
                                 n_jobs=40)
        except ImportError:
            multicore_tsne = False

        if not multicore_tsne:
            tsne = TSNEApprox(n_iter=n_iter,
                              perplexity=perplexity,
                              verbose=verbose,
                              random_state=69,
                              learning_rate=learn_rate,
                              early_exaggeration=early_exag)

        tsne.fit(np.concatenate(assembled))
        embedding = tsne.embedding_

    if shuffle_ds:
        rand_idx = range(embedding.shape[0])
        random.shuffle(list(rand_idx))
        embedding = embedding[rand_idx, :]
        labels = labels[rand_idx]

    # Plot clusters together.
    plot_clusters(embedding, labels, s=size, colors=colors)
    plt.title(('Panorama ({} iter, perplexity: {}, sigma: {}, ' +
               'knn: {}, hvg: {}, dimred: {}, approx: {})').format(
                   n_iter, perplexity, SIGMA, KNN, HVG, DIMRED, APPROX))
    plt.savefig(namespace + image_suffix, dpi=500)

    # Plot clusters individually.
    if viz_cluster and not shuffle_ds:
        for i in range(len(data_names)):
            visualize_cluster(embedding,
                              i,
                              labels,
                              cluster_name=data_names[i],
                              size=size,
                              viz_prefix=namespace,
                              image_suffix=image_suffix)

    # Plot gene expression levels.
    if (not gene_names is None) and \
       (not gene_expr is None) and \
       (not genes is None):
        if shuffle_ds:
            gene_expr = gene_expr[rand_idx, :]
        for gene_name in gene_names:
            visualize_expr(gene_expr,
                           embedding,
                           genes,
                           gene_name,
                           size=size,
                           viz_prefix=namespace,
                           image_suffix=image_suffix)

    return embedding