Пример #1
0
def tsne_executor(X, y, logger, path_logs):
    check_input_type(
        ['epi'],
        "t-SNE experiment work just with epigenetic data, {} found".format(
            config['general']['input_type']))

    cell_lines = config['general']['cell_lines']

    tasks_dict = config['general']['tasks']

    results = {}
    for t in tasks_dict:
        task_name, X_filtered, y_filtered = filter_labels(X, y, t)
        logger.debug("TASK: {}".format(task_name))

        cpus = multiprocessing.cpu_count(
        ) // 2  # we use just half of avaible cpus to not overload the machine
        logger.debug("Using {} cpus".format(cpus))

        for cl, data, labels in zip(cell_lines, X_filtered, y_filtered):
            logger.debug("Computing t-SNE for {}".format(cl))

            tsne = TSNE(perplexity=config['tsne']['perplexity'],
                        n_jobs=cpus)  # TODO: add parameters
            tsne_results = tsne.fit_transform(data)
            assert len(tsne_results) == len(labels)
            tsne_results = np.c_[
                tsne_results,
                labels]  # to save the labels with the tsne results
            results["{}_{}".format(task_name, cl)] = tsne_results

    save_tsne(path_logs, "tsne_results", results)
    if config['tsne']['save_plots']:
        plot_tsne(results, path_logs, "tsne_plot")
Пример #2
0
def main(feats_path):
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    labels = {
        name: vector
        for name, vector in labels.items() if vector is not None
    }
    features = np.asarray(list(labels.values()))

    print('[INFO] Conducting t-SNE on ' + feats_path)
    tsne = TSNE(metric='braycurtis',
                verbose=1,
                n_iter=5000,
                random_state=42,
                n_jobs=-1)
    projection = tsne.fit_transform(features)

    # save reduced vectors
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = name + '_tsne.pickle'
    print('[INFO] Saving reduced vectors to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(projection, handle)
Пример #3
0
def calcTSNEMulti(data, iterations, perplexity, learning_rate):
    tsne = TSNE(n_jobs=4,
                perplexity=perplexity,
                n_iter=iterations,
                learning_rate=learning_rate)
    Y = tsne.fit_transform(data)
    return data.assign(x=Y[:, 0], y=Y[:, 1])
Пример #4
0
 def set_params(self,
                n_components=2,
                perplexity=30.0,
                early_exaggeration=12,
                learning_rate=200,
                n_iter=1000,
                n_iter_without_progress=30,
                min_grad_norm=1e-07,
                metric='euclidean',
                init='random',
                verbose=0,
                random_state=None,
                method='barnes_hut',
                angle=0.5,
                n_jobs=1,
                cheat_metric=True):
     self.tsne = MulticoreTSNE(
         n_components=n_components,
         perplexity=perplexity,
         early_exaggeration=early_exaggeration,
         learning_rate=learning_rate,
         n_iter=n_iter,
         n_iter_without_progress=n_iter_without_progress,
         min_grad_norm=min_grad_norm,
         metric=metric,
         init=init,
         verbose=verbose,
         random_state=random_state,
         method=method,
         angle=angle,
         n_jobs=n_jobs,
         cheat_metric=cheat_metric)
Пример #5
0
def draw(x, y):
    from matplotlib.colors import ListedColormap
    from MulticoreTSNE import MulticoreTSNE as TSNE

    print("TSNE: fitting start...")
    tsne = TSNE(2, n_jobs=4, perplexity=30)
    Y = tsne.fit_transform(x)

    # matplotlib_axes_logger.setLevel('ERROR')
    labels = [
        'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'open'
    ]
    id_to_label = {i: label for i, label in enumerate(labels)}
    y_true = pd.Series(y)
    plt.style.use('ggplot')
    n_class = y_true.unique().shape[0]
    colors = ('gray', 'lightgreen', 'plum', 'DarkMagenta', 'SkyBlue',
              'PaleTurquoise', 'DeepPink', 'Gold', 'Orange', 'Brown',
              'DarkKhaki')

    fig, ax = plt.subplots(figsize=(9, 6), )
    la = [i for i in range(n_class)]
    la = sorted(la, reverse=True)
    cmap = ListedColormap(colors)
    for idx, label in enumerate(la):
        ix = y_true[y_true == label].index
        x = Y[:, 0][ix]
        y = Y[:, 1][ix]
        ax.scatter(x, y, c=cmap(idx), label=id_to_label[label], alpha=0.5)

    # Shrink current axis by 20%
    ax.set_title('proto_loss')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def generate2dftEmb():
    global w2id, w, i, word, tsne, post_2d
    ####
    # Loading glove embeddings from pickle file glove_new.pickle and writing into embedding map and a text file which
    # can be used to gensim model
    ####
    file = open(finetuned_path, 'rb')
    embedding_map = pickle.load(file)
    # In[470]:
    ##########
    ##Converting glove embeddings to numpy matrix where each row contains embedding of a word.
    ##Adding words to "word to id" and "id to word" maps
    ##########
    w2id = {}
    id2w = {}
    w = np.zeros((len(embedding_map.keys()), 300))
    for i, word in enumerate(embedding_map.keys()):
        w2id[word] = i
        id2w[i] = word
        w[i] = embedding_map[word]
    # In[6]:
    ######
    ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D.
    ######
    tsne = TSNE(n_jobs=12)
    post_2d = tsne.fit_transform(w)
    # In[486]:
    return post_2d, w2id, w
def generate2dpre():
    global word, i, pre_w2id, tsne, pre_2d
    pre_vocab = []
    pre = open(pretrained_path, 'r')
    for line in pre:
        embeds = line.rstrip().split(" ")
        word = embeds[0]
        pre_vocab.append(word)
    # In[37]:
    pre_w = np.zeros((len(pre_vocab), 300))
    for i, line in enumerate(pre):
        embeds = line.rstrip().split(" ")
        word = embeds[0]
        pre_w[i, :] = embeds[1:]
    # In[ ]:
    ##########
    ##Converting pre glove embeddings to numpy matrix where each row contains embedding of a word.
    ##Adding words to "word to id" and "id to word" maps
    ##########
    pre_w2id = {}
    for i in range(len(pre_vocab)):
        pre_w2id[pre_vocab[i]] = i
    # In[39]:
    ######
    ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D.
    ######
    tsne = TSNE(n_jobs=12)
    pre_2d = tsne.fit_transform(pre_w)
    return pre_2d, pre_w2id, pre_w
Пример #8
0
def run_tSNE(natural_embed, n_jobs, perplexity):
    '''
    The GPU version requires CUDA 9.0 and install the tsnecuda package by running
    conda install tsnecuda -c cannylab

    The Multicore CPU version can be installed by running
    pip install MulticoreTSNE

    Apply t-SNE to the input data
    INPUT:
        natural_embed: 2d numpy array with size [number of points, embedding length]
        n_jobs:
        perplexity:
    OUTPUT:
        natural_2d: 2d numpy array with size [number of points, 2]
        adversary_2d: 2d numpy array with size [number of points, 2]
    '''
    X = natural_embed

    # CPU Sklearn
    # from sklearn.manifold import TSNE
    # tsne = TSNE(perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine')
    # X_embedded = tsne.fit_transform(X)

    # CPU
    from MulticoreTSNE import MulticoreTSNE as TSNE
    tsne = TSNE(n_jobs=n_jobs, perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine')
    X_embedded = tsne.fit_transform(X)

    # GPU
    # from tsnecuda import TSNE
    # X_embedded = TSNE(n_components=2, perplexity=30, learning_rate=10).fit_transform(X)

    return X_embedded
Пример #9
0
def ex3(wv):
    phrases = [
        "szkoda",
        "strata",
        "uszczerbek",
        "szkoda majątkowa",
        # "uszczerbek na zdrowiu",
        "krzywda",
        "niesprawiedliwość",
        "nieszczęście"
    ]

    tsne = MulticoreTSNE(n_components=2, n_jobs=os.cpu_count())
    tsne.fit(wv.vectors)

    vectors_embedded = tsne.fit_transform(wv[(sanitize(phrase)
                                              for phrase in phrases)])

    fig, ax = plt.subplots()
    ax.scatter(vectors_embedded[:, 0], vectors_embedded[:, 1])

    for i, phrase in enumerate(phrases):
        ax.annotate(phrase, (vectors_embedded[:, 0][i], vectors_embedded[:, 1][i]))

    plt.show()
Пример #10
0
def plot_tsne(experience=None, latent_states=None, rewards=None):
    if latent_states is None or rewards is None:
        latent_states = np.array([
            list(rssm_state.prev_state.stoch)
            for rssm_state in experience['agent_infos']
        ])
        rewards = np.array(experience['reward'])
    np.random.seed(0)
    perm = np.random.permutation(10000)
    latent_states = latent_states[perm]
    rewards = rewards[perm]
    feature_cols = ['axis_' + str(i) for i in range(latent_states.shape[1])]
    df = DataFrame(latent_states, columns=feature_cols)
    df['y'] = rewards
    time_start = time()
    tsne = TSNE(n_components=2,
                verbose=1,
                perplexity=1000,
                n_iter=1000,
                n_jobs=16)
    tsne_results = tsne.fit_transform(df[feature_cols].values)
    print('t-SNE done! Time elapsed: {} seconds'.format(time() - time_start))
    pickle.dump(tsne_results, open('tsne_results.pkl', 'wb'))
    df['tsne-2d-one'] = tsne_results[:, 0]
    df['tsne-2d-two'] = tsne_results[:, 1]
    sns.scatterplot(x="tsne-2d-one",
                    y="tsne-2d-two",
                    hue="y",
                    palette=sns.color_palette("flare", as_cmap=True),
                    data=df,
                    alpha=0.6,
                    s=5)
    plt.show()
Пример #11
0
def main():
    parser = argparse.ArgumentParser(description='main function parser')
    parser.add_argument('--path',
                        type=str,
                        help='load file path',
                        required=True)
    parser.add_argument('--dump_dir',
                        type=str,
                        help='dump directory',
                        default=None)
    parser.add_argument('--size',
                        type=int,
                        default=1000,
                        help='embedding vector size')
    args = parser.parse_args()

    embeddings, labels = load(args.path, args.size)

    output = args.path.split('/')[-1]
    # # UMAP
    # weights = umap.UMAP().fit_transform(embeddings)
    # show(weights, labels, 'umap.svg')

    # t-SNE
    tsne_model = TSNE(n_components=2)
    weights = tsne_model.fit_transform(embeddings)
    show(weights, labels, f'graph/{output}.svg')
Пример #12
0
def plot_distribution(
        epoch,
        train,
        #  acc,
        path,
        data_x,
        #  true_y,
        pred_y,
        learning_rate=100,
        n_jobs=-1):
    print("plotting image on " + path + "...")
    if (os.path.exists(path) == False):
        os.makedirs(path)
    tsne_model = TSNE(n_components=2,
                      learning_rate=learning_rate,
                      n_jobs=n_jobs)
    #  pca_model = PCA(n_components=2)

    data_x = np.array(data_x)
    if (len(data_x.shape) > 2):
        data_temp = []
        for data in data_x:
            data_temp.append(data.rehsape(-1))
        data_x = np.array(data_temp)

    transformed = tsne_model.fit_transform(data_x)
    #  transformed = pca_model.fit_transform(data_x)
    xs = transformed[:, 0]
    ys = transformed[:, 1]

    #  draw_plot(xs, ys, train, epoch, true_y, os.path.join(path, "true_label"))
    draw_plot(xs, ys, train, epoch, pred_y, path)
Пример #13
0
def display_closestwords_tsnescatterplot(arg_path_to_model, word):
    model = word2vec.Word2Vec.load(arg_path_to_model)
    for i in range(len(word)):
        arr = np.empty((0, 300), dtype='f')
        word_labels = [word[i]]

        # get close words
        close_words = model.similar_by_word(word[i])

        # add the vector for each of the closest words to the array
        arr = np.append(arr, np.array([model[word[i]]]), axis=0)
        for wrd_score in close_words:
            wrd_vector = model[wrd_score[0]]
            word_labels.append(wrd_score[0])
            arr = np.append(arr, np.array([wrd_vector]), axis=0)

        # find tsne coords for 2 dimensions
        tsne = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        Y = tsne.fit_transform(arr)

        x_coords = Y[:, 0]
        y_coords = Y[:, 1]
        # display scatter plot
        plt.scatter(x_coords, y_coords)

        for label, x, y in zip(word_labels, x_coords, y_coords):
            plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')

    # Zmiana mnoznika powoduje zmiane 'przyblizenia' wykresu (mniejszy mnoznik = wieksze przyblizenie)
    plt.xlim(x_coords.min()*1, x_coords.max()*1)
    plt.ylim(y_coords.min()*1, y_coords.max()*1)
    plt.show()
Пример #14
0
def main():
    parser = argparse.ArgumentParser(description='main function parser')
    parser.add_argument('--path', type=str, help='load file path', required=True)
    parser.add_argument('--dump_dir', type=str, help='dump directory', default=None)
    parser.add_argument('--size', type=int, default=1000, help='embedding vector size')
    args = parser.parse_args()

    embeddings, labels = load(args.path)
    embeddings = np.array(embeddings)

    output = args.path.split('/')[-1]
    # # UMAP
    n_neighbors = [15] #, 35, 55, 75]
    min_dists = [0.1] #0.001, 0.01, 0.1]
    for min_dist in min_dists:
        for n_neighbor in n_neighbors:
            start = time.time()
            weights = umap.UMAP(n_neighbors=n_neighbor, min_dist=min_dist).fit_transform(embeddings)
            finish = time.time()
            print(f'time: {finish-start} s', flush=True)
            os.makedirs(f'graph/umap/{output}', exist_ok=True)
            show(weights, labels, f'graph/umap/{output}/min_dist:{min_dist}_neighbor:{n_neighbor}.svg')

    # t-SNE
    perplexities = [30] #10, 20, 30, 40, 50]
    for perplexity in perplexities:
        start = time.time()
        tsne_model = TSNE(n_components=2, perplexity=perplexity, n_jobs=10)
        weights = tsne_model.fit_transform(embeddings)
        finish = time.time()
        print(f'time: {finish-start} s', flush=True)
        os.makedirs(f'graph/tsne/{output}', exist_ok=True)
        show(weights, labels, f'graph/tsne/{output}/perplexity:{perplexity}.svg')
Пример #15
0
def __async_tsne_embedding(x):
    # learn manifold
    tsne = MulticoreTSNE(n_jobs=32, n_components=2)
    x = x.astype(np.float64)
    x_fitted = tsne.fit_transform(x)

    return x_fitted
def tsne_vis(netZ, rn, img_size, real_imgs):
    import matplotlib.pyplot as plt
    from MulticoreTSNE import MulticoreTSNE as TSNE

    Zs_real = netZ.emb.weight.data.detach().cpu().numpy()
    if not os.path.isdir("runs"):
        os.mkdir("runs")
    if not os.path.isdir("runs/ims_%s" % rn):
        os.mkdir("runs/ims_%s" % rn)
    tsne = TSNE(n_components=2, perplexity=30, n_jobs=20)
    n_samples = len(real_imgs)
    targets = np.asarray([netZ.idx2label[x] for x in range(n_samples)])
    filtered_indices = targets[targets < 11]
    targets = targets[filtered_indices]
    Z_filter = Zs_real[filtered_indices]
    print(len(Z_filter))
    reduced_data = tsne.fit_transform(np.asarray(Z_filter, dtype='float64'))
    plot_by_latent(reduced_data,
                   real_imgs,
                   indices=filtered_indices,
                   img_size=img_size,
                   rn=rn,
                   title="G2")
    # print(indices)
    # y_for_plot = np.concatenate([a, moves_,y_labels])
    # N = len(y_lables)
    # Y=Y[indices]
    # Y = bh_sne(np.asarray(s_t[0:N], dtype='float64'))
    # normalize
    min_1 = reduced_data[:, 0].min()
    max_1 = reduced_data[:, 0].max()
    min_2 = reduced_data[:, 1].min()
    max_2 = reduced_data[:, 1].max()
    Yn = reduced_data[:]
    Yn[:, 0] = (reduced_data[:, 0] - min_1) / (max_1 - min_1)
    Yn[:, 1] = (reduced_data[:, 1] - min_2) / (max_2 - min_2)

    ## plot distribution

    unique_classes = len(np.unique(targets))

    y_labels_colors = targets

    plt.scatter(Yn[:, 1],
                -Yn[:, 0],
                c=y_labels_colors,
                cmap=plt.cm.get_cmap("tab20", unique_classes),
                s=10,
                edgecolors='k')
    mn = int(np.floor(y_labels_colors.min()))  # colorbar min value
    mx = int(np.ceil(y_labels_colors.max()))  # colorbar max value
    md = (mx - mn) // 2
    cbar = plt.colorbar()
    cbar.set_ticks([mn, md, mx])
    cbar.set_ticklabels([mn, md, mx])
    # plt.scatter(Yn[Zs_real, 1], -Yn[Zs_real, 0], c="black", s=100, edgecolors='k', marker="x")
    # plt.scatter(Yn[indices[0], 1], -Yn[indices[0], 0], c="darkorange", s=100, edgecolors='k', marker="P", label="start")
    # plt.scatter(Yn[indices[1], 1], -Yn[indices[1], 0], c="yellow", s=100, edgecolors='k', marker="p", label="target")
    # plot_path(moves_knn, start_point_plt, targ_point_plt, title="RNN miniImagenet", more="")
    plt.savefig(f"runs/ims_{rn}/tsne_{rn}.jpg")
Пример #17
0
def compute_tsne(X, y, n_class=2,
                 savepath=None,
                 xlim=(-50,50), ylim=(-50,50),
                 cls_lbl=['Benign','Tumor'],
                 title=' ',PCADIM=50):

    tsne = TSNE(n_jobs=4, random_state=1337)
    #X = PCA(n_components=PCADIM).fit_transform(X)
    embs = tsne.fit_transform(X)

    plt.figure(figsize=(10,10))
    for i in range(n_class):
        inds = np.where(y == i)[0]
        plt.scatter(embs[inds, 0], embs[inds, 1], color=colors[i], marker='*', s=30)
    if xlim:
        plt.xlim(xlim[0], xlim[1])
    if ylim:
        plt.ylim(ylim[0], ylim[1])
    plt.legend(cls_lbl)
    plt.grid(b=None)
    plt.title(title)
    if savepath:
        plt.savefig(savepath, dpi=300, bbox_inches='tight')
        plt.savefig(savepath.replace('.png','.pdf'), dpi=300, bbox_inches='tight')
    else:
        plt.show()
    plt.clf()
Пример #18
0
def tsne_image(
    features, images, img_res=64, res=4000, background_color=255,  max_feature_size=-1, labels=None, point_radius=20, n_threads=0
):
    """
    Embeds images via tsne into a scatter plot.

    Parameters
    ---------
    features: numpy array
        Features to visualize

    images: list or numpy array
        Corresponding images to features.

    img_res: int
        Resolution to embed images at

    res: int
        Size of embedding image in pixels

    background_color: float or numpy array
        Background color value

    max_feature_size: int
        If input_feature_size > max_feature_size> 0, features are first
        reduced using PCA to the desired size.

    point_radius: int
        Size of the circle for the label image.

    n_threads: int
        Number of threads to use for t-SNE


    labels: List or numpy array if provided
        Label for each image for drawing circle image.


    """
    features = np.asarray(features, dtype=np.float32)
    assert len(features.shape) == 2

    print("Starting TSNE")
    s_time = time.time()
    if 0 < max_feature_size < features.shape[-1]:
        pca = PCA(n_components=max_feature_size)
        features = pca.fit_transform(features)

    if n_threads <= 0:
        n_threads = multiprocessing.cpu_count()
    model = TSNE(n_components=2, verbose=1, random_state=0, n_jobs=n_threads)

    f2d = model.fit_transform(features)
    print("TSNE done.", (time.time() - s_time))
    print("Starting drawing.")

    x_coords = f2d[:, 0]
    y_coords = f2d[:, 1]

    return image_util.draw_images_at_locations(images, x_coords, y_coords, img_res, res, background_color, labels, point_radius)
Пример #19
0
def calc_tsne(
    X,
    n_jobs,
    n_components,
    perplexity,
    early_exaggeration,
    learning_rate,
    random_state,
    init="random",
    n_iter=1000,
    n_iter_early_exag=250,
):
    """
    TODO: Typing
    """
    tsne = TSNE(
        n_jobs=n_jobs,
        n_components=n_components,
        perplexity=perplexity,
        early_exaggeration=early_exaggeration,
        learning_rate=learning_rate,
        random_state=random_state,
        verbose=1,
        init=init,
        n_iter=n_iter,
        n_iter_early_exag=n_iter_early_exag,
    )
    X_tsne = tsne.fit_transform(X)
    logger.info("Final error = {}".format(tsne.kl_divergence_))
    return X_tsne
Пример #20
0
def tsne_reduction(samples,
                   perplexity,
                   data=None,
                   n_components=2,
                   l_r=200,
                   dim=2,
                   ex=12,
                   iterations=5000,
                   verbosity=0):
    if (samples is None) and (data is not None):
        samples = data[:, :-1]
        targets = data[:, -1]

    # tsne = manifold.TSNE(n_components = dim, init='pca', learning_rate = l_r,
    #                         perplexity=perplexity, early_exaggeration = ex,
    #                         n_iter = iterations, random_state=data_handling.RANDOM_SEED,
    #                         verbose = verbosity)

    tsne = TSNE(n_components=dim,
                n_jobs=-1,
                learning_rate=l_r,
                perplexity=perplexity,
                early_exaggeration=ex,
                n_iter=iterations,
                random_state=data_handling.RANDOM_SEED,
                verbose=verbosity)

    reduced_samples = tsne.fit_transform(samples)

    return reduced_samples, tsne
def get_data(n_cmd, n_spk, only_missed=False):
    if only_missed:
        # most popular MIS-CLASSIFIED command based on utterances count
        top_cmd = itemfreq(y_command[y_missed.astype('int32')])
        top_spk = itemfreq(y_speaker[y_missed.astype('int32')])
    else:
        top_spk = itemfreq(y_speaker)
        top_cmd = itemfreq(y_command)
    top_cmd = top_cmd[np.argsort(top_cmd[:, 1])][::-1]
    top_cmd = top_cmd[:, 0]

    # most speaker command based on utterances count
    top_spk = top_spk[np.argsort(top_spk[:, 1].astype('int32'))][::-1]
    top_spk = top_spk[:, 0]

    spk = top_spk[:n_spk]
    cmd = top_cmd[:n_cmd]
    ids = get_indices(speaker_set=spk, command_set=cmd)
    if only_missed:
        ids = np.array([i for i in ids if i in y_missed], dtype='int32')

    y_cmd = y_command[ids]
    y_spk = y_speaker[ids]

    z_org = Z_original[ids]
    z_max = Z_maximize[ids]

    tsne = TSNE(random_state=SEED)
    t = tsne.fit_transform(np.concatenate((z_org, z_max), axis=0))
    t_org = t[:z_org.shape[0]]
    t_max = t[z_org.shape[0]:]

    return t_org, t_max, y_cmd, y_spk
Пример #22
0
    def __init__(self,
                 container,
                 perplexity=30.0,
                 learning_rate=120.0,
                 n_componenets=2,
                 n_jobs=4,
                 n_iter=1000,
                 verbose=1000):
        """
          Args:
            container: EmbeddingContainer
        """
        self._container = container
        self._engine = MulticoreTSNE(
            perplexity=perplexity,
            learning_rate=learning_rate,
            n_components=n_componenets,
            n_jobs=n_jobs,
            n_iter=n_iter,
            verbose=verbose)

        self._results = None
        self._ids = None
        self._label_ids = None
        self._label_names = None
def plot_conti_code_tsne():
    data = pickle.load(
        open(
            "/home/patrick/repositories/hyperspectral_phenotyping_gan/experiments_{}/generated_code_noise{}_disc{}_conti{}_epoch{}.p"
            .format(opt.dataset, opt.n_noise, opt.n_dis, opt.n_conti,
                    opt.epoch), "rb"))
    labels = np.array(data["y"]).squeeze()
    labels_unique = np.unique(labels)
    code = np.array(data["z"]).copy()
    z = np.array(data["z"]).copy()
    # print(code[0])
    # code = code[:, -5:-2]
    code = code[:, -2:]
    # print(code[0])
    # 1 / 0
    signatures = np.array(data["x"])
    tsne = TSNE(n_jobs=26, n_components=2, learning_rate=100)
    Y = tsne.fit_transform(code)

    colors = ["red", "green", "blue"]
    for idx, label in enumerate(labels_unique):
        data_tsne = Y[labels == label]
        plt.scatter(data_tsne[:, 0],
                    data_tsne[:, 1],
                    c=colors[idx],
                    alpha=0.3,
                    label=str(label))
    plt.legend()
    plt.show()
Пример #24
0
    def run(self, word_embedding):
        """
        Runs t-SNE model with specified parameters and data. Returns result.
        :param word_embedding: Word embedding; expected to be only as long as self.num_words.
        :return:
        """
        self.word_embedding = word_embedding
        word_vector_data = numpy.stack(word_embedding['values'].values, axis=0)

        # Initialize t-SNE instance.
        tsne = MulticoreTSNE(n_components=self.num_dimensions,
                             perplexity=self.perplexity,
                             early_exaggeration=self.early_exaggeration,
                             learning_rate=self.learning_rate,
                             n_iter=self.num_iterations,
                             min_grad_norm=self.min_grad_norm,
                             random_state=self.random_state,
                             angle=self.angle,
                             metric=self.metric,
                             init=self.init_method,
                             n_jobs=2)

        # Train TSNE on gensim's model.
        # Note: Since MulticoreTSNE doesn't support metrics other than Euclidean, we normalize our vectors to an unit
        # norm so that the Euclidean distance yields results/ordering more similar to the cosine similarity.
        self.tsne_results = tsne.fit_transform(
            sklearn.preprocessing.normalize(word_vector_data,
                                            axis=1,
                                            norm='l2'))

        return self.tsne_results
Пример #25
0
    def train(self, parameters):

        tsne = TSNE(**parameters)
        tsne_outputs = tsne.fit_transform(self.x_train)

        utils.save_data_to_pkl(tsne_outputs,
                               tsne_outputs_path + 'tsne_outputs.p')
Пример #26
0
def run_tSNE(data, n_pc, n_dim, p, verbose = 3, random_state = 0, n_jobs = 20):
  pca = PCA(n_components = n_pc)
  rateb_reduce = pca.fit_transform(data)
  ndim = n_dim
  tsne = MulticoreTSNE(perplexity = p, verbose = verbose, random_state = random_state, n_jobs = n_jobs)
  y = tsne.fit_transform(rateb_reduce[:, :n_dim])
  return y
Пример #27
0
    def visualize(self, indices = [], center_num = 0, 
                  ref_labels = [], use_colors = True):
        
        
        # If indices are not given
        if len(indices) ==0:
            indices = np.arange(len(self.embeddings_))
        
        # If center number is not given
        if center_num == 0:
            center_num = self.opt_speaker_num_
                
        # If reference labels are used
        if len(ref_labels) != 0:
            speaker_labels = ref_labels   
            
        # Allow visualization of different center number configurations
        else:        
            # Get speaker labels 
            spkmeans = SphericalKMeans(n_clusters=len(self.centers_[center_num]), 
                                                       init = self.centers_[center_num], 
                                                       max_iter=1, n_init=1, n_jobs=1).fit(self.embeddings_[indices])  
            speaker_labels = spkmeans.labels_+1 
        
        
        if len(self.speaker_labels_) == 0:
            raise RuntimeError("Clustering not performed.")
                                       
        # Compute TSNE only once
        if len(self.emb_2d_) == 0:
            
            print("Computing TSNE transform...")
            tsne = TSNE(n_jobs=4)
            self.emb_2d_ = tsne.fit_transform(self.embeddings_)
        
        
        # Visualize
        emb_2d = self.emb_2d_[indices]
        speaker_labels = speaker_labels.astype(np.int)
        speakers = np.unique(speaker_labels)
        colors=cm.rainbow(np.linspace(0,1,len(speakers)))
        plt.figure(figsize=(7,7))

        for speaker in speakers:

            speak_ind = np.where(speaker_labels == speaker)[0]
            x, y = np.transpose(emb_2d[speak_ind])
            if use_colors == True:
               plt.scatter(x, y, c="k", edgecolors=colors[speaker-1], s=2,  label=speaker)
            else:
               plt.scatter(x, y, c="k", edgecolors="k", s=2,  label=speaker)

        plt.legend(title = "Speakers", prop={'size': 10})

        if len(ref_labels) == 0:
            plt.title("Predicted speaker clusters")
        else:
            plt.title("Reference speaker clusters")  
        plt.show()
Пример #28
0
def get_2D_vector(vectors):
    """
        Sử dụng giải thuật TSNE để ánh xạ vectors nhiều chiều về 2 chiều
        http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
        https://distill.pub/2016/misread-tsne/
    """
    tsne = TSNE(perplexity=25, n_components=2, init='random', n_iter=1000, n_jobs=-1)
    return tsne.fit_transform(vectors)
Пример #29
0
def run_tsne(path):

    ids, X = load_matrix(path)
    tsne = TSNE(n_jobs=8)
    #    tsne = TSNE(metric='cosine')
    Y = tsne.fit_transform(X)
    for i, row in enumerate(Y):
        print(' '.join([ids[i], str(row[0]), str(row[1])]))
Пример #30
0
def generate_tsne(path, data, label):
    print '\nGenerating t-SNE...'
    tsne = TSNE(n_jobs=-1)
    Y = tsne.fit_transform(data)
    plt.figure(figsize=(20, 20))
    plt.scatter(Y[:, 0], Y[:, 1], c=label, s=100, cmap='Set1', alpha=0.2)
    plt.colorbar()
    plt.savefig(path)