Пример #1
0
    def test_set_rand_seed(self):
        iris = load_iris()
        X = iris.data
        Y_a = tsne(X, rand_seed=999)
        Y_b = tsne(X, rand_seed=999)

        self.assertEqual(round(Y_a[0][0] / 5), round(Y_b[0][0] / 5))
        self.assertEqual(round(Y_a[0][1] / 5), round(Y_b[0][1] / 5))

        plt.scatter(Y_a[:, 0], Y_a[:, 1], c='b')
        plt.scatter(Y_b[:, 0], Y_b[:, 1], c='r')
        plt.savefig(PLOTS_DIR + '/iris_set_rand_seed.png')
        if os.environ.get('SHOW_PLOTS', None) != None:
            plt.show()
        plt.close()
Пример #2
0
    def test_without_seed_positions(self):
        iris = load_iris()
        X_a = load_iris().data[:-10]
        X_b = load_iris().data
        Y_a = tsne(X_a, rand_seed=999)
        Y_b = tsne(X_b, rand_seed=999)

        plt.scatter(Y_a[:, 0], Y_a[:, 1], c='b')
        plt.scatter(Y_b[:-10, 0], Y_b[:-10, 1], c='r')
        plt.scatter(Y_b[-10:, 0], Y_b[-10:, 1], c='g')

        plt.savefig(PLOTS_DIR + '/iris_without_seed_positions.png')
        if os.environ.get('SHOW_PLOTS', None) != None:
            plt.show()
        plt.close()
Пример #3
0
def create_docvec_scatter(matrix, document_list):
    """
    文書ベクトルの散布図を作成する
    :param matrix: 行列
    :return:
    """
    tsne = bhtsne.tsne(matrix.astype(sp.float64),
                       dimensions=2,
                       perplexity=30.0,
                       theta=0.5,
                       rand_seed=-1)
    doc_tsne = pd.DataFrame(tsne[:, 0], columns=["x"])
    doc_tsne["y"] = pd.DataFrame(tsne[:, 1])
    doc_tsne["category"] = list(document_list["category1"])
    sns.set_style("darkgrid")
    sns.set(font="IPAexGothic")
    sns.lmplot(data=doc_tsne,
               x="x",
               y="y",
               hue="category",
               fit_reg=False,
               size=10)
    # 図を表示
    plt.show()
    return tsne
Пример #4
0
    def evaluate(self):

        # dimensionality reduction using Barnes-Hut implementation of t-SNE
        data_2d = tsne(np.array(self.data, dtype=np.float64))

        silhouette = {}
        dunn_score = {}
        rand_score = {}
        nmi_score = {}

        # data for using elbow method
        centres_list = []
        k_distance_list = []
        cluster_index_list = []
        distance_list = []

        subplot_counter = 331
        for c in self.k_range:
            print("Number of clusters c:", c)

            # create model
            model = self._cluster_model(self.model_name, c)
            model.fit(self.data)
            labels = model.labels_

            # visualization evaluation - 2D Scatter plot of data
            plt.subplot(subplot_counter)
            plt.scatter(data_2d[:, 0], data_2d[:, 1], c=labels)
            subplot_counter += 1

            # elbow method calculations AIC
            centres_list.append(model.cluster_centers_)
            k_dist = cdist(self.data, model.cluster_centers_, 'euclidean')
            k_distance_list.append(k_dist)
            cluster_index_list.append(np.argmin(k_dist, axis=1))
            distance_list.append(np.min(k_dist, axis=1))

            # internal evaluation methods not using true labels
            silhouette[c] = silhouette_score(np.array(self.data),
                                             labels,
                                             metric='euclidean')
            print("silhoutete with cluster number: ", silhouette[c], c)
            dunn_score[c] = self.dunn_fast(self.data, labels)
            print("dunn with cluster number: ", dunn_score[c], c)

            # external evaluation methods using true labels (supervised datasets)
            if self.supervised_labels:
                rand_score[c] = adjusted_rand_score(self.supervised_labels,
                                                    labels)
                nmi_score[c] = normalized_mutual_info_score(
                    self.supervised_labels, labels)

        plt.show()
        self._plot_elbow_method(distance_list)

        print(max(silhouette.items(), key=operator.itemgetter(1)))
        print(max(dunn_score.items(), key=operator.itemgetter(1)))

        print("Silhoutete ", silhouette)
        print("Dunn ", dunn_score)
	def fit_and_plot(self, X, label_list, cluster_list):
		data_tsne = bhtsne.tsne(
			X.astype(scipy.float64),
			dimensions=self.dimensions,
			perplexity=self.perplexity,
			theta=self.theta,
			rand_seed=self.rand_seed)


		xmin, xmax = data_tsne[:,0].min(), data_tsne[:,0].max()
		ymin, ymax = data_tsne[:,1].min(), data_tsne[:,1].max()

		# split each label
		data_dict = {str(label): np.array([data_tsne[idx] for idx, cluster in enumerate(cluster_list) if cluster == label]) for label in range(len(np.unique(label_list)))}
		print([len(data) for label, data in data_dict.items()])

		plt.figure(figsize=(15,10))
		for m, data in data_dict.items():
			plt.scatter(data[:,0],data[:,1],cmap=cmap(int(m)),label='label.{}'.format(m))
		plt.legend()
		plt.axis([xmin,xmax,ymin,ymax])
		plt.xlabel('component 0')
		plt.ylabel('component 1')
		plt.title('t-SNE visualization')

		savedir = os.path.join('_fig','tsne')
		if not os.path.exists(savedir):
			os.mkdir(savedir)

		plt.savefig(os.path.join(savedir,'clustering_result.png'))
		plt.close()
 def fit_transform(self, X):
     return bhtsne.tsne(X.astype(sp.float64),
                        dimensions=self.dimensions,
                        perplexity=self.perplexity,
                        theta=self.theta,
                        rand_seed=self.rand_seed,
                        max_iter=self.max_iter)
Пример #7
0
 def _dim_reduct(self, vecs):
     vecs_2d = bhtsne.tsne(vecs.astype(np.float64),
                           dimensions=2,
                           perplexity=5.0,
                           theta=0.5,
                           rand_seed=-1)
     return vecs_2d
Пример #8
0
 def fit_transform(self, x):
     tsne = bhtsne.tsne(x.astype(np.float64),
                        dimensions=self.n_dim,
                        perplexity=self.perplexity,
                        theta=self.theta,
                        rand_seed=self.seed)
     return tsne
Пример #9
0
    def fit_transform(self, data: np.ndarray or pd.DataFrame) -> None:
        """
        fit the tSNE model to data given the parameters provided during
         initialization and transform the output
        :param data: n observation x k feature data array
        :return np.ndarray or pd.DataFrame: tsne results
        """

        ## NEED TO RUN PCA BEFORE TSNE ##

        if isinstance(data, pd.DataFrame):
            data_ = data.values
        else:
            data_ = data

        res = bhtsne.tsne(data_.astype(float),
                          dimensions=self.n_components,
                          **self.kwargs)

        if isinstance(data, pd.DataFrame):
            self.tsne = pd.DataFrame(res, index=data.index, columns=['x', 'y'])
        else:
            self.tsne = res

        return self.tsne
Пример #10
0
def create_wordvec_scatter(matrix, vocab):
    """
	単語ベクトルの散布図を作成する
	:param matrix: 単語ベクトルの行列
	:param vocab: 単語リスト
	:return:
	"""
    tsne = bhtsne.tsne(matrix.astype(sp.float64),
                       dimensions=2,
                       perplexity=30.0,
                       theta=0.5,
                       rand_seed=-1)
    plt.figure(figsize=(32, 24))  # 図のサイズ
    plt.scatter(tsne[0:241], tsne[0:241, 1])

    count = 0
    for label, x, y in zip(vocab, tsne[0:241], tsne[0:241, 1]):
        count += 1
        if count < 0:
            continue

        plt.annotate(label,
                     xy=(x, y),
                     xytext=(0, 0),
                     textcoords='offset points')

        if count == 500:
            break

    plt.show()
Пример #11
0
    def tsne(self, n_dims=2, perplexity=30, theta=0.5, rand_seed=0, **kwargs):
        '''t-SNE algorithm.

        Args:
            n_dims (int): Number of dimensions to use.
            perplexity (float): Perplexity of the algorithm.
            theta (float): A number between 0 and 1. Higher is faster but \
                    less accurate (via the Barnes-Hut approximation).
            rand_seed (int): Random seed. -1 randomizes each run.
            **kwargs: Named arguments passed to the t-SNE algorithm.

        Returns:
        '''
        from bhtsne import tsne

        n = self.dataset.n_samples
        if (n - 1 < 3 * perplexity):
            raise ValueError('Perplexity too high, reduce to <= {:}'.format(
                (n - 1.) / 3))

        X = self.dataset.counts.copy()

        # this version does not require pre-whitening
        Y = tsne(data=X.values.T,
                 dimensions=n_dims,
                 perplexity=perplexity,
                 theta=theta,
                 rand_seed=rand_seed,
                 **kwargs)
        vs = pd.DataFrame(
            Y,
            index=X.columns,
            columns=['dimension ' + str(i + 1) for i in range(n_dims)])
        return vs
Пример #12
0
def get_tsne(matrix, dimensions=2):
    tsne = bhtsne.tsne(matrix.astype(sp.float64),
                       dimensions=dimensions,
                       perplexity=30.0,
                       theta=0.5,
                       rand_seed=-1)
    return tsne
Пример #13
0
    def fit_transform(self, data: np.ndarray or pd.DataFrame) -> None:
        """
        fit the tSNE model to data given the parameters provided during
         initialization and transform the output

        :param data: n observation x k feature data array
        :return np.ndarray or pd.DataFrame: tsne results
        """
        if isinstance(data, pd.DataFrame):
            data_ = data.values
        else:
            data_ = data

        if self.fillna is not None:
            data_[np.where(np.isnan(data_))] = self.fillna
            data_[np.where(np.isinf(data_))] = self.fillna
        if self.run_pca:
            self.pca = PCA(n_components=self.n_pca_components)
            data_ = self.pca.fit_transform(data_)

        res = bhtsne.tsne(data_.astype(float), dimensions=self.n_components, **self.kwargs)

        if isinstance(data, pd.DataFrame):
            self.tsne = pd.DataFrame(res, index=data.index)
        else:
            self.tsne = res
        return self.tsne
Пример #14
0
def compress_to_2dim(np_mat, split_pos_list, seed=-1, perplexity=30.0):
    np_mat = np.asarray(np_mat, dtype=np.float64)
    np_mat = bhtsne.tsne(data=np_mat,
                         dimensions=2,
                         perplexity=perplexity,
                         theta=0.5,
                         rand_seed=seed)
    return np.split(np_mat, split_pos_list, axis=0)
Пример #15
0
def run_tsne(data, n_dim=2, perplexity=150, **kwargs):
    """Run tSNE

    :param data: Dataframe of cells X genes. Typicaly multiscale space diffusion components
    :param n_dim: Number of dimensions for tSNE embedding
    :return: tSNE embedding of the data
    """
    tsne = bhtsne.tsne(data.values.astype(float),
                       dimensions=n_dim, perplexity=perplexity, **kwargs)
    tsne = pd.DataFrame(tsne, index=data.index)
    tsne.columns = ['x', 'y']
    return tsne
Пример #16
0
    def tsne(self, n_dims=2, perplexity=30, theta=0.5, rand_seed=0, **kwargs):
        '''t-SNE algorithm.

        Args:
            n_dims (int): Number of dimensions to use.
            perplexity (float): Perplexity of the algorithm.
            theta (float): A number between 0 and 1. Higher is faster but
                less accurate (via the Barnes-Hut approximation).
            rand_seed (int): Random seed. -1 randomizes each run.
            **kwargs: Named arguments passed to the t-SNE algorithm.

        Returns:
        '''
        # scikit-learn's <0.19 has a bug
        import sklearn
        vers = sklearn.__version__.split('.')
        vmaj, vmin = vers[:2]
        if (int(vmaj) == 0) and (int(vmin) < 19):
            from bhtsne import tsne
            use_bhtsne = True
        else:
            from sklearn.manifold import TSNE
            use_bhtsne = False

        n = self.dataset.n_samples
        if (n - 1 < 3 * perplexity):
            raise ValueError('Perplexity too high, reduce to <= {:}'.format(
                (n - 1.) / 3))

        X = self.dataset.counts.values

        if use_bhtsne:
            # this version does not require pre-whitening
            Y = tsne(data=X.T,
                     dimensions=n_dims,
                     perplexity=perplexity,
                     theta=theta,
                     rand_seed=rand_seed,
                     **kwargs)
        else:
            Y = TSNE(
                n_components=n_dims,
                perplexity=perplexity,
                method='barnes_hut' if theta > 0 else 'exact',
                angle=theta,
                random_state=rand_seed,
            ).fit_transform(X.T)

        vs = pd.DataFrame(
            Y,
            index=self.dataset.counts.columns,
            columns=['dimension ' + str(i + 1) for i in range(n_dims)])
        return vs
Пример #17
0
def word_vec_average(document_list, word2vec_model):
	counter = 0
	num_features = 200
	plainDocVec_all = np.zeros((document_list["news"].size, num_features), dtype="float32")

	for sentence in document_list["news"]:
		plainDocVec_all[counter] = plain_word2vec_document_vector(sentence, word2vec_model, num_features)
		counter += 1


	tsne = bhtsne.tsne(plainDocVec_all.astype(sp.float64), dimensions=2, perplexity=30.0, theta=0.5, rand_seed=-1)

	return tsne
Пример #18
0
def show_tSNE(vecs, labels):
    # vecsをtnseでfit_transform
    embedded = bhtsne.tsne(np.array(vecs).astype(np.float64), dimensions=2)
    # グラフを作成(20x20インチ)
    plt.figure(figsize=(20, 20))
    # 散布図を作成 (embdeddedは2次元ベクトルのリスト)
    plt.scatter(embedded[:, 0], embedded[:, 1])
    # アノテーションを作成
    for i, country in enumerate(countries):
        plt.annotate(country, (embedded[i, 0], embedded[i, 1]))
    # グラフの保存
    plt.savefig('NLP100_69_result.png')
    plt.show()
Пример #19
0
def plot(ifPlot=False, path, train_motor, test_motor):
    train_lstm_feature = np.load('{}/train_lstm_feature.npy'.format(path))
    train_lstm_feature = np.reshape(train_lstm_feature,
                                    (train_lstm_feature.shape[0], -1))
    test_lstm_feature = np.load('{}/test_lstm_feature.npy'.format(path))
    test_lstm_feature = np.reshape(test_lstm_feature,
                                   (test_lstm_feature.shape[0], -1))

    train_labels = np.load('{}/train_label.npy'.format(path))
    test_labels = np.load('{}/test_label.npy'.format(path))
    # train_labels = np.reshape(train_labels, (train_labels.shape[0], 1))
    # test_labels = np.reshape(test_labels, (test_labels.shape[0], 1))

    train_lstm_feature = pd.DataFrame(train_lstm_feature)
    train_lstm_feature = train_lstm_feature.fillna(0)
    train_lstm_feature = train_lstm_feature.astype('float64')

    test_lstm_feature = pd.DataFrame(test_lstm_feature)
    test_lstm_feature = test_lstm_feature.fillna(0)
    test_lstm_feature = test_lstm_feature.astype('float64')

    sample_size = 2000
    train_sample = random.sample(range(0, train_lstm_feature.shape[0]),
                                 sample_size)
    test_sample = random.sample(range(0, test_lstm_feature.shape[0]),
                                sample_size)

    # train_tsne_result = tsne(train_lstm_feature.iloc[train_sample])

    fit_feature = pd.concat((train_lstm_feature.iloc[train_sample],
                             test_lstm_feature.iloc[test_sample]),
                            axis=0)
    tsne_result = tsne(fit_feature)

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
    np.save('{}/lstm_feature_tsne.npy'.format(path), tsne_result[:sample_size])
    plot_tsne_for_different_motor_left(
        tsne_result[:sample_size],
        np.reshape(train_labels[train_sample], (sample_size)), axes[0],
        train_motor)

    # test_tsne_result = tsne(test_lstm_feature.iloc[test_sample])
    # np.save('{}/test_lstm_feature_tsne.npy'.format(path), tsne_result[sample_size:])
    plot_tsne_for_different_motor_right(
        tsne_result[sample_size:],
        np.reshape(test_labels[test_sample], (sample_size)), axes[1],
        test_motor)
    plt.savefig('{}/lstm_feature_tsne.jpg'.format(path))
    if ifPlot:
        plt.show()
Пример #20
0
def embed(edges, components, file_name):
  if os.path.isfile(file_name):
    with open(file_name) as f:
      return cPkl.load(f)
  # distance_edges = np.amax(edges, axis=1, keepdims=True) - edges
  # tsne = TSNE(n_components=components, metric='precomputed', n_iter=200, random_state=RANDOM_STATE, verbose=2)
  # embeddings = tsne.fit_transform(distance_edges)
  distance_edges = edges.astype(np.float64)
  embeddings = tsne(distance_edges, dimensions=components, rand_seed=RANDOM_STATE)
  print(embeddings)
  print(embeddings.shape)
  with open(file_name, "wb") as f:
    cPkl.dump(embeddings, f, cPkl.HIGHEST_PROTOCOL)
  return embeddings
Пример #21
0
    def test_seed_positions(self):
        iris = load_iris()
        X_a = load_iris().data[:-10]
        X_b = load_iris().data
        Y_a = tsne(X_a, rand_seed=999)
        # Generate random positions for last 10 items
        remainder_positions = np.array([[
            (random.uniform(0, 1) * 0.0001), (random.uniform(0, 1) * 0.0001)
        ] for x in range(X_b.shape[0] - Y_a.shape[0])])
        # Append them to previous TSNE output and use as seed_positions in next plot
        seed_positions = np.vstack((Y_a, remainder_positions))
        Y_b = tsne(X_b, seed_positions=seed_positions)

        self.assertEqual(round(Y_a[0][0] / 20), round(Y_b[0][0] / 20))
        self.assertEqual(round(Y_a[0][1] / 20), round(Y_b[0][1] / 20))

        plt.scatter(Y_a[:, 0], Y_a[:, 1], c='b')
        plt.scatter(Y_b[:-10, 0], Y_b[:-10, 1], c='r')
        plt.scatter(Y_b[-10:, 0], Y_b[-10:, 1], c='g')

        plt.savefig(PLOTS_DIR + '/iris_seed_positions.png')
        if os.environ.get('SHOW_PLOTS', None) != None:
            plt.show()
        plt.close()
Пример #22
0
def visualize_data(
  data, labels, model_path, max_iter = 1000):
  model = create_base_network(data.shape[1:])
  model.load_weights(model_path)
  results = model.predict(
    data, 96, verbose = 1).astype(np.float64)
  reduced_data = tsne(results, max_iter = max_iter)
  num_classes = labels.max() + 1
  for i in range(num_classes):
    plot_data = reduced_data[labels == i]
    plt.plot(
      plot_data[:, 0], 
      plot_data[:, 1], 
      color = np.random.rand(3),
      marker = 'o', linestyle = '', alpha = 0.7
    )
  plt.show()
Пример #23
0
    def create_features(self):
        feats = [
            'var_13', 'var_108', 'var_33', 'var_184', 'var_165', 'var_9',
            'var_169', 'var_166', 'var_127', 'var_76', 'var_6', 'var_22',
            'var_1', 'var_133', 'var_133', 'var_99', 'var_80', 'var_34',
            'var_110', 'var_177'
        ]
        whole = pd.concat([train[feats], test[feats]], axis=0).values

        tsne_array = tsne(data=whole,
                          dimensions=3,
                          perplexity=10.0,
                          theta=0.5,
                          rand_seed=1000)
        for i in range(tsne_array.shape[1]):
            col_ = 'TSNE_{}'.format(i + 1)
            self.train[col_] = tsne_array[:len(train), i]
            self.test[col_] = tsne_array[len(test):, i]
Пример #24
0
def write_projection(representation_file, output_file, method='PCA'):
    '''performs an inputted dimensionality reduction method on the output of 
	a representational layer and writes the projection to a .tsv file'''

    X = np.loadtxt(representation_file, delimiter='\t')

    if method == 'PCA':
        from sklearn.decomposition import PCA
        model = PCA(n_components=X.shape[1])
        transformed_X = model.fit_transform(X)

    elif method == 'tSNE':
        # from sklearn.manifold import TSNE
        from bhtsne import tsne
        # model = TSNE(n_components=10,perplexity=50,n_iter=400)
        # transformed_X = model.fit_transform(X)
        transformed_X = tsne(X, dimensions=5)

    np.savetxt(output_file, transformed_X, delimiter='\t')
Пример #25
0
def compress_from_path(lst, resize=(64, 64), seed=-1):
    # convert path list from 2dim to 1dim
    flatten_lst = flatten(lst)

    # Load image each paths
    img_vec = list(map(lambda f: Image.open(f).resize(resize), flatten_lst))

    # Convert image from 3dim(RGB) to 1dim (for processing tSNE)
    img_vec = np.asarray(list(map(lambda v: np.ravel(v), img_vec)), np.float64)

    # Run tSNE (Dimensional compression)
    compressed_vec = bhtsne.tsne(img_vec, rand_seed=seed)

    # Get split position
    len_list = [len(n) for n in lst]
    split_pos = [sum(len_list[:n + 1]) for n in range(len(len_list))]
    split_pos = split_pos[:len(len_list) - 1]

    # Return split vector each domain
    return np.split(compressed_vec, split_pos, axis=0)
Пример #26
0
    def fit_transform(self, X):
        if self.variant == "bhtsne":
            return bhtsne.tsne(X, perplexity=self.perplexity)

        if self.variant == "multicore":
            return MulticoreTSNE(n_jobs=4,
                                 perplexity=self.perplexity).fit_transform(X)

        if self.variant == "sklearn":
            return skTSNE(perplexity=self.perplexity).fit_transform(X)

        if self.variant == "optsne":
            return OptSNE(perplexity=self.perplexity).fit_transform(X)

        if self.variant == "fitsne":
            return FItSNE(X, perplexity=self.perplexity)

        if self.variant == "cuda":
            return CudaTSNE(perplexity=self.perplexity).fit_transform(X)

        return None
Пример #27
0
 def plotTSNE(self, ax, tsneData, dataLabels, perplexity, theta=0.5):
     '''
     - plot and fencify
     '''
     lineWidth = 1.2
     tsneData = tsneData.astype('float64')
     tsneEmbedded = tsne(tsneData, perplexity=perplexity, theta=theta)
     plt.hsv()  # color model
     ax.scatter(tsneEmbedded[:, 0],
                tsneEmbedded[:, 1],
                s=45,
                c=dataLabels,
                edgecolors='w',
                linewidth=0.30,
                alpha=0.75)
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
     ax.spines['bottom'].set_visible(False)
     ax.spines['left'].set_visible(False)
     #ax.yaxis.set_ticks_position('left'); ax.xaxis.set_ticks_position('bottom')
     ax.tick_params(axis='x',
                    which='both',
                    bottom='off',
                    top='off',
                    labelbottom='off')
     ax.tick_params(axis='y',
                    which='both',
                    left='off',
                    right='off',
                    labelleft='off')
     xmin, xmax = ax.get_xlim()
     ax.axvline((xmax - np.abs(xmin)) / 2.0,
                color='black',
                linewidth=lineWidth)
     ymin, ymax = ax.get_ylim()
     ax.axhline((ymax - np.abs(ymin)) / 2.0,
                color='black',
                linewidth=lineWidth)
     plt.tight_layout()
     plt.show()
Пример #28
0
    def test_iris(self):
        iris = load_iris()
        X = iris.data
        self.assertEqual(mean_shift(X)[0], 2)
        Y = tsne(X)
        plt.scatter(Y[:, 0], Y[:, 1], c=iris.target)

        num_clusters, cluster_centers = mean_shift(Y)
        self.assertTrue(num_clusters > 1)
        self.assertTrue(num_clusters < 4)
        for k in range(num_clusters):
            cluster_center = cluster_centers[k]
            plt.plot(cluster_center[0],
                     cluster_center[1],
                     'x',
                     markerfacecolor='r',
                     markeredgecolor='r',
                     markersize=16)

        plt.savefig(PLOTS_DIR + '/iris.png')
        if os.environ.get('SHOW_PLOTS', None) != None:
            plt.show()
        plt.close()
Пример #29
0
def compress_from_path(lst, class_num, model_path, size, layer, seed=-1):
    # convert path list from 2dim to 1dim
    flatten_lst = flatten(lst)

    # create network object
    fe = FeatureExtractor(class_num, model_path, size, gpu)

    # get feature list
    vec = np.asarray(
        list(map(lambda f: fe.get_flat_feat(f, layer), flatten_lst)))

    # Convert image from 3dim(RGB) to 1dim (for processing tSNE)
    vec = np.asarray(list(map(lambda v: np.ravel(v), vec)), np.float64)

    # Run tSNE (Dimensional compression)
    compressed_vec = bhtsne.tsne(vec, rand_seed=seed)

    # Get split position
    len_list = [len(n) for n in lst]
    split_pos = [sum(len_list[:n + 1]) for n in range(len(len_list))]
    split_pos = split_pos[:len(len_list) - 1]

    # Return split vector
    return np.split(compressed_vec, split_pos, axis=0)
Пример #30
0
def bhtsne(vectors, vecs_with_center, args):
    # if args.bhtsne or not(args.timeline or args.bhtsne or args.wordclouds):
    # bhtnse

    pca = PCA(n_components=50)
    vectors = pca.fit_transform(vectors)

    print('Bhtsne..')
    Y = tsne(vectors, perplexity=args["tsne_perplexity"])
    pd.DataFrame(Y).to_csv('{}/bhtsne.csv'.format(args['path']))
    plt.scatter(Y[:, 0], Y[:, 1], s=0.3)
    plt.savefig('{}/bhtsne.svg'.format(args['path']), bbox_inches='tight')
    plt.savefig('{}/bhtsne.png'.format(args['path']), bbox_inches='tight')
    pd.DataFrame(Y).to_csv('{}/bhtsne_2d.csv'.format(args['path']))
    pvtm_utils.svg_to_pdf('{}/bhtsne.svg'.format(args['path']))
    plt.close()

    print('Bhtsne with center..')
    Y = tsne(vecs_with_center.values, perplexity=args["tsne_perplexity"])
    pd.DataFrame(Y).to_csv('{}/bhtsne_with_center.csv'.format(args['path']))
    plt.scatter(Y[:len(vectors), 0], Y[:len(vectors), 1], s=0.3)
    plt.scatter(Y[len(vectors):, 0],
                Y[len(vectors):, 1],
                s=0.8,
                c='r',
                marker='x')
    plt.savefig('{}/bhtsne_with_center.svg'.format(args['path']),
                bbox_inches='tight')
    plt.savefig('{}/bhtsne_with_center.png'.format(args['path']),
                bbox_inches='tight')
    pd.DataFrame(Y).to_csv('{}/bhtsne_with_center_2d.csv'.format(args['path']))

    pvtm_utils.svg_to_pdf('{}/bhtsne_with_center.svg'.format(args['path']))
    plt.close()

    print('3D tsne...')

    Y = tsne(vectors, dimensions=3, perplexity=args["tsne_perplexity"])
    fig = pyplot.figure(frameon=False, figsize=(8, 5))
    ax = Axes3D(fig)

    ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], s=1, c='b', marker='^')
    ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], s=20, c='r', marker='^')
    # pyplot.axis('off')
    xmax, ymax, zmax = Y[:len(vectors), 0].max(), Y[:len(vectors),
                                                    1].max(), Y[:len(vectors),
                                                                2].max()
    xmin, ymin, zmin = Y[:len(vectors), 0].min(), Y[:len(vectors),
                                                    1].min(), Y[:len(vectors),
                                                                2].min()

    ax.set_xlim(xmin + 4, xmax - 4)
    ax.set_ylim(ymin + 4, ymax - 4)
    ax.set_zlim(zmin + 4, zmax - 4)
    pyplot.savefig('{}/bhtsne_3d.svg'.format(args['path']),
                   bbox_inches='tight')
    pyplot.savefig('{}/bhtsne_3d.png'.format(args['path']),
                   bbox_inches='tight')
    pvtm_utils.svg_to_pdf('{}/bhtsne_3d.svg'.format(args['path']))

    pd.DataFrame(Y).to_csv('{}/bhtsne_3d.csv'.format(args['path']))

    Y = tsne(vecs_with_center.values,
             dimensions=3,
             perplexity=args["tsne_perplexity"])
    fig = pyplot.figure(frameon=False, figsize=(8, 5))
    ax = Axes3D(fig)

    ax.scatter(Y[:len(vectors), 0],
               Y[:len(vectors), 1],
               Y[:len(vectors), 2],
               s=1,
               c='b',
               marker='^')
    ax.scatter(Y[len(vectors):, 0],
               Y[len(vectors):, 1],
               Y[len(vectors):, 2],
               s=20,
               c='r',
               marker='^')
    # pyplot.axis('off')
    xmax, ymax, zmax = Y[:len(vectors), 0].max(), Y[:len(vectors),
                                                    1].max(), Y[:len(vectors),
                                                                2].max()
    xmin, ymin, zmin = Y[:len(vectors), 0].min(), Y[:len(vectors),
                                                    1].min(), Y[:len(vectors),
                                                                2].min()

    ax.set_xlim(xmin + 4, xmax - 4)
    ax.set_ylim(ymin + 4, ymax - 4)
    ax.set_zlim(zmin + 4, zmax - 4)
    pyplot.savefig('{}/bhtsne_with_center_3d.svg'.format(args['path']),
                   bbox_inches='tight')
    pyplot.savefig('{}/bhtsne_with_center_3d.png'.format(args['path']),
                   bbox_inches='tight')
    pvtm_utils.svg_to_pdf('{}/bhtsne_with_center_3d.svg'.format(args['path']))

    pd.DataFrame(Y).to_csv('{}/bhtsne_with_center_3d.csv'.format(args['path']))
Пример #31
0
def compose(builders, sizes, articles=None, output_dir=OUTPUT_CSV,
            vector=None, keys=None, indices=None, sset=None,
            classes=None):
    """
    Function to compose a combination of features
    """
    global gvec

    
    if (vector==None or keys==None):
        #If vector isn't provided
        sset = None
        if not articles:
            articles = load_data_folder(text_dir)
            gvec = articles
        else: sset = set(articles.keys())
        keys = list(articles.keys())
        vector = -1
        trained_models = []

        for builder, size in zip(builders, sizes):
            tmp, model= builder(articles, size, text_dir, sset)
            trained_models+=[model]
            if type(vector)!= type(None):
                vector = tmp
            else:
                vector = np.append(vector, tmp, axis=1)
        print(vector.shape)

    gvec = vector
    tsne_success = False
    perplexity = 32
    while(not tsne_success and perplexity>0):
        try:
            printl("trying perplexity", perplexity)
            coords= [coord for coord in tsne(vector, 
                                             verbose=True, 
                                             perplexity=perplexity)]
            tsne_success = True
        except Exception as e:
            print(type(e)) 
            print(e.message)
            perplexity = perplexity/2

    if (tsne_success == False):
        print("T-SNE failed - all perplexity settings not working") 
        print("Use Scikit-Learn's implementation of TSNE")
        perplexity = 32
        while(not tsne_success and perplexity>0):
            try:
                model = sk_tsne(n_components=2, random_state=0,
                                perplexity = perplexity)
                t1 = time.time()
                output = model.fit_transform(vector)
                coords = [pair for pair in output]
                t2 = time.time()
                print("Time taken:", t2-t1, "seconds")
                tsne_success = True
            except Exception as e:
                print(type(e))
                print(e)
                print(e.message)
                perplexity/=2
                
    if (classes == None):
        clusters = kmeans_clusters(keys, vector)
    else: 
        clusters = classes
    
    output_write(keys, coords, clusters=clusters, indices=indices,
                 output_dir = output_dir)
    
    """Caching"""
    if(not sset):
        global cached_model, cached_tsne
        cached_model = vector
        cached_tsne  = coords
    return vector
Пример #32
0
import numpy as np
import os
from bhtsne import tsne

ROOT = os.path.dirname(os.path.realpath(__file__))
# load encoded data
enc_themes = np.load(ROOT + '/cache/gru32_themes128.npz')
enc_themes = enc_themes['arr_0']
print(enc_themes.shape)
# load pure data
# themes = np.load(ROOT + '/cache/themes128.npz')
# themes = themes['arr_0']
# themes = themes.reshape(themes.shape[0], -1,)

Y = tsne(enc_themes, 2, 50.0)

np.savez_compressed(ROOT+'/cache/tsne2_themes128.npz', Y)