示例#1
0
_ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes)

# %%
# Computing the LLE and t-SNE embeddings, we find that LLE seems to unroll the
# Swiss Roll pretty effectively. t-SNE on the other hand, is able
# to preserve the general structure of the data, but, poorly represents the
# continuous nature of our original data. Instead, it seems to unnecessarily
# clump sections of points together.

sr_lle, sr_err = manifold.locally_linear_embedding(sr_points,
                                                   n_neighbors=12,
                                                   n_components=2)

sr_tsne = manifold.TSNE(n_components=2,
                        learning_rate="auto",
                        perplexity=40,
                        init="pca",
                        random_state=0).fit_transform(sr_points)

fig, axs = plt.subplots(figsize=(8, 8), nrows=2)
axs[0].scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color)
axs[0].set_title("LLE Embedding of Swiss Roll")
axs[1].scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color)
_ = axs[1].set_title("t-SNE Embedding of Swiss Roll")

# %%
# .. note::
#
#     LLE seems to be stretching the points from the center (purple)
#     of the swiss roll. However, we observe that this is simply a byproduct
#     of how the data was generated. There is a higher density of points near the
示例#2
0
assert (np.isclose(
    y_1,
    y_2).all()), "Invalid labels given: different labels for raw and scat data"
y = y_1
labels_uniq = np.unique(y)
fig, axs = plt.subplots(1, 2, figsize=(18, 6))
#fig, subplots = plt.subplots(3, 5, figsize=(15, 8))
if method == 'pca':
    pca = decomposition.PCA(n_components=2)
    X_1 = pca.fit_transform(X_1)
    X_2 = pca.fit_transform(
        X_2)  # TODO: check if doing this without redefining pca is ok
else:
    # TODO: check if setting random_state gives consistent results
    tsne = manifold.TSNE(n_components=2,
                         init='random',
                         random_state=0,
                         perplexity=perplexity)
    X_1 = tsne.fit_transform(X_1)
    X_2 = tsne.fit_transform(X_2)

for label in labels_uniq:
    X_label_1 = X_1[label == y, :]
    X_label_2 = X_2[label == y, :]
    axs[0].scatter(X_label_1[:, 0],
                   X_label_1[:, 1],
                   s=marker_size,
                   label=label)
    axs[1].scatter(X_label_2[:, 0],
                   X_label_2[:, 1],
                   s=marker_size,
                   label=label)
     if stuff_hrep_tr is None:
         stuff_hrep_tr = l[2]
     else:
         stuff_hrep_tr = np.vstack((stuff_hrep_tr, l[2]))
 pkl.dump(
     {
         "x_hint_repr_tst": stuff_hrep_tst,
         "y_tst": testy,
         "ximg_tst": testx.reshape((testx.shape[0], 28 * 28)),
         "x_hint_repr_tr": stuff_hrep_tr,
         "y_tr": trainy,
         "ximg_tr": trainx.reshape((trainx.shape[0], 28 * 28))
     }, fhr)
 # plot t-SNE of the opriginal images
 tx0 = DT.datetime.now()
 tsne_original = manifold.TSNE(n_components=2, init='pca', random_state=0)
 X_tsne_original = tsne_original.fit_transform(
     testx.reshape((testx.shape[0], 28 * 28)))
 fig_tsne_org = plot_representations(
     X_tsne_original, testy, "t-SNE embedding of mnist original images.")
 fig_tsne_org.savefig(fold_exp + "/original_rep_test.eps",
                      format='eps',
                      dpi=1200,
                      bbox_inches='tight')
 print "t-SNE of original images took:", DT.datetime.now() - tx0
 # plot t-SNE of the prediction
 tx0 = DT.datetime.now()
 tsne_lasthidden_rep = manifold.TSNE(n_components=2,
                                     init='pca',
                                     random_state=0)
 X_tsne_lhrep = tsne_original.fit_transform(stuff_hrep_tst)
示例#4
0
def start_manifold_learning(input):
  # res = np.loadtxt('numpyData.csv', dtype=float, delimiter=';')

  # todo check mask
  # mask = np.any(np.not_equal(input, 0.), axis=0)
  # arr = input['numpyArr'][:,mask]
  #
  # arr = np.unique(arr, axis=0)
  #
  # arr = arr[:]

  n_points = 1000
  X = input['numpyArr']
  print(type(X))
  color = datasets.samples_generator.make_s_curve(n_points, random_state=0)

  # X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
  # print (X[0])
  # print (input[0])
  # print (len(X[0]))
  n_neighbors = 10
  n_components = 2

  fig = plt.figure(figsize=(15, 8))
  plt.suptitle("Manifold Learning with %i points, %i neighbors"
               % (1000, n_neighbors), fontsize=14)

  ax = fig.add_subplot(251, projection='3d')
  ax.scatter(X[:, 0], X[:, 1], X[:, 2], cmap=plt.cm.Spectral)
  ax.view_init(4, -72)

  methods = ['standard', 'ltsa', 'hessian', 'modified']
  labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']

  res = {}
  # try:
  #     for i, method in enumerate(methods):
  #         t0 = time()
  #         Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
  #                                             eigen_solver='auto',
  #                                             method=method).fit_transform(X)
  #         t1 = time()
  #         print("%s: %.2g sec" % (methods[i], t1 - t0))

  #         ax = fig.add_subplot(252 + i)
  #         plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral)
  #         plt.title("%s (%.2g sec)" % (labels[i], t1 - t0))
  #         ax.xaxis.set_major_formatter(NullFormatter())
  #         ax.yaxis.set_major_formatter(NullFormatter())
  #         plt.axis('tight')
  #         res[method] = {
  #             'x' : Y[:, 0].tolist(),
  #             'y' : Y[:, 1].tolist()
  #           }
  # except:
  #     pass
  t0 = time()
  Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X)
  t1 = time()
  print("Isomap: %.2g sec" % (t1 - t0))
  ax = fig.add_subplot(257)
  plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral)
  plt.title("Isomap (%.2g sec)" % (t1 - t0))
  ax.xaxis.set_major_formatter(NullFormatter())
  ax.yaxis.set_major_formatter(NullFormatter())
  plt.axis('tight')

  res['Isomap'] = {
    'x': Y[:, 0].tolist(),
    'y': Y[:, 1].tolist(),
    'ids': input['ids'],
    'matInfo': input['matInfo']
  }
  print('Learning data: ')
  print('x: ' + str(len(res['Isomap']['x'])))
  print('y: ' + str(len(res['Isomap']['y'])))
  print('ids: ' + str(len(res['Isomap']['ids'])))
  print('matInfo: ' + str(len(res['Isomap']['matInfo'])))

  t0 = time()
  mds = manifold.MDS(n_components, max_iter=100, n_init=1)
  Y = mds.fit_transform(X)
  t1 = time()
  print("MDS: %.2g sec" % (t1 - t0))
  ax = fig.add_subplot(258)
  plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral)
  plt.title("MDS (%.2g sec)" % (t1 - t0))
  ax.xaxis.set_major_formatter(NullFormatter())
  ax.yaxis.set_major_formatter(NullFormatter())
  plt.axis('tight')

  res['MDS'] = {
    'x': Y[:, 0].tolist(),
    'y': Y[:, 1].tolist(),
    'ids': input['ids'],
    'matInfo': input['matInfo']
  }

  t0 = time()
  se = manifold.SpectralEmbedding(n_components=n_components,
                                  n_neighbors=n_neighbors)
  Y = se.fit_transform(X)
  t1 = time()
  print("SpectralEmbedding: %.2g sec" % (t1 - t0))
  ax = fig.add_subplot(259)
  plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral)
  plt.title("SpectralEmbedding (%.2g sec)" % (t1 - t0))
  ax.xaxis.set_major_formatter(NullFormatter())
  ax.yaxis.set_major_formatter(NullFormatter())
  plt.axis('tight')

  res['Spectral Embedding'] = {
    'x': Y[:, 0].tolist(),
    'y': Y[:, 1].tolist(),
    'ids': input['ids'],
    'matInfo': input['matInfo']
  }

  t0 = time()
  tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
  Y = tsne.fit_transform(X)
  t1 = time()
  print("t-SNE: %.2g sec" % (t1 - t0))
  ax = fig.add_subplot(2, 5, 10)
  plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral)
  plt.title("t-SNE (%.2g sec)" % (t1 - t0))
  ax.xaxis.set_major_formatter(NullFormatter())
  ax.yaxis.set_major_formatter(NullFormatter())
  plt.axis('tight')

  res['TSNE'] = {
    'x': Y[:, 0].tolist(),
    'y': Y[:, 1].tolist(),
    'ids': input['ids'],
    'matInfo': input['matInfo']
  }

  # plt.show()
  # return plt
  # np.savetxt('X.csv', X)
  # np.savetxt('Y.csv', Y)

  # res = {
  #     'x': Y[:,0],
  #     'y': Y[:,1]
  # }

  return res
#feature selection
lasso_selector = linear_model.Lasso()
lasso_selector.fit(X_train, y_train)
print(lasso_selector.coef_)
utils.plot_feature_importances(lasso_selector, X_train, 40)

X_train1 = utils.select_features(lasso_selector, X_train)

#reduce features for visualization
utils.corr_heatmap(X_train1)
lpca = decomposition.PCA(n_components=0.95)
lpca.fit(X_train1)
print(np.cumsum(lpca.explained_variance_ratio_))
pca_data = lpca.transform(X_train1)

tsne = manifold.TSNE(n_components=3)
tsne_data = tsne.fit_transform(pca_data)
rutils.plot_data_3d_regression(tsne_data, y_train)

#build model with regression machine learning algorithms
scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

knn_estimator = neighbors.KNeighborsRegressor()
knn_grid = {'n_neighbors': list(range(5, 15))}
final_model = utils.grid_search_best_model(knn_estimator,
                                           knn_grid,
                                           pca_data,
                                           y_train,
                                           scoring=scoring)

X_test = house3[house_train.shape[0]:]
示例#6
0
    X[:, c] = n_enc

# 3. what does an embedding of all int values look like?
print('embedding int values..')
plt.figure(1)

X_int = X[:, np.array(int_idx)]
X_int = np.float64(X_int)

#replace nan
X_int[X_int != X_int] = 0  # 이부분이 이해가 안가네 nan을 0으로 바꾸는 부분인가?
X_int -= np.min(X_int, axis=0)  # 이부분은 generalize하는건가
X_int /= (.001 + np.max(X_int, axis=0)
          )  # 맥스값에 min value(0.001)을 더해서 이걸로 원래 값을 나눠버린다 역시 generalization

tsne = manifold.TSNE(n_components=2, init='pca')
Y_int = tsne.fit_transform(X_int)

plt.scatter(Y_int[len(X1):, 0], Y_int[len(X1):, 1], marker='.', label='test')
sp = plt.scatter(Y_int[:len(X1), 0], Y_int[:len(X1), 1], c=y1, label='train')
plt.legend(prop={'size': 6})
plt.colorbar(sp)
plt.title('t-SNE embedding of int variables')
plt.savefig('t-SNE_int.png')
plt.show()

# 4: what does an embedding of all string values look like?
print('embedding string values...')
plt.figure(2)
X_str = X[:, np.array(cat_idx)]
# replace nan
示例#7
0
def plot_tsne_result(X, y, n_components):
    positions = []
    errors = []

    def _gradient_descent(objective,
                          p0,
                          it,
                          n_iter,
                          n_iter_check=1,
                          n_iter_without_progress=300,
                          momentum=0.8,
                          learning_rate=200.0,
                          min_gain=0.01,
                          min_grad_norm=1e-7,
                          verbose=0,
                          args=None,
                          kwargs=None):
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        p = p0.copy().ravel()
        update = np.zeros_like(p)
        gains = np.ones_like(p)
        error = np.finfo(np.float).max
        best_error = np.finfo(np.float).max
        best_iter = i = it

        tic = time()
        for i in range(it, n_iter):
            positions.append(p.copy())

            error, grad = objective(p, *args, **kwargs)
            errors.append(error)
            grad_norm = linalg.norm(grad)

            inc = update * grad < 0.0
            dec = np.invert(inc)
            gains[inc] += 0.2
            gains[dec] *= 0.8
            np.clip(gains, min_gain, np.inf, out=gains)
            grad *= gains
            update = momentum * update - learning_rate * grad
            p += update

            if (i + 1) % n_iter_check == 0:
                toc = time()
                duration = toc - tic
                tic = toc

            if verbose >= 2:
                print("[t-SNE] Iteration %d: error = %.7f,"
                      " gradient norm = %.7f"
                      " (%s iterations in %0.3fs)" %
                      (i + 1, error, grad_norm, n_iter_check, duration))

            if error < best_error:
                best_error = error
                best_iter = i
            elif i - best_iter > n_iter_without_progress:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: did not make any progress "
                          "during the last %d episodes. Finished." %
                          (i + 1, n_iter_without_progress))
                break
            if grad_norm <= min_grad_norm:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: gradient norm %f. Finished." %
                          (i + 1, grad_norm))
                break

        return p, error, i

    D = pairwise_distances(X, squared=True)
    P_binary = _joint_probabilities(D, 30., False)
    P_binary_s = squareform(P_binary)

    positions.clear()
    errors.clear()
    manifold.t_sne._gradient_descent = _gradient_descent
    manifold.TSNE(n_components=n_components, random_state=100).fit_transform(X)
    if n_components == 3:
        X_iter = np.dstack(position.reshape(-1, 3) for position in positions)
    elif n_components == 2:
        X_iter = np.dstack(position.reshape(-1, 2) for position in positions)

    cmap = sns.light_palette("blue", as_cmap=True)

    fig = plt.figure(figsize=(12, 12))
    if X.shape[1] == 3:
        ax1 = fig.add_subplot(3, 4, 1, projection='3d')
        plot_data_3d_classification(X,
                                    y,
                                    ax=ax1,
                                    new_window=False,
                                    title="Original Data")
    elif X.shape[1] == 2:
        ax1 = fig.add_subplot(3, 4, 1)
        plot_data_2d_classification(X,
                                    y,
                                    ax=ax1,
                                    new_window=False,
                                    title="Original Data")

    ax2 = fig.add_subplot(3, 4, 2)
    plot_distance_matrix(P_binary_s, ax2, cmap, 'Pairwise Similarities')

    iter_size = int(len(positions) / 5)
    k = 2
    for i in range(5):
        iter_index = i * iter_size
        tmp = X_iter[..., iter_index]
        err = round(errors[iter_index], 2)
        title = "Iter: " + str(iter_index) + " Loss:" + str(err)

        k = k + 1
        if X_iter.shape[1] == 3:
            ax3 = fig.add_subplot(3, 4, k, projection='3d')
            plot_data_3d_classification(tmp,
                                        y,
                                        ax=ax3,
                                        new_window=False,
                                        title=title)
        elif X_iter.shape[1] == 2:
            ax3 = fig.add_subplot(3, 4, k)
            plot_data_2d_classification(tmp,
                                        y,
                                        ax=ax3,
                                        new_window=False,
                                        title=title)

        k = k + 1
        ax4 = fig.add_subplot(3, 4, k)
        n = 1. / (pdist(tmp, "sqeuclidean") + 1)
        Q = n / (2.0 * np.sum(n))
        Q = squareform(Q)
        plot_distance_matrix(Q, ax4, cmap, title=title)
    plt.subplots_adjust(wspace=0.1, hspace=0.5)
示例#8
0
            title=title.format(e), show=False)

    # -----------------------------------------------
    # Transfrom using xdawn
    data_1 = xdawn.transform(epochs_1)[:, :n_components]
    data_2 = xdawn.transform(epochs_2)[:, :n_components]

    # Get data
    X, y, z = epochs_get_MVPA_data([epochs_1, epochs_2])
    Xd = np.concatenate([data_1, data_2], axis=0)
    X.shape, Xd.shape, y.shape, z.shape

    # -----------------------------------------------
    # Calculate TSNE manifold
    vectorizer = mne.decoding.Vectorizer()
    tsne = manifold.TSNE(n_components=2, n_jobs=n_jobs)
    X2 = tsne.fit_transform(vectorizer.fit_transform(Xd))
    X2.shape

    # -----------------------------------------------
    # Plot in TSNE manifold
    plt.style.use('ggplot')
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    yy = y + z * 10
    for j in np.unique(yy):
        print(j)
        ax.scatter(X2[yy == j, 0], X2[yy == j, 1], alpha=0.5, label=j)
    ax.legend()
    drawer.fig = fig

    # -----------------------------------------------
示例#9
0
    def __init__(self, vis=None):

        self.vis = vis

        self.tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
示例#10
0
        X_all = o_data.df[o_config.free_parameter_names + o_config.qoi_names]
        X_all_scaled = preprocessing.scale(X_param)
        mds_n_components = 2
        mds_all = MDS(n_components = 2,max_iter=1000, n_init=1)
        Y_all = mds_all.fit_transform(X_all_scaled)
        ax[2].scatter(Y_all[:,0],Y_all[:,1],s=1,c='black')

    elif manifold_learn_config['manifold_type'] == 'tsne':
        manifold_learn_config['config'] = OrderedDict()
        manifold_learn_config['config']['n_components'] = 2
        manifold_learn_config['config']['init'] = 'pca'
        manifold_learn_config['config']['random_state'] = 0

        print('parameter_analysis')
        X_param = o_data.df[o_config.free_parameter_names]
        Y_param = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_param)
        print(X_param.shape)
        print(Y_param.shape)
        ax[0].scatter(Y_param[:,0],Y_param[:,1],s=1,c='black')

        print('qoi_analysis')
        X_qoi = o_data.df[o_config.qoi_names]
        Y_qoi = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_qoi)
        print(X_qoi.shape)
        print(Y_qoi.shape)
        ax[1].scatter(Y_qoi[:,0],Y_qoi[:,1],s=1,c='black')

        print('parameter+qoi')
        X_all = o_data.df[o_config.free_parameter_names + o_config.qoi_names]
        Y_all = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_all)
        print(X_all.shape)
示例#11
0

# Scale and visualize the embedding vectors
def plot_embedding(X, y, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0],
                 X[i, 1],
                 y[i, 0],
                 color=plt.cm.Set1(m[y[i, 0]] / 21.),
                 fontdict={
                     'weight': 'bold',
                     'size': 9
                 })

    if title is not None:
        plt.title(title)


# t-SNE embedding of the digit utterances
print("Computing t-SNE embedding")
tsne = manifold.TSNE()
X_tsne = tsne.fit_transform(X)
plot_embedding(X_tsne, y,
               "t-SNE 2D embedding of English and Spanish digit utterances")
plt.savefig('tsne.png')
示例#12
0
import matplotlib.colors as colors
import matplotlib.cm as cmx
import matplotlib as mpl

matrix1 = gensim.matutils.corpus2dense(p, num_terms=4)
matrix3=matrix1.T
matrix3

from sklearn import manifold, datasets, decomposition, ensemble,discriminant_analysis, random_projection

def norm(x):
    return (x-np.min(x))/(np.max(x)-np.min(x))

X=norm(matrix3)

tsne = manifold.TSNE(n_components=2, init='pca', random_state=0,perplexity=50,verbose=1,n_iter=1500)
X_tsne = tsne.fit_transform(X)

### WORK HERE - COMO DESCOBRI QUE TINHA 3 CLUSTERS ???? SORT X_tsne
## DEFINE K-MEANS
plt.hist(X_tsne)

from sklearn.cluster import KMeans
model3=KMeans(n_clusters=4,random_state=0)
model3.fit(X_tsne)
cc=model3.predict(X_tsne)

## ALSO TRY COM X PARA VER QUE TOPICO SELECIONA

tokens2 = word_tokenize(str(sentences2))
示例#13
0
    def __init__(self, components=[0, 1]):
        if components is None:
            raise Exception("Component error.")

        self.components = components
        self.tsne = manifold.TSNE(n_components=max(components) + 1, init='pca')
示例#14
0
def train_kmeans_tsne(train_dataloader, test_dataloader, autoencoder,
                      Maxepoch):
    # We set criterion : L1 loss (or Mean Absolute Error, MAE)
    criterion = nn.MSELoss()  #nn.L1Loss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    # Now, we train 20 epochs.
    '''
    for epoch in range(Maxepoch):
 
        cumulate_loss = 0
        for x in train_dataloader:
            latent, reconstruct = autoencoder(x)
            loss = criterion(reconstruct, x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            cumulate_loss = loss.item() * x.shape[0]
 
        print(f'Epoch { "%03d" % epoch }: Loss : { "%.5f" % (cumulate_loss / trainX.shape[0])}')
    '''
    autoencoder1 = torch.load('model_best.pkl')
    #autoencoder2= torch.load('model_best2.pkl')

    # Collect the latents and stdardize it.
    latents = []
    reconstructs = []
    for x in test_dataloader:

        latent1, reconstruct1 = autoencoder1(x)
        #latent2, reconstruct2 = autoencoder2(x)
        latent = latent1  #(latent1+latent2)/2
        reconstruct = reconstruct1  #(reconstruct1+reconstruct2)/2
        latents.append(latent.cpu().detach().numpy())
        reconstructs.append(reconstruct.cpu().detach().numpy())

    reconstructs = np.concatenate(reconstructs, axis=0)
    reconstructs = np.transpose(reconstructs, (0, 2, 3, 1))

    latents = np.concatenate(latents, axis=0).reshape([9000, -1])
    latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0)

    # Use PCA to lower dim of latents and use K-means to clustering.
    #print(latents.shape)
    #latents = PCA(n_components=32).fit_transform(latents)
    #latents =RandomTreesEmbedding(n_jobs=-1).fit(latents).labels_

    print("TSNE")

    # what the hell is tsne
    latents = PCA(n_components=32).fit_transform(latents)
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=8700)
    latents = tsne.fit_transform(latents)
    torch.save(autoencoder, 'model.pkl')  # 保存整个网络
    '''
    latents = TSNE(n_components =2).fit_transform(latents) #random_state=8700
    print(latents.shape)
    '''
    result = KMeans(n_clusters=2).fit(latents).labels_
    print("KMeans")
    print(latents.shape)
    #result = MeanShift(bandwidth=2).fit(latents).labels_
    # We know first 5 labels are zeros, it's a mechanism to check are your answers
    # need to be flipped or not.
    if np.sum(result[:5]) >= 3:
        result = 1 - result

    return latents, result
示例#15
0
def reduce_to_2D(X, Y):
    color = Y
    n_neighbors = 10
    n_components = 2

    fig = plt.figure(figsize=(15, 8))
    plt.suptitle('Manifold Learning with %i points, %i neighbors'
                 % (len(X), n_neighbors), fontsize=14)

    methods = ['standard', 'ltsa', 'hessian', 'modified']
    labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']

    for i, method in enumerate(methods):
        t0 = time()
        Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
                                            eigen_solver='auto',
                                            method=method).fit_transform(X)
        t1 = time()
        print('%s: %.2g sec' % (methods[i], t1 - t0))

        ax = fig.add_subplot(252 + i)
        plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
        plt.title('%s (%.2g sec)' % (labels[i], t1 - t0))
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')

    t0 = time()
    Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X)
    t1 = time()
    print('Isomap: %.2g sec' % (t1 - t0))
    ax = fig.add_subplot(257)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title('Isomap (%.2g sec)' % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')


    t0 = time()
    mds = manifold.MDS(n_components, max_iter=100, n_init=1)
    Y = mds.fit_transform(X)
    t1 = time()
    print('MDS: %.2g sec' % (t1 - t0))
    ax = fig.add_subplot(258)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title('MDS (%.2g sec)' % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')


    t0 = time()
    se = manifold.SpectralEmbedding(n_components=n_components,
                                    n_neighbors=n_neighbors)
    Y = se.fit_transform(X)
    t1 = time()
    print('SpectralEmbedding: %.2g sec' % (t1 - t0))
    ax = fig.add_subplot(259)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title('SpectralEmbedding (%.2g sec)' % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

    t0 = time()
    tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
    Y = tsne.fit_transform(X)
    t1 = time()
    print('t-SNE: %.2g sec' % (t1 - t0))
    ax = fig.add_subplot(2, 5, 10)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title('t-SNE (%.2g sec)' % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

    plt.show()
示例#16
0
def getTSNE():
    return manifold.TSNE()
示例#17
0
n_components = 3
perplexity = 50
NUM_COLORS = 60

cm = pylab.get_cmap('gist_rainbow')

if __name__ == "__main__":
    X, y = load_data()
    #print(X.shape)
    print(len(X))
    for n_components in range(2, 4):
        for perplexity in range(20, 50, 5):
            for random_state in range(0, 2):
                t0 = time()
                tsne = manifold.TSNE(n_components=n_components,
                                     init='pca',
                                     random_state=random_state,
                                     perplexity=perplexity)
                X_ = tsne.fit_transform(X)
                print(len(X_))
                print(len(X_[0]))
                np.save("data/tsne", X_)
                t1 = time()  # 修改
                print("t-SNE: %.2g sec" % (t1 - t0))
                x_min, x_max = X_.min(0), X_.max(0)
                X_norm = (X_ - x_min) / (x_max - x_min)  # 归一化
                plt.figure(figsize=(8, 8))
                for i in range(X_norm.shape[0]):
                    #plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]),
                    # fontdict={'weight': 'bold', 'size': 9})
                    plt.scatter(X_norm[i, 0],
                                X_norm[i, 1],
def plot_tsne(model,
              df,
              words=None,
              vectors=None,
              target_subfeatures_mask=None,
              perplexity=5,
              title="TSNE",
              colour=False,
              n_components=2):

    if model is not None:
        words = []
        vectors = []
        for word in model.wv.vocab.keys():
            words.append(word)
            vectors.append(model.wv.word_vec(word))

    if colour:
        cols = df.columns
        cmap = plt.get_cmap('viridis')
        clrs = cmap(np.linspace(0, 1, len(cols)))
        clr_dict = {}
        clrs_points = []
        for i, c in enumerate(cols):
            unique_col_vals = df[c].unique()
            for uv in unique_col_vals:
                clr_dict[uv] = clrs[i]
        for w in words:
            clrs_points.append(clr_dict[w])

    tsne = manifold.TSNE(n_components=n_components,
                         init='pca',
                         random_state=10,
                         method='exact',
                         perplexity=perplexity)
    Y = tsne.fit_transform(vectors)

    plt.figure(figsize=(18, 12))
    if target_subfeatures_mask is None:
        target_subfeatures_mask = np.array([False] * Y.shape[0])
    plt.rcParams.update({'font.size': 14})  # set everything to this font size
    marker_size = plt.rcParams['lines.markersize']**2 * 5
    if colour:
        plt.scatter(Y[~target_subfeatures_mask, 0],
                    Y[~target_subfeatures_mask, 1],
                    c=clrs_points,
                    s=marker_size)
    else:
        plt.scatter(Y[target_subfeatures_mask, 0],
                    Y[target_subfeatures_mask, 1],
                    c='red',
                    s=marker_size)
        plt.scatter(Y[~target_subfeatures_mask, 0],
                    Y[~target_subfeatures_mask, 1],
                    c='blue',
                    s=marker_size)

    for i, (label) in enumerate(words):
        plt.annotate(label, (Y[i, 0], Y[i, 1]), fontsize=22)

    plt.title(title, fontsize=26)
    plt.xlabel("Dimension 1", fontsize=26)
    plt.ylabel("Dimension 2", fontsize=26)
    plt.xticks(fontsize=24)
    plt.yticks(fontsize=24)

    plt.savefig("tsne_2_components")
    plt.show()
示例#19
0
        # we will take the maximum over the H and W dimensions
        image_features = torch.mean(torch.mean(image_features, dim=-1), dim=-1)
        # Finally, we can store the computed CNN features
        images_projected_cnn[i, :] = image_features.cpu()

#
# Applying t-SNE
#

# sklearn provides us with a nice t-SNE implementation and good documentation
# https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
# https://scikit-learn.org/stable/auto_examples/manifold/plot_t_sne_perplexity.html#sphx-glr-auto-examples-manifold-plot-t-sne-perplexity-py
from sklearn import manifold

# Get t-SNE model
tsne = manifold.TSNE(random_state=1)

# Fit (=train) model on our data
images_projected_tsne = tsne.fit_transform(images_projected_cnn)
print(f"t-SNE projected our data to shape {images_projected_tsne.shape}")

# Plot the result
fig, ax = plt.subplots()
ax.scatter(x=images_projected_tsne[:, 0], y=images_projected_tsne[:, 1],
           c=point_colors, s=0.1, cmap='nipy_spectral')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.grid(True)
fig.savefig('tsne.png', dpi=250)
fig.savefig('tsne_large.png', dpi=500)
fig.savefig('tsne.svg')
示例#20
0
def run_expt(X, color, expt_name):
    n_neighbors = 10
    n_components = 2
    
    # Create figure
    fig = plt.figure(figsize=(15, 8))
    #fig = plt.figure()
    #fig.suptitle("Manifold Learning with %i points, %i neighbors"
    #             % (1000, n_neighbors), fontsize=14)
    
    # Add 3d scatter plot
    #ax = fig.add_subplot(251, projection='3d')
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
    ax.view_init(4, -72)
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    ax.zaxis.set_major_formatter(NullFormatter())
    ax.axis('tight')
    ttl = '{}-data'.format(expt_name)
    ax.set_title(ttl)    
    save_fig('{}.pdf'.format(ttl))
    plt.show()
    
    # Set-up manifold methods
    LLE = partial(manifold.LocallyLinearEmbedding,
                  n_neighbors, n_components, eigen_solver='auto')
    
    methods = OrderedDict()
    methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
    
    methods['PCA'] = decomposition.TruncatedSVD(n_components=n_components)
    methods['LLE'] = LLE(method='standard')
    #methods['LTSA'] = LLE(method='ltsa')
    #methods['Hessian LLE'] = LLE(method='hessian')
    #methods['Modified LLE'] = LLE(method='modified')

    methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)
    methods['SE'] = manifold.SpectralEmbedding(n_components=n_components,
                                               n_neighbors=n_neighbors)
    methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca',
                                     random_state=0)
    methods['kPCA'] = decomposition.KernelPCA(n_components=n_components, kernel='rbf')
    
    # Plot results
    for i, (label, method) in enumerate(methods.items()):
        t0 = time()
        Y = method.fit_transform(X)
        t1 = time()
        print("%s: %.2g sec" % (label, t1 - t0))
        fig = plt.figure()
        # ax = fig.add_subplot(2, 5, 2 + i + (i > 3))
        ax = fig.add_subplot(111)    
        ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
        #ax.set_title("%s (%.2g sec)" % (label, t1 - t0))
        ttl = '{}-{}'.format(expt_name, label)
        ax.set_title(ttl)    
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        ax.axis('tight')
        save_fig('{}.pdf'.format(ttl))
        plt.show()
示例#21
0
def testModelOnOneDataset(loaded_weights, test_data_dir):
    # loaded_weights = '/media/wz209/a29353b7-1090-433f-b452-b4ce827adb17/sugurs/PythonProjects/pem_multi/Expirements/1.ProposedModel/patch/backup/IncepResV2-Adam/1/weights-improvement-15-0.8322.hdf5'
    # test_data_dir = '/media/wz209/a29353b7-1090-433f-b452-b4ce827adb17/sugurs/PythonProjects/pem_multi/dataset/patch/1'

    base_model = load_model(loaded_weights)
    # print(base_model.summary())

    partial_model = Model(input=base_model.input,
                          output=base_model.get_layer('avg_pool').output)
    # partial_model = Model(input=base_model.input, output=base_model.get_layer('activation_208').output)

    # 读取TPC和LTPC病人的序列号
    with open('dict19.pkl', 'rb') as f:
        samples = cPickle.load(f)
    """
    对图片的评估
    """
    img_list = []
    test_images = []
    test_labels = []
    test_features = []
    for fpathe, dirs, fs in os.walk(test_data_dir):
        for f in fs:
            file = os.path.join(fpathe, f)
            img_list.append(file)
    for file in tqdm(img_list):
        x = image.load_img(file, target_size=(img_width, img_height))
        x = image.img_to_array(x)
        x /= 255
        x = x.tolist()
        test_images.append(x)

        lch_in_file = file.split('/')[-1].split('-')[0][:-1]
        for each in samples:
            if lch_in_file == each['lch']:
                test_features.append(each['feature19'])
                test_labels.append(each['label_bl'])
                break

    test_labels = np.array(test_labels)
    y_tsne = []
    for yy in test_labels.tolist():
        if yy == 0:
            y_tsne.append([1, 0])
        elif yy == 1:
            y_tsne.append([0, 1])

    LTPC_lch = []
    TPC_lch = []
    all_patient_pred_list = []
    all_patient_truelabel_list = []
    all_patient_in_one_pred_list = []
    all_patient_in_one_truelabel_list = []

    for each in samples:
        if each['label_bl'] == 0:
            LTPC_lch.append(each['lch'])
        elif each['label_bl'] == 1:
            TPC_lch.append(each['lch'])

    # 先检查LTPC的临床编号
    all_cnt_ltpc = 0
    all_pics_ltpc = 0

    for each in LTPC_lch:
        for person in samples:
            if person['lch'] == each:
                features19 = person['feature19']

        one_patient_pic_list = []
        one_patient_test_res_list = []
        for fpathe, dirs, fs in os.walk(test_data_dir):
            for f in fs:
                file = os.path.join(fpathe, f)
                lch_in_file = file.split('/')[-1].split('-')[0][:-1]
                if each == lch_in_file:
                    one_patient_pic_list.append(file)

        if len(one_patient_pic_list) == 0:
            continue

        all_cnt_ltpc += 1

        for i in one_patient_pic_list:
            x = image.load_img(i, target_size=(img_width, img_height))
            x = image.img_to_array(x)
            x /= 255
            x = x.tolist()
            image_input = np.array([x])
            all_pics_ltpc += 1

            bingshi_input = np.array([features19])

            result = partial_model.predict(image_input).tolist()[0]
            one_patient_test_res_list.append(result)
        # 以图片为单位
        all_patient_pred_list += one_patient_test_res_list
        all_patient_truelabel_list += np.zeros(
            len(one_patient_test_res_list)).tolist()
        # 以病人为单位
        # print one_patient_test_res_list
        myArray = np.array(one_patient_test_res_list)
        # print myArray
        # print np.sum(myArray, axis=0)
        ret = (np.sum(myArray, axis=0) /
               len(one_patient_test_res_list)).tolist()
        all_patient_in_one_pred_list += [ret]
        all_patient_in_one_truelabel_list += [0]

    # 先检查TPC的临床编号
    all_cnt_tpc = 0
    all_pics_tpc = 0

    for each in TPC_lch:
        for person in samples:
            if person['lch'] == each:
                features19 = person['feature19']

        one_patient_pic_list = []
        one_patient_test_res_list = []
        for fpathe, dirs, fs in os.walk(test_data_dir):
            for f in fs:
                file = os.path.join(fpathe, f)
                lch_in_file = file.split('/')[-1].split('-')[0][:-1]
                if each == lch_in_file:
                    one_patient_pic_list.append(file)

        if len(one_patient_pic_list) == 0:
            continue

        all_cnt_tpc += 1

        for i in one_patient_pic_list:
            x = image.load_img(i, target_size=(img_width, img_height))
            x = image.img_to_array(x)
            x /= 255
            x = x.tolist()
            image_input = np.array([x])
            all_pics_tpc += 1

            bingshi_input = np.array([features19])

            result = partial_model.predict(image_input).tolist()[0]
            one_patient_test_res_list.append(result)

        # 以图片为单位
        all_patient_pred_list += one_patient_test_res_list
        all_patient_truelabel_list += np.ones(
            len(one_patient_test_res_list)).tolist()

        # 以病人为单位
        myArray = np.array(one_patient_test_res_list)
        ret = (np.sum(myArray, axis=0) /
               len(one_patient_test_res_list)).tolist()
        all_patient_in_one_pred_list += [ret]
        all_patient_in_one_truelabel_list += [1]

    x_tsne = np.array(all_patient_pred_list)
    y_tsne = np.array(all_patient_truelabel_list)
    print x_tsne.shape
    print len(y_tsne)
    print('num of class is %d' % len(set(y_tsne)))
    # tsne = manifold.TSNE(n_components=2, init='random', random_state=0, perplexity=100)
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    x_tsne = tsne.fit_transform(x_tsne)
    colors = ['r', 'b']
    target_names = range(2)

    x_min, x_max = np.min(x_tsne, 0), np.max(x_tsne, 0)
    x_tsne = (x_tsne - x_min) / (x_max - x_min)
    plt.figure()
    for (i, color, target) in zip(target_names, colors, target_names):
        plt.scatter(x_tsne[y_tsne == i, 0],
                    x_tsne[y_tsne == i, 1],
                    c=color,
                    label=target,
                    s=2,
                    lw=1)
    # plt.show()
    plt.savefig('./1.png', dpi=330)

    # # **×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×

    x_tsne = np.array(all_patient_in_one_pred_list)
    y_tsne = np.array(all_patient_in_one_truelabel_list)
    print x_tsne.shape
    print len(y_tsne)
    print('num of class is %d' % len(set(y_tsne)))
    # tsne = manifold.TSNE(n_components=2, init='random', random_state=0, perplexity=100)
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    x_tsne = tsne.fit_transform(x_tsne)
    colors = ['r', 'b']
    target_names = range(2)

    x_min, x_max = np.min(x_tsne, 0), np.max(x_tsne, 0)
    x_tsne = (x_tsne - x_min) / (x_max - x_min)
    plt.figure()
    for (i, color, target) in zip(target_names, colors, target_names):
        plt.scatter(x_tsne[y_tsne == i, 0],
                    x_tsne[y_tsne == i, 1],
                    c=color,
                    label=target,
                    s=2,
                    lw=1)
    # plt.show()
    plt.savefig('./2.png', dpi=330)
示例#22
0
def tsne(X):
    return manifold.TSNE(n_components=n_components, init='pca',
                         random_state=0).fit_transform(X)
示例#23
0
def do_evaluation(config,
                  qualitative_analysis=True,
                  quantitative_analysis=True,
                  verbose=0):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf.compat.v1.disable_eager_execution()

    Model_cls = getattr(sys.modules[__name__], config['model_cls'])
    Dataset_cls = getattr(sys.modules[__name__], config['dataset_cls'])

    batch_size = 1
    data_sequence_length = None
    # Load validation dataset to fetch statistics.
    if issubclass(Dataset_cls, HandWritingDatasetConditional):
        validation_dataset = Dataset_cls(
            config['validation_data'],
            var_len_seq=True,
            use_bow_labels=config['use_bow_labels'])
    elif issubclass(Dataset_cls, HandWritingDataset):
        validation_dataset = Dataset_cls(config['validation_data'],
                                         var_len_seq=True)
    else:
        raise Exception("Unknown dataset class.")

    strokes = tf.compat.v1.placeholder(tf.float32,
                                       shape=[
                                           batch_size, data_sequence_length,
                                           sum(validation_dataset.input_dims)
                                       ])
    targets = tf.compat.v1.placeholder(tf.float32,
                                       shape=[
                                           batch_size, data_sequence_length,
                                           sum(validation_dataset.target_dims)
                                       ])
    sequence_length = tf.compat.v1.placeholder(tf.int32, shape=[batch_size])

    # Create inference graph.
    with tf.name_scope("validation"):
        inference_model = Model_cls(config,
                                    reuse=False,
                                    input_op=strokes,
                                    target_op=targets,
                                    input_seq_length_op=sequence_length,
                                    input_dims=validation_dataset.input_dims,
                                    target_dims=validation_dataset.target_dims,
                                    batch_size=batch_size,
                                    mode="validation",
                                    data_processor=validation_dataset)
        inference_model.build_graph()
        inference_model.create_image_summary(
            validation_dataset.prepare_for_visualization)

    # Create sampling graph.
    with tf.name_scope("sampling"):
        model = Model_cls(config,
                          reuse=True,
                          input_op=strokes,
                          target_op=None,
                          input_seq_length_op=sequence_length,
                          input_dims=validation_dataset.input_dims,
                          target_dims=validation_dataset.target_dims,
                          batch_size=batch_size,
                          mode="sampling",
                          data_processor=validation_dataset)
        model.build_graph()

    # Create a session object and initialize parameters.
    sess = tf.compat.v1.Session()
    # Restore computation graph.
    try:
        saver = tf.compat.v1.train.Saver()
        # Restore variables.
        if config['checkpoint_id'] is None:
            checkpoint_path = tf.train.latest_checkpoint(config['model_dir'])
        else:
            checkpoint_path = os.path.join(config['model_dir'],
                                           config['checkpoint_id'])

        print("Loading model " + checkpoint_path)
        saver.restore(sess, checkpoint_path)
    except:
        raise Exception("Model is not found.")

    if run_gmm_eval:
        from sklearn import manifold
        import matplotlib.pyplot as plt
        from matplotlib.ticker import NullFormatter

        gmm_mus, gmm_sigmas = model.evaluate_gmm_latent_space(sess)

        # We have ~70 components. Select a subset of them manually.
        gmm_component_ids = [2, 3, 11, 12, 13, 14, 15, 39, 40]
        gmm_legend_labels = ["1", "2", "a", "b", "c", "d", "e", "C", "D"]
        num_components = len(gmm_component_ids)
        size_components = gmm_mus.shape[1]

        gmm_samples = np.zeros(
            (num_components * gmm_num_samples, size_components))
        gmm_sample_labels = np.zeros(num_components * gmm_num_samples)

        for comp_idx in range(num_components):
            epsilon = np.random.normal(0, 1,
                                       (gmm_num_samples, gmm_mus.shape[1]))
            gmm_samples[comp_idx * gmm_num_samples:comp_idx * gmm_num_samples +
                        gmm_num_samples, :] = gmm_mus[
                            comp_idx] + gmm_sigmas[comp_idx] * epsilon
            gmm_sample_labels[comp_idx *
                              gmm_num_samples:comp_idx * gmm_num_samples +
                              gmm_num_samples] = np.ones(
                                  gmm_num_samples) * comp_idx

        # Creating a discrete colorbar
        colors = plt.cm.jet(np.linspace(0, 1, num_components))

        Y = manifold.TSNE(n_components=2, init='pca',
                          random_state=0).fit_transform(gmm_samples)

        fig = plt.figure(figsize=(15, 8))
        ax = fig.add_subplot(1, 1, 1)

        current_plot_range = 0
        previous_plot_range = 0
        for i, c in enumerate(colors):
            previous_plot_range += current_plot_range
            current_plot_range = gmm_sample_labels[gmm_sample_labels == i].size
            plt.scatter(
                Y[previous_plot_range:previous_plot_range + current_plot_range,
                  0],
                Y[previous_plot_range:previous_plot_range + current_plot_range,
                  1],
                20,
                lw=.25,
                marker='o',
                color=c,
                label=gmm_legend_labels[i],
                alpha=0.9,
                antialiased=True,
                zorder=3)

        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.legend()
        plt.axis('tight')
        plt.show()

    keyword_args = dict()
    keyword_args['conditional_inputs'] = None
    keyword_args['eoc_threshold'] = eoc_threshold
    keyword_args['cursive_threshold'] = cursive_threshold
    keyword_args['use_sample_mean'] = True

    if quantitative_analysis:
        pass

    if qualitative_analysis:
        print("Generating samples...")
        for real_img_idx in reference_sample_ids:
            _, stroke_model_input, _ = validation_dataset.fetch_sample(
                real_img_idx)
            stroke_sample = stroke_model_input[:, :, 0:3]

            if run_reconstruction or run_biased_sampling:
                inference_results = inference_model.reconstruct_given_sample(
                    session=sess, inputs=stroke_model_input)

            if run_original_sample:
                svg_path = os.path.join(
                    config['eval_dir'],
                    "real_image_" + str(real_img_idx) + '.svg')
                visualize.draw_stroke_svg(
                    validation_dataset.undo_normalization(
                        validation_dataset.samples[real_img_idx],
                        detrend_sample=False),
                    factor=0.001,
                    svg_filename=svg_path)

            if run_reconstruction:
                svg_path = os.path.join(
                    config['eval_dir'],
                    "reconstructed_image_" + str(real_img_idx) + '.svg')
                visualize.draw_stroke_svg(
                    validation_dataset.undo_normalization(
                        inference_results[0]['output_sample'][0],
                        detrend_sample=False),
                    factor=0.001,
                    svg_filename=svg_path)

            if concat_ref_and_synthetic_samples:
                reference_sample_in_img = stroke_sample
            else:
                reference_sample_in_img = None

            # Conditional handwriting synthesis.
            for text_id, text in enumerate(conditional_texts):
                keyword_args['conditional_inputs'] = text
                if config.get('use_real_pi_labels', False) and isinstance(
                        model, VRNNGMM):
                    if run_biased_sampling:
                        biased_sampling_results = model.sample_biased(
                            session=sess,
                            seq_len=seq_len,
                            prev_state=inference_results[0]['state'],
                            prev_sample=reference_sample_in_img,
                            **keyword_args)

                        save_name = 'synthetic_biased_ref(' + str(
                            real_img_idx) + ')_(' + str(text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            biased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(biased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)

                        # Without beautification: set False
                        # Apply beautification: set True.
                        keyword_args['use_sample_mean'] = True
                        biased_sampling_results = model.sample_biased(
                            session=sess,
                            seq_len=seq_len,
                            prev_state=inference_results[0]['state'],
                            prev_sample=reference_sample_in_img,
                            **keyword_args)

                        save_name = 'synthetic_biased_sampled_ref(' + str(
                            real_img_idx) + ')_(' + str(text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            biased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(biased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)

                    if run_unbiased_sampling:
                        unbiased_sampling_results = model.sample_unbiased(
                            session=sess, seq_len=seq_len, **keyword_args)

                        save_name = 'synthetic_unbiased_(' + str(text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            unbiased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(unbiased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)

                        # Without beautification.
                        keyword_args['use_sample_mean'] = True
                        unbiased_sampling_results = model.sample_unbiased(
                            session=sess, seq_len=seq_len, **keyword_args)
                        save_name = 'synthetic_unbiased_sampled(' + str(
                            text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            unbiased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(unbiased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)

                else:
                    if run_biased_sampling:
                        biased_sampling_results = model.sample_biased(
                            session=sess,
                            seq_len=seq_len,
                            prev_state=inference_results[0]['state'],
                            prev_sample=reference_sample_in_img)

                        save_name = 'synthetic_biased_ref(' + str(
                            real_img_idx) + ')_(' + str(text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            biased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(biased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)

                    if run_unbiased_sampling:
                        unbiased_sampling_results = model.sample_unbiased(
                            session=sess, seq_len=seq_len)

                        save_name = 'synthetic_unbiased_(' + str(text_id) + ')'
                        synthetic_sample = validation_dataset.undo_normalization(
                            unbiased_sampling_results[0]['output_sample'][0],
                            detrend_sample=False)
                        if save_plots:
                            plot_eval_details(unbiased_sampling_results[0],
                                              synthetic_sample,
                                              config['eval_dir'], save_name)
    sess.close()
def compare_mds_tsne(stddev=0,
                     metric='euclidean',
                     tsne_init='pca',
                     random_state=0):
    cmap = plt.cm.viridis
    n_points = 1000
    # random_state = 0
    X, sample_order = datasets.samples_generator.make_s_curve(n_points,
                                                              random_state=0)

    if stddev > 0:
        X = X + np.random.normal(size=np.product(X.shape), scale=0.1).reshape(
            X.shape)

    fig = plt.figure(figsize=(12, 4))
    plt.suptitle("Manifold Learning with %i points" % (n_points))

    # Plot original data
    ax = fig.add_subplot(131, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=sample_order, cmap=cmap)
    ax.view_init(4, -72)

    # Add noise if necessary
    n_components = 2
    mds_kws = dict(n_components=n_components, random_state=random_state)
    tsne_kws = dict(init=tsne_init)
    tsne_kws.update(mds_kws)

    if metric != 'euclidean':
        X = scipy.spatial.distance.squareform(
            scipy.spatial.distance.pdist(X, metric=metric))
        mds_kws['dissimliarity'] = 'precomputed'
        tsne_kws['metric'] = 'precomputed'

        print(
            'FYI not initializing t-SNE with PCA since distances are precomputed'
        )
        tsne_kws.pop('init')

    # Perform MDS and plot it
    t0 = time()
    mds = manifold.MDS(max_iter=100, n_init=1, **mds_kws)
    Y = mds.fit_transform(X)
    t1 = time()
    print("MDS: %.2g sec" % (t1 - t0))
    ax = fig.add_subplot(1, 3, 2)
    plt.scatter(Y[:, 0],
                Y[:, 1],
                c=sample_order,
                cmap=cmap,
                linewidth=0.5,
                edgecolor='grey')
    plt.title("MDS (%.2g sec)" % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

    # Perform t-SNE and plot it
    t0 = time()
    tsne = manifold.TSNE(**tsne_kws)
    Y = tsne.fit_transform(X)
    t1 = time()
    print("t-SNE: %.2g sec" % (t1 - t0))
    ax = fig.add_subplot(1, 3, 3)
    plt.scatter(Y[:, 0],
                Y[:, 1],
                c=sample_order,
                cmap=cmap,
                linewidth=0.5,
                edgecolor='grey')
    plt.title("t-SNE (%.2g sec)" % (t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')
    plt.show()
示例#25
0
def main(args):
    linearize = False
    if args.xtrans:
        period = 6
        data = dset.XtransDataset(args.data_dir,
                                  transform=None,
                                  augment=False,
                                  linearize=linearize)
    else:
        period = 2
        data = dset.BayerDataset(args.data_dir,
                                 transform=None,
                                 augment=False,
                                 linearize=linearize)
    loader = DataLoader(data,
                        batch_size=args.batch_size,
                        shuffle=True,
                        num_workers=8)

    mask_viz = viz.BatchVisualizer("mask", env="demosaic_inspect")
    mos_viz = viz.BatchVisualizer("mosaic", env="demosaic_inspect")
    diff_viz = viz.BatchVisualizer("diff", env="demosaic_inspect")
    target_viz = viz.BatchVisualizer("target", env="demosaic_inspect")
    input_hist = viz.HistogramVisualizer("color_hist", env="demosaic_inspect")

    for sample in loader:
        mosaic = sample["mosaic"]
        mask = sample["mask"]

        pad = args.ksize // 2
        dx = (pad - args.offset_x) % period
        dy = (pad - args.offset_y) % period
        print("dx {} dy {}".format(dx, dy))
        mosaic = mosaic[..., dy:, dx:]
        mask = mask[..., dy:, dx:]

        def to_patches(arr):
            patches = arr.unfold(2, args.ksize,
                                 period).unfold(3, args.ksize, period)
            bs, c, h, w, _, _ = patches.shape
            patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
            patches = patches.view(bs * h * w, c, args.ksize, args.ksize)
            return patches

        patches = to_patches(mosaic)
        bs = patches.shape[0]
        means = patches.view(bs, -1).mean(-1).view(bs, 1, 1, 1)
        std = patches.view(bs, -1).std(-1).view(bs, 1, 1, 1)
        print(means.min().item(), means.max().item())

        patches -= means
        patches /= std + 1e-8

        new_bs = 1024
        idx = np.random.randint(0, patches.shape[0], (new_bs, ))
        patches = patches[idx]

        import torchlib.debug as D
        D.tensor(patches)

        flat = patches.view(new_bs, -1).cpu().numpy()

        nclusts = 16
        clst = cluster.MiniBatchKMeans(n_clusters=nclusts)
        # clst.fit(flat)
        clst_idx = clst.fit_predict(flat)
        colors = np.random.uniform(size=(nclusts, 3))

        manif = manifold.TSNE(n_components=2)
        new_coords = manif.fit_transform(flat)
        color = np.zeros((new_coords.shape[0], 3))
        color = (colors[clst_idx, :] * 255).astype(np.uint8)
        print(color.shape)
        D.scatter(th.from_numpy(new_coords[:, 0]),
                  th.from_numpy(new_coords[:, 1]),
                  color=color,
                  key="tsne")

        centers = th.from_numpy(clst.cluster_centers_).view(
            nclusts, 3, args.ksize, args.ksize)
        D.tensor(centers, "centers")

        for cidx in range(nclusts):
            idx = clst_idx == cidx
            p = th.from_numpy(patches.numpy()[idx])
            D.tensor(p, key="cluster_{:02d}".format(cidx))

        import ipdb
        ipdb.set_trace()
示例#26
0
labels = np.load("data_window_labels.npy")

print(X.columns.values)
print(labels)
print(np.where(labels == 'flow=From-Botne')[0][0])

y_bin6 = y == np.where(labels == 'flow=From-Botne')[0][0]
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y_bin6, test_size=0.33, random_state=123456)

print("y", np.unique(y, return_counts=True))
print("y_train", np.unique(X_train, return_counts=True))
print("y_test", np.unique(y_test, return_counts=True))

print("t-SNE")  # Beware: this is very time-consuming
clf = manifold.TSNE(n_components=2, random_state=123456)
clf.fit(
    X[['Dport_nunique', 'TotBytes_sum', 'Dur_sum', 'Dur_mean',
       'TotBytes_std']])

print(clf.embedding_)

y_plot = np.where(y_bin6 == True)[0]
print(len(y_plot))

y_plot2 = np.random.choice(np.where(y_bin6 == False)[0],
                           size=len(y_plot) * 100,
                           replace=False)
print(len(y_plot2))

index = list(y_plot) + list(y_plot2)
示例#27
0
pca = decomposition.TruncatedSVD(n_components=2)
X_reduced = pca.fit_transform(X_transformed)

plot_embedding(
    X_reduced,
    "Random forest embedding of the digits (time %.2fs)" % (time() - t0))

#----------------------------------------------------------------------
# Spectral embedding of the digits dataset
print("Computing Spectral embedding")
embedder = manifold.SpectralEmbedding(n_components=2,
                                      random_state=0,
                                      eigen_solver="arpack")
t0 = time()
X_se = embedder.fit_transform(X)

plot_embedding(X_se,
               "Spectral embedding of the digits (time %.2fs)" % (time() - t0))

#----------------------------------------------------------------------
# t-SNE embedding of the digits dataset
print("Computing t-SNE embedding")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
t0 = time()
X_tsne = tsne.fit_transform(X)

plot_embedding(X_tsne,
               "t-SNE embedding of the digits (time %.2fs)" % (time() - t0))

plt.show()
                                        ('scaler',
                                         preprocessing.StandardScaler())])

#build preprocessing pipeline for all features
cat_features = utils.get_non_continuous_features(house_train1)
num_features = utils.get_continuous_features(house_train1)

preprocess_pipeline = compose.ColumnTransformer([
    ('cat', categorical_pipeline, cat_features),
    ('num', numerical_pipeline, num_features)
])

viz_pipeline = pipeline.Pipeline([('preprocess', preprocess_pipeline),
                                  ('pca',
                                   decomposition.PCA(n_components=0.95)),
                                  ('tsne', manifold.TSNE(2))])

tsne_data = viz_pipeline.fit_transform(house_train1)
rutils.plot_data_3d_regression(tsne_data, house_train['SalePrice'])

#build feature selection pipeline
features_pipeline = pipeline.FeatureUnion([
    ('pca_selector', decomposition.PCA()),
    ('et_selector',
     feature_selection.SelectFromModel(ensemble.ExtraTreesClassifier()))
])

regressor = linear_model.Lasso()
#build complete pipeline with feature selection and ml algorithms
complete_pipeline = pipeline.Pipeline([
    ('preprocess', preprocess_pipeline),
示例#29
0
def test_with_tsne(model_path):
    load_state(model_path, model)
    model.eval()  # 把module设置为评估模式,只对Dropout和BatchNorm模块有影响
    test_loss = 0
    correct = 0
    data_all = Variable()
    first = True
    print('start test:')
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        if first :
            data_all, target_all = Variable(data, volatile=True), Variable(target)
            data_all = data_all.view(-1, 784)
        data, target = Variable(data, volatile=True), Variable(target)
        data = data.view(-1, 784)
        data_all = torch.cat((data, data_all), 0)
        # print(data_all.size())

        output, layer1_out, layer2_out, layer3_out, layer4_out = model(data)
        test_loss += F.nll_loss(output, target).data[0]  # Variable.data
        if first:
            pred_all = output.data.max(1)[1]
            layer1_out_all = layer1_out
            layer2_out_all = layer2_out
            layer3_out_all = layer3_out
            layer4_out_all = layer4_out
            first = False

        pred = output.data.max(1)[1]  # get the index of the max log-probability
        pred_all = torch.cat((pred, pred_all), 0)
        layer1_out_all = torch.cat((layer1_out, layer1_out_all), 0)
        layer2_out_all = torch.cat((layer2_out, layer2_out_all), 0)
        layer3_out_all = torch.cat((layer3_out, layer3_out_all), 0)
        layer4_out_all = torch.cat((layer4_out, layer4_out_all), 0)
        # print(pred_all.size())
        correct += pred.eq(target.data).cpu().sum()
    # print(data_all.size())
    # print(pred_all.size())
    test_loss = test_loss
    test_loss /= len(test_loader)  # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    print("Computing t-SNE embedding")
    # tsne = manifold.TSNE(n_components=2, init='random', random_state=0)
    layer1_out_all = layer1_out_all.data.numpy()
    layer2_out_all = layer2_out_all.data.numpy()
    layer3_out_all = layer3_out_all.data.numpy()
    layer4_out_all = layer4_out_all.data.numpy()
    # data_all = pd.DataFrame(data_all, index=data_all[:, 0]),
    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    layer1_out_all_tsne = np.array(tsne.fit_transform(layer1_out_all))[:, np.newaxis, :]
    layer2_out_all_tsne = np.array(tsne.fit_transform(layer2_out_all))[:, np.newaxis, :]
    layer3_out_all_tsne = np.array(tsne.fit_transform(layer3_out_all))[:, np.newaxis, :]
    layer4_out_all_tsne = np.array(tsne.fit_transform(layer4_out_all))[:, np.newaxis, :]

    layerout_tsne = layer1_out_all_tsne
    layerout_tsne = np.concatenate((layerout_tsne, layer2_out_all_tsne), axis=1)
    layerout_tsne = np.concatenate((layerout_tsne, layer3_out_all_tsne), axis=1)
    layerout_tsne = np.concatenate((layerout_tsne, layer4_out_all_tsne), axis=1)
    np.save('layerout_tsne.npy', layerout_tsne)
    # layerout_tsne = np.load('layerout_tsne.npy')
    # print(layerout_tsne.shape)
    # tsne = pd.DataFrame(tsne.embedding_, index=data_all.index)  # 转换数据格式

    colors = ['red', 'm', 'cyan', 'blue', 'lime', 'lawngreen', 'lightcoral', 'lightyellow', 'mediumorchid', 'mediumpurple']

    plt.figure(figsize=(10, 6))
    print('start plot:')
    for i in range(len(colors)):
        px = []
        py = []
        px2 = []
        py2 = []
        for j in range(1000):
            if pred_all[j] == i :
                plt.plot(layerout_tsne[j,:,0], layerout_tsne[j,:,1])
                # px.append(layerout_tsne[j, 0])
                # py.append(layerout_tsne[j, 1])

        # plt.scatter(px, py, s=20, c=colors[i], marker='o')
        # plt.scatter(px2, py2, s=20, c=colors[i], marker='v')

    # plt.legend(np.arange(0,5).astype(str))
    plt.xticks([])
    plt.yticks([])
    # plt.savefig('C:/Users/Day/Desktop/PPT_report/Galaxy pic/Visualization/2/cnn1_train.png', dpi=300, bbox_inches='tight')
    plt.savefig('1.png', dpi=300,
                bbox_inches='tight')

    plt.show()
示例#30
0
 def __init__(self, source):
     min_max_scaler = preprocessing.MinMaxScaler()
     data_source = min_max_scaler.fit_transform(source)
     tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
     self.return_data = tsne.fit_transform(data_source)