示例#1
0
def dimension_reduction(*x, algo='pca', **kwargs):
  algo = str(algo).lower()
  assert algo in ('pca', 'tsne', 'umap'), \
    "No support for algorithm: '%s'" % algo
  if x[0].shape[1] == 1:
    raise ValueError("No dimension reduction for input with shape: %s" %
                     str(x[0].shape))
  elif x[0].shape[1] == 2:
    pass
  elif algo == 'tsne':
    x = fast_tsne(*x,
                  n_components=2,
                  perplexity=30.0,
                  learning_rate=200,
                  n_iter=1000,
                  random_state=1234,
                  n_jobs=8,
                  **kwargs)
  elif algo == 'pca':
    x = fast_pca(*x, n_components=2, random_state=1234, **kwargs)
  else:
    x = fast_umap(*x, random_state=1234, **kwargs)
  if len(x) == 1:
    return x[0]
  return x
示例#2
0
def evaluate_latent(fn, feeder, title):
    y_true = []
    Z = []
    for outputs in Progbar(feeder.set_batch(batch_mode='file'),
                           name=title,
                           print_report=True,
                           print_summary=False,
                           count_func=lambda x: x[-1].shape[0]):
        name = str(outputs[0])
        idx = int(outputs[1])
        data = outputs[2:]
        assert idx == 0
        y_true.append(name)
        Z.append(fn(*data))
    Z = np.concatenate(Z, axis=0)
    # ====== visualize spectrogram ====== #
    if Z.ndim >= 3:
        sample = np.random.choice(range(len(Z)), size=3, replace=False)
        spec = Z[sample.astype('int32')]
        y = [y_true[int(i)] for i in sample]
        plot_figure(nrow=6, ncol=6)
        for i, (s, tit) in enumerate(zip(spec, y)):
            s = s.reshape(len(s), -1)
            plot_spectrogram(s.T, ax=(1, 3, i + 1), title=tit)
    # ====== visualize each point ====== #
    # flattent to 2D
    Z = np.reshape(Z, newshape=(len(Z), -1))
    # tsne if necessary
    if Z.shape[-1] > 3:
        Z = fast_tsne(Z,
                      n_components=3,
                      n_jobs=8,
                      random_state=K.get_rng().randint(0, 10e8))
    # color and marker
    Z_color = [digit_color_map[i.split('_')[-1]] for i in y_true]
    Z_marker = [gender_marker_map[i.split('_')[1]] for i in y_true]
    plot_figure(nrow=6, ncol=20)
    for i, azim in enumerate((15, 60, 120)):
        plot_scatter(x=Z[:, 0],
                     y=Z[:, 1],
                     z=Z[:, 2],
                     ax=(1, 3, i + 1),
                     size=4,
                     color=Z_color,
                     marker=Z_marker,
                     azim=azim,
                     legend=legends if i == 1 else None,
                     legend_ncol=11,
                     fontsize=10,
                     title=title)
    plot_save(os.path.join(FIG_PATH, '%s.pdf' % title))
示例#3
0
from sklearn.model_selection import train_test_split

from odin import ml
from odin import visual as vs

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

tf.random.set_seed(8)
np.random.seed(8)

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

X_umap = ml.fast_umap(X_train, X_test)
X_tsne = ml.fast_tsne(X_train, X_test)
X_pca = ml.fast_pca(X_train, X_test, n_components=2)

styles = dict(size=12, alpha=0.6, centroids=True)

vs.plot_figure(6, 12)
vs.plot_scatter(x=X_pca[0], color=y_train, ax=(1, 2, 1), **styles)
vs.plot_scatter(x=X_pca[1], color=y_test, ax=(1, 2, 2), **styles)

vs.plot_figure(6, 12)
vs.plot_scatter(x=X_tsne[0], color=y_train, ax=(1, 2, 1), **styles)
vs.plot_scatter(x=X_tsne[1], color=y_test, ax=(1, 2, 2), **styles)

vs.plot_figure(6, 12)
vs.plot_scatter(x=X_umap[0], color=y_train, ax=(1, 2, 1), **styles)
vs.plot_scatter(x=X_umap[1], color=y_test, ax=(1, 2, 2), **styles)
示例#4
0
    def plot_imputation_scatter(self,
                                test=True,
                                pca=False,
                                color_by_library=True):
        start_time = time.time()
        n_system = len(self) + 2  # add the original and the corrupted
        data_type = 'test' if test else 'train'

        if n_system <= 5:
            nrow = 1
            ncol = n_system
        else:
            nrow = 2
            ncol = int(np.ceil(n_system / 2))

        X_org = self.posteriors[0].X_test_org if test else self.posteriors[
            0].X_train_org
        X_crr = self.posteriors[0].X_test if test else self.posteriors[
            0].X_train
        y = self.posteriors[0].y_test if test else self.posteriors[0].y_train
        labels = self.posteriors[0].labels
        is_binary_classes = self.posteriors[0].is_binary_classes
        allV = [X_org, X_crr] + [
            pos.V_test if test else pos.V_train for pos in self.posteriors
        ]
        assert X_org.shape == X_crr.shape and all(v.shape == X_org.shape
                                                  for v in allV)
        all_names = ["[%s]Original" % data_type,
                     "[%s]Corrupted" % data_type
                     ] + [i.short_id_lines for i in self.posteriors]

        # log-normalize everything
        if len(X_org) > 5000:
            np.random.seed(5218)
            ids = np.random.permutation(X_org.shape[0])[:5000]
            allV = [v[ids] for v in allV]
            y = y[ids]

        if is_binary_classes:
            y = np.argmax(y, axis=-1)
        else:
            y = ProbabilisticEmbedding().fit_transform(y)
            y = np.argmax(y, axis=-1)

        allV = [log_norm(v) for v in allV]

        fig = plt.figure(figsize=(min(20, 5 * ncol) + 2, nrow * 5))
        for idx, (name, v) in enumerate(zip(all_names, allV)):
            ax = plt.subplot(nrow, ncol, idx + 1)
            n = np.sum(v, axis=-1)
            v = fast_pca(v, n_components=2) if pca else fast_tsne(
                v, n_components=2)
            with catch_warnings_ignore(Warning):
                if color_by_library:
                    plot_scatter(x=v,
                                 val=n,
                                 ax=ax,
                                 size=8,
                                 legend_enable=False,
                                 grid=False,
                                 title=name)
                else:
                    plot_scatter(x=v,
                                 color=[labels[i] for i in y],
                                 marker=[labels[i] for i in y],
                                 ax=ax,
                                 size=8,
                                 legend_enable=True if idx == 0 else False,
                                 grid=False,
                                 title=name)

        with catch_warnings_ignore(Warning):
            plt.tight_layout()
        self.add_figure(
            'imputation_scatter_%s_%s' %
            ('lib' if color_by_library else 'cell', data_type), fig)
        return self._log(
            'plot_imputation_scatter[%s] %s(s)' %
            (data_type, ctext(time.time() - start_time, 'lightyellow')))
示例#5
0
U = []
Z_hat = []
Y = []
for x, y in tqdm(valid):
    qz_x, qu_z, qz_u = vae.encode_two_stages(x)
    Z.append(qz_x.mean())
    U.append(qu_z.mean())
    Z_hat.append(qz_u.mean())
    Y.append(np.argmax(y, axis=-1))
Z = np.concatenate(Z, 0)[:5000]
U = np.concatenate(U, 0)[:5000]
Z_hat = np.concatenate(Z_hat, 0)[:5000]
Y = np.concatenate(Y, 0)[:5000]

plt.figure(figsize=(15, 5), dpi=150)
vs.plot_scatter(fast_tsne(Z), color=Y, grid=False, ax=(1, 3, 1))
vs.plot_scatter(fast_tsne(U), color=Y, grid=False, ax=(1, 3, 2))
vs.plot_scatter(fast_tsne(Z_hat), color=Y, grid=False, ax=(1, 3, 3))
plt.tight_layout()

ids = np.argsort(np.mean(qz_x.stddev(), 0))
ids_u = np.argsort(np.mean(qu_z.stddev(), 0))

plt.figure(figsize=(10, 10), dpi=200)
plot_latent_stats(mean=np.mean(qz_x.mean(), 0)[ids],
                  stddev=np.mean(qz_x.stddev(), 0)[ids],
                  ax=(3, 1, 1),
                  name='q(z|x)')
plot_latent_stats(mean=np.mean(qu_z.mean(), 0)[ids_u],
                  stddev=np.mean(qu_z.stddev(), 0)[ids_u],
                  ax=(3, 1, 2),
示例#6
0
def compare_methods(X, y, dim, title, n_iter='auto', verbose=0, plda=False):
    print(title, ':', dim)
    #
    pca = PCA(n_components=dim, random_state=random_state)
    pca.fit(X)
    X_pca = pca.transform(X)
    #
    if plda:
        plda = ml.PLDA(n_phi=dim, verbose=verbose)
        plda.fit(X=X_iris, y=y_iris)
        X_plda = plda.transform(X_iris)
        n_col = 5
    else:
        plda = None
        X_plda = None
        n_col = 4
    #
    ppca = ml.PPCA(n_components=dim,
                   verbose=verbose,
                   n_iter=n_iter,
                   random_state=random_state)
    ppca.fit(X)
    X_ppca = ppca.transform(X)
    #
    sppca1 = ml.SupervisedPPCA(n_components=dim,
                               verbose=verbose,
                               extractor='supervised',
                               n_iter=n_iter,
                               random_state=random_state)
    sppca1.fit(X, y)
    X_sppca1 = sppca1.transform(X)
    #
    sppca2 = ml.SupervisedPPCA(n_components=dim,
                               verbose=verbose,
                               extractor='unsupervised',
                               n_iter=n_iter,
                               random_state=random_state)
    sppca2.fit(X, y)
    X_sppca2 = sppca2.transform(X)
    # T-SNE if necessary
    if dim > 2:
        X_pca = ml.fast_tsne(X_pca, n_components=2)
        X_ppca = ml.fast_tsne(X_ppca, n_components=2)
        X_sppca1 = ml.fast_tsne(X_sppca1, n_components=2)
        X_sppca2 = ml.fast_tsne(X_sppca2, n_components=2)
        if X_plda is not None:
            X_plda = ml.fast_tsne(X_plda, n_components=2)
    # Plotting
    V.plot_figure(nrow=4, ncol=18)
    plt.subplot(1, n_col, 1)
    plt.scatter(x=X_pca[:, 0], y=X_pca[:, 1], c=y, marker='o', alpha=0.5, s=1)
    plt.xticks([], [])
    plt.yticks([], [])
    plt.title("PCA")

    plt.subplot(1, n_col, 2)
    plt.scatter(x=X_ppca[:, 0],
                y=X_ppca[:, 1],
                c=y,
                marker='o',
                alpha=0.5,
                s=1)
    plt.xticks([], [])
    plt.yticks([], [])
    plt.title("PPCA")

    plt.subplot(1, n_col, 3)
    plt.scatter(x=X_sppca1[:, 0],
                y=X_sppca1[:, 1],
                c=y,
                marker='o',
                alpha=0.5,
                s=1)
    plt.xticks([], [])
    plt.yticks([], [])
    plt.title("S-PPCA (supervised extractor)")

    plt.subplot(1, n_col, 4)
    plt.scatter(x=X_sppca2[:, 0],
                y=X_sppca2[:, 1],
                c=y,
                marker='o',
                alpha=0.5,
                s=1)
    plt.xticks([], [])
    plt.yticks([], [])
    plt.title("S-PPCA (unsupervised extractor")

    if plda is not None:
        plt.subplot(1, n_col, 5)
        plt.scatter(x=X_plda[:, 0],
                    y=X_plda[:, 1],
                    c=y,
                    marker='o',
                    alpha=0.5,
                    s=1)
        plt.xticks([], [])
        plt.yticks([], [])
        plt.title("PLDA")

    plt.suptitle('[%d]%s' % (dim, title))
示例#7
0
                              inc_labels=True).concatenate(
                                  sc.create_dataset(batch_size=batch_size,
                                                    partition='test',
                                                    inc_labels=True)):
    X_test.append(x)
    y_test.append(y)
X_test = tf.concat(X_test, axis=0)
y_test = tf.concat(y_test, axis=0)

try:
    from odin.ml import fast_umap
    x_ = fast_umap(X_test.numpy())
    algo = "umap"
except:
    from odin.ml import fast_tsne
    x_ = fast_tsne(X_test.numpy())
    algo = "tsne"


# ===========================================================================
# MOdel
# ===========================================================================
def fig2image(fig: plt.Figure, dpi=180) -> tf.Tensor:
    r""" Return an image shape `[1, h, w, 4]` """
    buf = io.BytesIO()
    plt.savefig(buf, format='png', dpi=dpi)
    buf.seek(0)
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    # add batch dimension
    image = tf.expand_dims(image, 0)
    return image