Пример #1
0
        print('Input:\t\t', ip.data.cpu().numpy()[0])
        enc = encoder(ip)
        print('Encoding:\t', enc.data.cpu().numpy()[0])
        enc = channel_output(enc)
        print('Channel:\t', enc.data.cpu().numpy()[0])
        op = decoder(enc)
        print('Output:\t\t', torch.softmax(op, dim=1).data.cpu().numpy()[0])

if hp.constellation:  # to visualize encodings, etc.
    try:
        os.makedirs('Constellations')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    ip = torch.eye(hp.M, device=device)
    enc = encoder(ip).cpu().detach().numpy()

    enc_emb = TSNE().fit_transform(enc).T
    enc_emb -= enc_emb.mean(axis=1).reshape(2, 1)
    enc_emb /= enc_emb.std()

    plt.figure(dpi=250)
    plt.grid()
    plt.scatter(enc_emb[0], enc_emb[1])
    plt.title('Constellation for RBF ({0},{1})'.format(hp.n, hp.k))
    plt.savefig(join('Constellations', 'RBF({0},{1}).png'.format(hp.n, hp.k)))
    plt.show()

print('Total time taken:{0:.2f} seconds'.format(time() - start))
Пример #2
0
def preprocess(data, data_in='../data/raw', data_out='../data/preprocessed'):
    """
    Preprocess a dataset based on its name.
    """
    if data == 'breast-cancer':
        df = pd.read_csv('{}/{}/breast-cancer.data'.format(data_in, data),
                         header=None)
        trn_idx, test_idx = split(df.shape[0])
        features = to_categorical_features(df.iloc[:, 1:])
        labels = to_labels(df[0])
    elif data == 'breast-cancer-wisconsin':
        df = pd.read_csv('{}/{}/breast-cancer-wisconsin.data'.format(
            data_in, data),
                         header=None)
        trn_idx, test_idx = split(df.shape[0])
        features = to_numerical_features(df, trn_idx, list(range(1, 10)))
        labels = to_labels(df[10])
    elif data == 'heart-disease':
        df = pd.read_csv('{}/{}/processed.cleveland.data'.format(
            data_in, data),
                         header=None)
        trn_idx, test_idx = split(df.shape[0])
        x1 = to_numerical_features(df, trn_idx, [0, 3, 4, 7, 9])
        x2 = to_categorical_features(df, [1, 2, 5, 6, 8, 10, 12])
        features = np.concatenate((x1, x2), axis=1)
        labels = to_labels((df[13] > 0).astype(np.int64))
    elif data == 'hepatitis':
        df = pd.read_csv('{}/{}/hepatitis.data'.format(data_in, data),
                         header=None)
        trn_idx, test_idx = split(df.shape[0])
        x1 = to_numerical_features(df, trn_idx, [1, 14, 15, 16, 17, 18])
        x2 = to_categorical_features(df, [2, 4, 5])
        features = np.concatenate((x1, x2), axis=1)
        labels = to_labels(df[19])
    elif data == 'brain-tumor':
        df = pd.read_csv('{}/{}/Dataset.csv'.format(data_in, data))
        df = df[df['Area'] != 0].reset_index()
        trn_idx, test_idx = split(df.shape[0])
        cols = [
            'Area', 'Perimeter', 'Convex Area', 'Solidity',
            'Equivalent Diameter', 'Major Axis', 'Minor Axis'
        ]
        features = to_numerical_features(df, trn_idx, cols)
        labels = to_labels(df['Class'])
    elif data == 'diabetes':
        df = pd.read_csv('{}/{}/pima-indians-diabetes.csv'.format(
            data_in, data),
                         header=None)
        trn_idx, test_idx = split(df.shape[0])
        df.iloc[:, 1:-1] = df.iloc[:, 1:-1].replace(0, np.nan)
        features = to_numerical_features(df.iloc[:, 1:-1], trn_idx)
        labels = to_labels(df.iloc[:, -1])
    elif data == 'synthetic':
        df = pd.read_csv(
            '{}/breast-cancer-wisconsin/breast-cancer-wisconsin.data'.format(
                data_in),
            header=None)
        trn_idx, test_idx = split(df.shape[0])
        features = to_numerical_features(df, trn_idx, list(range(1, 10)))
        features = TSNE(random_state=0).fit_transform(features)
        features = (features - features.mean(axis=0)) / features.std(axis=0)
        labels = to_labels(df[10])
    else:
        raise ValueError(data)

    print('{}\t{}\t{}\t{}'.format(data, features.shape[0], features.shape[1],
                                  labels.max() + 1))

    trn_x = features[trn_idx]
    trn_y = labels[trn_idx]
    test_x = features[test_idx]
    test_y = labels[test_idx]

    os.makedirs('{}/{}'.format(data_out, data), exist_ok=True)
    np.save('{}/{}/trn_x'.format(data_out, data), trn_x)
    np.save('{}/{}/trn_y'.format(data_out, data), trn_y)
    np.save('{}/{}/test_x'.format(data_out, data), test_x)
    np.save('{}/{}/test_y'.format(data_out, data), test_y)
        print('Output:\t\t',torch.softmax(op,dim=1).data.cpu().numpy()[0])


if hyper.constellation: # to visualize encodings, etc.
    try:
        os.makedirs('Constellations')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    ip = torch.eye(hyper.M,device=device)
    enc = encoder(ip).cpu().detach().numpy()

    enc_emb = TSNE().fit_transform(enc).T
    enc_emb -= enc_emb.mean(axis=1).reshape(2,1)
    enc_emb /= enc_emb.std()

    plt.figure(dpi=250)
    plt.grid()
    plt.scatter(enc_emb[0],enc_emb[1])
    plt.title('Constellation of autoencoder ({0},{1})'.format(hyper.n,hyper.k))
    plt.savefig( join('Constellations','constellation_({0},{1}).png'.format(hyper.n,hyper.k)) )
    plt.show()


if hyper.boundaries: # to (try to) visualize decision boundaries, etc.
    try:
        os.makedirs('Decision Boundaries')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
plt.colorbar()
plt.rcParams['font.size'] = 10
for sample_number in range(score.shape[0]):
    plt.text(score.iloc[sample_number, 0], score.iloc[sample_number, 1], score.index[sample_number],
             horizontalalignment='center', verticalalignment='top')
plt.xlabel('t_1 (PCA)')
plt.ylabel('t_2 (PCA)')
plt.show()

# t-SNE
# k3n-error を用いた perplexity の最適化 
k3n_errors = []
for index, perplexity in enumerate(candidates_of_perplexity):
    print(index + 1, '/', len(candidates_of_perplexity))
    t = TSNE(perplexity=perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x)
    scaled_t = (t - t.mean(axis=0)) / t.std(axis=0, ddof=1)

    k3n_errors.append(
        sample_functions.k3n_error(autoscaled_x, scaled_t, k_in_k3n_error) + sample_functions.k3n_error(
            scaled_t, autoscaled_x, k_in_k3n_error))
plt.rcParams['font.size'] = 18
plt.scatter(candidates_of_perplexity, k3n_errors, c='blue')
plt.xlabel("perplexity")
plt.ylabel("k3n-errors")
plt.show()
optimal_perplexity = candidates_of_perplexity[np.where(k3n_errors == np.min(k3n_errors))[0][0]]
print('\nk3n-error による perplexity の最適値 :', optimal_perplexity)
# t-SNE
t = TSNE(perplexity=optimal_perplexity, n_components=2, init='pca', random_state=10).fit_transform(autoscaled_x)
t = pd.DataFrame(t, index=x.index, columns=['t_1 (t-SNE)', 't_2 (t-SNE)'])
t.to_csv('tsne_t.csv')
Пример #5
0
            z = q['styles'].value.cpu().detach().numpy()
        else:
            q = enc(images)
            z = q['styles'].value.data.detach().numpy()
        zs.append(z)
        ys.append(y.numpy())
ys = np.concatenate(ys,0)
zs = np.concatenate(zs,0)


# run TSNE when number of latent dims exceeds 2
if NUM_STYLE > 2:
    from sklearn.manifold import TSNE
    zs2 = TSNE().fit_transform(zs)
    zs2_mean = zs2.mean(0)
    zs2_std = zs2.std(0)
else:
    zs2 = zs

# display a 2D plot of the digit classes in the latent space
fig = plt.figure(figsize=(6,6))
ax = plt.gca()

colors = []
for k in range(10):
    m = (ys == k)
    p = ax.scatter(zs2[m, 0], zs2[m, 1], label='y=%d' % k, alpha=0.5, s=5)
    colors.append(p.get_facecolor())
ax.legend()

#fig.tight_layout()