示例#1
0
from sklearn.decomposition import PCA
from scipy.spatial import distance
import generate_data
import settings
import logging
import numpy as np

logging.basicConfig(level=logging.INFO)

X_mnist_raw = generate_data.load_x_mnist_raw()

num_pca_dimensions = settings.parameters.get(
    "num_pca_dimensions", settings.parameters["num_pca_dimensions"])
pca_random_seed = settings.parameters.get(
    "pca_random_seed", settings.parameters["pca_random_seed"])

X_mnist_old = np.zeros((X_mnist_raw.shape[0], num_pca_dimensions))

for i in range(1000):
    mnist_pca = PCA(n_components=num_pca_dimensions,
                    svd_solver='full',
                    random_state=i)
    X_mnist = mnist_pca.fit_transform(X_mnist_raw)

    D = distance.pdist(X_mnist)
    min_dist = np.min(D)
    logging.info(
        "After PCA - minimum distance between samples is %f, dist to old %f",
        min_dist, np.max(np.abs(X_mnist_old - X_mnist)))
    X_mnist_old = X_mnist
import generate_data
import settings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import settings

parameters = settings.parameters
X_mnist_raw = generate_data.load_x_mnist_raw(parameters=parameters)
letters_A, letters_A_raw = generate_data.load_A_letters(parameters=parameters)

print(letters_A_raw.shape, np.max(letters_A_raw[0, :]),
      np.min(letters_A_raw[0, :]))

width = 10  #total number to show in one row
start_index = 0

height = 10  # Number of rows /2 to show. half will go to labels, half to pictures.

f, ax = plt.subplots(height, width)
f.set_size_inches(16, 16)
f.subplots_adjust()
for i in range(int(height)):
    for j in range(width):
        ax[i][j].imshow(letters_A_raw[start_index + width * i + j, :].reshape(
            28, 28),
                        cmap='gray_r')
        #ax[2*i+1][j].text(text=str(letters_A_labels[start_index + width*i + j])

        #str(chr(