示例#1
0
def plot_iris(y, y_classes, maxit=25, *args, **kwargs):
    # np.random.seed(0)
    fig, ax = plot_grid(5)
    #Variational bayes
    vbpca = VBPCA(y, *args, **kwargs)
    for i in range(maxit):
        vbpca.update()
    plot_scatter(vbpca.transform(), y_classes, ax[0])
    ax[0].set_title('VBPCA')
    #Laplace approximation
    lbpca = LBPCA(y.T)
    lbpca.fit(maxit)
    plot_scatter(lbpca.transform(2).T, y_classes, ax[1])
    ax[1].set_title('LBPCA')
    #Streaming LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.randomized_fit(1)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[2])
    ax[2].set_title('Batch BPCA')
    #Distributed LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.averaged_fit(maxit)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[3])
    ax[3].set_title('Parallel BPCA')
    #PCA
    pca = PCA(y.T)
    plot_scatter(pca.fit_transform().T, y_classes, ax[4])
    ax[4].set_title('PCA')
    plt.show()
示例#2
0
def generate_pca_embedding_files():
    '''
		Generate PCA embedding csv files for the experiments.
	'''
    raw = genfromtxt('digits-raw.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(10)
    X_new = pca.fit_transform(X)
    raw_new = hstack((raw[:, :2], X_new))
    savetxt('digits-pca-embedding.csv', raw_new, delimiter=',')
示例#3
0
def Bonus3():
    '''
		Scatter plot of samples projected onto the first 
		two eigenvectors.
	'''
    raw = genfromtxt('digits-raw.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(2)
    X_new = pca.fit_transform(X)
    perm = permutation(X.shape[0])[:1000]
    labels = array(raw[perm, 1], dtype=int)
    colors = rand(10, 3)[labels, :]
    scatter(X_new[perm, 0], X_new[perm, 1], c=colors, alpha=0.9, s=10)
    show()
示例#4
0
def show_hinton_weights(data):
    np.set_printoptions(precision=3)
    lbpca = LBPCA(data)
    pca = PCA(data)
    # LBPCA
    iterations = 50
    lbpca.fit_transform(iterations)
    weight = lbpca.W
    hinton(weight)
    figure = plt.gcf()
    figure.canvas.set_window_title('BPCA, iterations=' + str(iterations))
    plt.title('BPCA')
    plt.show()
    # PCA
    weight = pca.fit_transform()
    pcs = pca.params
    hinton(pcs[:,:-1])
    figure = plt.gcf()
    figure.canvas.set_window_title('PCA')
    plt.title('PCA')
    plt.show()
    # Streaming LBPCA
    iterations = 50
    coord = create_distributed(data, 10)
    coord.randomized_fit(iterations)
    weight = coord.W
    hinton(weight)
    figure = plt.gcf()
    figure.canvas.set_window_title('Batch BPCA')
    plt.title('Batch BPCA')
    plt.show()
    # Distributed LBPCA
    iterations = 50
    coord = create_distributed(data, 10)
    coord.averaged_fit(iterations)
    weight = coord.W
    hinton(weight)
    figure = plt.gcf()
    figure.canvas.set_window_title('Parallel BPCA, iterations=' + str(iterations))
    plt.title('Parallel BPCA')
    plt.show()
示例#5
0
def main():
    datafile = "data.txt"
    data = loaddata(datafile)
    k = 2
    pca = PCA(k)
    return pca.fit_transform(data)
示例#6
0
import matplotlib.pyplot as plt
import seaborn as sns
from pca import PCA

#Read file
columns = [
    'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'
]
iris = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    header=None,
    names=columns)

#Extract features
features = iris.drop('class', 1)

#Apply pca in data
p = PCA(k=3)
p = p.fit_transform(features)

#Create a dataframe with new data
names = ['pc1', 'pc2', 'pc3', 'pc4']
principalDf = pd.DataFrame(data=p, columns=names[0:p.shape[1]])

#Concat with class
finalDf = pd.concat([principalDf, iris['class']], axis=1)

#Show the new space
sns.pairplot(data=finalDf, hue='class', diag_kind='kde')
plt.show()
示例#7
0
data = data.drop(test_data.index)

X_train = data.loc[:, 'Alcohol':]
y_train = data['target']

X_test = test_data.loc[:, 'Alcohol':]
y_test = test_data['target']

target_names = [str(i) for i in np.unique(y_train)]

print()
print('test data of class', y_test)
"""
scale
"""
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
"""
PCA
"""
pca = PCA()
pca.fit_transform(X_train)
pca.add_data(X_test)
plt = pca.plot(y_train,
               target_names,
               title='PCA of Wine dataset',
               plot_ellipse=True)
plt.savefig('output/pca_wine.png')
plt.show()
示例#8
0
                labels.append(label)

                abs_path = dir + "/" + file
                print("[INFO] Reading file : " + abs_path)

                img = cv2.imread(abs_path)
                hog_emb, grad_magnitude = hog(img)

                hog_embeddings.append(hog_emb)

    print("-----------------------------------------------------------")
    print("[INFO] Implementing Principal component analysis ... ")
    hog_embeddings = np.array(hog_embeddings)

    labels = np.array(labels)
    hog_embeddings = pca.fit_transform(hog_embeddings)

    pickle.dump(hog_embeddings, open("hog_embeddings.pickle", "wb"))
    pickle.dump(labels, open("labels.pickle", "wb"))
else:
    hog_embeddings = pickle.load(open("hog_embeddings.pickle", "rb"))
    labels = pickle.load(open("labels.pickle", "rb"))

print("-------------------------------------------------------------")
print("[INFO] Reading validation data")

VAL_DIR = 'validation/'
if (not os.path.exists("hog_embeddings_val.pickle")
        or not os.path.exists("labels_val.pickle")):
    for (dir, dirs, files) in os.walk(VAL_DIR):
        if (dir != VAL_DIR):
示例#9
0
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img = cv2.resize(img, (30, 30), interpolation=cv2.INTER_AREA)
        #add to dataset after flattening into a row vector
        dataset.append(img.flatten())
    return np.array(dataset)


"""First testing on some non image data"""

#read data
dataset = pd.read_csv("Wine.csv")

x = dataset.iloc[:, :-1].values

p = PCA(nb_components=2)
x2 = p.fit_transform(x)
print(p.get_variance_score())

#Visusalization time
plt.scatter(x2[:, 0], x2[:, 1])
plt.plot()
p = None
"""now image compression"""

x = load_imgs()
print("The x matrix contains {} images converted to row vectors of length {}".
      format(x.shape[0], x.shape[1]))

#Now let's convert them to 25x25 size images. So the number of
#principal components becomes 25*25 = 625
class EigenFaceRecognizer:
    pca = None
    labels = None
    trained_imgs = None
    i_to_label = defaultdict(int)

    def __init__(self):
        self.pca = PCA()
        self.labels = []

    def train(self, mat, label):
        tmp = []
        for i in range(len(label)):
            self.i_to_label[i] = label[i]
            if i != 0 and label[i] != label[i - 1]:
                self.labels.append(tmp)
                tmp = []
            tmp.append((i, label[i]))
        self.labels.append(tmp)

        to_fit = []
        for i in range(len(mat)):
            to_fit.append(np.ndarray.flatten(mat[i]))
        self.trained_imgs = self.pca.fit_transform(to_fit)

    def predict(self, img):
        input_img = img
        tmp_img = np.ndarray.flatten(img)
        tmp_img = np.array([tmp_img])
        tmp_img = self.pca.transform(tmp_img)
        min_mean = 1e100
        min_lable = 0

        re_imgs = self.pca.inverse_transform(self.trained_imgs)
        re_imgs = re_imgs.astype(np.uint8)
        sum = 0
        size = 0
        for i in range(len(self.trained_imgs)):
            if i != 0 and self.i_to_label[i] != self.i_to_label[i - 1]:
                mean = sum / size
                if mean < min_mean:
                    min_mean = mean
                    min_lable = self.i_to_label[i - 1]
                sum = 0
                size = 0
            trained_img = self.trained_imgs[i]
            distance = self.dis(tmp_img, trained_img)
            size += 1
            sum += distance
            #print(self.i_to_label[i], distance)
        mean = sum / size
        if mean < min_mean:
            min_mean = mean
            min_lable = self.i_to_label[len(self.trained_imgs) - 1]

        for i in self.i_to_label:
            if self.i_to_label[i] == min_lable:
                result_img = np.reshape(re_imgs[i], (100, 100))
                tmp = np.concatenate((input_img, result_img))
                break
        sum = 0
        size = 0
        return [min_lable, min_mean]

    def dis(self, a, b):
        return np.linalg.norm(a - b)
示例#11
0
#Read wine
wine = datasets.load_wine()
wine = pd.DataFrame(data=np.c_[wine['data'], wine['target']],
                    columns=wine['feature_names'] + ['target'])

#Separate in features and target
features = wine.iloc[:, 0:13]
target = wine.iloc[:, 13]

#Standardization dataset
features = StandardScaler().fit_transform(features)

#Creating pca
pca = PCA(k=5)
newFeatures1 = pca.fit_transform(features)

#Creating adaptive pca
aPCA = AdaptivePCA(13, 5, 100)
newFeatures2 = aPCA.fit_transform(features)

#Split dataset into train and test
kf = KFold(n_splits=10)

#Mlp model
mlp = MLPClassifier(solver='adam', hidden_layer_sizes=(25, ))

score = {'None': [], 'pca': [], 'aPca': []}

#Run for each fold
for train_index, test_index in kf.split(features):