def reduce_data(): flucs = [] data = collect_data() for element in data: flucs.append(element.fluc.flatten()) flucs = np.vstack(flucs) p = Pca(flucs, energy_treshold=0.5) #save compressed vector for element in data: if True:#not os.path.isfile(element.path+"fluc_comp_1.npy"): fluc_comp = p.project_data(element.fluc.flatten()[:,np.newaxis]) np.save(os.path.join(element.path,"fluc_comp_"+element.name), fluc_comp) """reduced = p.project_data(flucs.transpose()) np.save("reduced_data",reduced)""" np.save("/home/kayibal/sc-recom/code/data_aq/pca/energies",p.en) np.save("/home/kayibal/sc-recom/code/data_aq/pca/mean",p.mean) np.save("/home/kayibal/sc-recom/code/data_aq/pca/pcs",p.pcs)
def reduce_data(): flucs = [] data = collect_data() for element in data: flucs.append(element.fluc.flatten()) flucs = np.vstack(flucs) p = Pca(flucs, energy_treshold=0.5) #save compressed vector for element in data: if True: #not os.path.isfile(element.path+"fluc_comp_1.npy"): fluc_comp = p.project_data(element.fluc.flatten()[:, np.newaxis]) np.save(os.path.join(element.path, "fluc_comp_" + element.name), fluc_comp) """reduced = p.project_data(flucs.transpose()) np.save("reduced_data",reduced)""" np.save("/home/kayibal/sc-recom/code/data_aq/pca/energies", p.en) np.save("/home/kayibal/sc-recom/code/data_aq/pca/mean", p.mean) np.save("/home/kayibal/sc-recom/code/data_aq/pca/pcs", p.pcs)
def read_file(): iris = load_iris() return iris.data, iris.target batch_size = 2 num_epochs = 1000 number_final_att = 13 if __name__ == '__main__': ############################################## 2d inputs, labels = data.open_data('wine.arff', 3) inputs = np.array(inputs) labels = np.array(labels) pca = Pca() data = preprocessing.scale(inputs) pcaAdapt = PcaAdapt(13) pcaAdapt.train(data) result = np.matrix.transpose(pcaAdapt.pca_result(data)).reshape( len(data), number_final_att) mlp = MLP(3) points = result inputs = points mlp.create_network(inputs.shape[1:], 0.001) mlp.train(inputs, labels, num_epochs, batch_size) '''
def pca_async_processing(database_url_input, label_name, pca_filename): pca_generator = Pca(database_url_input) pca_generator.create_image(label_name, pca_filename)
def read_file(): iris = load_iris() return iris.data, iris.target colors = {0: 'ro', 1: 'bo', 2: 'go'} if __name__ == '__main__': ############################################## 2d data, target = read_file() data = preprocessing.scale(data) pca = Pca() cov = pca.cov_matrix(data[:, 0], data[:, 1], data[:, 2], data[:, 3]) values, vectors = pca.eigen_values_vectors(cov) values, vectors = pca.sort_eigen(values, vectors) vectors = pca.eigen_strip_vectors(values, vectors, 0.90) print(vectors) values = values[:len(vectors[0])] result = np.matrix.transpose(pca.pca_result(data, vectors)).reshape(len(data), len(data[0])-2) result[:, 1] = -result[:, 1] points = result count = 0
mlp = MLP(3) # loading data inputs, labels = data.open_data('wine.arff', 3) inputs = np.array(inputs) labels = np.array(labels) # # without pca batch_size = 2 # print(inputs[0]) # leng = [len(inp) for inp in inputs] # print(max(leng), min(leng)) # print(inputs.shape[1:]) # mlp.create_network(inputs.shape[1:], 0.001) # mlp.train(inputs, labels, num_epochs, batch_size) # with pca pca = Pca() data = preprocessing.scale(inputs) cov = np.cov(data, rowvar=False) values, vectors = pca.eigen_values_vectors(cov) values, vectors = pca.sort_eigen(values, vectors) vectors = pca.eigen_strip_vectors(values, vectors, 0.98) print(vectors.shape) values = values[:len(vectors[0])] result = np.matrix.transpose(pca.pca_result(data, vectors)).reshape( len(data), 8) print(result.shape) points = result
# kmeans.distribute() # error = kmeans.check() # print('separated sets normalize') # print("error is %f%%" % (error*100)) # # print('none separated sets') # points = Points() # points.init(file_name="sets_connected.xls", start_row=0, dim=5) # kmeans = Kmeans(points=points.points, centroid_num=5) # kmeans.distribute() # error = kmeans.check() # print('none separated sets') # print("error is %f%%" % (error*100)) # # print('none separated sets normalize') # points = Points() # points.init(file_name="sets_connected_norma.xls", start_row=0, dim=5) # kmeans = Kmeans(points=points.points, centroid_num=5) # kmeans.distribute() # error = kmeans.check() # print('none separated sets normalize') # print("error is %f%%" % (error*100)) points = Points() points.init(file_name="iris.xls", start_row=0, end_row=2, dim=4) prc = Pca(points=points.points) prc.distribute() error = prc.check() print("error is %f%%" % (error*100))
def compress(self, compression): # compress using PCA k = int(self.data.shape[1] * compression) pca = Pca(self.data) compressed_vectors = [pca.project(vector, k) for vector in self.data] return CompressionInfo(compressed_vectors, pca, self.data.shape)
def compress(self, compression): # compress using PCA k = int(self.data.shape[1]*compression) pca = Pca(self.data) compressed_vectors = [pca.project(vector, k) for vector in self.data] return CompressionInfo(compressed_vectors, pca, self.data.shape)