def _pca_legislaturas(self): """Wheel analysis by principal components legislature. Returns a dictionary where the keys are the ids of the legislaturas and the value of each key is a vector with n dimensions of the pca analysis""" if not self.pca_legislaturas: if not self.vetores_votacao: self._inicializa_vetores() ilnn = self._lista_de_indices_de_legislaturas_nao_nulas() matriz = self.vetores_votacao # Excludes missing legislaturas in all voting period matriz = matriz[ilnn, :] # Centralyses datas matriz = matriz - matriz.mean(axis=0) # DO the pca self.pca = pca.PCA(matriz, fraction=1) self._preenche_pca_de_legislaturas_nulas(ilnn) logger.info("PCA terminada com sucesso. ini=%s, fim=%s" % (str(self.ini), str(self.fim))) # Create dictionary to be returned: dicionario = {} for legislatura, vetor in zip(self.legislaturas, self.pca.U): dicionario[legislatura.id] = vetor return dicionario
def train_model(slice_data, k, lr, random=True): global test_set global holdout_set global train_set global epochs percent_correct = [] slice_data = bucket(slice_data, 6) for i in range(5): test_start_i = 2*i test_end_i = test_start_i + 2 test_set = unbucket(slice_data[test_start_i:test_end_i], 6) remaining = slice_data[:test_start_i] + slice_data[test_end_i:] rand.shuffle(remaining) train_set = unbucket(remaining[:6], 6) holdout_set = unbucket(remaining[6:], 6) p_c_a = pca.PCA(k) p_c_a.fit(np.array([image for image,_ in train_set])) train_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in train_set]) holdout_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in holdout_set]) test_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in test_set]) w = batch_gd_sm(epochs, lr, p_c_a.k + 1) visualize(p_c_a, np.array(w)) percent_correct.append(sum([correct_category_sm(x, w) for x in test_set])/len(test_set)) return percent_correct
def pca(self, keep=None, center=False, weight=True): ''' Performss principal component analysis on data field, and stores a PCA object. Please, remove climatology, detrend data etc before calling this method. If center=True, PCA object will center data using mean and standard deviation. If weight=True, multiply data by area weights. ''' nt, km, jm, im = self.data.shape # multiply data by area factor, reshape, return matrix if weight: factor = sp.cos(sp.deg2rad(self.grid['lat'])) factor[factor < 0.] = 0. factor = sp.sqrt(factor) else: factor = sp.ones(self.grid['lat'].shape) mask = sp.ma.getmaskarray(self.data).copy() self.data[mask] = 0.0 self.data *= factor[sp.newaxis, sp.newaxis] X = self.data.reshape((nt, km * jm * im)).view(sp.ndarray) self._pc = pca.PCA(X, center=center, keep=keep) self.data /= factor[sp.newaxis, sp.newaxis] self.data[mask] = self.data.fill_value self.data.mask = mask
def fit_transform(self, X, epochs, optimizer): ''' Parameters ---------- X : shape (n_samples, n_features) Training data epochs : The number of epochs optimizer : Optimize algorithm, see also optimizer.py Returns ------- s : shape (n_samples, n_features) Predicted source per sample. ''' n_samples, n_features = X.shape pca_model = pca.PCA(n_features, True) X_whiten = pca_model.fit_transform(X) self.__W = np.random.rand(n_features, n_features) for _ in range(epochs): g_W = np.zeros_like(self.__W) for x in X_whiten: g_W += (1 - 2 * scipy.special.expit(self.__W.dot(x.T)) ).dot(x) + np.linalg.inv(self.__W.T) g_W /= n_samples g_W = optimizer.optimize([g_W])[0] self.__W += g_W return X_whiten.dot(self.__W)
def TestLaws(mgnames, NJ=100): # create laws filters filts = texture.BuildLawsFilters() # allocate for jets NI = len(mgnames) # number of images jets = np.zeros((NJ * NI, 25)) # for each image for i in xrange(NI): # load # correlate #corrs = BruteCorrelate( data, filts ) data = mgnames[i] + 0 corrs = map(lambda x: correlate2d(data, x), filts) for j in range(25): corrs[i] = cspline2d(abs(corrs[i]), 200) corrs = np.array(corrs) # extract random jets V, H = data.shape vs = range(V) hs = range(H) np.random.shuffle(vs) np.random.shuffle(hs) for j in range(NJ): jets[i * NJ + j] = corrs[:, vs[j], hs[j]] # k-means clustering clust, mmb = kmeans.KMeans(NI, jets) #return jets cffs, evecs = pca.PCA(clust, 3) cffs = pca.Map2PCA(clust, evecs) gnu.Save('Laws_results.txt', cffs) return clust, cffs
def Train(brodatz_f, brodatz_256_mgs): #A container function that can be imported or run with user created arrays and filenames. #Coded Just for convenience incase of code testing!! qvecs = map(lambda x: basis.GaborCorr(x), brodatz_256_mgs) qvecs = np.array(qvecs) cffs, evecs = pca.PCA(qvecs, 3) cffs = pca.Map2PCA(qvecs, evecs) vecs_num = 0 for name in brodatz_f: plot_vecs = cffs[vecs_num:vecs_num + 4] vecs_num = vecs_num + 4 gnu.Save('graph_output\\' + name[0:-4] + '.txt', plot_vecs) for i in brodatz_f: print i + '\n' ss = 'unset key\n' ss += 'splot for [i=1:112] \'graph_output\\D\'.i.\'.txt\'\n' fp1 = open('plot_brodatz_256_textures.txt', 'w') fp1.write(ss) fp1.flush() return brodatz_f, cffs
def use_pca(dataset): dataset_rest = [] dataset_main = [] for road in dataset: pca_obj = pca.PCA(road, 2) dataset_rest.append(pca_obj.rest_x) dataset_main.append(pca_obj.main_x) return dataset_main, dataset_rest
def train(self): train_pca = pca.PCA(self.pcs) train_pca.center(self.train_data) train_pca.svd() self.train_features = np.transpose(train_pca.compress(self.train_data)) LREG = l_reg.linear_regression(self.train_features) self.w_opt = LREG.optimize() self.targets = LREG.targets
def train_model(slice_data, k, lr, random=True): """ Performs 5 runs to create 5 different models, using a different train/holdout/test split each time param: slice_data: data to slice up param: k: number of principle components param: lr: learning rate param: random: boolean that determines if holdout set should be chosen randomly, default is True return: average of all 5 runs percent correct based on test set """ global test_set global holdout_set global train_set percent_correct = [] for i in range(5): holdout_set = [] test_starting_index = (i * 4) test_ending_index = test_starting_index + 4 test_set = slice_data[test_starting_index:test_ending_index] remaining = slice_data[:test_starting_index] + slice_data[ test_ending_index:] for _ in range(2): # Either chooses random index or a set index # This is used when you want to not use the data itself as a factor when testing just the effects of changing the learning rate rand_index = rand.randint(0, len(remaining) - 1) if random else i * 2 holdout_set += [remaining[::2][rand_index // 2] ] + [remaining[1::2][rand_index // 2]] del remaining[(rand_index if rand_index % 2 == 0 else rand_index - 1):(rand_index + 2 if rand_index % 2 == 0 else rand_index + 1)] train_set = remaining p_c_a = pca.PCA(k) p_c_a.fit(np.array([image for image, _ in train_set])) train_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in train_set]) holdout_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in holdout_set]) test_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in test_set]) # Only shuffles if order of images can be a factor # Turned off random when only testing learning rate effect if random: rand.shuffle(train_set) w = batch_gradient_descent(epochs, lr, p_c_a.k + 1) percent_correct.append( sum([ correct_category(w, image, 1 if c1 in label else 0) for image, label in test_set ]) / len(test_set)) return percent_correct
def low_rank_approximation(O, k): """ O dimensions are n X m """ import time a = time.time() pca_out = pca.PCA(O) b = time.time() res = dot(pca_out.P[:, 0:k], pca_out.U[:, 0:k].transpose()) c = time.time() logging.debug("PCA TOOK %s SECONDS AND DOT(MULTI) TOOK %s SECONDS" % (b - a, c - b)) return res
def _pca_partido(self): """Roda a análise de componentes principais por partidos. Guarda o resultado em self.pca Retorna um dicionário no qual as chaves são as siglas dos partidos e o valor de cada chave é um vetor com as n dimensões da análise pca""" if not bool(self.pca_partido): if self.vetores_votacao == []: self._inicializa_vetores() matriz = self.vetores_votacao - self.vetores_votacao.mean(axis=0) self.pca_partido = pca.PCA(matriz) dicionario = {} for partido, vetor in zip(self.partidos, self.pca_partido.U): dicionario[partido.nome] = vetor return dicionario
def _pca_partido(self): """Roda a análise de componentes principais por partido. Guarda o resultado em self.pca Retorna um dicionário no qual as chaves são as siglas dos partidos e o valor de cada chave é um vetor com as n dimensões da análise pca """ # Fazer pca, se ainda não foi feita: if not self.pca_partido: if self.vetores_votacao == None or len(self.vetores_votacao) == 0: self._inicializa_vetores() # Partidos de tamanho nulo devem ser excluidos da PCA: ipnn = [] # lista de indices dos partidos nao nulos ip = -1 for p in self.partidos: ip += 1 if self.tamanhos_partidos[p.nome] != 0: ipnn.append(ip) matriz = self.vetores_votacao matriz = matriz[ipnn, :] # excluir partidos de tamanho zero # Centralizar dados: matriz = matriz - matriz.mean(axis=0) # Fazer pca: self.pca_partido = pca.PCA(matriz, fraction=1) # Recuperar partidos de tamanho nulo, atribuindo zero em # em todas as dimensões no espaço das componentes principais: U2 = self.pca_partido.U.copy() # Salvar resultado da pca em U2 self.pca_partido.U = numpy.zeros( (len(self.partidos), self.num_votacoes)) ip = -1 ipnn2 = -1 for p in self.partidos: ip += 1 if ip in ipnn: # Se este partido for um partido não nulo ipnn2 += 1 cpmaximo = U2.shape[1] # colocar nesta linha os valores que eu salvei antes em U2 self.pca_partido.U[ip, 0:cpmaximo] = U2[ipnn2, :] else: self.pca_partido.U[ip, :] = numpy.zeros( (1, self.num_votacoes)) logger.info("PCA terminada com sucesso. ini=%s, fim=%s" % (str(self.ini), str(self.fim))) # Criar dicionario a ser retornado: dicionario = {} for partido, vetor in zip(self.partidos, self.pca_partido.U): dicionario[partido.nome] = vetor return dicionario
def classify(self): test_pca = pca.PCA(self.pcs) test_pca.center(self.test_data) test_pca.svd() self.test_features = np.transpose(test_pca.compress(self.test_data)) self.train_classifications = [] self.test_classifications = [] #looping through the features for i in range(0, self.test_features.shape[0]): self.test_classifications.append( np.matmul(self.w_opt, self.test_features[i])) self.train_classifications.append( np.matmul(self.w_opt, self.train_features[i]))
def _pca_uf(self): """Roda a análise de componentes principais por UF. Guarda o resultado em self.pca Retorna um dicionário no qual as chaves são as siglas dos partidos e o valor de cada chave é um vetor com as n dimensões da análise pca""" if not bool(self.pca_uf): if self.vetores_votacao_uf == []: self._inicializa_vetores_uf() matriz = self.vetores_votacao_uf - self.vetores_votacao_uf.mean( axis=0) self.pca_uf = pca.PCA(matriz) dicionario = {} for uf, vetor in zip(self.lista_ufs, self.pca_uf.U): dicionario[uf] = vetor return dicionario
def getpcanns(word): response.content_type = 'application/json' nnv = n.nnv(n.vec(word), k=getk(request)) words = n.words(nnv.indices) vecs = n.index_w(nnv.indices) pca, eigval, eigvec = p.PCA(vecs) pca -= pca.mean(axis=0) pca /= np.abs(pca).max(axis=0) # TODO why are there NaNs in the word list?? res = [{ 'w': word if word == word else '', 'p': pca_vec.tolist(), 'd': float(val) } for word, pca_vec, val in zip(words, pca, nnv.values)] return json.dumps(res)
def test_init(self): testDataArray = np.array([[1, 4.5, 4, 7], [2, 7.3, 5, 8], [3, 1.2, 9, 9]]) testK = 2 testP = pca.PCA(testDataArray, testK) #testPB = pca.PCABig("testDataFile.csv", "transformed.bin", testK) # Test Array Equality npt.assert_array_max_ulp(testP.data, testDataArray, maxulp = 0.) #npt.assert_array_max_ulp(testPB.data, "testDataFile.csv", maxulp = 0.) # Test requested dimension equality self.assertTrue(testP.k == testK)
def __init__(self, d, pop, numOfGenerations, a, r, q_min, q_max, lower_bound, upper_bound, function, use_pca=True, levy=False, seed=0, alpha=1, gamma=1): # Number of dimensions self.d = d # Population size self.pop = pop # Generations self.numOfGenerations = numOfGenerations # Loudness and alpha parameter (0 < a < 1) self.A = np.array([a] * pop) self.alpha = alpha # Pulse rate and gamma parameter (y > 0) self.R = np.array([r] * pop) self.gamma = gamma # (Min/Max) frequency self.Q = np.zeros(self.pop) self.q_min = q_min self.q_max = q_max # Domain bounds self.lower_bound = lower_bound self.upper_bound = upper_bound self.levy = levy self.use_pca = use_pca if use_pca: self.PCA = pca.PCA() # Initialise fitness and solutions self.f_min = np.inf self.solutions = np.zeros((self.pop, self.d)) self.pop_fitness = np.zeros(self.pop) # fitness of population self.best = np.zeros(self.d) # best solution # Random number generator self.rng = np.random.default_rng(seed) # Velocity self.V = np.zeros((self.pop, self.d)) # Optimisation/fitness function self.func = function # History (for plots) self.best_history = [] self.min_val_history = [] self.loudness_history = [] self.pulse_rate_history = [] self.frequency_history = []
def semelhanca_pca(vetores): """Calcula as semelhanças partidárias gerando um gráfico bidimensional Isto é feito com a Análise de Componentes Principais (PCA) Argumentos: vetores -- uma lista de listas, em que cada lista é um vetor de votações de um partido Retorna: Uma lista em que a i-ésima posição representa a coordenada bidimensional do partido cujo vetor de votações era a i-ésima lista do argumento vetores """ #PCA: linhas são amostras e colunas variáveis # vamos fazer linhas = partidos e colunas = votações # devemos também centralizar os valores # como todos os valores \in [0,1], não precisamos ajustar a escala matriz = numpy.array(vetores) matriz -= matriz.mean(axis=0) # centralização p = pca.PCA(matriz) return p
def main(): # prepare sample data and target variable wine_data = WineData() X = wine_data.X y = wine_data.y # split sample data into training data and test data and standardize them X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y) sc = StandardScaler().fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) pca_transformers = [pca.PCA(n_components=2), PCA(n_components=2)] for pca_transformer in pca_transformers: # execute PCA X_train_pca = pca_transformer.fit_transform(X_train_std) # show principal components and explained variance print('principal components:\n', pca_transformer.components_) print('explained variance:', pca_transformer.explained_variance_) plot_features(X_train_pca, y_train, xlabel='PC1', ylabel='PC2') # fit classifier and plot decigion regions classifier = LogisticRegression(C=100.0, random_state=1, solver='liblinear', multi_class='ovr').fit( X_train_pca, y_train) X_test_pca = pca_transformer.transform(X_test_std) print('score: ', classifier.score(X_test_pca, y_test)) plot_decision_regions(X_test_pca, y_test, classifier=classifier, xlabel='PC1', ylabel='PC2')
def _pca_legislaturas(self): """Roda a análise de componentes principais por legislatura. Retorna um dicionário no qual as chaves são os ids das legislaturas e o valor de cada chave é um vetor com as n dimensões da análise pca """ if not self.pca_legislaturas: if not self.vetores_votacao: self._inicializa_vetores() ilnn = self._lista_de_indices_de_legislaturas_nao_nulas() matriz = self.vetores_votacao # exclui legislaturas ausentes em todas as votações do período matriz = matriz[ilnn, :] matriz = matriz - matriz.mean(axis=0) # centraliza dados self.pca = pca.PCA(matriz, fraction=1) # faz o pca self._preenche_pca_de_legislaturas_nulas(ilnn) logger.info("PCA terminada com sucesso. ini=%s, fim=%s" % (str(self.ini), str(self.fim))) # Criar dicionario a ser retornado: dicionario = {} for legislatura, vetor in zip(self.legislaturas, self.pca.U): dicionario[legislatura.id] = vetor return dicionario
def compute(self, waveforms, sampling_rate=None, n_pca_comp=8, sign='-'): """Return concatenation of PCA and OnlyMax features. OnlyMax is first component. """ # shape (N_spikes, n_pca_comp) a1 = pca.PCA().compute(waveforms, sampling_rate, output_dim=n_pca_comp) # shape (N_spikes, trodness) #a2 = onlymax.OnlyMax().compute(waveforms, sampling_rate) print sign if sign == '-': a2 = np.min(waveforms, axis=2) elif sign == '+': a2 = np.max(waveforms, axis=2) elif sign == 'either': minny = np.min(waveforms, axis=2) maxxy = np.max(waveforms, axis=2) a2 = np.where(maxxy > np.abs(minny), maxxy, minny) elif sign == 'abs(either)': a2 = np.max(np.abs(waveforms), axis=2) return np.concatenate([a2, a1], axis=1)
def _pca_partido(self): """Roda a análise de componentes principais por partido. Guarda o resultado em self.pca Retorna um dicionário no qual as chaves são as siglas dos partidos e o valor de cada chave é um vetor com as n dimensões da análise pca """ if not self.pca_partido: if self.vetores_votacao == None or len(self.vetores_votacao) == 0: self._inicializa_vetores() ipnn = self._lista_de_indices_de_partidos_naos_nulos() matriz = self.vetores_votacao matriz = matriz[ipnn, :] # exclui partidos de tamanho zero matriz = matriz - matriz.mean(axis=0) # centraliza dados self.pca_partido = pca.PCA(matriz, fraction=1) # faz o pca self._preenche_pca_de_partidos_nulos(ipnn) logger.info("PCA terminada com sucesso. ini=%s, fim=%s" % (str(self.ini), str(self.fim))) # Criar dicionario a ser retornado: dicionario = {} for partido, vetor in zip(self.partidos, self.pca_partido.U): dicionario[partido.nome] = vetor return dicionario
def similarity_pca(vetores): """Calculates similarities party generating a two-dimensional graph This is done with the Principal Component Analysis (PCA) arguments: vectors - a list of lists, where each list is a vector of voting for a party returns: A list where the ith position represents the two-dimensional coordinate of the party whose vector voting was the i-th argument list of vectors.""" # PCA: lines are samples. Columns are variable # We do: linhas = partidos and colunas = votações # Should centralize values. # As all values \ in [0,1], we need not to scale. # # Receive a list of lists of vectors of voting of a party matriz = numpy.array(vetores) # Centralization: matriz -= matriz.mean(axis=0) # Receives centering matrix with PCA p = pca.PCA(matriz) return p
def pca_model(): return pca.PCA(faces.IMAGES).run()
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser(description='Does principal component ' 'analysis on the penny colors') parser.add_argument('image', type=str, help='Image filename used to ' 'generate histograms (not actually read)') args = parser.parse_args(argv) histogram_file_in = args.image + '_hist.csv' colors = np.array(penny_r.get_colors(histogram_file_in, rounded=False)) pcao = pca.PCA(colors, fraction=1) #import pdb;pdb.set_trace() #print p # principal component vectors = pcao.Vt # RGB triplets in principal components = pcao.pc() penny_pc = pcao.pc() title_format = ':^10s' stat_format = ':10.2f' stat_titles = 'min', 'q1', 'median', 'mean', 'q3', 'max' stat_funcs = min, lambda a: sps.scoreatpercentile( a, 25), np.median, np.mean, lambda a: sps.scoreatpercentile(a, 75), max assert len(stat_titles) == len(stat_funcs) print ' | '.join(['{' + title_format + '}'] * len(stat_titles)).format(*stat_titles) for pc_set in penny_pc.T: print ' | '.join(['{' + stat_format + '}'] * len(stat_funcs)).format( *[f(pc_set) for f in stat_funcs]) # find image bounds to synthesize im_pc1_bounds = [min(penny_pc.T[0]), max(penny_pc.T[0])] im_pc1_range = im_pc1_bounds[1] - im_pc1_bounds[0] im_pc1_bounds[0] = im_pc1_bounds[0] - 0.25 * im_pc1_range im_pc1_bounds[1] = im_pc1_bounds[1] + 0.25 * im_pc1_range im_pc2_bounds = [min(penny_pc.T[1]), max(penny_pc.T[1])] im_pc2_range = im_pc2_bounds[1] - im_pc2_bounds[0] im_pc2_bounds[0] = im_pc2_bounds[0] - 0.25 * im_pc2_range im_pc2_bounds[1] = im_pc2_bounds[1] + 0.25 * im_pc2_range im_pc1_ax = np.arange(*[round(x) for x in im_pc1_bounds]) im_pc2_ax = np.arange(*[round(x) for x in im_pc2_bounds]) im_pc1_AX, im_pc2_AX = np.meshgrid(im_pc1_ax, im_pc2_ax) im = np.empty(im_pc1_AX.shape + (3, )) for i, row in enumerate(im): for j, pxl in enumerate(row): im[i, j] = pcao.Vt[0] * im_pc1_ax[j] + pcao.Vt[1] * im_pc2_ax[i] im[i, j] = im[i, j] / 255 fig, ax = plt.subplots() ax.plot(penny_pc.T[0], penny_pc.T[1], linestyle='none', marker='o', color='lime', markersize=10, alpha=0.5) ax.imshow(im, extent=[ min(im_pc1_ax), max(im_pc1_ax), min(im_pc2_ax), max(im_pc2_ax) ]) ax.set_aspect('auto') ax.set_title('Penny Color Space') ax.set_xlabel('PC1 (1 unit = {} RGB units, of 255)'.format(', '.join( '{:0.3f}'.format(v) for v in pcao.Vt[0]))) ax.set_ylabel('PC2 (1 unit = {} RGB units, of 255)'.format(', '.join( '{:0.3f}'.format(v) for v in pcao.Vt[1]))) f2, ax2 = plt.subplots() pc1 = penny_pc.T[0] ax2.hist(pc1, bins=10, normed=True) norm_mean, norm_sd = sps.norm.fit(pc1) x = np.linspace(norm_mean - 4 * norm_sd, norm_mean + 4 * norm_sd, 100) y = sps.norm.pdf(x, norm_mean, norm_sd) ax2.autoscale(False, axis='x') ax2.plot(x, y, color='red', lw=5) ax2.set_title('Penny PC1 color distribution') ax2.set_xlabel('PC1') ax2.set_ylabel('Frequency') #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1))) #spo.leastsq(lambda p, x: skew.skew(x, *p) - pc1, [0.5]*3, ()) #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1))) #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1))) fig.savefig('fig.png') f2.savefig('fig2.png')
digits_train), utils.get_hole_features(digits_test) pix_train, pix_test = utils.get_pix_features( digits_train), utils.get_pix_features(digits_test) X_train, X_test = np.hstack([pix_train, holes_train]), np.hstack([pix_test, holes_test]) mean_normalizer = utils.normalization(X_train) X_train = mean_normalizer.transform(X_train) X_test = mean_normalizer.transform(X_test) mx_score = 0 best = (-1, -1) clf = knn.KNN(mode='weighted') for n_component in range(3, 61, 3): for k in range(1, 11): _pca = pca.PCA(X_train) X_train_reduced = _pca.transform(X_train, n_component) X_test_reduced = _pca.transform(X_test, n_component) start_time = timeit.default_timer() validation_scores = [] kf = KFold(n_splits=10) for t_idx, v_idx in kf.split(X_train_reduced): X_train_T, X_train_V = X_train_reduced[t_idx], X_train_reduced[ v_idx] y_train_T, y_train_V = y_train[t_idx], y_train[v_idx] clf.fit(X_train_T, y_train_T) validation_score = clf.score(X_train_V, y_train_V, k) validation_scores.append(validation_score) avg_val_score = np.mean(validation_scores) if avg_val_score > mx_score:
import numpy as np import struct import matplotlib.pyplot as plt import pylab import pca import tools import linear train_data = tools.get_train_data() train_label = tools.get_train_label() dimension = 50 train_data, eigenVectors = pca.PCA(train_data, dimension) print(np.array(train_data).shape) num1 = 1 num2 = 9 # 10 classes classified_data = [] for i in range(10): classified_data.append([]) for i in range(len(train_label)): # print(train_label[i][0]) classified_data[train_label[i][0]].append(train_data[i]) train_data1 = classified_data[num1] train_data2 = classified_data[num2]
import csv import matplotlib.pyplot as plt import numpy as np import pca # データ読み込み Xy = [] with open("winequality-red.csv") as fp: for row in csv.reader(fp, delimiter=";"): Xy.append(row) Xy = np.array(Xy[1:], dtype=np.float64) X = Xy[:, :-1] # 学習 model = pca.PCA(n_components=2) model.fit(X) # 交換 Y = model.transform(X) # 描画 plt.scatter(Y[:, 0], Y[:, 1], color="k") plt.show()
# 'i1171n','m1166r','r1192p','r1275q','t1151m','y1278s'] mutation_list = ['a1200v', 'd1270g', 'd1349h', 'g1286r', 'r1231q'] #mutation_list=['v1229m'] #fix this one directory = '../40ns-dry/' num_frames = 2000 #want 40 ns worth of trajectory for mutation in mutation_list: print "\nanalyzing {0} system".format(mutation) psf = '{0}alk.dry.{1}.psf'.format(directory, mutation) dcd = '{0}alk.40ns.dry.{1}.dcd'.format(directory, mutation) # psf="{0}alki.{1}.ions.psf".format(directory,mutation) # dcd="{0}alki.{1}.nvt-constrain.dcd".format(directory,mutation) out_file_name = "{0}_peter".format(mutation) PCA = pca.PCA( psf, dcd, align_selection= "name CA and not ((resid 1125:1129) or (resid 1157:1174) or (resid 1271:1292))", pca_selection="name CA", outname=out_file_name, last_frame=num_frames) PCA.make_covariance() PCA.write_data() """ analysis=jma.analyzer(psf,dcd,out_file=out_file_name,last_frame=num_frames) analysis.get_rms() analysis.get_sasa() analysis.get_salts() analysis.get_hbonds() """
############Program Start####################### fnames = ReadFiles('data') mgs = map(lambda x: basis.LoadGray(x), fnames) nmgs = [] for i in range(len(mgs)): nm = ip.Plop(mgs[i], 512, 512) nmgs.append(nm) fids = map(lambda x: basis.MakeGrid(x), nmgs) fids = np.array(fids) morph = warp.Morph(nmgs[1], fids[1], nmgs[2], fids[2], NI=10) Imageset = [] Imageset.append(nmgs[1]) for i in morph: Imageset.append(i) Imageset.append(nmgs[2]) for i in range(len(Imageset)): nm = 'output\\morph_' + str(i) + '.png' smisc.imsave(nm, Imageset[i]) clust, cffs = TestLaws(Imageset) qvecs, a = TestHaar(Imageset) qvecs = qvecs.sum(3).sum(2) cffs, evecs = pca.PCA(qvecs, 3) cffs = pca.Map2PCA(qvecs, evecs) gnu.Save('Haar_results.txt', cffs)