def calculate(ids, matrix, target=None): results = PCA(matrix) data = [] for obj_id, row in zip(ids, matrix): data.append([round(results.project(row)[0],6), round(results.project(row)[1],6)]) #target = [] data = icp.align(data, target) #for obj_id, row in zip(ids, data): #row.append(obj_id) return data.tolist()
def pca_joints(self, num, data=None): """compresses joint data using PCA and returns a projection onto num axes. optionally, PCA weights can be computed based on another dataset. if "collisions", also highlight the points where drum collisions are detected""" if data is None: data = self.joints lpca = PCA(data[:, :7]) # joint positions only rpca = PCA(data[:, 7:14]) # extract 'num' components lproj = lpca.project(self.joints[:, :7], minfrac=lpca.fracs[num - 1]) rproj = rpca.project(self.joints[:, 7:14], minfrac=rpca.fracs[num - 1]) return np.concatenate((lproj, rproj), axis=1)
def doStuff(self): self.readWFDEIOutput() self.makeArray() pca = PCA( self.array) print( pca.mu) print( pca.fracs) out = pca.project(self.array,minfrac=0.1) print( out.shape) plt.subplot(1,3,1) plt.plot( out[:,0],out[:,1], 'k+') plt.subplot(1,3,2) plt.plot( out[:,0],out[:,2], 'k+') plt.subplot(1,3,3) plt.plot( out[:,1],out[:,2], 'k+') plt.show()
def PCA_on_waveforms(waveforms, minfrac, location): """ This function performs principal component analysis on the spike waveforms extracted and returns the projection of the waveforms on these principal component axes. Inputs: waveforms: Numpy array containing the waveforms; in the form of (N_events x N_electrodes x N_spike_time_range_steps) minfrac: Principal component axes that counts for the variance greater than this minfrac value will be taken into account. params: Dictionary containing the recording and analysis parameters. Following entries must be present: spike_timerange: List containing the time range of spike waveform as an array Outputs: projection: Waveforms projected on the principal component axes """ """peak_of_spike_time_range = (len(params['spike_timerange']) / 2) + 1 peaks = waveforms[:,:,peak_of_spike_time_range] true_electrode_inds = np.where(peaks[0] != 0) #Eliminating the broken or absent electrodes on the grid (for which the voltage equals 0 all the time) in order to avoid their contamination on the PCA. waveforms_true = waveforms[:,true_electrode_inds] #Waveforms from absent electrodes eliminated n_dimensions = len(true_electrode_inds[0]) * len(params['spike_timerange']) #Number of dimensions before dimensionality reduction waveforms_true = waveforms_true.reshape(len(peaks),n_dimensions) #Reshaping the array with respect to initial number of dimensions results = PCA(waveforms_true)""" experiment = location.experiment n_dimensions = len(waveforms[0]) * (experiment.spike_samples_before + experiment.spike_samples_after) waveforms = waveforms.reshape(len(waveforms), n_dimensions) results = PCA(waveforms) projection = results.project(waveforms, minfrac) return projection
def main(): print "Loading Word2Vec model..." # 4 GB input file, uses about 20 GB of memory when loaded '''Uses the model from: http://bio.nlplab.org/''' model = gensim.models.Word2Vec.load_word2vec_format("../../PubMed/BioNLP/wikipedia-pubmed-and-PMC-w2v.bin", binary = True) model.init_sims(replace=True) vocab = model.index2word data_matrix = np.array([model[vocab[i]] for i in range(len(vocab))]) print "Running PCA..." pca_results = PCA(data_matrix) seed_word_list = ["dopamine", "GABA", "serotonin", "5HT", "acetylcholine" , "glutamate","electrode", "stimulator", "cognitive", "behavioral", "ethological", "genetic", "biochemical", "channel", "concentration", "dynamics", "receptor", "antibody", "fMRI", "calcium", "nucleus", "axon", "soma", "dendrite", "synapse", "fNIRS", "EEG"] # seed_word_list = [s.lower() for s in seed_word_list] classes = [[] for s in seed_word_list] for i in range(len(seed_word_list)): classes[i].append(model[seed_word_list[i]]) for s in model.most_similar(seed_word_list[i]): classes[i].append(model[s[0]]) classes_projected = [[] for s in seed_word_list] for i in range(len(seed_word_list)): for f in classes[i]: classes_projected[i].append(pca_results.project(f)) print "Plotting PCA results..." fig = plt.figure() ax = fig.add_subplot(111, projection = '3d') ax.set_title("Principal Components of Word Vectors") import itertools marker = itertools.cycle(['o', '^', '*', "s", "h", "8"]) colorList = ["r", "b", "g", "y", "k", "c", "m", "w"] colors = itertools.cycle(colorList) m = marker.next() for i in range(len(seed_word_list)): col = colors.next() if i % len(colorList) == 0: m = marker.next() ''' # plot the individual words ax.scatter([f[0] for f in classes_projected[i]], [f[1] for f in classes_projected[i]], [f[2] for f in classes_projected[i]], marker = m, s = 20, c = col) ''' # plot the cluster means ax.plot([np.mean([f[0] for f in classes_projected[i]])], [np.mean([f[1] for f in classes_projected[i]])], [np.mean([f[2] for f in classes_projected[i]])], marker = m, markersize = 21, color = col, label = seed_word_list[i], linestyle = "none") ax.legend(numpoints = 1) plt.show()
def pca(minfrac): matrix = [] for vector in vects: matrix.append(vector[0]) print "Matrix Built" training = numpy.array(matrix) print "Training..." results = PCA(training) ret = [] print "Projecting..." for vector in vects: ret.append(results.project(vector[0], minfrac)) return ret
def calculate(ids, matrix, target=None): results = PCA(matrix) data = [] for obj_id, row in zip(ids, matrix): data.append([ round(results.project(row)[0], 6), round(results.project(row)[1], 6) ]) #target = [] data = icp.align(data, target) #for obj_id, row in zip(ids, data): #row.append(obj_id) return data.tolist()
def pca(ids, matrix): print("{}: Calculating PCA...".format(timestamp())) results = PCA(matrix) pickle.dump(results, open('./pca_pickle.dat', 'w')) data = [] for obj_id, row in zip(ids, matrix): data.append([round(results.project(row)[0],6), round(results.project(row)[1],6), obj_id]) print("{}: Done.".format(timestamp())) return data
def draw_pcca_memberships(original_data, pcca, discrete_trajectory, colormap_name="jet"): """ Visualize the result of PCCA+ as colored plot of the PCA. """ pca = PCA(original_data) cluster_ids = range(0, pcca.shape[1]) colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1) membership = pcca > 0.5 pcca_traj = np.where(membership[discrete_trajectory])[1] for index, cluster in enumerate(cluster_ids): datapoints = original_data[np.where(pcca_traj == cluster)] print('points in cluster ', cluster, ': ', len(datapoints)) datapoints_transformed = pca.project(datapoints) plt.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5) plt.title('pcca')
def draw_clusters(clusters, plotter=None, colormap_name="jet"): """ Visualize clustered data and cluster membership in a new plot or with an existing axis object. """ plotter = plotter or plt # use PCA to be able to visualize the data in two dimensions all_data = clusters.getOriginalData() pca = PCA(all_data) # for nicer visualization data_length = len(all_data) alpha = 1.0 / (math.sqrt(data_length)) if alpha < 0.05: alpha = 0.05 elif alpha > 0.75: alpha = 0.75 cluster_ids = clusters.getClusterIDs() colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1) for index, cluster in enumerate(cluster_ids): datapoints = all_data[clusters._map == cluster,:] datapoints_transformed = pca.project(datapoints) plotter.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5)
def sample_cluster_2Dmap(self, **kwargs): defaults = dict( genelist=None, samplenames=None, size=50,) for key in defaults: kwargs.setdefault(key, defaults[key]) genearray = self.array if type(kwargs['genelist']) == list: validatedlist = self.validate_genelist(kwargs['genelist']) genearray = self.array.take(validatedlist, axis=0) elif kwargs['genelist']: raise('genelist should be list of genes') samplenames = [x for x in self.dataindexdic.keys()] if kwargs['samplenames']: if len(kwargs['samplenames']) != len(samplenames): raise('length of samplenames should be {}'.format(len(samplenames))) samplenames = kwargs['samplenames'] covarray = numpy.cov(genearray.T) # covariance array covPCA = PCA(covarray) # matplotlib.mlab.PCA convertedcovs = covPCA.project(covarray) # converted vector along PC data = numpy.array([[x[0] for x in convertedcovs], [x[1] for x in convertedcovs]]) # auto color picking with sample numbers color = [] colorlist = cm.rainbow(numpy.linspace(0, 1, len(samplenames))) keys = [x for x in self.dataindexdic.keys()] for c, key in zip(colorlist, keys): color.extend([c] * len(self.dataindexdic[key])) sampleindex = 0 for i in range(len(samplenames)): samplenumber = len(self.dataindexdic[keys[i]]) subdata = numpy.take( data, range(sampleindex, sampleindex + samplenumber), axis=1) plt.scatter( subdata[0], subdata[1], color=colorlist[i], s=kwargs['size'], label=samplenames[i]) sampleindex += samplenumber plt.legend(loc='upper left', fontsize=15, scatterpoints=1, bbox_to_anchor=(1, 1))
def main(): print "Loading Word2Vec model..." # 4 GB input file, uses about 20 GB of memory when loaded '''Uses the model from: http://bio.nlplab.org/''' model = gensim.models.Word2Vec.load_word2vec_format( "../../PubMed/BioNLP/wikipedia-pubmed-and-PMC-w2v.bin", binary=True) #model = gensim.models.Word2Vec.load("../../PubMed/derived_from_neuroscience_abstracts/word2vec_model_1") model.init_sims(replace=True) vocab = model.index2word while True: seed_string = raw_input('\nprompt> ') seed_word_list = list(set( seed_string.split())) # set gets the unique elements here print "Seed words:" for word in seed_word_list: print word # choose how many words to find to allow numrows > numcols in PCA vector_length = len(model[vocab[0]]) top_vecs = int(1 + float(vector_length) / float(len([s for s in seed_word_list if s in vocab]))) if top_vecs < 15: top_vecs = 15 print "Finding a bunch of similar words..." derived_word_list = [] for s in seed_word_list: if s in vocab: print "\tSearching for similarities for %s" % s l = [ m[0] for m in model.most_similar(positive=[s], topn=top_vecs) ] derived_word_list += l if len(derived_word_list) == 0: continue derived_word_list = list(set(derived_word_list)) print "Derived words:" for word in derived_word_list: print word data_matrix = np.array([model[s] for s in derived_word_list]) print "Running PCA..." pca_results = PCA(data_matrix) projected_vectors = [] word_short_list = [] for word in seed_word_list: if word in vocab: f = model[word] projected_vectors.append(pca_results.project(f)) word_short_list.append(word) print "Plotting PCA results..." fig = plt.figure() plt.title("Principal Components of Word Vectors") plots = [] import itertools marker = itertools.cycle(['o', '^', '*', "s", "h", "8"]) colorList = ["r", "b", "g", "y", "k", "c", "m", "w"] colors = itertools.cycle(colorList) m = marker.next() for i in range(len(projected_vectors)): col = colors.next() if i % len(colorList) == 0: m = marker.next() p, = plt.plot([projected_vectors[i][0]], [projected_vectors[i][1]], marker=m, markersize=21, color=col, linestyle="none") plots.append(p) plt.legend(plots, word_short_list, loc="upper left", numpoints=1) plt.show()
schTrain = getFold(sch, schUserFoldDict, foldid, lambda x,y:x!=y, useNgram) XTrain, YTrain = randomShuffle(controlTrain + schTrain, [1]*len(controlTrain) + [0]*len(schTrain)) #findCorrelation(XTrain) #plots graph of feature correlations #[meanFt, varFt] = normFeatParams(XTrain) #both meanFt and varFt are of length = numberoffeatures #XTrain = normFeat(XTrain, meanFt, varFt) PCAObject = PCA(np.asarray(XTrain)) XTrain = PCAObject.center(XTrain) if doPCA: numFeatures = retainPerc(PCAObject.fracs, 0.99) XTrain = PCAObject.project(XTrain)[:,0:numFeatures] [meanFt, varFt] = normFeatParams(XTrain) #both meanFt and varFt are of length = numberoffeatures XTrain = np.asarray(normFeat(XTrain, meanFt, varFt)) #print numFeatures, XTrain.shape #TODO: SHUFFLE UP THE INPUT clf = svm.SVC(kernel='rbf') clf.fit(XTrain, YTrain) XTest = controlTest + schTest XTest = PCAObject.center(XTest) if doPCA: XTest = PCAObject.project(XTest)[:,0:numFeatures] XTest = np.asarray(normFeat(XTest, meanFt, varFt))
for i in matrix_with_id: obj_ids.append(i) matrix.append(matrix_with_id[i]) if len(matrix_with_id[i]) != row_length: print('row length is not {}'.format(row_length)) # PCA calculating vec = np.array(matrix) vec.shape = (len(matrix), row_length) results = PCA(vec) data = [] for obj_id, row in zip(obj_ids, matrix): obj_type = BandB_sampled[obj_id]["stype"] data.append([results.project(row)[0], results.project(row)[1], obj_type, obj_id]) f_out = open(args.path+'/pca_supernova.json', 'w') f_out.write(json.dumps(data)) f_out.close() #matrix = [] #j = json.load(open('{}/PLV_LINEAR.json'.format(args.path))) #metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"]) #obj_ids = [] #row_length = 50 #with open('{}/object_list.csv'.format(args.path)) as csvfile: #objects = csv.reader(csvfile)
def pca(dim): pca = PCA(data[:, 0:9]) return pca.project(data[:, 0:9])[:, 0:dim]
objects = csv.reader(csvfile) next(objects, None) for row in objects: obj_id = int(row[0]) period = float(row[1]) if period > 0: v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json') for i in range(row_length - len(v)): v.append(v[0]) matrix.append(v) obj_ids.append(obj_id) vec = np.array(matrix) vec.shape = (len(matrix), row_length) results = PCA(vec) with open('pca_result.dat', 'wb') as f: pickle.dump(results, f) with open('pca_matrix.dat', 'wb') as f: pickle.dump(vec, f) data = [] for obj_id, row in zip(obj_ids, matrix): data.append([results.project(row)[0], results.project(row)[1], metadata[obj_id]["LCtype"], obj_id]) f_out = open(args.path+'/pca.json', 'w') f_out.write(json.dumps(data)) f_out.close()
import numpy as np from matplotlib.mlab import PCA data = [] for line in open("emotions.train"): data.append([]) for el in line[2:].strip().split(" "): data[-1].append(float(el[el.index(":")+1:])) if len(data[-1]) != 86: data.remove(data[-1]) results = PCA(np.array(data)) archive = open("pca_archive_wt.txt", "w") for v in results.Wt: archive.write(",".join([str(float(x)) for x in v]) + "\n") archive.close() archive = open("pca_archive_mu.txt", "w") archive.write(",".join([str(float(x)) for x in results.mu]) + "\n") archive.close() archive = open("pca_archive_sigma.txt", "w") archive.write(",".join([str(float(x)) for x in results.sigma]) + "\n") archive.close() fout = open("emotions.train.pca", "w") for line in open("emotions.train"): temp = [] for el in line[2:].strip().split(" "): temp.append(float(el[el.index(":")+1:])) fout.write(line[:2] + " ".join([str(str(i+1) + ":" + str(index)) for i, index in enumerate(results.project(np.array(temp), 0.001))]) + "\n") fout.close()
for line in open("emotions.train"): data.append([]) for el in line[2:].strip().split(" "): data[-1].append(float(el[el.index(":") + 1:])) if len(data[-1]) != 86: data.remove(data[-1]) results = PCA(np.array(data)) archive = open("pca_archive_wt.txt", "w") for v in results.Wt: archive.write(",".join([str(float(x)) for x in v]) + "\n") archive.close() archive = open("pca_archive_mu.txt", "w") archive.write(",".join([str(float(x)) for x in results.mu]) + "\n") archive.close() archive = open("pca_archive_sigma.txt", "w") archive.write(",".join([str(float(x)) for x in results.sigma]) + "\n") archive.close() fout = open("emotions.train.pca", "w") for line in open("emotions.train"): temp = [] for el in line[2:].strip().split(" "): temp.append(float(el[el.index(":") + 1:])) fout.write(line[:2] + " ".join([ str(str(i + 1) + ":" + str(index)) for i, index in enumerate(results.project(np.array(temp), 0.001)) ]) + "\n") fout.close()
def make_plots(chromosomes, groups, group_labels): infile = open( '../finescale_mut_spectra/derived_each_lineage_chr%i_nosingle.txt' % chromosomes[0]) lines = infile.readlines() s = lines[0].strip('\n').split(' ') indices = {} for i in range(1, len(s)): try: indices[s[i]].append(i - 1) except KeyError: indices[s[i]] = [i - 1] mut_counts = np.zeros((2 * (len(s) - 1), len(lines) - 1)) mut_list = [] for chrom in chromosomes: infile = open('../finescale_mut_spectra/derived_each_lineage_chr' + str(chrom) + '_nosingle.txt') lines = infile.readlines() infile.close() for i in range(len(lines) - 1): s = lines[i + 1].strip('\n').split(' ') if chrom == 1: mut_list.append(s[0]) for j in range(len(s) - 1): mut_counts[j][i] += int(s[j + 1]) for j in range(len(s) - 1): der_count = mut_counts[j].sum() for i in range(len(mut_counts[j])): mut_counts[j][i] *= 1.0 / der_count averaged_mut_counts = [] for j in range((len(s) - 1) / 2): averaged_mut_counts.append([]) for i in range(len(mut_counts[0])): averaged_mut_counts[-1].append( 0.5 * (mut_counts[2 * j][i] + mut_counts[2 * j + 1][i])) mut_counts = np.array(averaged_mut_counts) group_mut_counts = [] for group in groups: for population in group: for i in indices[population]: group_mut_counts.append(mut_counts[i]) group_mut_counts = np.array(group_mut_counts) myPCA = PCA(group_mut_counts) colors = ['blue', 'green', 'red', 'purple', 'black', 'orange'] for group, group_label, color in zip(groups, group_labels, colors): x, y = [], [] for population in group: for ind in indices[population]: this_point = myPCA.project(mut_counts[ind]) x.append(this_point[0]) y.append(this_point[1]) plt.scatter(x, y, color=color, label=longname[group_label]) plt.legend(loc='lower left', ncol=2, prop={'size': 8}) plt.xticks(()) plt.yticks(()) plt.xlabel('PC1 (' + str(int(100 * myPCA.fracs[0])) + '% variance explained)') plt.ylabel('PC2 (' + str(int(100 * myPCA.fracs[1])) + '% variance explained)') fig = plt.gcf() fig.set_size_inches((4.5, 3.5)) plt.savefig('_'.join(group_labels) + '_mut_PCA_1kg_nosingle_altlegend.pdf') plt.clf()
kf = cross_validation.KFold(len(X), k=folds, shuffle=True) for train_index, test_index in kf: # print("TRAIN: %s TEST: %s" % (train_index, test_index)) X_train, X_test = X[train_index], X[test_index] # generate knn analysis fits.append(g.fit(X_train)) scores.append(g.bic(X_test)) print scores fig = Figure(figsize=(6, 6)) canvas = FigureCanvas(fig) myPCA = PCA(X) pcDataPoint = myPCA.project(X) ax = fig.add_subplot(111) ax.scatter(pcDataPoint[:, 1], pcDataPoint[:, 2]) canvas.print_figure("PCA12.png", dpi=500) # print(scores) # avg = float(sum(scores)/len(scores)) # for k in range(0,len(scores)): # diffs.append((scores[k]-avg)*(scores[k]-avg)) # print diffs # var = float(sum(diffs)/len(scores)) # scoresavg.append(avg) # scoresvar.append(var) # print(scoresavg) # print(scoresvar)
matrix = list(x["splinedata"] for x in j) # metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"]) # obj_ids = [] # with open('{}/object_list.csv'.format(args.path)) as csvfile: # objects = csv.reader(csvfile) # next(objects, None) # for row in objects: # obj_id = int(row[0]) # period = float(row[1]) # if period > 0: # v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json') # for i in range(50 - len(v)): # v.append(v[0]) # matrix.append(v) # obj_ids.append(obj_id) vec = np.array(matrix) vec.shape = (len(matrix), 20) results = PCA(vec) data = [] for obj, row in zip(j, matrix): data.append([results.project(row)[0], results.project(row)[1], obj]) f_out = open('pca_transients.json', 'w') f_out.write(json.dumps(data)) f_out.close()
def main(): print "\nLoading Word2Vec model...\n" model = gensim.models.Word2Vec.load(outdir + subdir + "word2vec_model") model.init_sims(replace=True) vocab = model.index2word data_matrix = np.array([model[vocab[i]] for i in range(len(vocab))]) print "Running PCA..." pca_results = PCA(data_matrix) seed_word_list = [s.lower() for s in seed_words] vectors = [model[s] for s in seed_word_list] projected_vectors = [pca_results.project(v) for v in vectors] plt.rc('legend', **{'fontsize': 7}) print "Plotting PCA results in 3D..." fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_title("Principal Components of Word Vectors") import itertools marker = itertools.cycle(['o', '^', '*', "s", "h", "8"]) colorList = [ "r", "b", "g", "y", "k", "c", "m", "w", "HotPink", "Indigo", "Grey" ] colors = itertools.cycle(colorList) m = marker.next() for i in range(len(seed_word_list)): col = colors.next() if i % len(colorList) == 0: m = marker.next() a = ax.plot([projected_vectors[i][0]], [projected_vectors[i][1]], [projected_vectors[i][2]], marker=m, markersize=10, c=col, label=seed_words[i], linestyle="none") ax.legend(numpoints=1, loc=5) print "Plotting PCA results in 2D..." fig = plt.figure() ax = fig.add_subplot(111) ax.set_title("Principal Components of Word Vectors") marker = itertools.cycle(['o', '^', '*', "s", "h", "8"]) colorList = [ "r", "b", "g", "y", "k", "c", "m", "w", "HotPink", "Indigo", "Grey" ] colors = itertools.cycle(colorList) m = marker.next() for i in range(len(seed_word_list)): col = colors.next() if i % len(colorList) == 0: m = marker.next() a = ax.plot([projected_vectors[i][0]], [projected_vectors[i][1]], marker=m, markersize=10, c=col, label=seed_words[i], linestyle="none") ax.legend(numpoints=1, loc=5) plt.show()
results = PCA(data) #print results.numcols row_std = np.std(data, axis=0) print results.Wt.shape #print results.sigma #print row_std #print results.Wt[0] #print results.Wt[1] #print results.fracs #contribution of each axes loading1 = results.Wt[0]/row_std loading2 = results.Wt[1]/row_std if not os.path.exists(os.path.join(os.getcwd(), 'pca')): os.makedirs(os.path.join(os.getcwd(),'pca')) os.chdir(os.path.join(os.getcwd(),'pca')) para_project = results.project(para) print para_project.shape fig = plt.figure() fig.set_size_inches(10,10) ax1 = plt.subplot(121, aspect='equal') plt.plot(results.Y[:,0],results.Y[:,1],'o', color='blue', label = 'models') plt.xlabel('PC1') plt.ylabel('PC2') #plt.legend() centre_spines(ax1) ax2 = plt.subplot(122,aspect='equal') plt.plot(loading1,loading2,'^', color='red',label = 'metrics') for label, x,y in zip(t, loading1,loading2): if(x == np.max(loading1) or x == np.min(loading1) or y == np.max(loading2) or y == np.min(loading2)): plt.annotate(label, xy=(x,y)) #plt.annotate(label, xy=(x,y))
results = PCA(data) #print results.numcols row_std = np.std(data, axis=0) print results.Wt.shape #print results.sigma #print row_std #print results.Wt[0] #print results.Wt[1] #print results.fracs #contribution of each axes loading1 = results.Wt[0] / row_std loading2 = results.Wt[1] / row_std if not os.path.exists(os.path.join(os.getcwd(), 'pca')): os.makedirs(os.path.join(os.getcwd(), 'pca')) os.chdir(os.path.join(os.getcwd(), 'pca')) para_project = results.project(para) print para_project.shape fig = plt.figure() fig.set_size_inches(10, 10) ax1 = plt.subplot(121, aspect='equal') plt.plot(results.Y[:, 0], results.Y[:, 1], 'o', color='blue', label='models') plt.xlabel('PC1') plt.ylabel('PC2') #plt.legend() centre_spines(ax1) ax2 = plt.subplot(122, aspect='equal') plt.plot(loading1, loading2, '^', color='red', label='metrics') for label, x, y in zip(t, loading1, loading2): if (x == np.max(loading1) or x == np.min(loading1) or y == np.max(loading2) or y == np.min(loading2)): plt.annotate(label, xy=(x, y))
next(objects, None) for row in objects: obj_id = int(row[0]) period = float(row[1]) if period > 0: v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json') for i in range(row_length - len(v)): v.append(v[0]) v.append(period) matrix.append(v) obj_ids.append(obj_id) vec = np.array(matrix) vec.shape = (len(matrix), row_length+1) results = PCA(vec) with open('pca_result.dat', 'wb') as f: pickle.dump(results, f) with open('pca_matrix.dat', 'wb') as f: pickle.dump(vec, f) data = [] for obj_id, row in zip(obj_ids, matrix): data.append([results.project(row)[0], results.project(row)[1], metadata[obj_id]["LCtype"], obj_id]) f_out = open(args.path+'/pca_with_period.json', 'w') f_out.write(json.dumps(data)) f_out.close()
obj_ids.append(i) matrix.append(matrix_with_id[i]) if len(matrix_with_id[i]) != row_length: print('row length is not {}'.format(row_length)) # PCA calculating vec = np.array(matrix) vec.shape = (len(matrix), row_length) results = PCA(vec) data = [] for obj_id, row in zip(obj_ids, matrix): obj_type = BandB_sampled[obj_id]["stype"] data.append([ results.project(row)[0], results.project(row)[1], obj_type, obj_id ]) f_out = open(args.path + '/pca_supernova.json', 'w') f_out.write(json.dumps(data)) f_out.close() #matrix = [] #j = json.load(open('{}/PLV_LINEAR.json'.format(args.path))) #metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"]) #obj_ids = [] #row_length = 50
ax.set_ylim([-15,20]) ax.set_zlim([-15,15]) pl.savefig("3D_" +rat + "_" + date1+".png") pl.close('all') pl.xlim([-10,20]) pl.ylim([-15,15]) pl.scatter(pca.Y[::1,0], pca.Y[::1,1]) pl.savefig("2D_" + rat + "_" + date1 + ".png") for date in file[rat]: if date != date1: try: object = file[rat][date]["valueMatrix"] data = np.array(object) projectedData = pca.project(data) pl.close('all') fig1 = pl.figure() ax = Axes3D(fig1) ax.scatter(projectedData[::1,0], projectedData[::1,1], projectedData[::1,2], 'bo') ax.set_xlim([-10,20]) ax.set_ylim([-15,20]) ax.set_zlim([-15,15]) pl.savefig("3D_" +rat + "_" + date+".png") pl.close('all') pl.xlim([-10,20]) pl.ylim([-15,15]) pl.scatter(projectedData[::1,0], projectedData[::1,1]) pl.savefig("2D_" + rat + "_" + date + ".png")