def read_pic(fn): """ read_pic :param fn: :return: """ fnimg = cv2.imread(fn) img = cv2.resize(fnimg, (500, 400)) w = img.shape[1] h = img.shape[0] w_interval = w / w_fg h_interval = h / h_fg alltz = [] for now_h in xrange(0, h, h_interval): for now_w in xrange(0, w, w_interval): b = img[now_h:now_h + h_interval, now_w:now_w + w_interval, 0] g = img[now_h:now_h + h_interval, now_w:now_w + w_interval, 1] r = img[now_h:now_h + h_interval, now_w:now_w + w_interval, 2] btz = np.mean(b) gtz = np.mean(g) rtz = np.mean(r) alltz.append([btz, gtz, rtz]) result_alltz = np.array(alltz).T pca = mlpy.PCA() pca.learn(result_alltz) result_alltz = pca.transform(result_alltz, k=len(result_alltz) / 2) result_alltz = result_alltz.reshape(len(result_alltz)) return result_alltz
def readpic(fn): #返回图片特征码 fnimg = cv2.imread(fn) img=cv2.resize(fnimg,(500,400)) w=img.shape[1] h=img.shape[0] w_interval=w/20 h_interval=h/10 alltz=[] for now_h in xrange(0,h,h_interval): for now_w in xrange(0,w,w_interval): b = img[now_h:now_h+h_interval,now_w:now_w+w_interval,0] g = img[now_h:now_h+h_interval,now_w:now_w+w_interval,1] r = img[now_h:now_h+h_interval,now_w:now_w+w_interval,2] btz=np.mean(b) gtz=np.mean(g) rtz=np.mean(r) alltz.append([btz,gtz,rtz]) result_alltz=np.array(alltz).T print result_alltz pca = mlpy.PCA() #进行PCA 降为提取 pca.learn(result_alltz) result_alltz = pca.transform(result_alltz, k=len(result_alltz)/2) result_alltz =result_alltz.reshape(len(result_alltz)) print result_alltz return result_alltz
def get_pca(X, n): ''' Takes X and convert it into n dimensional space and returns the answer ''' pca = mlpy.PCA() pca.learn(X) return pca.transform(X, k = n)
def get_distance(img, findimg): newsize = (21, 21) fimg = cv2.resize(findimg, newsize) img = cv2.resize(img, newsize) my_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) my_fimg = cv2.cvtColor(fimg, cv2.COLOR_BGR2GRAY) pcaimg = mlpy.PCA() pcaimg.learn(my_img) pca_img = pcaimg.transform(my_img, k=1) pca_img = pcaimg.transform_inv(pca_img) pcafimg = mlpy.PCA() pcafimg.learn(my_fimg) pca_fimg = pcaimg.transform(my_fimg, k=1) pca_fimg = pcafimg.transform_inv(pca_fimg) return get_EuclideanDistance(pca_img, pca_fimg)
def metric(self): totalTimer = Timer() with totalTimer: model = mlpy.PCA(**self.build_opts) model.learn(self.data[0]) out = model.transform(self.data[0], self.k) metric = {} metric["runtime"] = totalTimer.ElapsedTime() return metric
def plot_data(X, y): pca = mlpy.PCA() pca.learn(X) z = pca.transform(X, k=2) plt.set_cmap(plt.cm.Paired) fig1 = plt.figure(1) title = plt.title("PCA on mushroom dataset") plot = plt.scatter(z[:, 0], z[:, 1], c=y) labx = plt.xlabel("First component") laby = plt.ylabel("Second component") plt.show()
def reduce_PCA(x): ''' Reduce the dimensions using Principal Component Analysis ''' # create the PCA object pca = ml.PCA(whiten=True) # learn the principal components from all the features pca.learn(x) # return the object return pca
def pcaDimRed(features, nDims): [X, Y] = listOfFeatures2Matrix(features) pca = mlpy.PCA(method='cov') pca.learn(X) coeff = pca.coeff() coeff = coeff[:, 0:nDims] featuresNew = [] for f in features: ft = f.copy() # ft = pca.transform(ft, k=nDims) ft = numpy.dot(f, coeff) featuresNew.append(ft) return (featuresNew, coeff)
def RunPCAMlpy(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') try: with totalTimer: # Find out what dimension we want. if "new_dimensionality" in options: k = int(options.pop("new_dimensionality")) if (k > data.shape[1]): Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 else: k = data.shape[1] build_opts = {} if "whiten" in options: build_opts["whiten"] = True options.pop("whiten") if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Perform PCA. prep = mlpy.PCA(**build_opts) prep.learn(data) out = prep.transform(data, k) except Exception as e: Log.Fatal("Exception: " + str(e)) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def textListToColors(names): ''' Generates a list of colors based on a list of names (strings). Similar strings correspond to similar colors. ''' # STEP A: compute strings distance between all combnations of strings Dnames = np.zeros( (len(names), len(names)) ) for i in range(len(names)): for j in range(len(names)): Dnames[i,j] = 1 - 2.0 * levenshtein(names[i], names[j]) / float(len(names[i]+names[j])) # STEP B: pca dimanesionality reduction to a single-dimension (from the distance space) pca = mlpy.PCA(method='cov') pca.learn(Dnames) coeff = pca.coeff() # STEP C: mapping of 1-dimensional values to colors in a jet-colormap textToColor = pca.transform(Dnames, k=1) textToColor = 255 * (textToColor - textToColor.min()) / (textToColor.max() - textToColor.min()) textmaps = generateColorMap(); colors = [textmaps[int(c)] for c in textToColor] return colors
def RunPCAMlpy(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') try: with totalTimer: # Find out what dimension we want. match = re.search('-d (\d+)', options) if not match: k = data.shape[1] else: k = int(match.group(1)) if (k > data.shape[1]): Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the options for running PCA. s = True if options.find("-s") > -1 else False # Perform PCA. prep = mlpy.PCA(whiten=s) prep.learn(data) out = prep.transform(data, k) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def pcaSvm(): wine = np.loadtxt(r'F:\PY\data\wine.txt', delimiter=',') x,y = wine[:,1:4],wine[:,0].astype(np.int) print x.shape, y.shape pca=mlpy.PCA() pca.learn(x) z = pca.transform(x,k=2) print z.shape fig1 = plt.figure(1) title = plt.title('PCA on wine dataset') plot = plt.scatter(z[:,0],z[:,1],c = y, s = 90, cmap =cm.Reds) labx = plt.xlabel('First component') laby = plt.ylabel('Second component') plt.show() svm = mlpy.LibSvm(kernel_type='rbf',gamma=20) svm.learn(z,y) xmin, xmax = z[:,0].min()-0.1, z[:,0].max()+0.1 ymin, ymax = z[:,1].min()-0.1, z[:,1].max()+0.1 xx, yy = np.meshgrid(np.arange(xmin, xmax, 0.01), np.arange(ymin, ymax, 0.01)) grid = np.c_[xx.ravel(), yy.ravel()] result = svm.pred(grid) fig2 = plt.figure(2) title = plt.title("SVM (rbf kernel) on PCA") plot1 = plt.pcolormesh(xx, yy, result.reshape(xx.shape), cmap = cm.Greys_r) plot2 = plt.scatter(z[:, 0], z[:, 1], c=y, s=90, cmap = cm.Reds) labx = plt.xlabel("First component") laby = plt.ylabel("Second component") limx = plt.xlim(xmin, xmax) limy = plt.ylim(ymin, ymax) plt.show()
#!/usr/bin/env python # -*- coding: utf-8 -*- #code:[email protected] #7-22.py import numpy as np import matplotlib.pyplot as plt import mlpy np.random.seed(0) mean, cov, n = [0, 0], [[1, 1], [1, 1.5]], 100 x = np.random.multivariate_normal(mean, cov, n) pca = mlpy.PCA() pca.learn(x) coeff = pca.coeff() fig = plt.figure(1) plot1 = plt.plot(x[:, 0], x[:, 1], 'o') plot2 = plt.plot([0, coeff[0, 0]], [0, coeff[1, 0]], linewidth=4, color='r') plot3 = plt.plot([0, coeff[0, 1]], [0, coeff[1, 1]], linewidth=4, color='g') xx = plt.xlim(-4, 4) yy = plt.ylim(-4, 4) z = pca.transform(x, k=1) xnew = pca.transform_inv(z) fig2 = plt.figure(2) plot1 = plt.plot(xnew[:, 0], xnew[:, 1], 'o') xx = plt.xlim(-4, 4) yy = plt.ylim(-4, 4) plt.show()
def visualizeFeaturesFolder(folder, dimReductionMethod, priorKnowledge="none"): ''' This function generates a chordial visualization for the recordings of the provided path. ARGUMENTS: - folder: path of the folder that contains the WAV files to be processed - dimReductionMethod: method used to reduce the dimension of the initial feature space before computing the similarity. - priorKnowledge: if this is set equal to "artist" ''' if dimReductionMethod == "pca": allMtFeatures, wavFilesList = aF.dirWavFeatureExtraction( folder, 30.0, 30.0, 0.050, 0.050, computeBEAT=True) namesCategoryToVisualize = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wavFilesList ] namesToVisualize = [ ntpath.basename(w).replace('.wav', '') for w in wavFilesList ] (F, MEAN, STD) = aT.normalizeFeatures([allMtFeatures]) F = np.concatenate(F) pca = mlpy.PCA(method='cov') # pca (eigenvalue decomposition) pca.learn(F) coeff = pca.coeff() finalDims = pca.transform(F, k=2) finalDims2 = pca.transform(F, k=10) else: allMtFeatures, Ys, wavFilesList = aF.dirWavFeatureExtractionNoAveraging( folder, 20.0, 5.0, 0.040, 0.040 ) # long-term statistics cannot be applied in this context (LDA needs mid-term features) namesCategoryToVisualize = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wavFilesList ] namesToVisualize = [ ntpath.basename(w).replace('.wav', '') for w in wavFilesList ] ldaLabels = Ys if priorKnowledge == "artist": uNamesCategoryToVisualize = list(set(namesCategoryToVisualize)) YsNew = np.zeros(Ys.shape) for i, uname in enumerate( uNamesCategoryToVisualize): # for each unique artist name: indicesUCategories = [ j for j, x in enumerate(namesCategoryToVisualize) if x == uname ] for j in indicesUCategories: indices = np.nonzero(Ys == j) YsNew[indices] = i ldaLabels = YsNew (F, MEAN, STD) = aT.normalizeFeatures([allMtFeatures]) F = np.array(F[0]) clf = LDA(n_components=10) clf.fit(F, ldaLabels) reducedDims = clf.transform(F) pca = mlpy.PCA(method='cov') # pca (eigenvalue decomposition) pca.learn(reducedDims) coeff = pca.coeff() reducedDims = pca.transform(reducedDims, k=2) # TODO: CHECK THIS ... SHOULD LDA USED IN SEMI-SUPERVISED ONLY???? uLabels = np.sort( np.unique((Ys)) ) # uLabels must have as many labels as the number of wavFilesList elements reducedDimsAvg = np.zeros((uLabels.shape[0], reducedDims.shape[1])) finalDims = np.zeros((uLabels.shape[0], 2)) for i, u in enumerate(uLabels): indices = [j for j, x in enumerate(Ys) if x == u] f = reducedDims[indices, :] finalDims[i, :] = f.mean(axis=0) finalDims2 = reducedDims print allMtFeatures.shape for i in range(finalDims.shape[0]): plt.text(finalDims[i, 0], finalDims[i, 1], ntpath.basename(wavFilesList[i].replace('.wav', '')), horizontalalignment='center', verticalalignment='center', fontsize=10) plt.plot(finalDims[i, 0], finalDims[i, 1], '*r') plt.xlim([1.2 * finalDims[:, 0].min(), 1.2 * finalDims[:, 0].max()]) plt.ylim([1.2 * finalDims[:, 1].min(), 1.2 * finalDims[:, 1].max()]) plt.show() SM = 1.0 - distance.squareform(distance.pdist(finalDims2, 'cosine')) for i in range(SM.shape[0]): SM[i, i] = 0.0 chordialDiagram("visualization", SM, 0.50, namesToVisualize, namesCategoryToVisualize) SM = 1.0 - distance.squareform(distance.pdist(F, 'cosine')) for i in range(SM.shape[0]): SM[i, i] = 0.0 chordialDiagram("visualizationInitial", SM, 0.50, namesToVisualize, namesCategoryToVisualize) # plot super-categories (i.e. artistname uNamesCategoryToVisualize = sort(list(set(namesCategoryToVisualize))) finalDimsGroup = np.zeros( (len(uNamesCategoryToVisualize), finalDims2.shape[1])) for i, uname in enumerate(uNamesCategoryToVisualize): indices = [ j for j, x in enumerate(namesCategoryToVisualize) if x == uname ] f = finalDims2[indices, :] finalDimsGroup[i, :] = f.mean(axis=0) SMgroup = 1.0 - distance.squareform( distance.pdist(finalDimsGroup, 'cosine')) for i in range(SMgroup.shape[0]): SMgroup[i, i] = 0.0 chordialDiagram("visualizationGroup", SMgroup, 0.50, uNamesCategoryToVisualize, uNamesCategoryToVisualize)
########################PCA USING SKLEARN######################### pca = PCA(n_components=2) pca.fit(vecs_of_nodes) z = pca.transform(vecs_of_nodes) print(pca.explained_variance_ratio_) print(pca.singular_values_) plt.set_cmap(plt.cm.Paired) fig1 = plt.figure(1) title = plt.title("PCA on vecs of nodes") plot = plt.scatter(z[:, 0], z[:, 1]) labx = plt.xlabel("First component") laby = plt.ylabel("Second component") ########################PCA USING MLPY############################# #Dimensionality reduction by Principal Component Analysis (PCA) pca = mlpy.PCA() # new PCA instance pca.learn(x) # learn from data z = pca.transform(x, k=2) # embed x into the k=2 dimensional subspace z.shape #Plot the principal components: plt.set_cmap(plt.cm.Paired) fig1 = plt.figure(1) title = plt.title("PCA on iris dataset") plot = plt.scatter(z[:, 0], z[:, 1], c=y) labx = plt.xlabel("First component") laby = plt.ylabel("Second component") plt.show()