def pca(d, headers, normalize=True): # assign to A the desired data. Use either normalize_columns_separately # or get_data, depending on the value of the normalize argument. if normalize: A = normalize_columns_separately(headers, d) else: A = d.get_data(headers) # assign to m the mean values of the columns of A m = np.mean(A, axis=0)[0] m = np.array(m) M = m * np.ones(A.shape) # assign to D the difference matrix A - m D = A - M # assign to U, S, V the result of running np.svd on D, with full_matrices=False U, S, V = np.linalg.svd(D, full_matrices=False) # the eigenvalues of cov(A) are the squares of the singular values (S matrix) # divided by the degrees of freedom (N-1). The values are sorted. N = d.get_num_rows() eValues = (S * S) / (N - 1) # project the data onto the eigenvectors. Treat V as a transformation # matrix and right-multiply it by D transpose. The eigenvectors of A # are the rows of V. The eigenvectors match the order of the eigenvalues. pData = np.dot(V, D.T).T # create and return a PCA data object with the headers, projected data, # eigenvectors, eigenvalues, and mean vector. pca_data = PCAData.PCAData(headers, pData, V, eValues, m) return pca_data
def pca(headers, d, normalize=True): if normalize: A = clusterNormalizeColSeparate(headers, d) else: A = d.getDataNum(headers) m = np.mean(A, axis=0)[0] m = np.array(m) M = m*np.ones( A.shape ) # this is so the mean is setup as the same dimensions as A D = A - M # difference between matrix A and m U, S, V = np.linalg.svd(D, full_matrices=False) #V = Evecs, S/(N-1) = Evals pdata = np.dot(V, D.T).T return PCAData.PCAData(headers, pdata, (S*S)/(d.getNumRowNum()-1), V, m)
def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(headers, d) else: A = d.get_LimitedHeaders(headers) print(A) m = np.mean(A, axis=0) D = A - m U, S, V = np.linalg.svd(D, full_matrices=False) eigenvalues = np.square(S) / (A.shape[0] - 1) projected_data = ((V) * (D.T)).T return PCAData.PCAData(projected_data, V, eigenvalues, m, headers)
def pca( d, headers, normalized = True ): #takes in a data object, list of column headers, and optional normalized boolean. #returns a PCAData object with the original headers, projected data, eigen values, #eigen vectors, and data means. #data if normalized: A = normalize_columns_separately( headers,d ) else: A = d.get_data(headers) #means m = A.mean(axis=0) #difference matrix D = A-m #transformation matrix U,S,V = np.linalg.svd( D, full_matrices=False ) #eigen values evals = np.matrix( (S*S)/(A.shape[0]-1) ) #eigen vectors evecs = V #projected data pdata = D*V.T #PCAData object return PCAData.PCAData( headers, pdata, evals, evecs, m )
# means of the original data means = np.matrix([3., 6.]) # eigenvalues of the original data evals = np.matrix([16.13395443, 0.03271224]) # eigenvectors of the original data as rows evecs = np.matrix([[0.4527601, 0.89163238], [-0.89163238, 0.4527601]]) # the original data projected onto the eigenvectors. # pdata = (evecs * (orgdata - means).T).T pdata = np.matrix([[-4.4720497, -0.02777563], [-2.23602485, -0.01388782], [4.02623351, -0.19860441], [2.68184104, 0.24026787]]) # create a PCAData object pcad = PCAData.PCAData(headers, pdata, evecs, evals, means) # Test all of the various new functions print "Eigenvalues:" print pcad.get_eigenvalues() print "\nEigenvectors:" print pcad.get_eigenvectors() print "\nMeans:" print pcad.get_data_means() print "\nOriginal Headers:" print pcad.get_data_headers() # Test old functions