Пример #1
0
def pca(d, headers, normalize=True):
    # assign to A the desired data. Use either normalize_columns_separately
    #	or get_data, depending on the value of the normalize argument.
    if normalize:
        A = normalize_columns_separately(headers, d)
    else:
        A = d.get_data(headers)

    # assign to m the mean values of the columns of A
    m = np.mean(A, axis=0)[0]
    m = np.array(m)
    M = m * np.ones(A.shape)

    # assign to D the difference matrix A - m
    D = A - M

    # assign to U, S, V the result of running np.svd on D, with full_matrices=False
    U, S, V = np.linalg.svd(D, full_matrices=False)

    # the eigenvalues of cov(A) are the squares of the singular values (S matrix)
    #	divided by the degrees of freedom (N-1). The values are sorted.
    N = d.get_num_rows()
    eValues = (S * S) / (N - 1)

    # project the data onto the eigenvectors. Treat V as a transformation
    #	matrix and right-multiply it by D transpose. The eigenvectors of A
    #	are the rows of V. The eigenvectors match the order of the eigenvalues.
    pData = np.dot(V, D.T).T

    # create and return a PCA data object with the headers, projected data,
    # eigenvectors, eigenvalues, and mean vector.
    pca_data = PCAData.PCAData(headers, pData, V, eValues, m)
    return pca_data
Пример #2
0
def pca(headers, d, normalize=True):
	if normalize:
		A = clusterNormalizeColSeparate(headers, d)
	else:
		A = d.getDataNum(headers)

	m = np.mean(A, axis=0)[0]
	m = np.array(m)
	M = m*np.ones( A.shape ) # this is so the mean is setup as the same dimensions as A

	D = A - M # difference between matrix A and m
	U, S, V = np.linalg.svd(D, full_matrices=False) #V = Evecs, S/(N-1) = Evals
	pdata = np.dot(V, D.T).T
	return PCAData.PCAData(headers, pdata, (S*S)/(d.getNumRowNum()-1), V, m)
Пример #3
0
def pca(d, headers, normalize=True):
    if normalize:
        A = normalize_columns_separately(headers, d)

    else:
        A = d.get_LimitedHeaders(headers)

    print(A)

    m = np.mean(A, axis=0)
    D = A - m
    U, S, V = np.linalg.svd(D, full_matrices=False)
    eigenvalues = np.square(S) / (A.shape[0] - 1)
    projected_data = ((V) * (D.T)).T

    return PCAData.PCAData(projected_data, V, eigenvalues, m, headers)
Пример #4
0
def pca( d, headers, normalized = True ):
	#takes in a data object, list of column headers, and optional normalized boolean.
	#returns a PCAData object with the original headers, projected data, eigen values,
	#eigen vectors, and data means.
	#data
	if normalized:
		A = normalize_columns_separately( headers,d )
	else:
		A = d.get_data(headers)
	#means
	m = A.mean(axis=0)
	#difference matrix
	D = A-m
	#transformation matrix
	U,S,V = np.linalg.svd( D, full_matrices=False )
	#eigen values
	evals = np.matrix( (S*S)/(A.shape[0]-1) )
	#eigen vectors
	evecs = V
	#projected data
	pdata = D*V.T
	#PCAData object
	return PCAData.PCAData( headers, pdata, evals, evecs, m )
Пример #5
0
# means of the original data
means = np.matrix([3., 6.])

# eigenvalues of the original data
evals = np.matrix([16.13395443, 0.03271224])

# eigenvectors of the original data as rows
evecs = np.matrix([[0.4527601, 0.89163238], [-0.89163238, 0.4527601]])

# the original data projected onto the eigenvectors.
# pdata = (evecs * (orgdata - means).T).T
pdata = np.matrix([[-4.4720497, -0.02777563], [-2.23602485, -0.01388782],
                   [4.02623351, -0.19860441], [2.68184104, 0.24026787]])

# create a PCAData object
pcad = PCAData.PCAData(headers, pdata, evecs, evals, means)

# Test all of the various new functions
print "Eigenvalues:"
print pcad.get_eigenvalues()

print "\nEigenvectors:"
print pcad.get_eigenvectors()

print "\nMeans:"
print pcad.get_data_means()

print "\nOriginal Headers:"
print pcad.get_data_headers()

# Test old functions