def main(input_file): matrix = parse_file(input_file) columnmean, coordinates, pc, eigenvalues = pca(matrix) returns = columnmean, coordinates, pc, eigenvalues print_top_genes(returns) f = open("pc_coordinates_" + "learning_topology" + ".out", "w") for row in coordinates: row = [str(i) for i in row] row_to_string = "\t".join(row) f.write(row_to_string + "\n") f.close()
def test_pca(self): if TestCluster.module == 'Bio.Cluster': from Bio.Cluster import pca elif TestCluster.module == 'Pycluster': from Pycluster import pca data = numpy.array([[3.1, 1.2], [1.4, 1.3], [1.1, 1.5], [2.0, 1.5], [1.7, 1.9], [1.7, 1.9], [5.7, 5.9], [5.7, 5.9], [3.1, 3.3], [5.4, 5.3], [5.1, 5.5], [5.0, 5.5], [5.1, 5.2], ]) mean, coordinates, pc, eigenvalues = pca(data) self.assertAlmostEqual(mean[0], 3.5461538461538464) self.assertAlmostEqual(mean[1], 3.5307692307692311) self.assertAlmostEqual(coordinates[0, 0], 2.0323189722653883) self.assertAlmostEqual(coordinates[0, 1], 1.2252420399694917) self.assertAlmostEqual(coordinates[1, 0], 3.0936985166252251) self.assertAlmostEqual(coordinates[1, 1], -0.10647619705157851) self.assertAlmostEqual(coordinates[2, 0], 3.1453186907749426) self.assertAlmostEqual(coordinates[2, 1], -0.46331699855941139) self.assertAlmostEqual(coordinates[3, 0], 2.5440202962223761) self.assertAlmostEqual(coordinates[3, 1], 0.20633980959571077) self.assertAlmostEqual(coordinates[4, 0], 2.4468278463376221) self.assertAlmostEqual(coordinates[4, 1], -0.28412285736824866) self.assertAlmostEqual(coordinates[5, 0], 2.4468278463376221) self.assertAlmostEqual(coordinates[5, 1], -0.28412285736824866) self.assertAlmostEqual(coordinates[6, 0], -3.2018619434743254) self.assertAlmostEqual(coordinates[6, 1], 0.019692314198662915) self.assertAlmostEqual(coordinates[7, 0], -3.2018619434743254) self.assertAlmostEqual(coordinates[7, 1], 0.019692314198662915) self.assertAlmostEqual(coordinates[8, 0], 0.46978641990344067) self.assertAlmostEqual(coordinates[8, 1], -0.17778754731982949) self.assertAlmostEqual(coordinates[9, 0], -2.5549912731867215) self.assertAlmostEqual(coordinates[9, 1], 0.19733897451533403) self.assertAlmostEqual(coordinates[10, 0], -2.5033710990370044) self.assertAlmostEqual(coordinates[10, 1], -0.15950182699250004) self.assertAlmostEqual(coordinates[11, 0], -2.4365601663089413) self.assertAlmostEqual(coordinates[11, 1], -0.23390813900973562) self.assertAlmostEqual(coordinates[12, 0], -2.2801521629852974) self.assertAlmostEqual(coordinates[12, 1], 0.0409309711916888) self.assertAlmostEqual(pc[0, 0], -0.66810932728062988) self.assertAlmostEqual(pc[0, 1], -0.74406312017235743) self.assertAlmostEqual(pc[1, 0], 0.74406312017235743) self.assertAlmostEqual(pc[1, 1], -0.66810932728062988) self.assertAlmostEqual(eigenvalues[0], 9.3110471246032844) self.assertAlmostEqual(eigenvalues[1], 1.4437456297481428) data = numpy.array([[2.3, 4.5, 1.2, 6.7, 5.3, 7.1], [1.3, 6.5, 2.2, 5.7, 6.2, 9.1], [3.2, 7.2, 3.2, 7.4, 7.3, 8.9], [4.2, 5.2, 9.2, 4.4, 6.3, 7.2]]) mean, coordinates, pc, eigenvalues = pca(data) self.assertAlmostEqual(mean[0], 2.7500) self.assertAlmostEqual(mean[1], 5.8500) self.assertAlmostEqual(mean[2], 3.9500) self.assertAlmostEqual(mean[3], 6.0500) self.assertAlmostEqual(mean[4], 6.2750) self.assertAlmostEqual(mean[5], 8.0750) self.assertAlmostEqual(coordinates[0, 0], 2.6460846688406905) self.assertAlmostEqual(coordinates[0, 1], -2.1421701432732418) self.assertAlmostEqual(coordinates[0, 2], -0.56620932754145858) self.assertAlmostEqual(coordinates[0, 3], 0.0) self.assertAlmostEqual(coordinates[1, 0], 2.0644120899917544) self.assertAlmostEqual(coordinates[1, 1], 0.55542108669180323) self.assertAlmostEqual(coordinates[1, 2], 1.4818772348457117) self.assertAlmostEqual(coordinates[1, 3], 0.0) self.assertAlmostEqual(coordinates[2, 0], 1.0686641862092987) self.assertAlmostEqual(coordinates[2, 1], 1.9994412069101073) self.assertAlmostEqual(coordinates[2, 2], -1.000720598980291) self.assertAlmostEqual(coordinates[2, 3], 0.0) self.assertAlmostEqual(coordinates[3, 0], -5.77916094504174) self.assertAlmostEqual(coordinates[3, 1], -0.41269215032867046) self.assertAlmostEqual(coordinates[3, 2], 0.085052691676038017) self.assertAlmostEqual(coordinates[3, 3], 0.0) self.assertAlmostEqual(pc[0, 0], -0.26379660005997291) self.assertAlmostEqual(pc[0, 1], 0.064814972617134495) self.assertAlmostEqual(pc[0, 2], -0.91763310094893846) self.assertAlmostEqual(pc[0, 3], 0.26145408875373249) self.assertAlmostEqual(pc[1, 0], 0.05073770520434398) self.assertAlmostEqual(pc[1, 1], 0.68616983388698793) self.assertAlmostEqual(pc[1, 2], 0.13819106187213354) self.assertAlmostEqual(pc[1, 3], 0.19782544121828985) self.assertAlmostEqual(pc[2, 0], -0.63000893660095947) self.assertAlmostEqual(pc[2, 1], 0.091155993862151397) self.assertAlmostEqual(pc[2, 2], 0.045630391256086845) self.assertAlmostEqual(pc[2, 3], -0.67456694780914772) # As the last eigenvalue is zero, the corresponding eigenvector is # strongly affected by roundoff error, and is not being tested here. # For PCA, this doesn't matter since all data have a zero coefficient # along this eigenvector. self.assertAlmostEqual(eigenvalues[0], 6.7678878332578778) self.assertAlmostEqual(eigenvalues[1], 3.0108911400291856) self.assertAlmostEqual(eigenvalues[2], 1.8775592718563467) self.assertAlmostEqual(eigenvalues[3], 0.0)
from Bio.Cluster import pca import numpy as np data = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [0, 1, 2, 3]]) columnmean, coordinates, components, eigenvalues = pca(data) print("columnmean:", columnmean) print("coordinates:", coordinates) print("components:", components) print("eigenvalues:", eigenvalues)