import numpy as np from matrix_sketching import MatrixSketching from pyspark import SparkContext, SparkConf conf = SparkConf().setAppName("PysparkSVDTest") sc = SparkContext(conf=conf) filename = '/home/hduser/Test_Spark/Spark_Matrix_Sketching/matrix.csv' raw_data = open(filename, 'rt') A = np.loadtxt(raw_data, delimiter=",") approxCovarianceMatrixA = np.dot(A.transpose(), A) Norm_A = np.linalg.norm(A, ord='fro') # isvd Test l = 20 ms = MatrixSketching(sc=sc, rows=l, columns=500, op=0.2) for i in range(10000): row = A[i, :] ms.add(row) B = ms.getLocalSketchMatrix() approxCovarianceMatrixB = np.dot(B.transpose(), B) testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB Test1 = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2) print(Test1) del (ms) np.savetxt("Test_PFD02_20.csv", Test1, delimiter=",") sc.stop() # # FD Test
data = mmread( '/home/duynguyen/Test_Spark/Spark_Matrix_Sketching/connectus.mtx') temp = data.toarray() A = temp.transpose() del (data) del (temp) approxCovarianceMatrixA = np.dot(A.transpose(), A) Norm_A = np.linalg.norm(A, ord='fro') # isvd Test Test1 = np.zeros(9) for j in range(9): l = 20 + 10 * j ms = MatrixSketching(sc=sc, rows=l, columns=512, op='isvd') for i in range(394792): row = A[i, :] ms.add(row) B = ms.getLocalSketchMatrix() approxCovarianceMatrixB = np.dot(B.transpose(), B) testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB Test1[j] = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2) del (ms) np.savetxt("connectUS_Test1.csv", Test1, delimiter=",") # FD Test Test2 = np.zeros(9) for j in range(9): l = 20 + 10 * j