lambda x: MatrixEntry(int(x[0]) - 1, int(x[1]) - 1, 1.0)) lower_entries = txt.map( lambda x: MatrixEntry(int(x[1]) - 1, int(x[0]) - 1, 1.0)) degrees = upper_entries.map(lambda entry: (entry.i, entry.value)).reduceByKey( lambda a, b: a + b) W = CoordinateMatrix(upper_entries.union(lower_entries), numCols=N, numRows=N) # XXX: laplacian = sys.argv[1] if laplacian == 'unnormalized': entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], x[1])) D = CoordinateMatrix(entries, numCols=N, numRows=N) L = D.toBlockMatrix().subtract(W.toBlockMatrix()).toCoordinateMatrix() elif laplacian == 'normalized': entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], 1 / x[1])) D_inv = CoordinateMatrix(entries, numCols=N, numRows=N).toBlockMatrix() I = CoordinateMatrix(sc.range(N).map(lambda i: MatrixEntry(i, i, 1.0)), numCols=N, numRows=N).toBlockMatrix() L = I.subtract(D_inv.multiply(W.toBlockMatrix())).toCoordinateMatrix() elif laplacian == 'symmetric': entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], 1 / sqrt(x[1]))) D_invsq = CoordinateMatrix(entries, numCols=N, numRows=N).toBlockMatrix() I = sc.range(N).map(lambda i: MatrixEntry(i, i, 1.0), N, N) tmp = D_invsq.multiply(W.toBlockMatrix()).multiply(D_invsq) L = I.toBlockMatrix().subtract(tmp) else: raise ValueError('Unknown type of Laplacian.')
#sc = SparkContext(conf=conf).getOrCreate() #use local spark on computer # findspark.init() #from pyspark.sql import SparkSession local_file_location = 'file:///wasp/pdb1HYS.mtx.mtx' rdd = spark.sparkContext.textFile(local_file_location) rdd = rdd.map(lambda line: line.split(" ")) rdd = rdd.map( lambda line: MatrixEntry(int(line[0]), int(line[1]), float(line[2]))) mat = CoordinateMatrix(rdd) M = mat.toRowMatrix() A = mat.toBlockMatrix() At = mat.transpose().toBlockMatrix() print("SVD") print(M.numRows(), M.numCols()) start_svd = time.time() NUM_TIMES = 10 #do it 10 times to get mean for i in range(NUM_TIMES): svd = M.computeSVD(5, computeU=True) end_svd = time.time() print("Time elapsed: ", (end_svd - start_svd) / NUM_TIMES) # CPU seconds elapsed (floating point)