Пример #1
0
 def test_svd(self):
     rng = np.random.RandomState(42)
     mat = rng.randn(1e3, 10)
     data = block_rdd(self.sc.parallelize(list(mat), 10))
     u, s, v = svd(data, 1)
     u = np.squeeze(np.concatenate(np.array(u.collect()))).T
     u_true, s_true, v_true = ln.svd(mat)
     assert_array_almost_equal(v[0], match_sign(v[0], v_true[0, :]))
     assert_array_almost_equal(s[0], s_true[0])
     assert_array_almost_equal(u, match_sign(u, u_true[:, 0]))
Пример #2
0
 def test_svd(self):
     rng = np.random.RandomState(42)
     mat = rng.randn(1e3, 10)
     data = block_rdd(self.sc.parallelize(list(mat), 10))
     u, s, v = svd(data, 1)
     u = np.squeeze(np.concatenate(np.array(u.collect()))).T
     u_true, s_true, v_true = ln.svd(mat)
     assert_array_almost_equal(v[0], match_sign(v[0], v_true[0, :]))
     assert_array_almost_equal(s[0], s_true[0])
     assert_array_almost_equal(u, match_sign(u, u_true[:, 0]))
Пример #3
0
def train(matrix, featureSize, labels):

	diSmatrix = sc.parallelize(list(matrix), 10)

	#use spyleanr to parallelize SVD on RDD
	data = block_rdd(diSmatrix)
	u, s, v = svd(data, 100)

	print v.shape
	#Old unparallelized version
	# cov_mat = numpy.cov(matrix.T)
	# print cov_mat.shape
	# eig_val_cov, eig_vec_cov = numpy.linalg.eig(cov_mat)
	# # Make a list of (eigenvalue, eigenvector) tuples
	# eig_pairs = [(numpy.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))]

	# # Sort the (eigenvalue, eigenvector) tuples from high to low
	# eig_pairs.sort()
	# eig_pairs.reverse()

	# matrix_w = eig_pairs[0][1].reshape(featureSize,1)
	# for i in range(200):
	#     matrix_w = numpy.hstack((matrix_w, eig_pairs[i+1][1].reshape(featureSize,1)))
	# print matrix_w.shape

	transformed = matrix.dot(v.T)
	print transformed.shape
	#Compute cov matrix
	# if os.path.isfile('svm.model'):
	#     print 'Loading Model file...'
	#     #Load models from file
	#     # with open('svm.model', 'rb') as file:
	#     #     Z = pickle.load(file)
	# else:
		#Start to train SVM
	Z = OneVsRestClassifier(SVC(kernel="rbf")).fit(transformed, labels)
	    # with open('svm.model', 'wb') as file:
	    #     pickle.dump(Z, file)

	Z = Z.predict(transformed)

	print Z[0]
	correct = 0.0
	for x in range(len(Z)):
		if labels[x] == Z[x]:
			correct = correct +1

	print correct/len(Z)

	print 'plot reconstructed data'
	recData = transformed.dot(v.T) + matrix.mean(axis=1)[:, None]
	plot(recData[0].reshape((32,32)))