def func(m=5000000, n=10, k=9): np.random.seed(1234) X = np.random.rand(m, n) # Exact scikit impl sklearn_tsvd = sklearnsvd(algorithm="arpack", n_components=k, random_state=42) print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix") print("Original X Matrix") print(X) print("\n") print("h2o4gpu tsvd run") start_time = time.time() h2o4gpu_tsvd = TruncatedSVDH2O(n_components=k) h2o4gpu_tsvd.fit(X) end_time = time.time() - start_time print("Total time for h2o4gpu tsvd is " + str(end_time)) print("h2o4gpu tsvd Singular Values") print(h2o4gpu_tsvd.singular_values_) print("h2o4gpu tsvd Components (V^T)") print(h2o4gpu_tsvd.components_) print("h2o4gpu tsvd Explained Variance") print(h2o4gpu_tsvd.explained_variance_) print("h2o4gpu tsvd Explained Variance Ratio") print(h2o4gpu_tsvd.explained_variance_ratio_) print("\n") print("sklearn run") start_sk = time.time() sklearn_tsvd.fit(X) end_sk = time.time() - start_sk print("Total time for sklearn is " + str(end_sk)) print("Sklearn Singular Values") print(sklearn_tsvd.singular_values_) print("Sklearn Components (V^T)") print(sklearn_tsvd.components_) print("Sklearn Explained Variance") print(sklearn_tsvd.explained_variance_) print("Sklearn Explained Variance Ratio") print(sklearn_tsvd.explained_variance_ratio_) print("\n") print("h2o4gpu tsvd U matrix") print(h2o4gpu_tsvd.U) print("h2o4gpu tsvd V^T") print(h2o4gpu_tsvd.components_) print("h2o4gpu tsvd Sigma") print(h2o4gpu_tsvd.singular_values_) print("h2o4gpu tsvd U * Sigma") x_tsvd_transformed = h2o4gpu_tsvd.U * h2o4gpu_tsvd.singular_values_ print(x_tsvd_transformed) print("h2o4gpu tsvd Explained Variance") print(np.var(x_tsvd_transformed, axis=0)) U, Sigma, VT = svds(X, k=k, tol=0) Sigma = Sigma[::-1] U, VT = svd_flip(U[:, ::-1], VT[::-1]) print("\n") print("Sklearn U matrix") print(U) print("Sklearn V^T") print(VT) print("Sklearn Sigma") print(Sigma) print("Sklearn U * Sigma") X_transformed = U * Sigma print(X_transformed) print("sklearn Explained Variance") print(np.var(X_transformed, axis=0)) print("U shape") print(np.shape(h2o4gpu_tsvd.U)) print(np.shape(U)) print("Singular Value shape") print(np.shape(h2o4gpu_tsvd.singular_values_)) print(np.shape(sklearn_tsvd.singular_values_)) print("Components shape") print(np.shape(h2o4gpu_tsvd.components_)) print(np.shape(sklearn_tsvd.components_)) print("Reconstruction") reconstruct_h2o4gpu = h2o4gpu_tsvd.inverse_transform( h2o4gpu_tsvd.fit_transform(X)) reconstruct_sklearn = sklearn_tsvd.inverse_transform( sklearn_tsvd.fit_transform(X)) reconstruct_h2o4gpu_manual = np.sum([ np.outer(h2o4gpu_tsvd.U[:, i], h2o4gpu_tsvd.components_[i, :]) * si for i, si in enumerate(h2o4gpu_tsvd.singular_values_) ], axis=0) print("Check inverse_transform() vs manual reconstruction for h2o4gpu") assert np.allclose(reconstruct_h2o4gpu, reconstruct_h2o4gpu_manual) #reconstruct_sklearn_manual = np.sum([np.outer(U[:, i], sklearn_tsvd.components_[i, :]) * si for i, si in enumerate(sklearn_tsvd.singular_values_)], axis=0) print("original X") print(X) print("h2o4gpu reconstruction") print(reconstruct_h2o4gpu) print("sklearn reconstruction") print(reconstruct_sklearn) h2o4gpu_diff = np.subtract(reconstruct_h2o4gpu, X) sklearn_diff = np.subtract(reconstruct_sklearn, X) print("h2o4gpu diff") print(h2o4gpu_diff) print("sklearn diff") print(sklearn_diff) h2o4gpu_max_diff = np.amax(abs(h2o4gpu_diff)) sklearn_max_diff = np.amax(abs(sklearn_diff)) print("h2o4gpu max diff") print(h2o4gpu_max_diff) print("sklearn max diff") print(sklearn_max_diff) print("h2o4gpu mae") h2o4gpu_mae = np.mean(np.abs(h2o4gpu_diff)) print(h2o4gpu_mae) print("sklearn mae") sklearn_mae = np.mean(np.abs(sklearn_diff)) print(sklearn_mae) return h2o4gpu_mae, sklearn_mae
def func_bench(m=2000, n=20, k=5): np.random.seed(1234) X = np.random.rand(m, n) #Warm start W = np.random.rand(1000, 5) print('Cusolver Warm Start') h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=3, algorithm="cusolver", random_state=42) h2o4gpu_tsvd_cusolver.fit(W) print('Power Warm Start') h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=3, algorithm="power", tol=1e-5, n_iter=100, random_state=42, verbose=True) h2o4gpu_tsvd_power.fit(W) print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix with k=" + str(k)) print("\n") cusolver_sum_time = 0 power_sum_time = 0 for i in range(5): start_time_cusolver = time.time() print("CUSOLVER Bencmark on iteration " + str(i)) h2o4gpu_tsvd_cusolver.n_components = k h2o4gpu_tsvd_cusolver.fit(X) end_time_cusolver = time.time() - start_time_cusolver cusolver_sum_time += end_time_cusolver print("Took cusolver " + str(end_time_cusolver) + " seconds on iteration " + str(i)) print("Sleep before Power on iteration " + str(i)) time.sleep(5) start_time_power = time.time() print("POWER Bencmark on iteration " + str(i)) h2o4gpu_tsvd_power.n_components = k h2o4gpu_tsvd_power.fit(X) end_time_power = time.time() - start_time_power power_sum_time += end_time_power print("Took power method " + str(end_time_power) + " seconds on iteration " + str(i)) #Benchmarks ######################################################################## dim = str(m) + "by" + str(n) with open('power_cusolver_avg_run.csv', 'a', newline='') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow( ['cusolver', str(cusolver_sum_time / 5), dim, str(k)]) csvwriter.writerow(['power', str(power_sum_time / 5), dim, str(k)]) csvfile.close()
def func(m=2000, n=20, k=5): np.random.seed(1234) X = np.random.rand(m, n) print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix") print("\n") start_time_cusolver = time.time() print("CUSOLVER") h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k, algorithm="cusolver", random_state=42) h2o4gpu_tsvd_cusolver.fit(X) end_time_cusolver = time.time() - start_time_cusolver print("Took cusolver " + str(end_time_cusolver) + " seconds") start_time_power = time.time() print("POWER") h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k, algorithm="power", tol=1E-50, n_iter=2000, random_state=42, verbose=True) h2o4gpu_tsvd_power.fit(X) end_time_power = time.time() - start_time_power print("Took power method " + str(end_time_power) + " seconds") print("h2o4gpu cusolver components") print(h2o4gpu_tsvd_cusolver.components_) print("h2o4gpu cusolver singular values") print(h2o4gpu_tsvd_cusolver.singular_values_) print("h2o4gpu tsvd cusolver Explained Variance") print(h2o4gpu_tsvd_cusolver.explained_variance_) print("h2o4gpu tsvd cusolver Explained Variance Ratio") print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_) print("h2o4gpu power components") print(h2o4gpu_tsvd_power.components_) print("h2o4gpu power singular values") print(h2o4gpu_tsvd_power.singular_values_) print("h2o4gpu tsvd power Explained Variance") print(h2o4gpu_tsvd_power.explained_variance_) print("h2o4gpu tsvd power Explained Variance Ratio") print(h2o4gpu_tsvd_power.explained_variance_ratio_) print("Checking singular values") rtol = 1E-5 assert np.allclose(h2o4gpu_tsvd_cusolver.singular_values_, h2o4gpu_tsvd_power.singular_values_, rtol=rtol) print("Checking explained variance") rtol = 1E-3 assert np.allclose(h2o4gpu_tsvd_cusolver.explained_variance_, h2o4gpu_tsvd_power.explained_variance_, rtol=rtol) print("Checking explained variance ratio") assert np.allclose(h2o4gpu_tsvd_cusolver.explained_variance_ratio_, h2o4gpu_tsvd_power.explained_variance_ratio_, rtol=rtol)
def func(m=5000000, n=10, k=9, convert_to_float32=False): np.random.seed(1234) X = np.random.rand(m, n) if convert_to_float32: print("Converting input matrix to float32") X = X.astype(np.float32) # Warm start W = np.random.rand(1000, 5) print('h2o4gpu Cusolver Warm Start') h2o4gpu_tsvd_cusolver_warm = TruncatedSVDH2O(n_components=3, algorithm="cusolver", tol=1e-5, n_iter=100, random_state=42, verbose=True) h2o4gpu_tsvd_cusolver_warm.fit(W) print('h2o4gpu Power Warm Start') h2o4gpu_tsvd_power_warm = TruncatedSVDH2O(n_components=3, algorithm="power", tol=1e-5, n_iter=100, random_state=42, verbose=True) h2o4gpu_tsvd_power_warm.fit(W) print('sklearn ARPACK Warm Start') sklearn_tsvd_arpack_warm = sklearnsvd(n_components=3, algorithm="arpack", n_iter=5, random_state=42) sklearn_tsvd_arpack_warm.fit(W) print('sklearn Randomized Warm Start') sklearn_tsvd_random_warm = sklearnsvd(n_components=3, algorithm="randomized", tol=1e-5, n_iter=5, random_state=42) sklearn_tsvd_random_warm.fit(W) # Exact scikit impl sklearn_tsvd_arpack = sklearnsvd(algorithm="arpack", n_components=k, tol=1e-5, n_iter=5, random_state=42) # Randomized scikit impl sklearn_tsvd_random = sklearnsvd(algorithm="randomized", n_components=k, tol=1e-5, n_iter=5, random_state=42) #Cusolver h2o4gpu impl print("Cusolver SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix") print("Original X Matrix") print(X) print("\n") print("h2o4gpu cusolver tsvd run") h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k, algorithm="cusolver", tol=1e-5, n_iter=100, random_state=42) start_time_gpu_cusolver = time.time() h2o4gpu_tsvd_cusolver.fit(X) end_time_gpu_cusolver = time.time() - start_time_gpu_cusolver print("Total time for h2o4gpu cusolver tsvd is " + str(end_time_gpu_cusolver)) print("h2o4gpu tsvd cusolver Singular Values") print(h2o4gpu_tsvd_cusolver.singular_values_) print("h2o4gpu tsvd cusolver Components (V^T)") print(h2o4gpu_tsvd_cusolver.components_) print("h2o4gpu tsvd cusolver Explained Variance") print(h2o4gpu_tsvd_cusolver.explained_variance_) print("h2o4gpu tsvd cusolver Explained Variance Ratio") print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_) print("Sleep before Power") time.sleep(5) #Power h2o4gpu impl print("Power SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix") print("Original X Matrix") print(X) print("\n") print("h2o4gpu tsvd power method run") h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k, algorithm="power", tol=1e-5, n_iter=100, random_state=42) start_time_gpu_power = time.time() h2o4gpu_tsvd_power.fit(X) end_time_gpu_power = time.time() - start_time_gpu_power print("Total time for h2o4gpu tsvd is " + str(end_time_gpu_power)) print("h2o4gpu tsvd power Singular Values") print(h2o4gpu_tsvd_power.singular_values_) print("h2o4gpu tsvd power Components (V^T)") print(h2o4gpu_tsvd_power.components_) print("h2o4gpu tsvd power Explained Variance") print(h2o4gpu_tsvd_power.explained_variance_) print("h2o4gpu tsvd power Explained Variance Ratio") print(h2o4gpu_tsvd_power.explained_variance_ratio_) print("Sleep before Sklearn ARPACK") time.sleep(5) #ARPACK sklearn impl print("\n") print("ARPACK sklearn run") start_sk_arpack = time.time() sklearn_tsvd_arpack.fit(X) end_sk_arpack = time.time() - start_sk_arpack print("Total time for sklearn is " + str(end_sk_arpack)) print("Sklearn ARPACK Singular Values") print(sklearn_tsvd_arpack.singular_values_) print("Sklearn ARPACK Components (V^T)") print(sklearn_tsvd_arpack.components_) print("Sklearn ARPACK Explained Variance") print(sklearn_tsvd_arpack.explained_variance_) print("Sklearn ARPACK Explained Variance Ratio") print(sklearn_tsvd_arpack.explained_variance_ratio_) print("Sleep before Sklearn Randomized") time.sleep(5) #Randomized sklearn impl print("\n") print("Randomized sklearn randomized run") start_sk_random = time.time() sklearn_tsvd_random.fit(X) end_sk_randomized = time.time() - start_sk_random print("Total time for sklearn is " + str(end_sk_randomized)) print("Sklearn Random Singular Values") print(sklearn_tsvd_random.singular_values_) print("Sklearn Random Components (V^T)") print(sklearn_tsvd_random.components_) print("Sklearn Random Explained Variance") print(sklearn_tsvd_random.explained_variance_) print("Sklearn Random Explained Variance Ratio") print(sklearn_tsvd_random.explained_variance_ratio_) return end_time_gpu_cusolver, end_sk_arpack, end_time_gpu_power, end_sk_randomized