def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0): """Check that PolynomialCountSketch results are the same for dense and sparse input. """ ps_dense = PolynomialCountSketch(n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42) Xt_dense = ps_dense.fit_transform(X) Yt_dense = ps_dense.transform(Y) ps_sparse = PolynomialCountSketch(n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42) Xt_sparse = ps_sparse.fit_transform(csr_matrix(X)) Yt_sparse = ps_sparse.transform(csr_matrix(Y)) assert_allclose(Xt_dense, Xt_sparse) assert_allclose(Yt_dense, Yt_sparse)
def test_polynomial_count_sketch(X, Y, gamma, degree, coef0): # test that PolynomialCountSketch approximates polynomial # kernel on random data # compute exact kernel kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0) # approximate kernel mapping ps_transform = PolynomialCountSketch(n_components=5000, gamma=gamma, coef0=coef0, degree=degree, random_state=42) X_trans = ps_transform.fit_transform(X) Y_trans = ps_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) <= 0.05 # close to unbiased np.abs(error, out=error) assert np.max(error) <= 0.1 # nothing too far off assert np.mean(error) <= 0.05 # mean is fairly close
# Now lets evaluate the scalability of PolynomialCountSketch vs Nystroem # First we generate some fake data with a lot of samples fakeData = np.random.randn(10000, 100) fakeDataY = np.random.randint(0, high=10, size=(10000)) out_dims = range(500, 6000, 500) # Evaluate scalability of PolynomialCountSketch as n_components grows ps_svm_times = [] for k in out_dims: ps = PolynomialCountSketch(degree=2, n_components=k) start = time() ps.fit_transform(fakeData, None) ps_svm_times.append(time() - start) # Evaluate scalability of Nystroem as n_components grows # This can take a while due to the inefficient training phase ny_svm_times = [] for k in out_dims: ny = Nystroem(kernel="poly", gamma=1.0, degree=2, coef0=0, n_components=k) start = time() ny.fit_transform(fakeData, None) ny_svm_times.append(time() - start) # Show results fig, ax = plt.subplots(figsize=(6, 4)) ax.set_title("Scalability results")
print('array is all zeros') else: print('Array is good') choice_length = np.count_nonzero(~np.isnan(labels)) X, y = shuffle(X_array, labels) X = X[:choice_length] y = y[:choice_length].fillna(0) scaler = MinMaxScaler(feature_range=(-1, 1)) mm = make_pipeline(MinMaxScaler(), Normalizer()) X = mm.fit_transform(X) rbf_feature = RBFSampler(gamma=1.5, random_state=10) ps = PolynomialCountSketch(degree=11, random_state=1) X_rbf_features = rbf_feature.fit_transform(X) X_poly_features = ps.fit_transform(X) # We want to get TSNE embedding with 2 dimensions n_components = 3 tsne = TSNE(n_components) tsne_result = tsne.fit_transform(X_rbf_features) locationFileName = os.path.join( figuresDestination, str(sorted(symbols)[symbolIdx]) + '_idx_' + str(idx) + 'date_' + str(dateIdx) + '_' + str(labelName) + '_tsne_rbf_kernelised.png') fashion_scatter(tsne_result, y, locationFileName) fig = plt.figure(figsize=(16, 9)) ax = plt.axes(projection='3d') # ax = Axes3D(fig)