def test_random_projection_fit_transform(datatype, method): if has_scipy(): from scipy.spatial.distance import pdist else: pytest.skip('Skipping test_random_projection_fit_transform because ' + 'Scipy is missing') eps = 0.2 # dataset generation data, target = make_blobs(n_samples=800, centers=400, n_features=3000) # conversion to input_type data = data.astype(datatype) target = target.astype(datatype) # creation of model if method == 'gaussian': model = GaussianRandomProjection(eps=eps) else: model = SparseRandomProjection(eps=eps) # fitting the model model.fit(data) # applying transformation transformed_data = model.transform(data) original_pdist = pdist(data) embedded_pdist = pdist(transformed_data) # check JL lemma assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist) and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
def test_random_projection_fit_transform_default(datatype, method): eps = 0.8 # dataset generation data, target = make_blobs(n_samples=30, centers=4, n_features=5000) # conversion to input_type data = data.astype(datatype) target = target.astype(datatype) # creation of model if method == 'gaussian': model = GaussianRandomProjection() else: model = SparseRandomProjection() # fitting the model model.fit(data) transformed_data = model.transform(data) original_pdist = pdist(data) embedded_pdist = pdist(transformed_data) # check JL lemma assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist) and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
def test_random_projection_fit(datatype, method): # dataset generation data, target = make_blobs(n_samples=800, centers=400, n_features=3000) # conversion to input_type data = data.astype(datatype) target = target.astype(datatype) # creation of model if method == 'gaussian': model = GaussianRandomProjection(eps=0.2) else: model = SparseRandomProjection(eps=0.2) # fitting the model model.fit(data) assert True # Did not crash
def test_random_projection_fit_transform(datatype, input_type, method): eps = 0.2 # dataset generation data, target = make_blobs(n_samples=800, centers=400, n_features=3000) # conversion to input_type data = data.astype(datatype) target = target.astype(datatype) # creation of model if method == 'gaussian': model = GaussianRandomProjection(eps=eps) else: model = SparseRandomProjection(eps=eps) # fitting the model if input_type == 'dataframe': gdf = cudf.DataFrame() for i in range(data.shape[1]): gdf[str(i)] = np.asarray(data[:, i], dtype=datatype) model.fit(gdf) else: model.fit(data) # applying transformation if input_type == 'dataframe': transformed_data = model.transform(gdf).as_matrix() else: transformed_data = model.transform(data) original_pdist = pdist(data) embedded_pdist = pdist(transformed_data) # check JL lemma assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist) and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
def test_random_projection_fit(datatype, input_type, method): # dataset generation data, target = make_blobs(n_samples=800, centers=400, n_features=3000) # conversion to input_type data = data.astype(datatype) target = target.astype(datatype) # creation of model if method == 'gaussian': model = GaussianRandomProjection(eps=0.2) else: model = SparseRandomProjection(eps=0.2) # fitting the model if input_type == 'dataframe': gdf = cudf.DataFrame() for i in range(data.shape[1]): gdf[str(i)] = np.asarray(data[:, i], dtype=datatype) model.fit(gdf) else: model.fit(data) assert True # Did not crash