def test_unknown_shapes(fn, solver): rng = sk_check_random_state(42) X = rng.uniform(-1, 1, size=(10, 3)) df = pd.DataFrame(X) ddf = dask.dataframe.from_pandas(df, npartitions=2) pca = dd.PCA(n_components=2, svd_solver=solver) fit_fn = getattr(pca, fn) X = ddf.values assert np.isnan(X.shape[0]) if solver == "auto": with pytest.raises(ValueError, match="Cannot automatically choose PCA solver"): fit_fn(X) else: X_hat = fit_fn(X) assert hasattr(pca, "components_") assert pca.n_components_ == 2 assert pca.n_features_ == 3 assert np.isnan(pca.n_samples_) if fn == "fit_transform": assert np.isnan(X_hat.shape[0]) assert X_hat.shape[1] == 2
def split_samples(X, y, fractions=[0.75, 0.25], random_state=None): """Split samples into training, test, and cross-validation sets Parameters ---------- X, y : array_like leading dimension n_samples fraction : array_like length n_splits. If the fractions do not add to 1, they will be re-normalized. random_state : None, int, or RandomState object random seed, or random number generator """ X = np.asarray(X) y = np.asarray(y) if X.shape[0] != y.shape[0]: raise ValueError("X and y should have the same leading dimension") n_samples = X.shape[0] fractions = np.asarray(fractions).ravel().cumsum() fractions /= fractions[-1] fractions *= n_samples N = np.concatenate([[0], fractions.astype(int)]) N[-1] = n_samples # in case of roundoff errors random_state = sk_check_random_state(random_state) indices = np.arange(len(y)) random_state.shuffle(indices) X_divisions = tuple(X[indices[N[i]:N[i + 1]]] for i in range(len(fractions))) y_divisions = tuple(y[indices[N[i]:N[i + 1]]] for i in range(len(fractions))) return X_divisions, y_divisions
def check_random_state(seed): return sk_check_random_state(seed)