示例#1
0
def test_unknown_shapes(fn, solver):
    rng = sk_check_random_state(42)
    X = rng.uniform(-1, 1, size=(10, 3))
    df = pd.DataFrame(X)
    ddf = dask.dataframe.from_pandas(df, npartitions=2)

    pca = dd.PCA(n_components=2, svd_solver=solver)
    fit_fn = getattr(pca, fn)
    X = ddf.values
    assert np.isnan(X.shape[0])

    if solver == "auto":
        with pytest.raises(ValueError, match="Cannot automatically choose PCA solver"):
            fit_fn(X)
    else:
        X_hat = fit_fn(X)
        assert hasattr(pca, "components_")
        assert pca.n_components_ == 2
        assert pca.n_features_ == 3
        assert np.isnan(pca.n_samples_)
        if fn == "fit_transform":
            assert np.isnan(X_hat.shape[0])
            assert X_hat.shape[1] == 2
示例#2
0
def split_samples(X, y, fractions=[0.75, 0.25], random_state=None):
    """Split samples into training, test, and cross-validation sets

    Parameters
    ----------
    X, y : array_like
        leading dimension n_samples
    fraction : array_like
        length n_splits.  If the fractions do not add to 1, they will be
        re-normalized.
    random_state : None, int, or RandomState object
        random seed, or random number generator
    """
    X = np.asarray(X)
    y = np.asarray(y)

    if X.shape[0] != y.shape[0]:
        raise ValueError("X and y should have the same leading dimension")

    n_samples = X.shape[0]

    fractions = np.asarray(fractions).ravel().cumsum()
    fractions /= fractions[-1]
    fractions *= n_samples
    N = np.concatenate([[0], fractions.astype(int)])
    N[-1] = n_samples  # in case of roundoff errors

    random_state = sk_check_random_state(random_state)
    indices = np.arange(len(y))
    random_state.shuffle(indices)

    X_divisions = tuple(X[indices[N[i]:N[i + 1]]]
                        for i in range(len(fractions)))
    y_divisions = tuple(y[indices[N[i]:N[i + 1]]]
                        for i in range(len(fractions)))

    return X_divisions, y_divisions
示例#3
0
def check_random_state(seed):
    return sk_check_random_state(seed)