Exemplo n.º 1
0
def test_one_batch_fit_transform():

    n_samples = 1000
    n_features = 100
    data = np.random.randn(n_samples, n_features).astype(DTYPE)

    # -- reference outputs
    ref_mean = data.mean(axis=0)
    ref_std = data.std(axis=0)
    ref_out = (data - ref_mean[np.newaxis]) / ref_std[np.newaxis, :]

    # -- calculation with OnlineScaler
    scaler = OnlineScaler(n_features)
    out = scaler.fit_transform(data)

    # -- tests
    assert_allclose(out, ref_out, rtol=RTOL, atol=ATOL)
Exemplo n.º 2
0
def test_multiple_batch_fit_for_mean_and_std():

    n_samples = 1000
    n_features = 100
    n_batches = 5
    data = np.random.randn(n_batches * n_samples, n_features).astype(DTYPE)

    # -- reference outputs
    ref_mean = data.mean(axis=0)
    ref_std = data.std(axis=0)

    # -- calculation with OnlineScaler
    scaler = OnlineScaler(n_features)
    for i in xrange(n_batches):
        scaler.partial_fit(data[(i * n_samples):((i + 1) * n_samples), :])
    out_mean = scaler.r_mn
    out_std = np.sqrt(scaler.r_va)

    # -- tests
    assert_allclose(out_mean, ref_mean, rtol=RTOL, atol=ATOL)
    assert_allclose(out_std, ref_std, rtol=RTOL, atol=ATOL)