Пример #1
0
def test_logistic(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "irls", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Пример #2
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == ()
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred) ** 2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())

    # Test if integer array arguments will converge properly.
    X = nps_app_inst.array([[1, 2], [3, 5], [1, 5]], block_shape=(2, 2))
    y = nps_app_inst.array([1, 2, 3], block_shape=(2,))
    model: LinearRegression = LinearRegression()
    model.fit(X, y)
    try:
        pred = model.predict([1, 2]).get()
        assert 0.9 < pred < 1.1
    except OverflowError:
        assert False, "LinearRegression overflows with integer array arguments."
Пример #3
0
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LinearRegression as SKLinearRegression

    _, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233,
                                                 num_features,
                                                 theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100, ))
    param_set = [
        {
            "solver": "newton-cg",
            "tol": 1e-8,
            "max_iter": 10
        },
    ]
    for kwargs in param_set:
        lr_model: LinearRegression = LinearRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()

        sk_lr_model = SKLinearRegression()
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        np.allclose(sk_y_pred, y_pred)
Пример #4
0
def test_vecdot(app_inst: ArrayApplication):
    size = 9
    block_size = 3
    y1 = app_inst.array(np.arange(size).reshape(size, 1),
                        block_shape=(block_size, 1))
    y2 = app_inst.array(np.arange(size).reshape(size, 1),
                        block_shape=(block_size, 1))
    assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get())
    y1 = app_inst.array(np.arange(size).reshape(size),
                        block_shape=(block_size, ))
    y2 = app_inst.array(np.arange(size).reshape(size),
                        block_shape=(block_size, ))
    assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get())
    y1 = app_inst.array(np.arange(size).reshape(size),
                        block_shape=(block_size, ))
    y2 = app_inst.array(np.arange(size).reshape(size, 1),
                        block_shape=(block_size, 1))
    assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get())
    assert np.allclose((y2.T @ y1).get(), y2.T.get() @ y1.get())
    y1 = app_inst.array(np.arange(size).reshape(1, size),
                        block_shape=(1, block_size))
    y2 = app_inst.array(np.arange(size).reshape(size, 1),
                        block_shape=(block_size, 1))
    assert np.allclose((y1 @ y2).get(), y1.get() @ y2.get())
    y1 = app_inst.array(np.arange(size).reshape(1, size),
                        block_shape=(1, block_size))
    y2 = app_inst.array(np.arange(size).reshape(1, size),
                        block_shape=(1, block_size))
    assert np.allclose((y1 @ y2.T).get(), y1.get() @ y2.T.get())
Пример #5
0
def test_concatenate(app_inst: ArrayApplication):
    axis = 1
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    real_ones = np.ones(shape=(1000, 1))
    X = app_inst.array(real_X, block_shape=(100, 9))
    ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype)
    X_concated = app_inst.concatenate([X, ones],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    common.check_block_integrity(X_concated)
    real_X_concated = np.concatenate([real_X, real_ones], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    real_X2 = np.random.random_sample(1000 * 17).reshape(1000, 17)
    X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3))
    X_concated = app_inst.concatenate([X, ones, X2],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    common.check_block_integrity(X_concated)
    real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    y1 = app_inst.zeros(shape=(50, ), block_shape=(10, ), dtype=int)
    y2 = app_inst.ones(shape=(50, ), block_shape=(10, ), dtype=int)
    y = app_inst.concatenate([y1, y2], axis=0)
    common.check_block_integrity(y)
Пример #6
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233,
                                                 num_features,
                                                 theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100, ))
    param_set = [{
        "solver": "gd",
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 100
    }, {
        "solver": "newton",
        "tol": 1e-8,
        "max_iter": 10
    }]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == (
        )
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred)**2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())
Пример #7
0
def test_lr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    for dtype in (np.float32, np.float64):
        real_theta = rs.random_sample(num_features).astype(dtype)
        real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
        real_X = real_X.astype(dtype)
        real_y = real_y.astype(dtype)
        X = app_inst.array(real_X, block_shape=(15, 5))
        y = app_inst.array(real_y, block_shape=(15,))

        # Direct TSQR LR
        theta = app_inst.linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error

        # Fast LR
        theta = app_inst.fast_linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error
Пример #8
0
def test_subscript(app_inst: ArrayApplication):
    shape = 12, 21
    npX = np.arange(np.product(shape)).reshape(*shape)
    X = app_inst.array(npX, block_shape=(6, 7))
    for i in range(12):
        assert np.allclose((X[:, i].T @ X[:, i]).get(),
                           npX[:, i].T @ npX[:, i])

    # Aligned tests.
    for i in range(0, 21, 7):
        sel = slice(i, i + 7)
        assert np.allclose((X[:, sel].T @ X[:, sel]).get(),
                           npX[:, sel].T @ npX[:, sel])

    # More basic tests.
    X_shape = 12, 21, 16
    npX = np.arange(np.product(X_shape)).reshape(*X_shape)
    X = app_inst.array(npX, block_shape=(6, 7, 8))
    for i in range(12):
        assert np.allclose(X[i].get(), npX[i])
        assert np.allclose(X[i, 1].get(), npX[i, 1])
        assert np.allclose(X[i, :, 2].get(), npX[i, :, 2])
        assert np.allclose(X[:, 3, i].get(), npX[:, 3, i])
        assert np.allclose(X[:, :, i].get(), npX[:, :, i])
        assert np.allclose((X[i].T @ X[i]).get(), npX[i].T @ npX[i])
Пример #9
0
def test_rr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    extra_X, extra_y = BimodalGaussian.get_dataset(10, num_features, p=0.5, theta=real_theta)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape)
    extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape)
    real_X = np.concatenate([real_X, extra_X], axis=0)
    real_y = np.concatenate([real_y, extra_y], axis=0)

    X = app_inst.array(real_X, block_shape=(15, 5))
    y = app_inst.array(real_y, block_shape=(15,))
    theta = app_inst.ridge_regression(X, y, lamb=0.0)
    robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0)

    # Generate a test set to evaluate robustness to outliers.
    test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    test_X = app_inst.array(test_X, block_shape=(15, 5))
    test_y = app_inst.array(test_y, block_shape=(15,))
    theta_error = np.sum((((test_X @ theta) - test_y)**2).get())
    robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get())
    assert robust_theta_error < theta_error
Пример #10
0
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LogisticRegression as SKLogisticRegression

    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )

        sk_lr_model = SKLogisticRegression(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        sk_y_pred_proba = sk_lr_model.predict_proba(real_X)
        np.allclose(
            np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1]
        )
        np.allclose(sk_y_pred, y_pred)
Пример #11
0
def test_penalties(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "lbfgs", "tol": 1e-8, "max_iter": 20},
    ]
    for kwargs in param_set:
        model: Lasso = Lasso(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: Ridge = Ridge(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: ElasticNet = ElasticNet(alpha=0.5, l1_ratio=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9
Пример #12
0
def test_basic_assign(app_inst: ArrayApplication):
    from_arr: np.ndarray = np.arange(5)
    block_shape = 3,
    slice_params = list(
        get_slices(size=10, index_multiplier=2, basic_step=True))
    pbar = tqdm.tqdm(total=len(slice_params))
    for slice_sel in slice_params:
        pbar.set_description(str(slice_sel))
        pbar.update(1)

        from_ba = app_inst.array(from_arr, block_shape=block_shape)
        from_bav = ArrayView.from_block_array(from_ba)

        to_arr: np.ndarray = np.zeros(5)
        to_ba = app_inst.array(to_arr, block_shape=block_shape)
        to_bav = ArrayView.from_block_array(to_ba)

        to_bav[slice_sel] = from_bav[slice_sel]
        to_arr[slice_sel] = from_arr[slice_sel]

        from_res = (from_arr, from_bav.create().get())
        assert np.allclose(*from_res), str(from_res)

        to_res = (to_arr, to_bav.create().get())
        assert np.allclose(*to_res), str(to_res)
Пример #13
0
def test_inv(app_inst: ArrayApplication):
    shape = (5, 5)
    for dtype in (np.float32, np.float64):
        mat = app_inst.array(sample_sym_pd_mat(shape=shape).astype(dtype), block_shape=shape)
        _, r = np.linalg.qr(mat.get())
        r_inv = app_inst.inv(app_inst.array(r, block_shape=shape)).get()
        assert np.allclose(np.linalg.inv(r), r_inv, rtol=1e-4, atol=1e-4)
        L = app_inst.cholesky(mat).get()
        assert np.allclose(np.linalg.cholesky(mat.get()), L, rtol=1e-4, atol=1e-4)
Пример #14
0
def test_stats(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(3, 2)
    X = app_inst.array(real_X, block_shape=(2, 1))
    assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0))
    assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1))

    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(10, 2))
    assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0))
    assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1))
Пример #15
0
def test_quickselect(app_inst: ArrayApplication):
    # Simple tests
    np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3, ))
    ba_oids = ba_x.flattened_oids()
    correct = [1, 2, 3, 4, 5, 5, 6, 7]
    for i in range(-8, 8):
        value = app_inst.quickselect(ba_oids, i)
        if i < 0:
            assert value == correct[i + 8]
        else:
            assert value == correct[i]

    # Randomized tests
    shapes = [(50, ), (437, ), (1000, )]
    block_shapes = [(10, ), (23, ), (50, )]
    kth = [-50, -42, -25, -13, 0, 8, 25, 36, 49]
    for shape, block_shape, k in itertools.product(shapes, block_shapes, kth):
        ba_x = app_inst.random.random(shape=shape, block_shape=block_shape)
        ba_oids = ba_x.flattened_oids()
        value = app_inst.quickselect(ba_oids, k)
        if k < 0:
            assert value == np.partition(ba_x.get(),
                                         k + shape[0])[k + shape[0]]
        else:
            assert value == np.partition(ba_x.get(), k)[k]
Пример #16
0
def test_quantile_percentile(app_inst: ArrayApplication):
    # see https://github.com/dask/dask/blob/main/dask/array/tests/test_percentiles.py
    qs = [0, 50, 100]
    methods = ["tdigest"]
    interpolations = ["linear"]

    np_x = np.ones((10,))
    ba_x = app_inst.ones(shape=(10,), block_shape=(2,))
    for q, method, interpolation in itertools.product(qs, methods, interpolations):
        assert app_inst.quantile(
            ba_x, q / 100, method=method, interpolation=interpolation
        ).get() == np.quantile(np_x, q / 100)
        assert app_inst.percentile(
            ba_x, q, method=method, interpolation=interpolation
        ).get() == np.percentile(np_x, q)

    np_x = np.array([0, 0, 5, 5, 5, 5, 20, 20])
    ba_x = app_inst.array(np_x, block_shape=(3,))
    for q, method, interpolation in itertools.product(qs, methods, interpolations):
        assert app_inst.quantile(
            ba_x, q / 100, method=method, interpolation=interpolation
        ).get() == np.quantile(np_x, q / 100)
        assert app_inst.percentile(
            ba_x, q, method=method, interpolation=interpolation
        ).get() == np.percentile(np_x, q)
Пример #17
0
def test_sklearn_poisson_regression(nps_app_inst: ArrayApplication):
    def dsqr(dev_func, y, _y_pred):
        dev = dev_func(y, _y_pred)
        y_mean = nps_app_inst.mean(y)
        dev_null = dev_func(y, y_mean)
        return 1 - dev / dev_null

    from sklearn.linear_model import PoissonRegressor as SKPoissonRegressor

    coef = np.array([0.2, -0.1])
    real_X = np.array([[0, 1, 2, 3, 4]]).T
    real_y = np.exp(np.dot(real_X, coef[0]) + coef[1]).reshape(-1)
    X = nps_app_inst.array(real_X, block_shape=real_X.shape)
    y = nps_app_inst.array(real_y, block_shape=real_y.shape)
    param_set = [
        {"tol": 1e-4, "max_iter": 100},
    ]
    for kwargs in param_set:
        lr_model: PoissonRegression = PoissonRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()
        print("D^2", dsqr(lr_model.deviance, y, y_pred).get())

        sk_lr_model = SKPoissonRegressor(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        print("D^2", dsqr(lr_model.deviance, y, sk_y_pred).get())
Пример #18
0
def test_assign_complete_2dim_slices(app_inst: ArrayApplication):
    # All 2-dim slice assignments.
    # i_1:i_2 = k_1:k_2
    A_shape = 4, 6
    B_shape = 4, 6
    A_shape_range = list(map(lambda x: list(range(1, x + 1)), A_shape))
    B_shape_range = list(map(lambda x: list(range(1, x + 1)), B_shape))
    A_block_shapes = list(itertools.product(*A_shape_range))
    B_block_shapes = list(itertools.product(*B_shape_range))

    pbar = tqdm.tqdm(total=np.product([
        len(A_block_shapes),
        len(B_block_shapes),
        len(A_block_shapes)**2,
        len(B_block_shapes)**2,
    ]))
    for A_block_shape in A_block_shapes:
        for B_block_shape in B_block_shapes:
            if A_block_shape != B_block_shape:
                # If array shapes are equal
                # then block shapes must be equal.
                pbar.update(len(A_block_shapes)**2 * len(B_block_shapes)**2)
                continue
            npA = np.zeros(np.product(A_shape)).reshape(*A_shape)
            npB = np.random.random_sample(
                np.product(B_shape)).reshape(*B_shape)
            A = app_inst.array(npA, block_shape=A_block_shape)
            B = app_inst.array(npB, block_shape=B_block_shape)
            for A_strt in A_block_shapes:
                for A_stp in A_block_shapes:
                    for B_strt in B_block_shapes:
                        for B_stp in B_block_shapes:
                            pbar.update(1)
                            if A_stp[0] <= A_strt[0] or A_stp[1] <= A_strt[1]:
                                continue
                            if (A_stp[0] - A_strt[0] != B_stp[0] - B_strt[0]
                                    or A_stp[1] - A_strt[1] !=
                                    B_stp[1] - B_strt[1]):
                                continue
                            desc_A = "(%d, %d)[%d:%d, %d:%d]" % (
                                A.block_shape[0], A.block_shape[1], A_strt[0],
                                A_stp[0], A_strt[1], A_stp[1])
                            desc_B = "(%d, %d)[%d:%d, %d:%d]" % (
                                B.block_shape[0], B.block_shape[1], B_strt[0],
                                B_stp[0], B_strt[1], B_stp[1])
                            desc = "Testing 2dim slices. %s = %s" % (desc_A,
                                                                     desc_B)
                            pbar.set_description(desc=desc)
                            assert np.allclose(
                                B[B_strt[0]:B_stp[0],
                                  B_strt[1]:B_stp[1]].get(),
                                npB[B_strt[0]:B_stp[0], B_strt[1]:B_stp[1]])
                            npA[A_strt[0]:A_stp[0],
                                A_strt[1]:A_stp[1]] = npB[B_strt[0]:B_stp[0],
                                                          B_strt[1]:B_stp[1]]
                            A[A_strt[0]:A_stp[0],
                              A_strt[1]:A_stp[1]] = B[B_strt[0]:B_stp[0],
                                                      B_strt[1]:B_stp[1]]
                            assert np.allclose(A.get(), npA)
                            assert np.allclose(B.get(), npB)
Пример #19
0
def test_median(app_inst: ArrayApplication):
    # Simple tests
    np_x = np.array([7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3,))
    assert app_inst.median(ba_x).get() == np.median(np_x)

    np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3,))
    assert app_inst.median(ba_x).get() == np.median(np_x)

    # Randomized tests
    shapes = [(50,), (437,), (1000,)]
    block_shapes = [(10,), (23,), (50,)]
    for shape, block_shape in itertools.product(shapes, block_shapes):
        ba_x = app_inst.random.random(shape=shape, block_shape=block_shape)
        assert app_inst.median(ba_x).get() == np.median(ba_x.get())
Пример #20
0
def test_qr(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))
    Q, R = app_inst.indirect_tsqr(X)
    assert np.allclose(Q.get() @ R.get(), real_X)
    Q, R = app_inst.direct_tsqr(X)
    assert np.allclose(Q.get() @ R.get(), real_X)
Пример #21
0
def test_tensordot_basic(app_inst: ArrayApplication):
    shape = 2, 4, 10, 15
    npX = np.arange(np.product(shape)).reshape(*shape)
    rX = app_inst.array(npX, block_shape=(1, 2, 10, 3))
    rResult = rX.T.tensordot(rX, axes=1)
    assert np.allclose(rResult.get(), (np.tensordot(npX.T, npX, axes=1)))
    common.check_block_integrity(rResult)
Пример #22
0
def test_tensordot_all_shapes(app_inst: ArrayApplication):
    for axes in [0, 1, 2]:
        if axes == 2:
            a = np.arange(7 * 6 * 4).reshape((7, 6, 4))
            b = np.arange(6 * 4 * 9).reshape((6, 4, 9))
            c = np.tensordot(a, b, axes=axes)
        elif axes in (1, 0):
            a = np.arange(7 * 6 * 4).reshape((7, 6, 4))
            b = np.arange(6 * 4 * 9).reshape((4, 6, 9))
            c = np.tensordot(a, b, axes=axes)
        else:
            raise Exception()
        a_block_shapes = list(
            itertools.product(
                *list(map(lambda x: list(range(1, x + 1)), a.shape))))
        b_block_shapes = list(
            itertools.product(
                *list(map(lambda x: list(range(1, x + 1)), b.shape))))
        pbar = tqdm.tqdm(total=np.product(
            [len(a_block_shapes), len(b_block_shapes)]))
        for a_block_shape in a_block_shapes:
            for b_block_shape in b_block_shapes:
                pbar.update(1)
                if a_block_shape[-axes:] != b_block_shape[:axes]:
                    continue
                pbar.set_description(
                    "axes=%s %s @ %s" %
                    (str(axes), str(a_block_shape), str(b_block_shape)))
                block_a = app_inst.array(a, block_shape=a_block_shape)
                block_b = app_inst.array(b, block_shape=b_block_shape)
                block_c = block_a.tensordot(block_b, axes=axes)
                assert np.allclose(block_c.get(), c)
                common.check_block_integrity(block_c)
Пример #23
0
def test_bool_reduction(app_inst: ArrayApplication):
    np_arr = np.array([True, False, True, True, False, False], dtype=np.bool_)
    ba = app_inst.array(np_arr, block_shape=(2,))
    result_sum = app_inst.sum(ba, axis=0).get()
    np_sum = np.sum(np_arr)
    assert result_sum.dtype == np_sum.dtype
    assert result_sum == np_sum
Пример #24
0
def test_transpose(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(100, 1))
    assert np.allclose(X.T.get(), real_X.T)
    # Identity.
    assert np.allclose(X.T.T.get(), X.get())
    assert np.allclose(X.T.T.get(), real_X)
Пример #25
0
def test_inv_assumptions(app_inst: ArrayApplication):
    # pylint: disable=no-member, unused-variable
    np_Z = sample_sym_pd_mat(shape=(10, 10))

    # Compute the inverse of np_Z using sym_psd routine.
    Z = app_inst.array(np_Z, np_Z.shape)
    Z_inv = app_inst.inv(Z).get()
    Z_true_inv = np.linalg.inv(np_Z)
    assert np.allclose(Z_true_inv, Z_inv)

    # Try Cholesky approach.
    np_L = np.linalg.cholesky(np_Z)
    np_L_inv = np.linalg.inv(np_L)
    Z_cho_inv = np_L_inv.T @ np_L_inv
    assert np.allclose(Z_cho_inv, Z_true_inv)

    # Test backsub.
    assert np_L.dtype == np.float64
    lp_L_inv, _ = lapack.dtrtri(np_L, lower=1, unitdiag=0, overwrite_c=0)
    assert np.allclose(np_L_inv, lp_L_inv)

    # Test overwrite.
    overwrite_L_inv = np_L.copy(order="F")
    overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1)
    assert np.allclose(overwrite_L_inv_res, overwrite_L_inv)
    assert np.allclose(np_L_inv, overwrite_L_inv)

    # This should copy.
    overwrite_L_inv = np_L.copy(order="C")
    overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1)
    assert not np.allclose(overwrite_L_inv_res, overwrite_L_inv)

    # scipy cholesky tests.
    scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z),
                                                            lower=True,
                                                            overwrite_a=True,
                                                            check_finite=False),
                                      lower=1,
                                      unitdiag=0,
                                      overwrite_c=1)
    assert np.allclose(scipy_L_inv, np_L_inv)

    # Benchmark test.
    np_Z = sample_sym_pd_mat((1500, 1500))
    scipy_runtime = time.time()
    scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z),
                                                            lower=True,
                                                            overwrite_a=True,
                                                            check_finite=False),
                                      lower=1,
                                      unitdiag=0,
                                      overwrite_c=1)
    scipy_Z_inv = scipy_L_inv.T @ scipy_L_inv
    scipy_runtime = time.time() - scipy_runtime

    np_runtime = time.time()
    np_Z_inv = np.linalg.inv(np_Z)
    np_runtime = time.time() - np_runtime
    assert scipy_runtime < np_runtime
Пример #26
0
def test_transposed_block(app_inst_all: ArrayApplication):
    ba: BlockArray = app_inst_all.array(np.array([[1, 2, 3], [4, 5, 6]]),
                                        block_shape=(1, 3))
    block1: Block = ba.T.blocks[0, 1]
    assert block1.size() == 3
    assert not block1.transposed
    assert block1.grid_entry == block1.true_grid_entry()
    assert block1.grid_shape == block1.true_grid_shape()
Пример #27
0
def test_basic_assignment_broadcasting(app_inst: ArrayApplication):
    # Test mixed-length broadcasting.
    def get_sel(num_entries, shape):
        r = []
        for i in range(num_entries):
            dim = shape[i]
            start = rs.random_integers(0, dim - 1)
            stop = rs.random_integers(start, dim)
            r.append((start, stop))
        return r

    rs = np.random.RandomState(1337)
    a_shape = (6, 7, 2, 5)
    a_block_shape = (2, 4, 2, 3)
    b_shape = (6, 7, 2, 5)
    b_block_shape = (3, 2, 1, 2)
    num_axes = len(a_shape)
    access_modes = [
        lambda a1, a2: a1,
        lambda a1, a2: slice(None, None, None),
        lambda a1, a2: slice(a1, None, None),
        lambda a1, a2: slice(None, a1, None),
        lambda a1, a2: slice(a1, a2, None),
    ]
    for a_len in range(num_axes):
        for b_len in range(num_axes):
            a_mode_iterator = list(
                itertools.product(access_modes, repeat=a_len))
            b_mode_iterator = list(
                itertools.product(access_modes, repeat=b_len))
            pbar = tqdm.tqdm(
                total=len(a_mode_iterator) * len(b_mode_iterator),
                desc="Testing assignment broadcasting %d/%d" %
                (a_len * num_axes + b_len, num_axes**2),
            )
            # Create some valid intervals.
            for a_mode in a_mode_iterator:
                for b_mode in b_mode_iterator:
                    pbar.update(1)
                    a_sel = get_sel(a_len, a_shape)
                    b_sel = get_sel(b_len, b_shape)
                    a_accessor = tuple(a_mode[i](*a_sel[i])
                                       for i in range(a_len))
                    b_accessor = tuple(b_mode[i](*b_sel[i])
                                       for i in range(b_len))
                    arr_a = np.arange(np.product(a_shape)).reshape(a_shape)
                    arr_b = np.arange(np.product(b_shape)).reshape(b_shape)
                    ba_a = app_inst.array(arr_a, a_block_shape)
                    ba_b = app_inst.array(arr_b, b_block_shape)
                    try:
                        arr_a[a_accessor] = arr_b[b_accessor]
                        broadcasted = True
                    except ValueError as _:
                        broadcasted = False
                    if broadcasted:
                        ba_a[a_accessor] = ba_b[b_accessor]
                        assert np.allclose(arr_a, ba_a.get())
                        assert np.allclose(arr_b, ba_b.get())
Пример #28
0
def test_poisson_basic(nps_app_inst: ArrayApplication):
    coef = np.array([0.2, -0.1])
    X_real = np.array([[0, 1, 2, 3, 4]]).T
    y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1)
    X = nps_app_inst.array(X_real, block_shape=X_real.shape)
    y = nps_app_inst.array(y_real, block_shape=y_real.shape)
    model: PoissonRegression = PoissonRegression(
        **{"solver": "newton", "tol": 1e-8, "max_iter": 10}
    )
    model.fit(X, y)
    print("norm", model.grad_norm_sq(X, y).get())
    print("objective", model.objective(X, y).get())
    print("D^2", model.deviance_sqr(X, y).get())
    assert nps_app_inst.allclose(
        model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4
    ).get()
    assert nps_app_inst.allclose(
        model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4
    ).get()
Пример #29
0
def test_tensordot_large_shape(app_inst: ArrayApplication):
    a = np.arange(4 * 6 * 10 * 90).reshape((90, 10, 6, 4))
    b = np.arange(4 * 6 * 10 * 75).reshape((4, 6, 10, 75))
    c = np.tensordot(a, b, axes=1)

    block_a = app_inst.array(a, block_shape=(30, 5, 3, 2))
    block_b = app_inst.array(b, block_shape=(2, 3, 5, 25))
    block_c = block_a.tensordot(block_b, axes=1)
    assert np.allclose(block_c.get(), c)
    common.check_block_integrity(block_c)
Пример #30
0
def test_pca(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))

    # Covariance matrix test.
    C = app_inst.cov(X, rowvar=False)
    V, _, VT = linalg.svd(app_inst, C)
    assert app_inst.allclose(V, VT.T)
    pc = X @ V
    assert app_inst.allclose(pc, linalg.pca(app_inst, X))