Пример #1
0
def test_pca(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))

    # Covariance matrix test.
    C = app_inst.cov(X, rowvar=False)
    V, _, VT = linalg.svd(app_inst, C)
    assert app_inst.allclose(V, VT.T)
    pc = X @ V
    assert app_inst.allclose(pc, linalg.pca(app_inst, X))
Пример #2
0
def test_poisson_basic(nps_app_inst: ArrayApplication):
    coef = np.array([0.2, -0.1])
    X_real = np.array([[0, 1, 2, 3, 4]]).T
    y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1)
    X = nps_app_inst.array(X_real, block_shape=X_real.shape)
    y = nps_app_inst.array(y_real, block_shape=y_real.shape)
    model: PoissonRegression = PoissonRegression(
        **{"solver": "newton", "tol": 1e-8, "max_iter": 10}
    )
    model.fit(X, y)
    print("norm", model.grad_norm_sq(X, y).get())
    print("objective", model.objective(X, y).get())
    print("D^2", model.deviance_sqr(X, y).get())
    assert nps_app_inst.allclose(
        model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4
    ).get()
    assert nps_app_inst.allclose(
        model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4
    ).get()
Пример #3
0
def test_default_random(app_inst: ArrayApplication):
    num1 = app_inst.random_state().random()
    num2 = app_inst.random_state().random()
    num_iters = 0
    max_iters = 10
    while app_inst.allclose(num1, num2) and num_iters < max_iters:
        num_iters += 1
        num2 = app_inst.random_state().random()
    if num_iters > 0:
        warnings.warn(
            "More than one iteration required to generate unequal random numbers."
        )
    assert not app_inst.allclose(num1, num2)

    # Test default random seed.
    app_inst.random.seed(1337)
    num1 = app_inst.random.random()
    app_inst.random.seed(1337)
    num2 = app_inst.random.random()
    assert app_inst.allclose(num1, num2)
Пример #4
0
def test_rwd(app_inst: ArrayApplication):
    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = app_inst.array(array, block_shape=(3, 4))
    filename = "/tmp/darrays/read_write_delete_array_test"
    write_result_ba: BlockArray = app_inst.write_fs(ba, filename)
    write_result_np = write_result_ba.get()
    for grid_entry in write_result_ba.grid.get_entry_iterator():
        assert write_result_ba[grid_entry].get() == write_result_np[grid_entry]
    ba_read: BlockArray = app_inst.read_fs(filename)
    assert app_inst.get(app_inst.allclose(ba, ba_read))
    del_result: bool = app_inst.delete_fs(filename)
    assert del_result
Пример #5
0
def test_np_random(app_inst: ArrayApplication):

    # Sample a single value.
    sample = app_inst.random_state(1337).random().get()
    assert sample.shape == ()
    assert isinstance(sample.item(), np.float)

    shape, block_shape = (15, 10), (5, 5)
    # Probably not equal if pvalue falls below this threshold.
    epsilon = 1e-2
    rs1: NumsRandomState = app_inst.random_state(1337)
    ba1: BlockArray = rs1.random(shape, block_shape)
    # The Kolmogorov–Smirnov test for arbitrary distributions.
    # Under the null hypothesis, the distributions are equal,
    # so we say distributions are neq if pvalue < epsilon.
    stat, pvalue = stats.kstest(ba1.get().flatten(), stats.uniform.cdf)
    assert pvalue > epsilon

    rs2: NumsRandomState = app_inst.random_state(1337)
    ba2: BlockArray = rs2.random(shape, block_shape)
    assert app_inst.allclose(ba1, ba2)

    rs3: NumsRandomState = app_inst.random_state(1338)
    ba3: BlockArray = rs3.random(shape, block_shape)
    assert not app_inst.allclose(ba2, ba3)

    # If block shape differs, so does generated arrays.
    # This is a non-issue since we don't expose block shape as a param.
    rs4: NumsRandomState = app_inst.random_state(1337)
    ba4: BlockArray = rs4.random(
        shape, block_shape=(6, 7)).reshape(block_shape=block_shape)
    assert not app_inst.allclose(ba2, ba4)

    # dtype tests.
    rs: NumsRandomState = app_inst.random_state(1337)
    ba4: BlockArray = rs.random(shape, block_shape, dtype=np.float32)
    assert ba4.dtype is np.float32
    assert str(ba4.get().dtype) == "float32"
Пример #6
0
def test_poisson(nps_app_inst: ArrayApplication):
    # TODO (hme): Is there a more appropriate distribution for testing Poisson?
    num_samples, num_features = 1000, 1
    rs = np.random.RandomState(1337)
    real_beta = rs.random_sample(num_features)
    real_model: PoissonRegression = PoissonRegression(solver="newton")
    real_model._beta = nps_app_inst.array(real_beta, block_shape=(3,))
    real_model._beta0 = nps_app_inst.scalar(rs.random_sample())
    real_X = rs.random_sample(size=(num_samples, num_features))
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = real_model.predict(X)
    param_set = [{"solver": "newton", "tol": 1e-8, "max_iter": 10}]
    for kwargs in param_set:
        runtime = time.time()
        model: PoissonRegression = PoissonRegression(**kwargs)
        model.fit(X, y)
        runtime = time.time() - runtime
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("D^2", model.deviance_sqr(X, y).get())
        assert nps_app_inst.allclose(real_model._beta, model._beta).get()
        assert nps_app_inst.allclose(real_model._beta0, model._beta0).get()
Пример #7
0
def test_rwd(app_inst: ArrayApplication):
    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = app_inst.array(array, block_shape=(3, 4))
    filename = "darrays/read_write_delete_array_test"
    write_result: BlockArray = app_inst.write_s3(ba, filename)
    write_result_arr = app_inst.get(write_result)
    for grid_entry in write_result.grid.get_entry_iterator():
        assert 'ETag' in write_result_arr[grid_entry]
    ba_read: BlockArray = app_inst.read_s3(filename)
    assert app_inst.get(app_inst.allclose(ba, ba_read))
    delete_result: BlockArray = app_inst.delete_s3(filename)
    delete_result_arr = app_inst.get(delete_result)
    for grid_entry in delete_result.grid.get_entry_iterator():
        deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"]
        assert deleted_key == StoredArrayS3(
            filename, delete_result.grid).get_key(grid_entry)
Пример #8
0
def test_rwd(app_inst_s3: ArrayApplication):

    conn = boto3.resource("s3", region_name="us-east-1")
    assert conn.Bucket("darrays") not in conn.buckets.all()
    conn.create_bucket(Bucket="darrays")

    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = app_inst_s3.array(array, block_shape=(3, 4))
    filename = "darrays/read_write_delete_array_test"
    write_result: BlockArray = app_inst_s3.write_s3(ba, filename)
    write_result_arr = app_inst_s3.get(write_result)
    for grid_entry in write_result.grid.get_entry_iterator():
        assert "ETag" in write_result_arr[grid_entry]
    ba_read: BlockArray = app_inst_s3.read_s3(filename)
    assert app_inst_s3.get(app_inst_s3.allclose(ba, ba_read))
    delete_result: bool = app_inst_s3.delete_s3(filename)
    assert delete_result
Пример #9
0
def test_rwd(serial_app_inst: ArrayApplication):

    conn = boto3.resource('s3', region_name='us-east-1')
    assert conn.Bucket('darrays') not in conn.buckets.all()
    conn.create_bucket(Bucket='darrays')

    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = serial_app_inst.array(array, block_shape=(3, 4))
    filename = "darrays/read_write_delete_array_test"
    write_result: BlockArray = serial_app_inst.write_s3(ba, filename)
    write_result_arr = serial_app_inst.get(write_result)
    for grid_entry in write_result.grid.get_entry_iterator():
        assert 'ETag' in write_result_arr[grid_entry]
    ba_read: BlockArray = serial_app_inst.read_s3(filename)
    assert serial_app_inst.get(serial_app_inst.allclose(ba, ba_read))
    delete_result: BlockArray = serial_app_inst.delete_s3(filename)
    delete_result_arr = serial_app_inst.get(delete_result)
    for grid_entry in delete_result.grid.get_entry_iterator():
        deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"]
        assert deleted_key == StoredArrayS3(
            filename, delete_result.grid).get_key(grid_entry)