def test_rwd(): from tests import conftest app_inst: ArrayApplication = conftest.get_app("serial") conn = boto3.resource('s3', region_name='us-east-1') assert conn.Bucket('darrays') not in conn.buckets.all() conn.create_bucket(Bucket='darrays') array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = app_inst.array(array, block_shape=(3, 4)) filename = "darrays/read_write_delete_array_test" write_result: BlockArray = app_inst.write_s3(ba, filename) write_result_arr = app_inst.get(write_result) for grid_entry in write_result.grid.get_entry_iterator(): assert 'ETag' in write_result_arr[grid_entry] ba_read: BlockArray = app_inst.read_s3(filename) assert app_inst.get(app_inst.allclose(ba, ba_read)) delete_result: BlockArray = app_inst.delete_s3(filename) delete_result_arr = app_inst.get(delete_result) for grid_entry in delete_result.grid.get_entry_iterator(): deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"] assert deleted_key == StoredArrayS3( filename, delete_result.grid).get_key(grid_entry)
extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape) extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape) real_X = np.concatenate([real_X, extra_X], axis=0) real_y = np.concatenate([real_y, extra_y], axis=0) X = app_inst.array(real_X, block_shape=(15, 5)) y = app_inst.array(real_y, block_shape=(15,)) theta = app_inst.ridge_regression(X, y, lamb=0.0) robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0) # Generate a test set to evaluate robustness to outliers. test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta) test_X = app_inst.array(test_X, block_shape=(15, 5)) test_y = app_inst.array(test_y, block_shape=(15,)) theta_error = np.sum((((test_X @ theta) - test_y)**2).get()) robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get()) assert robust_theta_error < theta_error if __name__ == "__main__": # pylint: disable=import-error from tests import conftest app_inst = conftest.get_app("serial") # test_inv_assumptions(app_inst) test_inv(app_inst) # test_qr(app_inst) # test_svd(app_inst) # test_lr(app_inst) # test_rr(app_inst)
k + shape[0])[k + shape[0]] else: assert value == np.partition(ba_x.get(), k)[k] def test_median(app_inst: ArrayApplication): # Simple tests np_x = np.array([7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3, )) assert app_inst.median(ba_x) == np.median(np_x) np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3, )) assert app_inst.median(ba_x) == np.median(np_x) # Randomized tests shapes = [(50, ), (437, ), (1000, )] block_shapes = [(10, ), (23, ), (50, )] for shape, block_shape in itertools.product(shapes, block_shapes): ba_x = app_inst.random.random(shape=shape, block_shape=block_shape) assert app_inst.median(ba_x) == np.median(ba_x.get()) if __name__ == "__main__": # pylint: disable=import-error from tests import conftest app_inst: ArrayApplication = conftest.get_app("serial") test_quickselect(app_inst) test_median(app_inst)
path = os.path.abspath(__file__) dir_path = os.path.dirname(path) filename = os.path.join(dir_path, "test.csv") ba_data: BlockArray = app_inst.read_csv(filename, has_header=True) np_data = _read_serially(filename, has_header=True) assert np.allclose(ba_data.get(), np_data) @pytest.mark.skip def test_higgs(app_inst: ArrayApplication): filename = os.path.join(settings.data_dir, "HIGGS.csv") t = time.time() ba: BlockArray = app_inst.read_csv(filename, num_workers=12) ba.touch() print("HIGGS nums load time", time.time() - t, ba.shape, ba.block_shape) t = time.time() np_data = _read_serially(filename, has_header=False) print("HIGGS serial load time", time.time() - t, np_data.shape) assert np.allclose(ba.get(), np_data) if __name__ == "__main__": # pylint: disable=import-error from tests import conftest app_inst = conftest.get_app("ray-cyclic") # test_loadtxt(app_inst) # test_rwd(app_inst) test_read_csv(app_inst) # test_higgs(app_inst)