def test_linear_regression__raise_on_dof_lte_0(): # Sample count too low relative to core covariate will cause # degrees of freedom to be zero XL = np.ones((2, 10)) XC = np.ones((2, 5)) Y = np.ones((2, 3)) with pytest.raises(ValueError, match=r"Number of observations \(N\) too small"): linear_regression(XL, XC, Y)
def check_simulation_result( datadir: Path, config: Dict[str, Any], run: Dict[str, Any], xp: Any, ) -> None: # Extract properties for simulation dataset, paramset = run["dataset"], run["paramset"] ds_config = config["datasets"][dataset] ps_config = config["paramsets"][paramset] dataset_dir = datadir / "dataset" / dataset result_dir = datadir / "result" / run["name"] # Load simulated data with zarr.ZipStore(str(dataset_dir / "genotypes.zarr.zip"), mode="r") as store: ds = xr.open_zarr(store, consolidated=False) df_covariate = load_covariates(dataset_dir) df_trait = load_traits(dataset_dir) contigs = ds["variant_contig"].values G = xp.asarray(ds["call_genotype"].sum(dim="ploidy").values) X = xp.asarray(df_covariate.values) Y = xp.asarray(df_trait.values) alphas = ps_config["alphas"] if alphas is not None: alphas = xp.asarray(alphas) # Define transformed traits res = regenie_transform( G.T, X, Y, contigs, variant_block_size=ps_config["variant_block_size"], sample_block_size=ps_config["sample_block_size"], normalize=True, add_intercept=False, alphas=alphas, orthogonalize=False, # Intentionally make mistakes related to these flags # in order to match Glow results _glow_adj_dof=True, _glow_adj_scaling=True, _glow_adj_alpha=True, ) YBP = res["regenie_base_prediction"].data YMP = res["regenie_meta_prediction"].data # Check equality of stage 1 and 2 transformations check_stage_1_results(YBP, ds_config, ps_config, result_dir) check_stage_2_results(YMP, df_trait, result_dir) # Check equality of GWAS results X = da.from_array(X) Q = da.linalg.qr(X)[0] YR = Y - YMP YP = YR - Q @ (Q.T @ YR) stats = linear_regression( _dask_cupy_to_numpy(G.T), _dask_cupy_to_numpy(YP), _dask_cupy_to_numpy(Q) ) check_stage_3_results(ds, stats, df_trait, result_dir)
def check_simulation_result( datadir: Path, config: Dict[str, Any], run: Dict[str, Any] ) -> None: # Extract properties for simulation dataset, paramset = run["dataset"], run["paramset"] ds_config = config["datasets"][dataset] ps_config = config["paramsets"][paramset] dataset_dir = datadir / "dataset" / dataset result_dir = datadir / "result" / run["name"] # Load simulated data with zarr.ZipStore(str(dataset_dir / "genotypes.zarr.zip"), mode="r") as store: ds = xr.open_zarr(store) # type: ignore[no-untyped-call] df_covariate = load_covariates(dataset_dir) df_trait = load_traits(dataset_dir) contigs = ds["variant_contig"].values G = ds["call_genotype"].sum(dim="ploidy").values X = df_covariate.values Y = df_trait.values # Define transformed traits res = regenie_transform( G.T, X, Y, contigs, variant_block_size=ps_config["variant_block_size"], sample_block_size=ps_config["sample_block_size"], normalize=True, add_intercept=False, alphas=ps_config["alphas"], orthogonalize=False, # Intentionally make mistakes related to these flags # in order to match Glow results _glow_adj_dof=True, _glow_adj_scaling=True, _glow_adj_alpha=True, ) YBP = res["base_prediction"].data YMP = res["meta_prediction"].data # Check equality of stage 1 and 2 transformations check_stage_1_results(YBP, ds_config, ps_config, result_dir) check_stage_2_results(YMP, df_trait, result_dir) # Check equality of GWAS results YR = Y - YMP stats = linear_regression(G.T, X, YR) check_stage_3_results(ds, stats, df_trait, result_dir)
def test_linear_regression__raise_on_non_2D(): XL = np.ones((10, 5, 1)) # Add 3rd dimension XC = np.ones((10, 5)) Y = np.ones((10, 3)) with pytest.raises(ValueError, match="All arguments must be 2D"): linear_regression(XL, XC, Y)