Exemplo n.º 1
0
def test_linear_regression__raise_on_dof_lte_0():
    # Sample count too low relative to core covariate will cause
    # degrees of freedom to be zero
    XL = np.ones((2, 10))
    XC = np.ones((2, 5))
    Y = np.ones((2, 3))
    with pytest.raises(ValueError, match=r"Number of observations \(N\) too small"):
        linear_regression(XL, XC, Y)
Exemplo n.º 2
0
def check_simulation_result(
    datadir: Path,
    config: Dict[str, Any],
    run: Dict[str, Any],
    xp: Any,
) -> None:
    # Extract properties for simulation
    dataset, paramset = run["dataset"], run["paramset"]
    ds_config = config["datasets"][dataset]
    ps_config = config["paramsets"][paramset]
    dataset_dir = datadir / "dataset" / dataset
    result_dir = datadir / "result" / run["name"]

    # Load simulated data
    with zarr.ZipStore(str(dataset_dir / "genotypes.zarr.zip"), mode="r") as store:
        ds = xr.open_zarr(store, consolidated=False)
        df_covariate = load_covariates(dataset_dir)
        df_trait = load_traits(dataset_dir)
        contigs = ds["variant_contig"].values
        G = xp.asarray(ds["call_genotype"].sum(dim="ploidy").values)
        X = xp.asarray(df_covariate.values)
        Y = xp.asarray(df_trait.values)
        alphas = ps_config["alphas"]
        if alphas is not None:
            alphas = xp.asarray(alphas)

        # Define transformed traits
        res = regenie_transform(
            G.T,
            X,
            Y,
            contigs,
            variant_block_size=ps_config["variant_block_size"],
            sample_block_size=ps_config["sample_block_size"],
            normalize=True,
            add_intercept=False,
            alphas=alphas,
            orthogonalize=False,
            # Intentionally make mistakes related to these flags
            # in order to match Glow results
            _glow_adj_dof=True,
            _glow_adj_scaling=True,
            _glow_adj_alpha=True,
        )
        YBP = res["regenie_base_prediction"].data
        YMP = res["regenie_meta_prediction"].data

        # Check equality of stage 1 and 2 transformations
        check_stage_1_results(YBP, ds_config, ps_config, result_dir)
        check_stage_2_results(YMP, df_trait, result_dir)

        # Check equality of GWAS results
        X = da.from_array(X)
        Q = da.linalg.qr(X)[0]
        YR = Y - YMP
        YP = YR - Q @ (Q.T @ YR)
        stats = linear_regression(
            _dask_cupy_to_numpy(G.T), _dask_cupy_to_numpy(YP), _dask_cupy_to_numpy(Q)
        )
        check_stage_3_results(ds, stats, df_trait, result_dir)
Exemplo n.º 3
0
def check_simulation_result(
    datadir: Path, config: Dict[str, Any], run: Dict[str, Any]
) -> None:
    # Extract properties for simulation
    dataset, paramset = run["dataset"], run["paramset"]
    ds_config = config["datasets"][dataset]
    ps_config = config["paramsets"][paramset]
    dataset_dir = datadir / "dataset" / dataset
    result_dir = datadir / "result" / run["name"]

    # Load simulated data
    with zarr.ZipStore(str(dataset_dir / "genotypes.zarr.zip"), mode="r") as store:
        ds = xr.open_zarr(store)  # type: ignore[no-untyped-call]
        df_covariate = load_covariates(dataset_dir)
        df_trait = load_traits(dataset_dir)
        contigs = ds["variant_contig"].values
        G = ds["call_genotype"].sum(dim="ploidy").values
        X = df_covariate.values
        Y = df_trait.values

        # Define transformed traits
        res = regenie_transform(
            G.T,
            X,
            Y,
            contigs,
            variant_block_size=ps_config["variant_block_size"],
            sample_block_size=ps_config["sample_block_size"],
            normalize=True,
            add_intercept=False,
            alphas=ps_config["alphas"],
            orthogonalize=False,
            # Intentionally make mistakes related to these flags
            # in order to match Glow results
            _glow_adj_dof=True,
            _glow_adj_scaling=True,
            _glow_adj_alpha=True,
        )
        YBP = res["base_prediction"].data
        YMP = res["meta_prediction"].data

        # Check equality of stage 1 and 2 transformations
        check_stage_1_results(YBP, ds_config, ps_config, result_dir)
        check_stage_2_results(YMP, df_trait, result_dir)

        # Check equality of GWAS results
        YR = Y - YMP
        stats = linear_regression(G.T, X, YR)
        check_stage_3_results(ds, stats, df_trait, result_dir)
Exemplo n.º 4
0
def test_linear_regression__raise_on_non_2D():
    XL = np.ones((10, 5, 1))  # Add 3rd dimension
    XC = np.ones((10, 5))
    Y = np.ones((10, 3))
    with pytest.raises(ValueError, match="All arguments must be 2D"):
        linear_regression(XL, XC, Y)