def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks): ts = simulate_ts(sample_size, length=200) ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call] ds = window(ds, size=25) ds = pbs(ds, cohorts=cohorts) # scikit-allel for i, j, k in itertools.combinations(range(n_cohorts), 3): stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}", cohorts_1=f"co_{j}", cohorts_2=f"co_{k}").values) if cohort_indexes is not None and (i, j, k) not in cohort_indexes: np.testing.assert_array_equal(stat_pbs, np.full_like(stat_pbs, np.nan)) else: ac_i = ds.cohort_allele_count.values[:, i, :] ac_j = ds.cohort_allele_count.values[:, j, :] ac_k = ds.cohort_allele_count.values[:, k, :] ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=25) # scikit-allel has final window missing np.testing.assert_allclose(stat_pbs[:-1], ska_pbs_value)
def test_pbs(sample_size, n_cohorts): ts = simulate_ts(sample_size) ds = ts_to_dataset(ts) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call] n_variants = ds.dims["variants"] ds = window(ds, size=n_variants) # single window ds = pbs(ds) # scikit-allel for i, j, k in itertools.combinations(range(n_cohorts), 3): stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}", cohorts_1=f"co_{j}", cohorts_2=f"co_{k}").values) ac_i = ds.cohort_allele_count.values[:, i, :] ac_j = ds.cohort_allele_count.values[:, j, :] ac_k = ds.cohort_allele_count.values[:, k, :] ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants) np.testing.assert_allclose(stat_pbs, ska_pbs_value)
def meanPBS(ac1, ac2, ac3, window_size, normalise): # pbs per variant pbs = allel.pbs(ac1, ac2, ac3, window_size=window_size, normed=normalise) # get average of all pbs values (will be per gene) meanpbs = np.nanmean(pbs) _, se, stats = allel.stats.misc.jackknife(pbs, statistic=lambda n: np.mean(n)) return (meanpbs, se, pbs, stats)
def meanPBS(ac1, ac2, ac3, window_size, normalise): """ This function calculate PBS on allele counts arrays and then takes the mean of all pbs values. """ #pbs per variant pbs = allel.pbs(ac1, ac2, ac3, window_size=window_size, normed=normalise) #get average of all pbs values (will be per gene) meanpbs = np.nanmean(pbs) _, se, stats = allel.stats.misc.jackknife(pbs, statistic=lambda n: np.mean(n)) return (meanpbs, se, pbs, stats)
def test_pbs(): # minimal input data, sanity check for output existence and type ac1 = [[2, 0], [0, 2], [1, 1], [2, 0], [0, 2]] ac2 = [[1, 1], [2, 0], [0, 2], [2, 0], [0, 2]] ac3 = [[0, 2], [1, 1], [2, 0], [2, 0], [0, 2]] ret = pbs(ac1, ac2, ac3, window_size=2, window_step=1) assert isinstance(ret, np.ndarray) assert 1 == ret.ndim assert 4 == ret.shape[0] assert 'f' == ret.dtype.kind # regression check expect = [0.52349464, 0., -0.85199356, np.nan] assert_array_almost_equal(expect, ret) # final value is nan because variants in final window are non-segregating assert np.isnan(ret[3])
chrom=chrom, ylim=0.5, save=True) #### Population Branch Statistic (PBS) in windows #### if pbs: for pbscomp in pbscomps: pop1, pop2, outpop = pbscomp.split("_") cohortText = f"(({pop1}, {pop2}), {outpop})" print(f"Calculating PBS values in sliding window for {pbscomp}\n") for wname, size, step in zip(windownames, windowsizes, windowsteps): pbsArray = allel.pbs(acsubpops[pop1], acsubpops[pop2], acsubpops[outpop], window_size=size, window_step=step, normed=True) midpoint = allel.moving_statistic(pos, np.median, size=size, step=step) cohortNoSpaceText = pbscomp + "." + wname rnaseqpop.plotWindowed( statName="PBS", cohortText=cohortText, cohortNoSpaceText=cohortNoSpaceText, values=pbsArray, midpoints=midpoint, colour='dodgerblue',