Пример #1
0
    def test_seeding_is_consistent(self):
        hl.set_global_seed(0)
        a = [Env.next_seed() for _ in range(10)]
        hl.set_global_seed(0)
        b = [Env.next_seed() for _ in range(10)]

        self.assertEqual(len(set(a)), 10)
        self.assertEqual(a, b)
Пример #2
0
    def test_seeding_is_consistent(self):
        hl.set_global_seed(0)
        a = [Env.next_seed() for _ in range(10)]
        hl.set_global_seed(0)
        b = [Env.next_seed() for _ in range(10)]

        self.assertEqual(len(set(a)), 10)
        self.assertEqual(a, b)
Пример #3
0
def test_king_homo_estimator():
    hl.set_global_seed(1)
    mt = hl.balding_nichols_model(2, 5, 5)
    mt = mt.select_entries(genotype_score=hl.float(mt.GT.n_alt_alleles()))
    da = hl.experimental.dnd.array(mt, 'genotype_score', block_size=3)

    def sqr(x):
        return x * x

    score_difference = da.T.inner_product(
        da, lambda l, r: sqr(l - r), lambda l, r: l + r, hl.float(0),
        hl.agg.sum).checkpoint(new_temp_file())
    assert np.array_equal(
        score_difference.collect(),
        np.array([[0., 6., 4., 2., 4.], [6., 0., 6., 4., 6.],
                  [4., 6., 0., 6., 0.], [2., 4., 6., 0., 6.],
                  [4., 6., 0., 6., 0.]]))
Пример #4
0
import hail as hl
hl.set_global_seed(0)
mt = hl.balding_nichols_model(n_populations=3, n_variants=(1 << 10), n_samples=4)
mt = mt.key_cols_by(s='s' + hl.str(mt.sample_idx))
mt = mt.annotate_entries(GT=hl.or_missing(hl.rand_bool(0.99), mt.GT))
hl.export_plink(mt, 'balding-nichols-1024-variants-4-samples-3-populations', fam_id='f' + mt.s)
Пример #5
0
        h2 = 0.6
        pi = 0.01
        K = 0.05

        n_pops = 3
        fst = [0.1] * n_pops
        n_vars = int(100e3)

        n_cas_list = [5000] * 3
        n_list = [2 * x for x in n_cas_list]

        n_sim = int(320e3)  # over simulate to be able to have ascertainment
    else:
        raise ValueError(f'sim_name="{sim_name}" does not match any models')

    hl.set_global_seed(seed)

    gt_sim_suffix = f'bn.npops_{n_pops}.nvars_{n_vars}.nsim_{n_sim}' if sim_name[:
                                                                                 3] == 'bn_' else ''  # suffix for genotype simulation (empty string if using ukb data)
    param_suffix = f'{gt_sim_suffix}.h2_{h2}.pi_{pi}.K_{K}.seed_{seed}'
    betas_path = f'{smiles_wd}/betas.{param_suffix}.tsv.gz'
    phens_path = f'{smiles_wd}/phens.{param_suffix}.tsv.gz'

    if sim_name[:3] == 'bn_':
        mt = hl.balding_nichols_model(n_populations=n_pops,
                                      n_samples=n_sim,
                                      n_variants=n_vars,
                                      fst=fst)

        mt = mt.filter_rows(
            (hl.abs(hl.agg.mean(mt.GT.n_alt_alleles()) / 2 - 0.5) <