Пример #1
0
    def test_linear_mixed_regression_low_rank(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        h2_hail = 0.10001626
        beta_hail = [0.0073201542, 0.039969148, -0.036727875, 0.29852363, -0.049212500]
        pval_hail = [0.90685162, 0.54839177, 0.55001054, 9.85247263e-07, 0.42796507]

        mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200))
        model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path)
        model.fit()
        self.assertTrue(model.low_rank)
        self.assertAlmostEqual(model.h_sq, h2_hail)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()))
        ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev,
                                             model)
        assert np.allclose(ht.beta.collect(), beta_hail)
        assert np.allclose(ht.p_value.collect(), pval_hail)
Пример #2
0
    def test_linear_mixed_regression_full_rank(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        h2_fastlmm = 0.142761
        h2_places = 6
        beta_fastlmm = [0.012202061, 0.037718282, -0.033572693, 0.29171541, -0.045644170]
        pval_hail = [0.84543084, 0.57596760, 0.58788517, 1.4057279e-06, 0.46578204]

        mt_chr1 = mt.filter_rows(mt.locus.contig == '1')
        model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path)
        model.fit()
        self.assertAlmostEqual(model.h_sq, h2_fastlmm, places=h2_places)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()))
        ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev,
                                             model)
        assert np.allclose(ht.beta.collect(), beta_fastlmm)
        assert np.allclose(ht.p_value.collect(), pval_hail)
Пример #3
0
    def test_linear_mixed_regression_low_rank(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'),
                                  no_header=True,
                                  impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'),
                                  no_header=True,
                                  impute=True,
                                  delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        h2_hail = 0.10001626
        beta_hail = [
            0.0073201542, 0.039969148, -0.036727875, 0.29852363, -0.049212500
        ]
        pval_hail = [
            0.90685162, 0.54839177, 0.55001054, 9.85247263e-07, 0.42796507
        ]

        mt_chr1 = mt.filter_rows((mt.locus.contig == '1')
                                 & (mt.locus.position < 200))
        model, _ = hl.linear_mixed_model(y=mt_chr1.y,
                                         x=[1, mt_chr1.x],
                                         z_t=mt_chr1.GT.n_alt_alleles(),
                                         p_path=p_path)
        model.fit()
        self.assertTrue(model.low_rank)
        self.assertAlmostEqual(model.h_sq, h2_hail)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3')
                                 & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(
            stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()))
        ht = hl.linear_mixed_regression_rows(
            (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) /
            mt_chr3.stats.stdev, model)
        assert np.allclose(ht.beta.collect(), beta_hail)
        assert np.allclose(ht.p_value.collect(), pval_hail)
Пример #4
0
    def test_linear_mixed_regression_full_rank(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'),
                                  no_header=True,
                                  impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'),
                                  no_header=True,
                                  impute=True,
                                  delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        h2_fastlmm = 0.142761
        h2_places = 6
        beta_fastlmm = [
            0.012202061, 0.037718282, -0.033572693, 0.29171541, -0.045644170
        ]
        pval_hail = [
            0.84543084, 0.57596760, 0.58788517, 1.4057279e-06, 0.46578204
        ]

        mt_chr1 = mt.filter_rows(mt.locus.contig == '1')
        model, _ = hl.linear_mixed_model(y=mt_chr1.y,
                                         x=[1, mt_chr1.x],
                                         z_t=mt_chr1.GT.n_alt_alleles(),
                                         p_path=p_path)
        model.fit()
        self.assertAlmostEqual(model.h_sq, h2_fastlmm, places=h2_places)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3')
                                 & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(
            stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()))
        ht = hl.linear_mixed_regression_rows(
            (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) /
            mt_chr3.stats.stdev, model)
        assert np.allclose(ht.beta.collect(), beta_fastlmm)
        assert np.allclose(ht.p_value.collect(), pval_hail)
Пример #5
0
    def test_linear_mixed_regression_pass_through(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200))
        model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path)
        model.fit(log_gamma=0)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()), foo=hl.struct(bar=hl.rand_norm(0, 1)))
        ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev,
                                             model, pass_through=['stats', mt_chr3.foo.bar, mt_chr3.cm_position])

        assert mt_chr3.aggregate_rows(hl.agg.all(mt_chr3.foo.bar == ht[mt_chr3.row_key].bar))
Пример #6
0
    def test_linear_mixed_regression_pass_through(self):
        x_table = hl.import_table(resource('fastlmmCov.txt'),
                                  no_header=True,
                                  impute=True).key_by('f1')
        y_table = hl.import_table(resource('fastlmmPheno.txt'),
                                  no_header=True,
                                  impute=True,
                                  delimiter=' ').key_by('f1')

        mt = hl.import_plink(bed=resource('fastlmmTest.bed'),
                             bim=resource('fastlmmTest.bim'),
                             fam=resource('fastlmmTest.fam'),
                             reference_genome=None)
        mt = mt.annotate_cols(x=x_table[mt.col_key].f2)
        mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache()
        p_path = utils.new_temp_file()

        mt_chr1 = mt.filter_rows((mt.locus.contig == '1')
                                 & (mt.locus.position < 200))
        model, _ = hl.linear_mixed_model(y=mt_chr1.y,
                                         x=[1, mt_chr1.x],
                                         z_t=mt_chr1.GT.n_alt_alleles(),
                                         p_path=p_path)
        model.fit(log_gamma=0)

        mt_chr3 = mt.filter_rows((mt.locus.contig == '3')
                                 & (mt.locus.position < 2005))
        mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(
            mt_chr3.GT.n_alt_alleles()),
                                        foo=hl.struct(bar=hl.rand_norm(0, 1)))
        ht = hl.linear_mixed_regression_rows(
            (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) /
            mt_chr3.stats.stdev,
            model,
            pass_through=['stats', mt_chr3.foo.bar, mt_chr3.cm_position])

        assert mt_chr3.aggregate_rows(
            hl.agg.all(mt_chr3.foo.bar == ht[mt_chr3.row_key].bar))