def test_linear_mixed_regression_low_rank(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() h2_hail = 0.10001626 beta_hail = [0.0073201542, 0.039969148, -0.036727875, 0.29852363, -0.049212500] pval_hail = [0.90685162, 0.54839177, 0.55001054, 9.85247263e-07, 0.42796507] mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200)) model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit() self.assertTrue(model.low_rank) self.assertAlmostEqual(model.h_sq, h2_hail) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles())) ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model) assert np.allclose(ht.beta.collect(), beta_hail) assert np.allclose(ht.p_value.collect(), pval_hail)
def test_linear_mixed_regression_full_rank(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() h2_fastlmm = 0.142761 h2_places = 6 beta_fastlmm = [0.012202061, 0.037718282, -0.033572693, 0.29171541, -0.045644170] pval_hail = [0.84543084, 0.57596760, 0.58788517, 1.4057279e-06, 0.46578204] mt_chr1 = mt.filter_rows(mt.locus.contig == '1') model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit() self.assertAlmostEqual(model.h_sq, h2_fastlmm, places=h2_places) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles())) ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model) assert np.allclose(ht.beta.collect(), beta_fastlmm) assert np.allclose(ht.p_value.collect(), pval_hail)
def test_linear_mixed_regression_low_rank(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() h2_hail = 0.10001626 beta_hail = [ 0.0073201542, 0.039969148, -0.036727875, 0.29852363, -0.049212500 ] pval_hail = [ 0.90685162, 0.54839177, 0.55001054, 9.85247263e-07, 0.42796507 ] mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200)) model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit() self.assertTrue(model.low_rank) self.assertAlmostEqual(model.h_sq, h2_hail) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows( stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles())) ht = hl.linear_mixed_regression_rows( (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model) assert np.allclose(ht.beta.collect(), beta_hail) assert np.allclose(ht.p_value.collect(), pval_hail)
def test_linear_mixed_regression_full_rank(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() h2_fastlmm = 0.142761 h2_places = 6 beta_fastlmm = [ 0.012202061, 0.037718282, -0.033572693, 0.29171541, -0.045644170 ] pval_hail = [ 0.84543084, 0.57596760, 0.58788517, 1.4057279e-06, 0.46578204 ] mt_chr1 = mt.filter_rows(mt.locus.contig == '1') model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit() self.assertAlmostEqual(model.h_sq, h2_fastlmm, places=h2_places) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows( stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles())) ht = hl.linear_mixed_regression_rows( (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model) assert np.allclose(ht.beta.collect(), beta_fastlmm) assert np.allclose(ht.p_value.collect(), pval_hail)
def test_linear_mixed_regression_pass_through(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200)) model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit(log_gamma=0) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats(mt_chr3.GT.n_alt_alleles()), foo=hl.struct(bar=hl.rand_norm(0, 1))) ht = hl.linear_mixed_regression_rows((mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model, pass_through=['stats', mt_chr3.foo.bar, mt_chr3.cm_position]) assert mt_chr3.aggregate_rows(hl.agg.all(mt_chr3.foo.bar == ht[mt_chr3.row_key].bar))
def test_linear_mixed_regression_pass_through(self): x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() p_path = utils.new_temp_file() mt_chr1 = mt.filter_rows((mt.locus.contig == '1') & (mt.locus.position < 200)) model, _ = hl.linear_mixed_model(y=mt_chr1.y, x=[1, mt_chr1.x], z_t=mt_chr1.GT.n_alt_alleles(), p_path=p_path) model.fit(log_gamma=0) mt_chr3 = mt.filter_rows((mt.locus.contig == '3') & (mt.locus.position < 2005)) mt_chr3 = mt_chr3.annotate_rows(stats=hl.agg.stats( mt_chr3.GT.n_alt_alleles()), foo=hl.struct(bar=hl.rand_norm(0, 1))) ht = hl.linear_mixed_regression_rows( (mt_chr3.GT.n_alt_alleles() - mt_chr3.stats.mean) / mt_chr3.stats.stdev, model, pass_through=['stats', mt_chr3.foo.bar, mt_chr3.cm_position]) assert mt_chr3.aggregate_rows( hl.agg.all(mt_chr3.foo.bar == ht[mt_chr3.row_key].bar))