def test_fit_null_binary(self): p = np.loadtxt(P_BINARY) m = np.loadtxt(M) cov = pd.DataFrame([]) # no covariates params = np.array([ -1.41572498, 0.35847998, -0.03014792, 2.46252819, 0.96908425, -0.20952455, -0.27988125, 0.36798503, -0.03278285, -1.34132024, 0.844149 ]) null_res = fit_null(p, m, cov, False, firth=False) self.assertTrue(abs((params - null_res.params).max()) < 1E-7) # no covariates, firth regression null_res = fit_null(p, m, cov, False, firth=True) self.assertAlmostEqual(null_res, -57.884527394557985) # covariates cov = np.loadtxt(COV) cov = pd.DataFrame(cov) params = np.array([ -0.87072948, 0.26456701, 0.03485904, 2.80243184, 1.086393, -0.3882244, -0.46883396, 0.61387846, 0.09962477, -1.45376984, 0.93929299, 0.07927743, -1.54631396, 0.1098796 ]) null_res = fit_null(p, m, cov, False, firth=False) self.assertTrue(abs((params - null_res.params).max()) < 1E-7) # covariates, firth regression null_res = fit_null(p, m, cov, False, firth=True) self.assertAlmostEqual(null_res, -55.60790630835098) # perfectly separable data p = np.array([1] * 10 + [0] * 90) m = np.array([1] * 10 + [0] * 90).reshape(-1, 1) cov = pd.DataFrame([]) self.assertEqual(fit_null(p, m, cov, False, False), None)
def test_fit_null_continuous(self): p_cont = np.loadtxt(P_CONT) m = np.loadtxt(M) # no covariates params = np.array([0.65572473, -0.16129649, 0.03417796, -0.08011702, 0.10902641, 0.00599514, -0.09081684, -0.13653787, 0.17798003, -0.16793408, 0.12959982]) null_res = fit_null(p_cont, m, pd.DataFrame([]), True, firth=False) self.assertTrue(abs((params - null_res.params).max()) < 1E-7) # covariates cov = np.loadtxt(COV) cov = pd.DataFrame(cov) params = np.array([0.49070237, -0.17284083, 0.00710691, -0.11784811, 0.07352861, 0.01219004, -0.04772721, -0.17089199, 0.18198025, -0.17141095, 0.11330439, 0.08887165, 0.20304982, 0.13802362]) null_res = fit_null(p_cont, m, cov, True, firth=False) self.assertTrue(abs((params - null_res.params).max()) < 1E-7)
def test_find_enet_selected_binary(self): p = pd.read_csv(P, index_col=0, sep='\t')['binary'] b = np.array([5.60000000e-01, 2.16450216e-37, -1.81966726e-37, 1.99034682e-38, -1.34400134e-37, -2.94000294e-38, -1.90213827e-37, 8.08080808e-38, 1.89393939e-37]) idx = [10, 29, 39, 95, 110, 153, 156, 164] g = find_enet_selected(b, idx, p, np.array([[]]), 'vcf', None, False, None, VariantFile(VCF), set(p.index), None, False, False, None, False) v = next(g) self.assertEqual(v.kmer, 'FM211187_83_G_A') self.assertEqual(v.af, 0.28) self.assertTrue(abs(v.prep - 0.17050715825327736) < 1E-7) self.assertTrue(np.isnan(v.pvalue)) self.assertTrue(abs(v.kbeta - 2.164502164502162e-37) < 1E-7) self.assertEqual(v.max_lineage, None) self.assertEqual(v.kstrains, [ 'sample_10', 'sample_11', 'sample_13', 'sample_18', 'sample_19', 'sample_20', 'sample_23', 'sample_25', 'sample_26', 'sample_31', 'sample_34', 'sample_36', 'sample_40', 'sample_45' ] ) self.assertEqual(v.nkstrains, [ 'sample_1', 'sample_12', 'sample_14', 'sample_15', 'sample_16', 'sample_17', 'sample_2', 'sample_21', 'sample_22', 'sample_24', 'sample_27', 'sample_28', 'sample_29', 'sample_3', 'sample_30', 'sample_32', 'sample_33', 'sample_35', 'sample_37', 'sample_38', 'sample_39', 'sample_4', 'sample_41', 'sample_42', 'sample_43', 'sample_44', 'sample_46', 'sample_47', 'sample_48', 'sample_49', 'sample_5', 'sample_50', 'sample_6', 'sample_7', 'sample_8', 'sample_9' ] ) self.assertEqual(len(v.notes), 0) # read to exhaustion for v in g: pass self.assertEqual(v.kmer, 'FM211187_3592_G_A') # with fixed effects pf = np.loadtxt(PFIRTH) mf = np.loadtxt(MFIRTH) cov = pd.DataFrame([]) null_res = fit_null(pf, mf, cov, False, firth=False).llr null_firth = fit_null(pf, mf, cov, False, firth=True) g = find_enet_selected(b, idx, p, np.array([[]]), 'vcf', (mf, null_res, null_firth), False, None, VariantFile(VCF), set(p.index), None, False, False, None, False) v = next(g) self.assertEqual(v.kmer, 'FM211187_83_G_A') self.assertEqual(v.af, 0.28) self.assertTrue(abs(v.prep - 0.17050715825327736) < 1E-7) self.assertEqual(v.pvalue, 1) self.assertTrue(abs(v.kbeta - 2.164502164502162e-37) < 1E-7) self.assertEqual(v.max_lineage, None) self.assertEqual(v.kstrains, [ 'sample_10', 'sample_11', 'sample_13', 'sample_18', 'sample_19', 'sample_20', 'sample_23', 'sample_25', 'sample_26', 'sample_31', 'sample_34', 'sample_36', 'sample_40', 'sample_45' ] ) self.assertEqual(v.nkstrains, [ 'sample_1', 'sample_12', 'sample_14', 'sample_15', 'sample_16', 'sample_17', 'sample_2', 'sample_21', 'sample_22', 'sample_24', 'sample_27', 'sample_28', 'sample_29', 'sample_3', 'sample_30', 'sample_32', 'sample_33', 'sample_35', 'sample_37', 'sample_38', 'sample_39', 'sample_4', 'sample_41', 'sample_42', 'sample_43', 'sample_44', 'sample_46', 'sample_47', 'sample_48', 'sample_49', 'sample_5', 'sample_50', 'sample_6', 'sample_7', 'sample_8', 'sample_9' ] ) self.assertEqual(len(v.notes), 0) # read to exhaustion for v in g: pass self.assertEqual(v.kmer, 'FM211187_3592_G_A')