示例#1
0
    def work_sequence(self):

        # is it OK to do the intersect and the linear regression 23 extra times?


        # clear
        G, y, snp_name, _ = load_intersect(self.snp_reader, self.pheno_fn)

        # compute linear regression
        _, p_values_lin = f_regression(G, y, center=True)

        # set up empty return structures
        #self.rs = snp_name
        #self.p_values = -np.ones(len(snp_name))

        # get chr names/id
        chr_ids = self.snp_reader.pos[:,0]

        #self.pos = self.snp_reader.pos

        #loco = [[range(0,5000), range(5000,10000)]]
        loco = LeaveOneChromosomeOut(chr_ids, indices=True)

        if len(loco) is not self.chrom_count :  raise Exception("The snp reader has {0} chromosome, not {1} as specified".format(len(loco),self.chrom_count))

    
        for i, (train_snp_idx, test_snp_idx) in enumerate(loco):
            if i == 0:
                result = {"p_values":-np.ones(len(snp_name)),
                          "p_values_lin": p_values_lin,
                          "rs":snp_name,
                          "pos":self.snp_reader.pos}
            else:
                result = None
            yield lambda i=i, train_snp_idx=train_snp_idx,test_snp_idx=test_snp_idx,result=result,G=G,y=y: self.dowork(i,train_snp_idx,test_snp_idx,result,G,y)  # the 'i=i',etc is need to get around a strangeness in Python
示例#2
0
    def test_results_identical_with_fastlmmc(self):
        """
        make sure gwas yields same results as fastlmmC
        """

        currentFolder = os.path.dirname(os.path.realpath(__file__))

        #prefix = r"C:\Users\chwidmer\Documents\Projects\sandbox\data\test"
        #bed_fn = prefix + "/jax_gt.up.filt.M"
        #dat_fn = prefix + "/jax_M_expression.1-18.dat"
        #pheno_fn = prefix + "/jax_M_expression.19.phe.txt"
        
        bed_fn = os.path.join(currentFolder, "../../feature_selection/examples/toydata")
        pheno_fn = os.path.join(currentFolder, "../../feature_selection/examples/toydata.phe")

        #prefix = "../../../tests\datasets\mouse"
        #bed_fn = os.path.join(prefix, "alldata")
        #pheno_fn = os.path.join(prefix, "pheno.txt")

        snp_reader = Bed(bed_fn)
        G, y, _, _ = load_intersect(snp_reader, pheno_fn)

        snp_pos = snp_reader.rs

        
        idx_sim = range(0, 5000)
        idx_test = range(5000, 10000)

        snp_pos_sim = snp_pos[idx_sim]
        snp_pos_test = snp_pos[idx_test]

        G_chr1, G_chr2 = G[:,idx_sim], G[:,idx_test]
        delta = 1.0



        ###################################
        # REML IN lmm.py is BROKEN!!

        # we compare REML=False in lmm.py to fastlmmc
        REML = False
        gwas_c_reml = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta, REML=REML)
        gwas_c_reml.run_gwas()

        gwas = GwasPrototype(G_chr1, G_chr2, y, delta, REML=False)
        gwas.run_gwas()

        # check p-values in log-space!
        np.testing.assert_array_almost_equal(np.log(gwas.p_values), np.log(gwas_c_reml.p_values), decimal=3)
        if False:
            import pylab
            pylab.plot(np.log(gwas_c_reml.p_values), np.log(gwas_f.p_values_F), "x")
            pylab.plot(range(-66,0,1), range(-66,0,1))
            pylab.show()

        # we compare lmm_cov.py to fastlmmc with REML=False
        gwas_c = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta, REML=True)
        gwas_c.run_gwas()
        gwas_f = FastGwas(G_chr1, G_chr2, y, delta, findh2=False)
        gwas_f.run_gwas()
        np.testing.assert_array_almost_equal(np.log(gwas_c.p_values), np.log(gwas_f.p_values_F), decimal=2)

        # additional testing code for the new wrapper functions

        # Fix delta
        from pysnptools.snpreader import Bed as BedSnpReader
        from fastlmm.association.single_snp import single_snp
        snpreader = BedSnpReader(bed_fn,count_A1=False)
        frame = single_snp(test_snps=snpreader[:,idx_test], pheno=pheno_fn, G0=snpreader[:,idx_sim],h2=1.0/(delta+1.0),leave_out_one_chrom=False,count_A1=False)
        sid_list,pvalue_list = frame['SNP'].values,frame['PValue'].values
        np.testing.assert_allclose(gwas_f.sorted_p_values_F, pvalue_list, rtol=1e-10)

        p_vals_by_genomic_pos = frame.sort_values(["Chr", "ChrPos"])["PValue"].tolist()
        np.testing.assert_allclose(gwas_c_reml.p_values, p_vals_by_genomic_pos, rtol=.1)
        np.testing.assert_allclose(gwas_c_reml.p_values, gwas_f.p_values_F, rtol=.1)
        np.testing.assert_allclose(gwas_f.sorted_p_values_F, gwas_c_reml.sorted_p_values, rtol=.1)


        # Search over delta
        gwas_c_reml_search = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta=None, REML=True)
        gwas_c_reml_search.run_gwas()

        frame_search = single_snp(test_snps=snpreader[:,idx_test], pheno=pheno_fn, G0=snpreader[:,idx_sim],h2=None,leave_out_one_chrom=False,count_A1=False)
        _,pvalue_list_search = frame_search['SNP'].values,frame_search['PValue'].values

        p_vals_by_genomic_pos = frame_search.sort_values(["Chr", "ChrPos"])["PValue"].tolist()
        np.testing.assert_allclose(gwas_c_reml_search.p_values, p_vals_by_genomic_pos, rtol=.001)
        np.testing.assert_allclose(gwas_c_reml_search.sorted_p_values, pvalue_list_search, rtol=.001)
示例#3
0
    def test_results_identical_with_fastlmmc(self):
        """
        make sure gwas yields same results as fastlmmC
        """

        currentFolder = os.path.dirname(os.path.realpath(__file__))

        #prefix = r"C:\Users\chwidmer\Documents\Projects\sandbox\data\test"
        #bed_fn = prefix + "/jax_gt.up.filt.M"
        #dat_fn = prefix + "/jax_M_expression.1-18.dat"
        #pheno_fn = prefix + "/jax_M_expression.19.phe.txt"
        
        bed_fn = os.path.join(currentFolder, "../../feature_selection/examples/toydata")
        pheno_fn = os.path.join(currentFolder, "../../feature_selection/examples/toydata.phe")

        #prefix = "../../../tests\datasets\mouse"
        #bed_fn = os.path.join(prefix, "alldata")
        #pheno_fn = os.path.join(prefix, "pheno.txt")

        snp_reader = Bed(bed_fn)
        G, y, _, _ = load_intersect(snp_reader, pheno_fn)

        snp_pos = snp_reader.rs

        
        idx_sim = range(0, 5000)
        idx_test = range(5000, 10000)

        snp_pos_sim = snp_pos[idx_sim]
        snp_pos_test = snp_pos[idx_test]

        G_chr1, G_chr2 = G[:,idx_sim], G[:,idx_test]
        delta = 1.0



        ###################################
        # REML IN lmm.py is BROKEN!!

        # we compare REML=False in lmm.py to fastlmmc
        REML = False
        gwas_c_reml = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta, REML=REML)
        gwas_c_reml.run_gwas()

        gwas = GwasPrototype(G_chr1, G_chr2, y, delta, REML=False)
        gwas.run_gwas()

        # check p-values in log-space!
        np.testing.assert_array_almost_equal(np.log(gwas.p_values), np.log(gwas_c_reml.p_values), decimal=3)
        if False:
            import pylab
            pylab.plot(np.log(gwas_c_reml.p_values), np.log(gwas_f.p_values_F), "x")
            pylab.plot(range(-66,0,1), range(-66,0,1))
            pylab.show()

        # we compare lmm_cov.py to fastlmmc with REML=False
        gwas_c = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta, REML=True)
        gwas_c.run_gwas()
        gwas_f = FastGwas(G_chr1, G_chr2, y, delta, findh2=False)
        gwas_f.run_gwas()
        np.testing.assert_array_almost_equal(np.log(gwas_c.p_values), np.log(gwas_f.p_values_F), decimal=2)

        # additional testing code for the new wrapper functions

        # Fix delta
        from pysnptools.snpreader import Bed as BedSnpReader
        from fastlmm.association.single_snp import single_snp
        snpreader = BedSnpReader(bed_fn,count_A1=False)
        frame = single_snp(test_snps=snpreader[:,idx_test], pheno=pheno_fn, G0=snpreader[:,idx_sim],h2=1.0/(delta+1.0),leave_out_one_chrom=False,count_A1=False)
        sid_list,pvalue_list = frame['SNP'].values,frame['PValue'].values
        np.testing.assert_allclose(gwas_f.sorted_p_values_F, pvalue_list, rtol=1e-10)

        p_vals_by_genomic_pos = frame.sort_values(["Chr", "ChrPos"])["PValue"].tolist()
        np.testing.assert_allclose(gwas_c_reml.p_values, p_vals_by_genomic_pos, rtol=.1)
        np.testing.assert_allclose(gwas_c_reml.p_values, gwas_f.p_values_F, rtol=.1)
        np.testing.assert_allclose(gwas_f.sorted_p_values_F, gwas_c_reml.sorted_p_values, rtol=.1)


        # Search over delta
        gwas_c_reml_search = GwasTest(bed_fn, pheno_fn, snp_pos_sim, snp_pos_test, delta=None, REML=True)
        gwas_c_reml_search.run_gwas()

        frame_search = single_snp(test_snps=snpreader[:,idx_test], pheno=pheno_fn, G0=snpreader[:,idx_sim],h2=None,leave_out_one_chrom=False,count_A1=False)
        _,pvalue_list_search = frame_search['SNP'].values,frame_search['PValue'].values

        p_vals_by_genomic_pos = frame_search.sort_values(["Chr", "ChrPos"])["PValue"].tolist()
        np.testing.assert_allclose(gwas_c_reml_search.p_values, p_vals_by_genomic_pos, rtol=.001)
        np.testing.assert_allclose(gwas_c_reml_search.sorted_p_values, pvalue_list_search, rtol=.001)