Exemplo n.º 1
0
 def testBoundaryChr10(self):
     BoundaryCheck.set_chrom('chr10')
     b = BoundaryCheck(mb=[1,3])
     self.assertTrue(b.valid)
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.TestBoundary(10, 1000000, ""))
     self.assertTrue(b.TestBoundary(10, 1200000, ""))
     self.assertTrue(b.TestBoundary(10, 3000000, ""))
     self.assertFalse(b.TestBoundary(10, 3000001, ""))
     self.assertFalse(b.TestBoundary(10, 999999, ""))
     self.assertFalse(b.TestBoundary(1, 1000500, ""))
Exemplo n.º 2
0
 def testBoundaryInitBPWithInclusions(self):
     BoundaryCheck.chrom = 1
     b = BoundaryCheck(bp=[10000, 500000])
     b.LoadSNPs(["rs12345", "rs23456"])
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.valid)
     self.assertEqual(False, b.TestBoundary(1, 500, ""))
     self.assertEqual(True, b.TestBoundary(1, 10000, ""))
     self.assertEqual(True, b.TestBoundary(1, 500000, ""))
     self.assertEqual(True, b.TestBoundary(1, 250000, ""))
     self.assertEqual(False, b.TestBoundary(2, 250000, ""))
     self.assertEqual(False, b.TestBoundary(10, 10000, ""))
     self.assertTrue(b.TestBoundary(1, 1000000, "rs12345"))
     self.assertTrue(b.TestBoundary(1, 1200000, "rs23456"))
     self.assertFalse(b.TestBoundary(1, 1200011, "rs345678"))
Exemplo n.º 3
0
    def testTpedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
Exemplo n.º 4
0
    def setUp(self):
        self.allele_1 = list("AAACCCGGGTCGTGTATACC")
        self.allele_2 = list("CGTGTATACCAAACCCGGGT")

        self.WriteTestFiles()

        self.phenotypes = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0]
        self.sex = [1,2,1,1,2,2,1,1,2,2,2,1]
        self.chrpos_encoding = mach_parser.Parser.chrpos_encoding
        self.dosage_ext = mach_parser.Parser.dosage_ext
        self.info_ext = mach_parser.Parser.info_ext
        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        DataParser.boundary = BoundaryCheck()
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        DataParser.ind_exclusions = []
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability
        self.encoding = mach_parser.encoding
        self.compression = DataParser.compressed_pedigree
        DataParser.compressed_pedigree = True
        #self.chunk_stride = mach_parser.chunk_stride
        self.standardizer = libgwas.standardizer.get_standardizer()
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)
Exemplo n.º 5
0
    def testBoundariedMiddle(self):
        BoundaryCheck.chrom = 4
        DataParser.boundary = BoundaryCheck(bp=[30734, 33528])
        impute_parser.encoding = impute_parser.Encoding.Recessive
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        parser = impute_parser.Parser(self.fam_file,
                                      [self.gen_file, self.gen_file2],
                                      chroms=[3, 4])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 0
        dropped = 0
        for snp in parser:
            while self.positions[idx] < 30734 or self.positions[idx] > 33528:
                idx += 1
                dropped += 1
            self.assertEqual(self.positions[idx], snp.pos)
            for i in range(0, len(self.recessive_encoding[idx])):
                self.assertAlmostEqual(self.recessive_encoding[idx][i],
                                       snp.genotype_data[i],
                                       places=3)
            idx += 1
        self.assertEqual(12, dropped)
Exemplo n.º 6
0
    def testBedAnalysisCov(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pheno)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)]

        self.assertAlmostEqual(0.0034238, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.0143949, results[0].lmpv, places=6)
        self.assertAlmostEqual(0.58495059, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.65786, results[1].lmpv, places=5)
        self.assertAlmostEqual(0.45178985, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.83956, results[2].lmpv, places=5)
        self.assertAlmostEqual(0.133661, results[3].p_mvtest, places=6)
        self.assertAlmostEqual(0.82169, results[3].lmpv, places=5)
        self.assertAlmostEqual(0.541391, results[4].p_mvtest, places=6)
        self.assertAlmostEqual(0.83595, results[4].lmpv, places=5)
        self.assertAlmostEqual(0.035665, results[5].p_mvtest, places=6)
        self.assertAlmostEqual(0.94900, results[5].lmpv, places=5)
        self.assertAlmostEqual(0.784660, results[6].p_mvtest, places=6)
        self.assertAlmostEqual(0.59324, results[6].lmpv, places=5)
        self.assertAlmostEqual(0.2137434, results[7].p_mvtest, places=6)
        self.assertAlmostEqual(0.18069, results[7].lmpv, places=5)
        self.assertAlmostEqual(0.8160148, results[8].p_mvtest, places=6)
        self.assertAlmostEqual(0.79734, results[8].lmpv, places=5)
Exemplo n.º 7
0
    def setUp(self):
        self.allele_1 = list("AAACCCGGGTCGTGTATACC")
        self.allele_2 = list("CGTGTATACCAAACCCGGGT")

        self.WriteTestFiles()

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1]

        self.gen_ext = impute_parser.Parser.gen_ext
        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        DataParser.boundary = BoundaryCheck()
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        DataParser.ind_exclusions = []
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability
        self.encoding = impute_parser.encoding
        self.compression = DataParser.compressed_pedigree
        DataParser.compressed_pedigree = True
        self.parser_info_thresh = impute_parser.Parser.info_threshold
        impute_parser.Parser.info_threshold = 0.0
Exemplo n.º 8
0
    def testPedBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]

        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)

            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(7, index)
Exemplo n.º 9
0
    def setUp(self):
        self.WriteTestFiles()

        self.ped = get_lines(self.ped_filename)

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        DataParser.boundary = BoundaryCheck()
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        DataParser.ind_exclusions = []
        DataParser.ind_inclusions = []
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability
        self.sex_as_covariate = PhenoCovar.sex_as_covariate
        self.standardizer = libgwas.standardizer.get_standardizer()
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)
Exemplo n.º 10
0
    def setUp(self):
        self.WriteTestFiles()

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability

        # the faked pheno/covariate non-missing
        self.non_missing = numpy.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                                        1]) == 1

        DataParser.boundary = BoundaryCheck()
Exemplo n.º 11
0
    def testBoundary(self):
        pc = PhenoCovar()
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        parser = Parser(self.nonmissing, data_field='GT')
        parser.init_subjects(pc)
        parser.load_genotypes()

        index = 4
        for snp in parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(self.nonmissing_mapdata[index][0]),
                                 snp.chr)
                self.assertEqual(int(self.nonmissing_mapdata[index][1]),
                                 snp.pos)
                self.assertEqual(self.nonmissing_mapdata[index][2], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 12
0
    def testBedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pheno)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)]

        self.assertAlmostEqual(0.00347562, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.00085539, results[0].lmpv, places=6)
        self.assertAlmostEqual(0.5777812, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.42212155, results[1].lmpv, places=6)
        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.61386344, results[2].lmpv, places=6)
        self.assertAlmostEqual(0.13555597, results[3].p_mvtest, places=6)
        self.assertAlmostEqual(0.59682217, results[3].lmpv, places=6)
        self.assertAlmostEqual(0.54029842, results[4].p_mvtest, places=6)
        self.assertAlmostEqual(0.60475964, results[4].lmpv, places=6)
        self.assertAlmostEqual(0.03547514, results[5].p_mvtest, places=6)
        self.assertAlmostEqual(0.86663730, results[5].lmpv, places=6)
        self.assertAlmostEqual(0.79249216, results[6].p_mvtest, places=6)
        self.assertAlmostEqual(0.67678089, results[6].lmpv, places=6)
        self.assertAlmostEqual(0.20973300, results[7].p_mvtest, places=6)
        self.assertAlmostEqual(0.14431260, results[7].lmpv, places=6)
        self.assertAlmostEqual(0.81471528, results[8].p_mvtest, places=6)
        self.assertAlmostEqual(0.56378497, results[8].lmpv, places=6)
Exemplo n.º 13
0
    def testTpedBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck(bp=[2000,3000])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertEqual(1, results[0].chr)
        self.assertEqual(2000, results[0].pos)
        self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
Exemplo n.º 14
0
 def testBoundaryX(self):
     BoundaryCheck.set_chrom('X')
     b = BoundaryCheck()
     self.assertEqual(23, BoundaryCheck.chrom)
     self.assertEqual('X', BoundaryCheck.chrom_name)
     self.assertTrue(b.TestBoundary('x', 100, "rs100"))
     self.assertTrue(b.TestBoundary('X', 1000, "rs1000"))
     self.assertTrue(b.TestBoundary(23, 1000, "rs1000"))
     self.assertFalse(b.TestBoundary('Y', 100, "rs100"))
Exemplo n.º 15
0
 def testBoundaryInitKB(self):
     BoundaryCheck.chrom = 5
     b = BoundaryCheck(kb=[20, 50])
     self.assertFalse(b.NoExclusions())
     self.assertEqual(True, b.valid)
     self.assertEqual(False, b.TestBoundary(5, 15000, ""))
     self.assertEqual(True, b.TestBoundary(5, 20000, ""))
     self.assertEqual(True, b.TestBoundary(5, 30000, ""))
     self.assertEqual(True, b.TestBoundary(5, 50000, ""))
     self.assertEqual(False, b.TestBoundary(5, 50001, ""))
     self.assertEqual(False, b.TestBoundary(1, 25000, ""))
     self.assertEqual(False, b.TestBoundary(10, 20000, ""))
Exemplo n.º 16
0
 def testBoundaryExceedPos(self):
     BoundaryCheck.chrom = 10
     b = BoundaryCheck(mb=[1,3])
     self.assertTrue(b.valid)
     self.assertFalse(b.NoExclusions())
     self.assertFalse(b.TestBoundary(10, 100, ""))
     self.assertFalse(b.beyond_upper_bound)
     self.assertTrue(b.TestBoundary(10, 1000000, ""))
     self.assertFalse(b.beyond_upper_bound)
     self.assertTrue(b.TestBoundary(10, 1200000, ""))
     self.assertFalse(b.beyond_upper_bound)
     self.assertTrue(b.TestBoundary(10, 3000000, ""))
     self.assertFalse(b.beyond_upper_bound)
     self.assertFalse(b.TestBoundary(10, 3000001, ""))
     self.assertTrue(b.beyond_upper_bound)
     self.assertFalse(b.TestBoundary(10, 999999, ""))
     self.assertFalse(b.beyond_upper_bound)
Exemplo n.º 17
0
 def testMapFileWithRegionAndSnpExclusion(self):
     BoundaryCheck.chrom = 2
     DataParser.boundary = BoundaryCheck(bp=[0, 10000])
     ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
     ped_parser.load_mapfile()
     self.assertEqual(2, len(ped_parser.markers))
     self.assertEqual(7, len(ped_parser.snp_mask[:, 0]))
     self.assertEqual(2, ped_parser.locus_count)
     # Masks are filters, so we should have 7 entries, but 4 will be 1
     self.assertEqual(5, numpy.sum(ped_parser.snp_mask[:, 0]))
     self.assertEqual(0, ped_parser.snp_mask[4, 1])
     self.assertEqual(0, ped_parser.snp_mask[5, 0])
Exemplo n.º 18
0
    def setUp(self):
        self.missing = "tests/bedfiles/ped_missing"
        self.missing_bed = resource_filename("libgwas",
                                             "%s.bed" % (self.missing))
        self.missing_bim = resource_filename("libgwas",
                                             "%s.bim" % (self.missing))
        self.missing_fam = resource_filename("libgwas",
                                             "%s.fam" % (self.missing))
        self.genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
                          [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                          [0, 2, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                          [0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                          [1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                          [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                          [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        self.nonmissing = "tests/bedfiles/ped_nomiss"
        self.nonmissing_bed = resource_filename("libgwas",
                                                "%s.bed" % (self.nonmissing))
        self.nonmissing_bim = resource_filename("libgwas",
                                                "%s.bim" % (self.nonmissing))
        self.nonmissing_fam = resource_filename("libgwas",
                                                "%s.fam" % (self.nonmissing))

        self.genotypes_w_missing = [[0, 1],
                                    [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                                    [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                                    [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                                    [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                                    [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                                    [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        self.nonmissing_mapdata = libgwas.get_lines(self.nonmissing_bim,
                                                    split=True)
        self.missing_mapdata = libgwas.get_lines(self.missing_bim, split=True)

        self.phenotypes = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        self.sex = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.has_sex = DataParser.has_sex
        self.has_pheno = DataParser.has_pheno
        self.has_parents = DataParser.has_parents
        self.has_fid = DataParser.has_fid
        self.has_liability = DataParser.has_liability

        DataParser.boundary = BoundaryCheck()
Exemplo n.º 19
0
 def testBoundaryInitBP(self):
     BoundaryCheck.chrom = 1
     b = BoundaryCheck(bp=[10000, 500000])
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.valid)
     self.assertEqual(False, b.TestBoundary(1, 500, ""))
     self.assertEqual(True, b.TestBoundary(1, 10000, ""))
     self.assertEqual(True, b.TestBoundary(1, 500000, ""))
     self.assertEqual(True, b.TestBoundary(1, 250000, ""))
     self.assertEqual(False, b.TestBoundary(2, 250000, ""))
     self.assertEqual(False, b.TestBoundary(10, 10000, ""))
Exemplo n.º 20
0
 def testBoundaryInitMB(self):
     BoundaryCheck.chrom = 10
     b = BoundaryCheck(mb=[1,3])
     self.assertTrue(b.valid)
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.TestBoundary(10, 1000000, ""))
     self.assertTrue(b.TestBoundary(10, 1200000, ""))
     self.assertTrue(b.TestBoundary(10, 3000000, ""))
     self.assertFalse(b.TestBoundary(10, 3000001, ""))
     self.assertFalse(b.TestBoundary(10, 999999, ""))
     self.assertFalse(b.TestBoundary(1, 1000500, ""))
Exemplo n.º 21
0
    def testDefaultBoundaryInitialization(self):

        # By default, it will identify as invalid, since it didn't find any boundaries
        # This is just for simplifying command line parsing
        BoundaryCheck.chrom = -1
        b = BoundaryCheck()
        self.assertEqual(True, b.valid)

        # At this point, this should any valid chromosome/position combination
        self.assertTrue(b.TestBoundary(1, 100, ""))
        self.assertTrue(b.TestBoundary(10, 1000000, ""))
        self.assertTrue(b.TestBoundary(25, 10000, ""))
        self.assertTrue(b.TestBoundary('Y', 1400, ""))
        self.assertFalse(b.TestBoundary('FF', 35, ""))
        self.assertFalse(b.TestBoundary(28, 10000, ""))

        # We should test that our short circuit functionality works
        self.assertTrue(b.NoExclusions())
Exemplo n.º 22
0
    def testBoundedMiddle(self):
        mach_parser.Parser.chrpos_encoding = True
        BoundaryCheck.chrom = 2
        DataParser.boundary = BoundaryCheck(bp=[1020, 1137])
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        parser = mach_parser.Parser([self.gen_file, self.gen_file2])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 8
        for snp in parser:
            self.assertEqual(self.positions[idx], snp.pos)
            for i in range(0, len(self.dosage_encoding[idx])):
                self.assertAlmostEqual(self.dosage_encoding[idx][i], snp.genotype_data[i], places=3)
            idx += 1
        self.assertEqual(13, idx)
Exemplo n.º 23
0
    def testPedBoundary(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pedigree = get_lines(self.map_filename, split=True)

        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(7, index)
Exemplo n.º 24
0
    def setUp(self):
        self.missing = "bedfiles/analysis"
        self.missing_bed = resource_filename("tests",
                                             "%s.bed" % (self.missing))
        self.missing_bim = resource_filename("tests",
                                             "%s.bim" % (self.missing))
        self.missing_fam = resource_filename("tests",
                                             "%s.fam" % (self.missing))
        self.genotypes = [[2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2],
                          [1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1],
                          [2, 0, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2],
                          [2, 1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
                          [1, 0, 2, 1, 2, 2, 1, 0, 2, 1, 2, 2],
                          [1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2],
                          [2, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2]]

        self.nonmissing = "bedfiles/analysis"
        self.nonmissing_bed = resource_filename("tests",
                                                "%s.bed" % (self.nonmissing))
        self.nonmissing_bim = resource_filename("tests",
                                                "%s.bim" % (self.nonmissing))
        self.nonmissing_fam = resource_filename("tests",
                                                "%s.fam" % (self.nonmissing))
        self.genotypes_w_missing = [[
            2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1
        ], [1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1],
                                    [2, -1, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2],
                                    [2, -1, 0, 1, 1, 2, 2, 1, 0, 1, 1, 2],
                                    [1, -1, 2, 1, 2, 2, 1, 0, 2, 1, 2, 2],
                                    [1, -1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2],
                                    [2, -1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2]]
        self.chrom = BoundaryCheck.chrom
        self.boundary = DataParser.boundary
        self.min_maf = DataParser.min_maf
        self.max_maf = DataParser.max_maf
        self.snp_miss_tol = DataParser.snp_miss_tol
        self.ind_miss_tol = DataParser.ind_miss_tol
        self.sex_as_covar = PhenoCovar.sex_as_covariate
        self.standardizer = libgwas.standardizer.get_standardizer()
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        DataParser.boundary = BoundaryCheck()
Exemplo n.º 25
0
    def testBedBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck(bp=[2000, 3000])
        pheno = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pheno)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(ped_parser, pheno)]

        self.assertEqual(1, results[0].chr)
        self.assertEqual(2000, results[0].pos)
        self.assertAlmostEqual(0.5777811, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.4221215, results[0].lmpv, places=6)
        self.assertAlmostEqual(0.4466128, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.6138634, results[1].lmpv, places=6)
Exemplo n.º 26
0
    def testBoundariedUpper(self):
        BoundaryCheck.chrom = 3
        DataParser.boundary = BoundaryCheck(bp=[21000, 50000])
        impute_parser.encoding = impute_parser.Encoding.Recessive
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        parser = impute_parser.Parser(self.fam_file,
                                      [self.gen_file, self.gen_file2],
                                      chroms=[3, 4])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 6

        for snp in parser:
            self.assertEqual(self.positions[idx], snp.pos)
            for i in range(0, len(self.recessive_encoding[idx])):
                self.assertAlmostEqual(self.recessive_encoding[idx][i],
                                       snp.genotype_data[i],
                                       places=3)
            idx += 1
Exemplo n.º 27
0
    def testPedBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)

            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertAlmostEqual(self.hetero_freq_tped[index],
                                       genodata.hetero_freq,
                                       places=4)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 28
0
    def testTPedAnalysisCov(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()
        #pheno.standardize_variables()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertAlmostEqual(0.00342380, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.11362883, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337610, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.00076356, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.01911090, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.10143178, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.8505542, results[0].beta_pvalues[3], places=6)


        self.assertAlmostEqual(0.584950593047, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.0276543736525, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03383588, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.413751829881, results[1].beta_pvalues[1], places=6)
Exemplo n.º 29
0
    def testPedBoundaryBed(self):
        pc = PhenoCovar()
        DataParser.boundary = BoundaryCheck()
        BoundaryCheck.chrom = 2
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pc)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        pedigree = self.nonmissing_mapdata

        index = 4
        valid_loci = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)

                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(pedigree[index][0]), snp.chr)
                    self.assertEqual(int(pedigree[index][3]), snp.pos)
                    self.assertEqual(pedigree[index][1], snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                    valid_loci += 1
                except TooMuchMissing as e:
                    pass
                except InvalidFrequency as e:
                    pass

            index += 1
        self.assertEqual(3, valid_loci)
        self.assertEqual(7, index)
Exemplo n.º 30
0
 def testBoundaryInitBPWithExclusions(self):
     BoundaryCheck.chrom = 1
     b = BoundaryCheck(bp=[10000, 500000])
     b.LoadExclusions(["rs12345", "rs234567", "rs345678"])
     self.assertFalse(b.NoExclusions())
     self.assertTrue(b.valid)
     self.assertFalse(b.TestBoundary(1, 500, ""))
     self.assertTrue(b.TestBoundary(1, 10000, ""))
     self.assertFalse(b.TestBoundary(1, 10010, "rs12345"))
     self.assertTrue(b.TestBoundary(1, 24000, "rs9876"))
     self.assertFalse(b.TestBoundary(1, 25000, "rs234567"))
     self.assertTrue(b.TestBoundary(1, 250000, ""))
     self.assertTrue(b.TestBoundary(1, 500000, ""))
     self.assertFalse(b.TestBoundary(2, 250000, ""))
     self.assertFalse(b.TestBoundary(10, 10000, ""))