示例#1
0
    def testRegionBoundaryWithExclusions(self):
        DataParser.ind_exclusions = ["1:1", "2:2", "3:3"]
        genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1],
                     [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0],
                     [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0],
                     [0, 0, 0, 0, 1, 1, 0, 0, 0]]

        BoundaryCheck.chrom = 2
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = get_lines(self.tped_filename, split=True)

        index = 4
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(genotypes[index], list(genodata.genotypes))
                index += 1
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
        self.assertEqual(7, index)
    def testTPedNoParentsPheno(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 1
2 2 1
3 3 2
4 4 2
5 5 1
6 6 1
7 7 1
8 8 1
9 9 2
10 10 2
11 11 1
12 12 1""")


        f.close()

        DataParser.has_parents = False
        DataParser.has_pheno   = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#3
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.misssnp_tped_filename, split=True)

        index = 2
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(3, index)
    def testRegionBoundaryWithExclusions(self):
        DataParser.ind_exclusions = ["1:1", "2:2", "3:3"]
        genotypes = [
            [0, 1, 0, 0, 1, 0, 0, 1, 0],
            [0, 0, 1, 1, 1, 0, 0, 0, 1],
            [1, 0, 0, 0, 2, 1, 1, 0, 0],
            [1, 1, 0, 0, 1, 2, 1, 1, 0],
            [1, 0, 0, 1, 2, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 1, 1, 0, 0, 0]
        ]

        BoundaryCheck.chrom = 2
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()]
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testTpedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
    def testTpedBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck(bp=[2000,3000])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertEqual(1, results[0].chr)
        self.assertEqual(2000, results[0].pos)
        self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
示例#7
0
    def testMissingWithExclusions(self):
        DataParser.ind_exclusions = ["2:2", "3:3"]
        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, 1],
                               [1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testTPedNoFamIDSex(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 0 0 0.1
2 0 0 0.4
3 0 0 1.0
4 0 0 0.5
5 0 0 0.9
6 0 0 1.0
7 0 0 0.1
8 0 0 0.4
9 0 0 1.0
10 0 0 0.5
11 0 0 0.9
12 0 0 1.0""")


        f.close()

        DataParser.has_fid = False
        DataParser.has_sex = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
    def testTPedLiability(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 0 0 1 0.1 1
2 2 0 0 1 0.4 1
3 3 0 0 2 1.0 1
4 4 0 0 2 0.5 1
5 5 0 0 1 0.9 1
6 6 0 0 1 1.0 1
7 7 0 0 1 0.1 1
8 8 0 0 1 0.4 1
9 9 0 0 2 1.0 1
10 10 0 0 2 0.5 1
11 11 0 0 1 0.9 1
12 12 0 0 1 1.0 1""")
        f.close()

        DataParser.has_liability = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#10
0
    def testPedWithMissingMxIndExclusionsToo(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["2:2", "3:3"]
        DataParser.ind_miss_tol = 0.5       # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()]

        genotypes_w_missing = [
            [0, -1, -1, -1, -1, -1, -1, -1, -1, 1],
            [1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
            [0, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [0, 1, 1, 0, 0, 1, 2, 1, 1, 0],
            [1, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]

        ]
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#11
0
    def testPedWithMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = get_lines(self.miss_tped_filename, split=True)

        self.assertEqual(7, ped_parser.locus_count)
        genotypes_w_missing = [[0, 1], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        index = 0
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(genotypes_w_missing[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
示例#12
0
    def testTpedAnalysis(self):
        # We'll start with the correct phenotype with the genotypes, so we'll use
        # a boundary to restrict us to only use the first SNP
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
示例#13
0
    def testTpedBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = BoundaryCheck(bp=[2000,3000])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertEqual(1, results[0].chr)
        self.assertEqual(2000, results[0].pos)
        self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
示例#14
0
    def test_tped_standardization2(self):
        DataParser.has_sex = True
        DataParser.has_pheno = True
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        raw_pheno = [
            0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0
        ]
        raw_cov = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [
            -1.61601695, -0.73455316, 1.02837442, -0.4407319, 0.73455316,
            1.02837442, -1.61601695, -0.73455316, 1.02837442, -0.4407319,
            0.73455316, 1.02837442
        ]
        std_cov = [
            -0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678,
            -0.70710678, -0.70710678, -0.70710678, 1.41421356, 1.41421356,
            -0.70710678, -0.70710678
        ]
        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
示例#15
0
    def test_tped_standardization_w_dbl_missing(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.ind_exclusions = ["11:11", "12:12"]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        nonmissing[0] = False
        nonmissing[1] = False
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5]
        raw_cov = [2, 2, 1, 1, 1, 1, 2, 2]

        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))
            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [
            1.19915853, -0.26322992, 0.90668084, 1.19915853, -1.43314068,
            -0.55570761, 1.19915853, -0.26322992
        ]
        std_cov = [
            1.22474487, 1.22474487, -0.81649658, -0.81649658, -0.81649658,
            -0.81649658, 1.22474487, 1.22474487
        ]
        test_var = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
示例#16
0
    def testAllelesIteration(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        index = 0
        for snp in ped_parser:
            self.assertEqual(self.tped1_alleles[index][1], snp.minor_allele)
            self.assertEqual(self.tped1_alleles[index][0], snp.major_allele)

            index += 1
        self.assertEqual(7, index)
示例#17
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.misssnp_tped_filename).readlines()]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#18
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#19
0
    def testPedWithMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()]

        self.assertEqual(7, ped_parser.locus_count)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes_w_missing[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#20
0
    def testPedComplete(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, ped_parser.ind_count)
        mapdata = libgwas.get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#21
0
    def test_tped_standardization_w_dbl_missing(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.ind_exclusions = ["11:11", "12:12"]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        nonmissing[0] = False
        nonmissing[1] = False
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)

        raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5]
        raw_cov   = [2, 2, 1, 1, 1, 1, 2, 2]


        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))
            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [ 1.19915853, -0.26322992,  0.90668084,
                        1.19915853, -1.43314068, -0.55570761,  1.19915853, -0.26322992]
        std_cov   = [ 1.22474487,  1.22474487, -0.81649658,
                        -0.81649658, -0.81649658, -0.81649658,  1.22474487,  1.22474487]
        test_var  = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))


            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
示例#22
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.misssnp_tped_filename).readlines()]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(3, index)
示例#23
0
    def testTPedPhenoMissingPC(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_missing,
                                              self.tped_filename)

        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))

        mapdata = get_lines(self.tped_filename, split=True)

        self.genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1],
                          [1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
                          [2, 1, 1, 0, 0, 0, 2, 1, 1, 0],
                          [1, 2, 1, 1, 0, 0, 1, 2, 1, 1],
                          [2, 0, 1, 0, 0, 1, 2, 0, 1, 0],
                          [0, 1, 0, 0, 0, 0, 0, 1, 0, 0],
                          [1, 1, 0, 0, 0, 0, 1, 1, 0, 0]]
        pheno_values = [0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9]
        index = 0
        self.assertEqual(7, ped_parser.locus_count)
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                self.assertAlmostEqual(pheno_values, list(pheno), places=4)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    pass
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(7, index)
示例#24
0
    def testTPedNoFamID(self):
        with open(self.tfam_filename, "w") as f:
            f.write("""1 0 0 1 0.1
2 0 0 1 0.4
3 0 0 2 1.0
4 0 0 2 0.5
5 0 0 1 0.9
6 0 0 1 1.0
7 0 0 1 0.1
8 0 0 1 0.4
9 0 0 2 1.0
10 0 0 2 0.5
11 0 0 1 0.9
12 0 0 1 1.0""")

        DataParser.has_fid = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 non_missing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(non_missing)
                    self.assertEqual(int(pedigree[index][0]), snp.chr)
                    self.assertEqual(int(pedigree[index][3]), snp.pos)
                    self.assertEqual(pedigree[index][1], snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    pass
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(7, index)
示例#25
0
    def testPedNegativePosLocalChromMissSNP(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary.LoadExclusions(snps=["rs0004"])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(3, index)
示例#26
0
    def testTPedNoParentsPheno(self):
        with open(self.tfam_filename, "w") as f:
            f.write("""1 1 1
2 2 1
3 3 2
4 4 2
5 5 1
6 6 1
7 7 1
8 8 1
9 9 2
10 10 2
11 11 1
12 12 1""")

        DataParser.has_parents = False
        DataParser.has_pheno = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
示例#27
0
    def testPedNegativePositionsLocalChrom(self):
        BoundaryCheck.chrom = 1
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.misssnp_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split()
            for x in open(self.misssnp_tped_filename).readlines()
        ]

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(4, index)
示例#28
0
    def test_tped_standardization2(self):
        DataParser.has_sex = True
        DataParser.has_pheno = True
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)

        raw_pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0]
        raw_cov   = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [-1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442,
                     -1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442]
        std_cov   = [-0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678,
                     -0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678]
        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
示例#29
0
    def testPedSnpBoundaryTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"])
        BoundaryCheck.chrom = 1
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(3, ped_parser.locus_count)
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(3, index)
示例#30
0
    def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index],
                                   snp.hetero_freq,
                                   places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
示例#31
0
    def testPedSnpBoundary2TPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4)
            index += 1
        self.assertEqual(2, ped_parser.locus_count)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
示例#32
0
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
示例#33
0
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = [x.split() for x in open(self.tped_filename).readlines()]
        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            self.assertEqual(pedigree[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            index += 1
        self.assertEqual(6, index)
示例#34
0
    def testTPedLiability(self):
        with open(self.tfam_filename, "w") as f:
            f.write("""1 1 0 0 1 0.1 1
2 2 0 0 1 0.4 1
3 3 0 0 2 1.0 1
4 4 0 0 2 0.5 1
5 5 0 0 1 0.9 1
6 6 0 0 1 1.0 1
7 7 0 0 1 0.1 1
8 8 0 0 1 0.4 1
9 9 0 0 2 1.0 1
10 10 0 0 2 0.5 1
11 11 0 0 1 0.9 1
12 12 0 0 1 1.0 1""")

        DataParser.has_liability = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 0
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
示例#35
0
    def testPedRegionBoundaryWithExclusionsTPed(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"])
        DataParser.boundary.LoadExclusions(snps=["rs0007"])
        BoundaryCheck.chrom = 2
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pedigree = get_lines(self.tped_filename, split=True)

        index = 4
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(pedigree[index][0]), snp.chr)
            self.assertEqual(int(pedigree[index][3]), snp.pos)
            index += 1
        index = 4
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(pedigree[index][0]), snp.chr)
                self.assertEqual(int(pedigree[index][3]), snp.pos)
                self.assertEqual(pedigree[index][1], snp.rsid)
                self.assertEqual(self.genotypes[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(6, index)
示例#36
0
    def testTPedPhenoComplete(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)

        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 0
        self.assertEqual(7, ped_parser.locus_count)
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#37
0
    def testTPedPhenoComplete(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)

        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()]

        index = 0
        self.assertEqual(7, ped_parser.locus_count)
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#38
0
    def testRegionBoundaryWithExclusions(self):
        DataParser.ind_exclusions = ["1:1", "2:2", "3:3"]
        genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1],
                     [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0],
                     [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0],
                     [0, 0, 0, 0, 1, 1, 0, 0, 0]]

        BoundaryCheck.chrom = 2
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.tped_filename, split=True)
        index = 4
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#39
0
    def testPedWithMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split()
            for x in open(self.miss_tped_filename).readlines()
        ]

        self.assertEqual(7, ped_parser.locus_count)

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes_w_missing[index],
                             list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#40
0
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
                               [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1
        self.assertEqual(7, index)
        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(genotypes_w_missing[index],
                                     list(snp.genotype_data))
                    self.assertAlmostEqual(hetero_freq_tped[index],
                                           genodata.hetero_freq,
                                           places=4)

                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(6, valid)
        self.assertEqual(7, index)
示例#41
0
    def testTPedNoParentsPheno(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 1 1
2 2 1
3 3 2
4 4 2
5 5 1
6 6 1
7 7 1
8 8 1
9 9 2
10 10 2
11 11 1
12 12 1""")

        f.close()

        DataParser.has_parents = False
        DataParser.has_pheno = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#42
0
    def testTPedNoFamIDSex(self):
        f = open(self.tfam_filename, "w")
        f.write("""1 0 0 0.1
2 0 0 0.4
3 0 0 1.0
4 0 0 0.5
5 0 0 0.9
6 0 0 1.0
7 0 0 0.1
8 0 0 0.4
9 0 0 1.0
10 0 0 0.5
11 0 0 0.9
12 0 0 1.0""")

        f.close()

        DataParser.has_fid = False
        DataParser.has_sex = False
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [
            x.strip().split() for x in open(self.tped_filename).readlines()
        ]

        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
示例#43
0
    def testTpedSnpBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = SnpBoundaryCheck(snps=["rs1000-rs3000"])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()


        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertEqual(1, results[0].chr)
        self.assertEqual(1000, results[0].pos)

        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
示例#44
0
    def testTpedSnpBounded(self):
        BoundaryCheck.chrom = 1
        DataParser.boundary = SnpBoundaryCheck(snps=["rs1000-rs3000"])
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()


        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]
        self.assertEqual(1, results[0].chr)
        self.assertEqual(1000, results[0].pos)

        self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6)
        self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6)

        self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6)
        self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6)
        self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6)
        self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
示例#45
0
    def testTPedAnalysisCov(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()
        #pheno.standardize_variables()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertAlmostEqual(0.00342380, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.11362883, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337610, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.00076356, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.01911090, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.10143178, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.8505542, results[0].beta_pvalues[3], places=6)


        self.assertAlmostEqual(0.584950593047, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.0276543736525, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03383588, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.413751829881, results[1].beta_pvalues[1], places=6)
示例#46
0
    def testPedWithMissingMxIndExclusionsToo(self):
        pc = PhenoCovar()
        DataParser.ind_exclusions = ["2:2", "3:3"]
        DataParser.ind_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, 1], [1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]]
        index = 0
        non_missing = self.non_missing
        non_missing[1] = False
        non_missing[2] = False
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertEqual(genotypes_w_missing[index],
                                 list(genodata.genotypes))
                index += 1
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
        self.assertEqual(7, index)
示例#47
0
    def testTPedAnalysisCov(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.boundary = BoundaryCheck()
        pheno = PhenoCovar()
        dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        dataset.load_tfam(pheno)
        dataset.load_genotypes()
        #pheno.standardize_variables()

        results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)]

        self.assertAlmostEqual(0.00342380, results[0].p_mvtest, places=6)
        self.assertAlmostEqual(0.11362883, results[0].betas[1], places=6)
        self.assertAlmostEqual(0.0337610, results[0].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.00076356, results[0].beta_pvalues[1], places=6)
        self.assertAlmostEqual(0.01911090, results[0].betas[3], places=6)
        self.assertAlmostEqual(0.10143178, results[0].beta_stderr[3], places=6)
        self.assertAlmostEqual(0.8505542, results[0].beta_pvalues[3], places=6)


        self.assertAlmostEqual(0.584950593047, results[1].p_mvtest, places=6)
        self.assertAlmostEqual(0.0276543736525, results[1].betas[1], places=6)
        self.assertAlmostEqual(0.03383588, results[1].beta_stderr[1], places=6)
        self.assertAlmostEqual(0.413751829881, results[1].beta_pvalues[1], places=6)
示例#48
0
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5       # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()]

        genotypes_w_missing = [
            [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
            [1,  1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
            [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
            [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
            [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
            [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
            [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]

        ]

        hetero_freq_tped = [0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727]

        self.assertEqual(6, ped_parser.locus_count)
        index = 1
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertAlmostEqual(hetero_freq_tped[index], snp.hetero_freq, places=4)
            index += 1

        index = 1
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)