Exemplo n.º 1
0
    def test_pheno_no_header(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = False
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)

        pc.load_phenofile(self.no_header)
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(0, len(pc.covariate_data))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        self.assertEqual("Pheno-1", pc.phenotype_names[0])

        phenotype_values = [0.9, 1.0, 0.4, 0.8, 1, 0.1]
        for idx in xrange(0, len(phenotype_values)):
            self.assertAlmostEqual(phenotype_values[idx], pc.phenotype_data[0][idx])
Exemplo n.º 2
0
    def testPedWithMissingComplete(self):
        pc = PhenoCovar()
        ped_parser = bed_parser.Parser(self.missing_fam, self.missing_bim,
                                       self.missing_bed)
        ped_parser.load_fam(pc)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        mapdata = self.missing_mapdata

        index = 0
        for snp in ped_parser:
            try:
                for y in pc:
                    (pheno, covars,
                     nonmissing) = y.get_variables(snp.missing_genotypes)
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.genotypes_w_missing[index],
                                     list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            except InvariantVar as e:
                pass
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 3
0
    def testTPedPhenoComplete(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pc)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        self.assertEqual(12, len(pc.covariate_data[0]))
        self.assertEqual(12, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        mapdata = self.nonmissing_mapdata

        index = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)

                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    pass
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 4
0
    def testEmptyIterator(self):
        pc = PhenoCovar()
        count = 0
        for test in pc:
            count += 1

        self.assertEqual(0, count)
Exemplo n.º 5
0
    def testPedRegionBoundaryTPed(self):
        pc = PhenoCovar()
        DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"])
        BoundaryCheck.chrom = 2
        ped_parser = bed_parser.Parser(self.nonmissing_fam,
                                       self.nonmissing_bim,
                                       self.nonmissing_bed)
        ped_parser.load_fam(pc)
        ped_parser.load_bim(map3=False)
        ped_parser.load_genotypes()

        pedigree = self.nonmissing_mapdata

        index = 4
        self.assertEqual(2, ped_parser.locus_count)
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)

                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(pedigree[index][0]), snp.chr)
                    self.assertEqual(int(pedigree[index][3]), snp.pos)
                    self.assertEqual(pedigree[index][1], snp.rsid)
                    self.assertEqual(self.genotypes[index],
                                     list(genodata.genotypes))
                except TooMuchMissing as e:
                    pass
                except InvalidFrequency as e:
                    pass

            index += 1
        self.assertEqual(6, index)
Exemplo n.º 6
0
def load_pedigree(pc, ped):
    for line in ped:
        fam, ind, sex, ph = line.split()
        pc.add_subject(PhenoCovar.build_id([fam, ind, sex, ph]),
                       sex=int(sex),
                       phenotype=float(ph))
    pc.freeze_subjects()
Exemplo n.º 7
0
    def testInfoFileUseNoChrPos(self):
        # We'll give it an invalid gen_ext so that we can be certain that it's using the files provided
        mach_parser.Parser.chrpos_encoding = False
        DataParser.boundary = SnpBoundaryCheck(self.locus_labels)
        mach_parser.Parser.gen_ext = 'asdf'
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        parser = mach_parser.Parser(
            [self.gen_file, self.gen_file2],
            info_files=[self.info_file1, self.info_file2])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 0

        for snp in parser:
            self.assertEqual("NA", snp.pos)
            self.assertEqual("NA", snp.chr)
            self.assertEqual("%s:%s" % (self.chroms[idx], self.positions[idx]),
                             snp.rsid)
            for i in range(0, len(self.dosage_encoding[idx])):
                self.assertAlmostEqual(self.dosage_encoding[idx][i],
                                       snp.genotype_data[i],
                                       places=3)
            idx += 1
        self.assertEqual(20, idx)
Exemplo n.º 8
0
    def test_tped_standardization_w_missing1(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.ind_exclusions = ["11:11", "12:12"]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(
            libgwas.standardizer.NoStandardization)

        raw_pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5]
        raw_cov = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2]

        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))
            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [
            -1.43314068, -0.55570761, 1.19915853, -0.26322992, 0.90668084,
            1.19915853, -1.43314068, -0.55570761, 1.19915853, -0.26322992
        ]
        std_cov = [
            -0.81649658, -0.81649658, 1.22474487, 1.22474487, -0.81649658,
            -0.81649658, -0.81649658, -0.81649658, 1.22474487, 1.22474487
        ]
        for pheno in pc:
            (y, c,
             total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Exemplo n.º 9
0
    def testIndIdsDefault(self):
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)

        ids = sorted(
            "Fam1:Ind1,Fam2:Ind2,Fam3:Ind3,Fam4:Ind4,Fam5:Ind5,Fam6:Ind6,Fam4:Ind7,Fam9:Ind1"
            .split(","))
        self.assertEqual(ids, sorted(pc.pedigree_data.keys()))
Exemplo n.º 10
0
    def testIndIdsFID(self):
        PhenoCovar.id_encoding = PhenoIdFormat.FID
        ped = get_lines(self.filenames[2])
        pc = PhenoCovar()
        load_pedigree(pc, ped)

        ids = "Fam1,Fam2,Fam3,Fam4,Fam5,Fam6".split(",")
        self.assertEqual(ids, sorted(pc.pedigree_data.keys()))
Exemplo n.º 11
0
    def test_basic_population(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        #sex = [1, 1, 2, 2, 1, 1]

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            pc.add_subject("%s:%s" % (fam, ind),
                           sex=int(sex),
                           phenotype=float(ph))

        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        i = 0

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            iid = "%s:%s" % (fam, ind)
            pdata = pc.pedigree_data[iid]
            self.assertEqual(pdata, i)
            self.assertAlmostEqual(float(ph), pc.phenotype_data[0][i])
            self.assertEqual(int(sex), pc.covariate_data[0][i])

            i += 1

        # Indicate that we do not want to use sex as a covariate
        PhenoCovar.sex_as_covariate = False
        newpc = PhenoCovar()

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            newpc.add_subject("%s:%s" % (fam, ind),
                              sex=int(sex),
                              phenotype=float(ph))

        # Test that sex wasn't loaded as a covariate due to the setting of PhenoCovar.sex_as_covariate
        self.assertEqual(0, len(newpc.covariate_data))
        self.assertEqual(1, len(newpc.phenotype_data))
        self.assertEqual(6, len(newpc.pedigree_data))
        self.assertEqual(6, len(newpc.phenotype_data[0]))

        i = 0

        for line in self.ped:
            fam, ind, sex, ph = line.split()
            iid = "%s:%s" % (fam, ind)
            pdata = pc.pedigree_data[iid]
            self.assertEqual(pdata, i)
            self.assertAlmostEqual(float(ph), pc.phenotype_data[0][i])

            i += 1
Exemplo n.º 12
0
    def testBasicWithCovar(self):
        PhenoCovar.sex_as_covariate = False
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.individual_mask = [0, 0, 0, 0, 0, 0, 1, 1]
        pc.load_covarfile(self.header, names=["BMI", "MSA"])
        pc.freeze_subjects()

        pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0]
        sex = [1, 1, 2, 1, 1, 1]

        # First, test without sex as covariate
        count = 0
        for test in pc:
            test_pheno, covars, nonmissing = test.get_variables()
            self.assertEqual(6, numpy.sum(nonmissing))
            self.assertEqual(2, len(covars))
            for phenotype in self.phenotypes[0:2]:
                self.assertEqual(self.phenotypes[0], list(covars[0]))
                self.assertEqual(self.phenotypes[2], list(covars[1]))
                self.assertEqual("BMI", test.get_covariate_name(0))
                self.assertEqual("MSA", test.get_covariate_name(1))
                self.assertEqual(6, len(pheno))
                self.assertEqual("Pheno-1", test.get_phenotype_name())

                for i in range(0, len(pheno)):
                    self.assertAlmostEqual(pheno[i], test_pheno[i])
            count += 1
        self.assertEqual(1, count)

        PhenoCovar.sex_as_covariate = True
Exemplo n.º 13
0
    def testWithoutSample(self):
        pc = PhenoCovar()
        parser = libgwas.bgen_parser.Parser(self.nomissing)

        parser.load_family_details(pc)
        parser.load_genotypes()
        idx = 0
        for id in ["ID%s" % str(x).zfill(4) for x in range(1, 12)]:
            self.assertTrue(id in pc.pedigree_data)
Exemplo n.º 14
0
    def testIndIdsIID(self):
        PhenoCovar.id_encoding = PhenoIdFormat.IID

        ped = get_lines(self.filenames[2])
        pc = PhenoCovar()
        load_pedigree(pc, ped)

        ids = "Ind1,Ind2,Ind3,Ind4,Ind5,Ind6".split(",")
        self.assertEqual(ids, sorted(pc.pedigree_data.keys()))
Exemplo n.º 15
0
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = libgwas.get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0],
                               [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1
        self.assertEqual(7, index)
        index = 0
        missing = 0
        valid = 0
        for snp in ped_parser:
            for y in pc:
                (pheno, covars,
                 nonmissing) = y.get_variables(snp.missing_genotypes)
                try:
                    genodata = snp.get_genotype_data(nonmissing)
                    self.assertEqual(int(mapdata[index][0]), snp.chr)
                    self.assertEqual(int(mapdata[index][3]), snp.pos)
                    self.assertEqual(mapdata[index][1], snp.rsid)
                    self.assertEqual(genotypes_w_missing[index],
                                     list(snp.genotype_data))
                    self.assertAlmostEqual(hetero_freq_tped[index],
                                           genodata.hetero_freq,
                                           places=4)

                    valid += 1
                except TooMuchMissing as e:
                    missing += 1
                except InvalidFrequency as e:
                    pass
            index += 1
        self.assertEqual(1, missing)
        self.assertEqual(6, valid)
        self.assertEqual(7, index)
Exemplo n.º 16
0
    def test_tped_standardization_w_dbl_missing(self):
        PhenoCovar.sex_as_covariate = True
        DataParser.ind_exclusions = ["11:11", "12:12"]

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        nonmissing[0] = False
        nonmissing[1] = False
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)

        raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5]
        raw_cov   = [2, 2, 1, 1, 1, 1, 2, 2]


        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))
            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [ 1.19915853, -0.26322992,  0.90668084,
                        1.19915853, -1.43314068, -0.55570761,  1.19915853, -0.26322992]
        std_cov   = [ 1.22474487,  1.22474487, -0.81649658,
                        -0.81649658, -0.81649658, -0.81649658,  1.22474487,  1.22474487]
        test_var  = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))


            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Exemplo n.º 17
0
    def testPedMultiPheno(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)

        with open(self.pheno_file) as f:
            pc.load_phenofile(f, indices=[2, 3])
        mapdata = get_lines(self.map_filename, split=True)

        sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1]
        pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                      [1.0, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]]
        dual_pheno = [[1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1],
                      [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2]]

        self.assertEqual(2, len(pc.phenotype_data))
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            idx = 0
            for y in pc:
                non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                non_missing[6] = False
                (pheno, covariates,
                 nm_indata) = y.get_variables(numpy.invert(non_missing))
                for i in range(0, 11):
                    self.assertEqual(sex[i], covariates[0][i])
                    self.assertAlmostEqual(dual_pheno[idx][i], pheno[i])

                idx += 1

            self.assertEqual(2, idx)

            index += 1
        self.assertEqual(7, index)
Exemplo n.º 18
0
 def test_exceed_column_count(self):
     pc = PhenoCovar()
     load_pedigree(pc, self.ped)
     self.assertRaises(InvalidSelection,
                       pc.load_phenofile,
                       self.header,
                       indices=[12])
     self.assertRaises(InvalidSelection,
                       pc.load_phenofile,
                       self.no_header,
                       indices=[12])
Exemplo n.º 19
0
    def testCovarMissingAll(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        prefix = "__test_pheno"
        filename = "%s_miss.txt" % (prefix)
        f = open(filename, "w")
        f.write("""FID\tIID\tBMI\tIBM\tMSA
F1\tI1\t-9\t1.0\t0.5
F2\tI2\t0.2\t-9\t1.0
F3\tI3\t0.3\t0.6\t-9
F4\tI4\t0.4\t0.5\t0.5
F4\tI5\t0.5\t1.0\t1.0
F4\tI6\t0.6\t0.1\t0.2""")
        f.close()
        file = open(filename)

        with self.assertRaises(NoMatchedPhenoCovars):
            pc = PhenoCovar()
            load_pedigree(pc, self.ped)
            pc.load_covarfile(file, indices=[1])
Exemplo n.º 20
0
    def test_tped_standardization2(self):
        DataParser.has_sex = True
        DataParser.has_pheno = True
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)

        raw_pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0]
        raw_cov   = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [-1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442,
                     -1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442]
        std_cov   = [-0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678,
                     -0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678]
        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Exemplo n.º 21
0
    def test_tped_standardization2(self):
        DataParser.has_sex = True
        DataParser.has_pheno = True
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool)
        nonmissing[:] = True
        libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization)

        raw_pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0]
        raw_cov   = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1]

        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(raw_pheno)):
                self.assertAlmostEqual(raw_pheno[i], y[i])
                self.assertAlmostEqual(raw_cov[i], c[0][i])

        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()
        pc.do_standardize_variables = True
        libgwas.standardizer.set_standardizer(Standardizer)

        std_pheno = [-1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442,
                     -1.61601695, -0.73455316,  1.02837442, -0.4407319 , 0.73455316, 1.02837442]
        std_cov   = [-0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678,
                     -0.70710678, -0.70710678,  1.41421356,  1.41421356, -0.70710678, -0.70710678]
        for pheno in pc:
            (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing))

            for i in range(0, len(std_pheno)):
                self.assertAlmostEqual(std_pheno[i], y[i])
                self.assertAlmostEqual(std_cov[i], c[0][i])
Exemplo n.º 22
0
    def test_sample_pheno_with_header(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.load_phenofile(open(self.filenames[8]), indices=[1,2], sample_file=True)
        self.assertEqual(2, len(pc.phenotype_data))
        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[1]))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(["BMI", "IBM"], pc.phenotype_names)



        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p1, pc.phenotype_data[0][index])
            self.assertEqual(p2, pc.phenotype_data[1][index])
            index += 1
        pc.load_phenofile(open(self.filenames[8]), indices=[1,2,3], sample_file=True)
        self.assertEqual(3, len(pc.phenotype_data))
        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[1]))
        self.assertEqual(6, len(pc.phenotype_data[2]))
        self.assertEqual(["BMI", "IBM", "MSA"], pc.phenotype_names)

        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p1, pc.phenotype_data[0][index])
            self.assertEqual(p2, pc.phenotype_data[1][index])
            self.assertEqual(p3, pc.phenotype_data[2][index])
            index += 1
Exemplo n.º 23
0
    def testPhenoNoHeader(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.load_phenofile(open(self.filenames[3]), indices=[2,3])
        self.assertEqual(2, len(pc.phenotype_data))
        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[1]))
        self.assertEqual(6, len(pc.covariate_data[0]))
        self.assertEqual(["Pheno-2", "Pheno-3"], pc.phenotype_names)

        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p2, pc.phenotype_data[0][index])
            self.assertEqual(p3, pc.phenotype_data[1][index])
            index += 1
        pc.load_phenofile(open(self.filenames[3]), indices=[1,2,3])
        self.assertEqual(3, len(pc.phenotype_data))
        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(6, len(pc.phenotype_data[0]))
        self.assertEqual(6, len(pc.phenotype_data[1]))
        self.assertEqual(6, len(pc.phenotype_data[2]))
        self.assertEqual(["Pheno-1", "Pheno-2", "Pheno-3"], pc.phenotype_names)

        index = 0
        for p1, p2, p3 in self.phenotypes:
            self.assertEqual(p1, pc.phenotype_data[0][index])
            self.assertEqual(p2, pc.phenotype_data[1][index])
            self.assertEqual(p3, pc.phenotype_data[2][index])
            index += 1
Exemplo n.º 24
0
    def testPedMultiPheno(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pc.load_phenofile(open(self.pheno_file), indices=[2, 3])
        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        sex = [1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1]
        pheno_data = [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [1.0, 0.5, 0.6, 0.5, 1.0, 0.1], [0.5, 1.0, 0.1, 0.5, 1.0, 0.2]]
        dual_pheno = [
            [1.0, 0.5, 0.6, 0.5, 1.0, 0.1, 0.5, 0.6, 0.5, 1.0, 0.1],
            [0.5, 1.0, 0.1, 0.5, 1.0, 0.2, 1.0, 0.1, 0.5, 1.0, 0.2],
        ]

        self.assertEqual(2, len(pc.phenotype_data))
        index = 0
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))

            idx = 0
            for y in pc:
                non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                non_missing[6] = False
                (pheno, covariates, nm_indata) = y.get_variables(numpy.invert(non_missing))
                for i in range(0, 11):
                    self.assertEqual(sex[i], covariates[0][i])
                    self.assertAlmostEqual(dual_pheno[idx][i], pheno[i])

                idx += 1

            self.assertEqual(2, idx)

            index += 1
        self.assertEqual(7, index)
Exemplo n.º 25
0
    def testBasicWithMask(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        pc.individual_mask = [0, 0, 1, 0, 0, 0, 1, 0]

        orig_pheno = [0.1, 0.4, 0.5, 0.9, 1.0, 0.9]
        sex   = [1, 1, 2, 1, 1, 1]

        count = 0
        for test in pc:
            pheno, covars, nonmissing = test.get_variables(numpy.array(pc.individual_mask, dtype=bool))
            self.assertEqual(6, numpy.sum(nonmissing))
            self.assertEqual(1, len(covars))

            self.assertEqual(sex, list(covars[count]))
            self.assertEqual("SEX", test.get_covariate_name(count))
            self.assertEqual(6, len(pheno))
            self.assertEqual("Pheno-1", test.get_phenotype_name())
            for idx in range(0, len(pheno)):
                self.assertEqual(orig_pheno[idx], pheno[idx])
            count += 1
        self.assertEqual(1, count)
Exemplo n.º 26
0
    def testAllelesIteration(self):
        pc = PhenoCovar()
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        index = 0
        for snp in ped_parser:
            self.assertEqual(self.tped1_alleles[index][1], snp.minor_allele)
            self.assertEqual(self.tped1_alleles[index][0], snp.major_allele)

            index += 1
        self.assertEqual(7, index)
Exemplo n.º 27
0
    def testPedNegativePositions(self):
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_miss_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        mapdata = get_lines(self.map_filename, split=True)

        index = 2
        for snp in ped_parser:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            self.assertEqual(mapdata[index][1], snp.rsid)
            self.assertEqual(self.genotypes[index], list(snp.genotype_data))
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 28
0
    def testFamilyData(self):
        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        parser = impute_parser.Parser(self.fam_file,
                                      [self.gen_file, self.gen_file2],
                                      chroms=["3", "4"])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 0
        for id in self.ind_ids:
            self.assertTrue(id in pc.pedigree_data)
            self.assertEqual(self.phenotypes[idx], pc.phenotype_data[0][idx])
            self.assertEqual(self.sex[idx], pc.covariate_data[0][idx])
            idx += 1
Exemplo n.º 29
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        f = open(self.pheno_file, "w")
        f.write(
            """FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0"""
        )
        f.close()

        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pc.load_phenofile(open(self.pheno_file), indices=[3])
        index = 0

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]

        with self.assertRaises(InvariantVar):
            for snp in ped_parser:
                for y in pc:
                    non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                    (pheno, covariates, nonmissing) = y.get_variables(numpy.invert(non_missing))
Exemplo n.º 30
0
    def testCovarHeader(self):
        # Indicate that we want to use sex as a covariate
        PhenoCovar.sex_as_covariate = True

        pc = PhenoCovar()
        load_pedigree(pc, self.ped)
        self.assertEqual(1, len(pc.phenotype_data))
        self.assertEqual(1, len(pc.covariate_data))
        self.assertEqual(8, len(pc.phenotype_data[0]))
        self.assertEqual(1, len(pc.phenotype_names))
        self.assertEqual("Pheno-1", pc.phenotype_names[0])

        for idx in xrange(0, len(self.pheno)):
            self.assertAlmostEqual(self.pheno[idx], pc.phenotype_data[0][idx])
            self.assertEqual(self.sex[idx], pc.covariate_data[0][idx])

        pc.load_covarfile(self.header)
        self.assertEqual("BMI", pc.covariate_labels[1])
        self.assertEqual("SEX", pc.covariate_labels[0])
        covar =  [0.9, 1.0, 0.4, 0.8, 1, 0.1, PhenoCovar.missing_encoding, PhenoCovar.missing_encoding]
        for idx in xrange(0, len(covar)):
            self.assertAlmostEqual(self.sex[idx], pc.covariate_data[0][idx])
            self.assertAlmostEqual(covar[idx], pc.covariate_data[1][idx])
            self.assertAlmostEqual(self.pheno[idx], pc.phenotype_data[0][idx])
Exemplo n.º 31
0
    def testForInvariant(self):
        prefix = "__test_pedigree"
        self.pheno_file = "%s_mch.txt" % (prefix)
        f = open(self.pheno_file, "w")
        f.write("""FID\tIID\tBMI\tIBM\tMSA
1\t1\t0.1\t1.0\t1.0
2\t2\t0.2\t0.5\t1.0
3\t3\t0.3\t0.6\t1.0
4\t4\t0.4\t0.5\t1.0
5\t5\t0.5\t1.0\t1.0
6\t6\t0.6\t0.1\t1.0
17\t7\t0.1\t1.0\t1.0
8\t8\t0.2\t0.5\t1.0
9\t9\t0.3\t0.6\t1.0
10\t10\t0.4\t0.5\t1.0
11\t11\t0.5\t1.0\t1.0
12\t12\t0.6\t0.1\t1.0""")
        f.close()


        PhenoCovar.sex_as_covariate = True
        pc = PhenoCovar()
        ped_parser = PedigreeParser(self.map_filename, self.ped_filename)
        ped_parser.load_mapfile()
        ped_parser.load_genotypes(pc)
        pc.load_phenofile(open(self.pheno_file), indices=[3])
        index = 0

        mapdata = [x.strip().split() for x in open(self.map_filename).readlines()]


        with self.assertRaises(InvariantVar):
            for snp in ped_parser:
                for y in pc:
                    non_missing = numpy.ones(len(snp.genotype_data), dtype=bool)
                    (pheno, covariates, nonmissing) = y.get_variables(numpy.invert(non_missing))
Exemplo n.º 32
0
 def testEmptyIterator(self):
     pc = PhenoCovar()
     count = 0
     invalid = 0
     missingness = 0
     try:
         for test in pc:
             count += 1
     except InvariantVar as e:
         invalid += 1
     except TooMuchMissingpPhenoCovar as e:
         missingness += 1
     self.assertEqual(0, invalid)
     self.assertEqual(1, missingness)
     self.assertEqual(0, count)
Exemplo n.º 33
0
    def testMAF(self):
        mach_parser.Parser.chrpos_encoding = True
        pc = PhenoCovar()
        parser = mach_parser.Parser([self.gen_file])
        parser.load_family_details(pc)
        parser.load_genotypes()

        idx = 0

        for snp in parser:
            self.assertEqual(self.positions[idx], snp.pos)
            maf = numpy.mean(snp.genotype_data/2)
            self.assertAlmostEqual(maf, snp.maf, places=3)
            idx += 1
        self.assertEqual(10, idx)
Exemplo n.º 34
0
    def testPedWithMissingMxSnpComplete(self):
        pc = PhenoCovar()
        DataParser.snp_miss_tol = 0.5  # We should only lose 1
        ped_parser = TransposedPedigreeParser(self.tfam_filename,
                                              self.miss_tped_filename)
        ped_parser.load_tfam(pc)
        ped_parser.load_genotypes()

        mapdata = get_lines(self.miss_tped_filename, split=True)

        genotypes_w_missing = [[0, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                               [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0],
                               [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0],
                               [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0],
                               [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                               [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]]

        hetero_freq_tped = [
            0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727
        ]

        self.assertEqual(7, ped_parser.locus_count)
        index = 0
        loci = ped_parser.get_loci()
        for snp in loci:
            self.assertEqual(int(mapdata[index][0]), snp.chr)
            self.assertEqual(int(mapdata[index][3]), snp.pos)
            index += 1

        index = 0
        for snp in ped_parser:
            snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1
            try:
                genodata = snp.get_genotype_data(snp_filter)
                self.assertEqual(int(mapdata[index][0]), snp.chr)
                self.assertEqual(int(mapdata[index][3]), snp.pos)
                self.assertEqual(mapdata[index][1], snp.rsid)
                self.assertAlmostEqual(hetero_freq_tped[index],
                                       genodata.hetero_freq,
                                       places=4)
                self.assertEqual(genotypes_w_missing[index],
                                 list(genodata.genotypes))
            except TooMuchMissing as e:
                pass
            except InvalidFrequency as e:
                pass
            index += 1
        self.assertEqual(7, index)
Exemplo n.º 35
0
        DataParser.ind_miss_tol = args.mind

        DataParser.ind_exclusions = ParseIndList(args.remove)

        PhenoCovar.sex_as_covariate = args.sex

        if args.compressed:
            DataParser.compressed_pedigree = True

        DataParser.has_sex = not args.no_sex
        DataParser.has_parents = not args.no_parents
        DataParser.has_fid = not args.no_fid
        DataParser.has_pheno = not args.no_pheno
        DataParser.has_liability = args.liability

        pheno_covar = PhenoCovar()
        self.verbose=False
        if args.verbose:
            self.verbose = True

        if args.file != None or args.ped or args.map:
            if args.ped and not args.map or args.map  and not args.ped:
                print >> sys.stderr, "When analyzing pedigree data, both .map and .ped must be specified"
                sys.exit(1)
            if args.ped:
                dataset = pedigree_parser.Parser(args.map.name, args.ped.name)
            else:
                dataset = pedigree_parser.Parser("%s.map" % (args.file), "%s.ped" % (args.file))

            dataset.load_mapfile(map3=args.map3)
            dataset.load_genotypes(pheno_covar)