def testRegionBoundaryWithExclusions(self): DataParser.ind_exclusions = ["1:1", "2:2", "3:3"] genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1], [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0]] BoundaryCheck.chrom = 2 pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = get_lines(self.tped_filename, split=True) index = 4 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(genodata.genotypes)) index += 1 except TooMuchMissing as e: pass except InvalidFrequency as e: pass self.assertEqual(7, index)
def testTPedNoParentsPheno(self): f = open(self.tfam_filename, "w") f.write("""1 1 1 2 2 1 3 3 2 4 4 2 5 5 1 6 6 1 7 7 1 8 8 1 9 9 2 10 10 2 11 11 1 12 12 1""") f.close() DataParser.has_parents = False DataParser.has_pheno = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.misssnp_tped_filename, split=True) index = 2 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(3, index)
def testRegionBoundaryWithExclusions(self): DataParser.ind_exclusions = ["1:1", "2:2", "3:3"] genotypes = [ [0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1], [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0] ] BoundaryCheck.chrom = 2 pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()] index = 4 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTpedAnalysis(self): # We'll start with the correct phenotype with the genotypes, so we'll use # a boundary to restrict us to only use the first SNP BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6) self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6) self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6) self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6) self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6) self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6) self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6) self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6) self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6) self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6) self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6) self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6) self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6) self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
def testTpedBounded(self): BoundaryCheck.chrom = 1 DataParser.boundary = BoundaryCheck(bp=[2000,3000]) pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertEqual(1, results[0].chr) self.assertEqual(2000, results[0].pos) self.assertAlmostEqual(0.57778118, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.02798537, results[0].betas[1], places=6) self.assertAlmostEqual(0.033790691857, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.40755865, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(0.03275892, results[0].betas[3], places=6) self.assertAlmostEqual(0.0475661, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.49101013, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.44661276, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.01663975, results[1].betas[1], places=6) self.assertAlmostEqual(0.03443300, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.62891811, results[1].beta_pvalues[1], places=6) self.assertAlmostEqual(0.05712017, results[1].betas[3], places=6) self.assertAlmostEqual(0.04783608, results[1].beta_stderr[3], places=6) self.assertAlmostEqual(0.232446188, results[1].beta_pvalues[3], places=6)
def testMissingWithExclusions(self): DataParser.ind_exclusions = ["2:2", "3:3"] genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, 1], [1, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]] pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.miss_tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedNoFamIDSex(self): f = open(self.tfam_filename, "w") f.write("""1 0 0 0.1 2 0 0 0.4 3 0 0 1.0 4 0 0 0.5 5 0 0 0.9 6 0 0 1.0 7 0 0 0.1 8 0 0 0.4 9 0 0 1.0 10 0 0 0.5 11 0 0 0.9 12 0 0 1.0""") f.close() DataParser.has_fid = False DataParser.has_sex = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedLiability(self): f = open(self.tfam_filename, "w") f.write("""1 1 0 0 1 0.1 1 2 2 0 0 1 0.4 1 3 3 0 0 2 1.0 1 4 4 0 0 2 0.5 1 5 5 0 0 1 0.9 1 6 6 0 0 1 1.0 1 7 7 0 0 1 0.1 1 8 8 0 0 1 0.4 1 9 9 0 0 2 1.0 1 10 10 0 0 2 0.5 1 11 11 0 0 1 0.9 1 12 12 0 0 1 1.0 1""") f.close() DataParser.has_liability = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingMxIndExclusionsToo(self): pc = PhenoCovar() DataParser.ind_exclusions = ["2:2", "3:3"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()] genotypes_w_missing = [ [0, -1, -1, -1, -1, -1, -1, -1, -1, 1], [1, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0] ] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingComplete(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = get_lines(self.miss_tped_filename, split=True) self.assertEqual(7, ped_parser.locus_count) genotypes_w_missing = [[0, 1], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] index = 0 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def test_tped_standardization2(self): DataParser.has_sex = True DataParser.has_pheno = True PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool) nonmissing[:] = True libgwas.standardizer.set_standardizer( libgwas.standardizer.NoStandardization) raw_pheno = [ 0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0 ] raw_cov = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(raw_pheno)): self.assertAlmostEqual(raw_pheno[i], y[i]) self.assertAlmostEqual(raw_cov[i], c[0][i]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pc.do_standardize_variables = True libgwas.standardizer.set_standardizer(Standardizer) std_pheno = [ -1.61601695, -0.73455316, 1.02837442, -0.4407319, 0.73455316, 1.02837442, -1.61601695, -0.73455316, 1.02837442, -0.4407319, 0.73455316, 1.02837442 ] std_cov = [ -0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678, -0.70710678, -0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678, -0.70710678 ] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(std_pheno)): self.assertAlmostEqual(std_pheno[i], y[i]) self.assertAlmostEqual(std_cov[i], c[0][i])
def test_tped_standardization_w_dbl_missing(self): PhenoCovar.sex_as_covariate = True DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool) nonmissing[:] = True nonmissing[0] = False nonmissing[1] = False libgwas.standardizer.set_standardizer( libgwas.standardizer.NoStandardization) raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5] raw_cov = [2, 2, 1, 1, 1, 1, 2, 2] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(raw_pheno)): self.assertAlmostEqual(raw_pheno[i], y[i]) self.assertAlmostEqual(raw_cov[i], c[0][i]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pc.do_standardize_variables = True libgwas.standardizer.set_standardizer(Standardizer) std_pheno = [ 1.19915853, -0.26322992, 0.90668084, 1.19915853, -1.43314068, -0.55570761, 1.19915853, -0.26322992 ] std_cov = [ 1.22474487, 1.22474487, -0.81649658, -0.81649658, -0.81649658, -0.81649658, 1.22474487, 1.22474487 ] test_var = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(std_pheno)): self.assertAlmostEqual(std_pheno[i], y[i]) self.assertAlmostEqual(std_cov[i], c[0][i])
def testAllelesIteration(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() index = 0 for snp in ped_parser: self.assertEqual(self.tped1_alleles[index][1], snp.minor_allele) self.assertEqual(self.tped1_alleles[index][0], snp.major_allele) index += 1 self.assertEqual(7, index)
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.misssnp_tped_filename).readlines()] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedNegativePositions(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingComplete(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()] self.assertEqual(7, ped_parser.locus_count) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedComplete(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, ped_parser.ind_count) mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def test_tped_standardization_w_dbl_missing(self): PhenoCovar.sex_as_covariate = True DataParser.ind_exclusions = ["11:11", "12:12"] pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool) nonmissing[:] = True nonmissing[0] = False nonmissing[1] = False libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization) raw_pheno = [1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5] raw_cov = [2, 2, 1, 1, 1, 1, 2, 2] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(raw_pheno)): self.assertAlmostEqual(raw_pheno[i], y[i]) self.assertAlmostEqual(raw_cov[i], c[0][i]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pc.do_standardize_variables = True libgwas.standardizer.set_standardizer(Standardizer) std_pheno = [ 1.19915853, -0.26322992, 0.90668084, 1.19915853, -1.43314068, -0.55570761, 1.19915853, -0.26322992] std_cov = [ 1.22474487, 1.22474487, -0.81649658, -0.81649658, -0.81649658, -0.81649658, 1.22474487, 1.22474487] test_var = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(std_pheno)): self.assertAlmostEqual(std_pheno[i], y[i]) self.assertAlmostEqual(std_cov[i], c[0][i])
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.misssnp_tped_filename).readlines()] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testTPedPhenoMissingPC(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_missing, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, len(pc.covariate_data[0])) self.assertEqual(12, len(pc.phenotype_data[0])) self.assertEqual(1, len(pc.phenotype_names)) mapdata = get_lines(self.tped_filename, split=True) self.genotypes = [[1, 0, 0, 1, 0, 0, 1, 0, 0, 1], [1, 0, 0, 0, 1, 1, 1, 0, 0, 0], [2, 1, 1, 0, 0, 0, 2, 1, 1, 0], [1, 2, 1, 1, 0, 0, 1, 2, 1, 1], [2, 0, 1, 0, 0, 1, 2, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 1, 1, 0, 0]] pheno_values = [0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9] index = 0 self.assertEqual(7, ped_parser.locus_count) for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) self.assertAlmostEqual(pheno_values, list(pheno), places=4) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testTPedNoFamID(self): with open(self.tfam_filename, "w") as f: f.write("""1 0 0 1 0.1 2 0 0 1 0.4 3 0 0 2 1.0 4 0 0 2 0.5 5 0 0 1 0.9 6 0 0 1 1.0 7 0 0 1 0.1 8 0 0 1 0.4 9 0 0 2 1.0 10 0 0 2 0.5 11 0 0 1 0.9 12 0 0 1 1.0""") DataParser.has_fid = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: for y in pc: (pheno, covars, non_missing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(non_missing) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testPedNegativePosLocalChromMissSNP(self): BoundaryCheck.chrom = 1 DataParser.boundary.LoadExclusions(snps=["rs0004"]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.misssnp_tped_filename, split=True) index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testTPedNoParentsPheno(self): with open(self.tfam_filename, "w") as f: f.write("""1 1 1 2 2 1 3 3 2 4 4 2 5 5 1 6 6 1 7 7 1 8 8 1 9 9 2 10 10 2 11 11 1 12 12 1""") DataParser.has_parents = False DataParser.has_pheno = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testPedNegativePositionsLocalChrom(self): BoundaryCheck.chrom = 1 pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.misssnp_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.misssnp_tped_filename).readlines() ] index = 2 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(4, index)
def test_tped_standardization2(self): DataParser.has_sex = True DataParser.has_pheno = True PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() nonmissing = numpy.empty(pc.phenotype_data[0].shape, dtype=numpy.bool) nonmissing[:] = True libgwas.standardizer.set_standardizer(libgwas.standardizer.NoStandardization) raw_pheno = [0.1, 0.4, 1.0, 0.5, 0.9, 1.0, 0.1, 0.4, 1.0, 0.5, 0.9, 1.0] raw_cov = [1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(raw_pheno)): self.assertAlmostEqual(raw_pheno[i], y[i]) self.assertAlmostEqual(raw_cov[i], c[0][i]) pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pc.do_standardize_variables = True libgwas.standardizer.set_standardizer(Standardizer) std_pheno = [-1.61601695, -0.73455316, 1.02837442, -0.4407319 , 0.73455316, 1.02837442, -1.61601695, -0.73455316, 1.02837442, -0.4407319 , 0.73455316, 1.02837442] std_cov = [-0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678, -0.70710678, -0.70710678, -0.70710678, 1.41421356, 1.41421356, -0.70710678, -0.70710678] for pheno in pc: (y, c, total_nonmissing) = pheno.get_variables(numpy.invert(nonmissing)) for i in range(0, len(std_pheno)): self.assertAlmostEqual(std_pheno[i], y[i]) self.assertAlmostEqual(std_cov[i], c[0][i])
def testPedSnpBoundaryTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0001-rs0003"]) BoundaryCheck.chrom = 1 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = libgwas.get_lines(self.tped_filename, split=True) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(3, ped_parser.locus_count) index = 0 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(3, index)
def testPedSnpBoundary2TPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0006"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertAlmostEqual(self.hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 self.assertEqual(2, ped_parser.locus_count) index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = [x.split() for x in open(self.tped_filename).readlines()] index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(6, index)
def testTPedLiability(self): with open(self.tfam_filename, "w") as f: f.write("""1 1 0 0 1 0.1 1 2 2 0 0 1 0.4 1 3 3 0 0 2 1.0 1 4 4 0 0 2 0.5 1 5 5 0 0 1 0.9 1 6 6 0 0 1 1.0 1 7 7 0 0 1 0.1 1 8 8 0 0 1 0.4 1 9 9 0 0 2 1.0 1 10 10 0 0 2 0.5 1 11 11 0 0 1 0.9 1 12 12 0 0 1 1.0 1""") DataParser.has_liability = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 0 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(7, index)
def testPedRegionBoundaryWithExclusionsTPed(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) DataParser.boundary = SnpBoundaryCheck(snps=["rs0005-rs0007"]) DataParser.boundary.LoadExclusions(snps=["rs0007"]) BoundaryCheck.chrom = 2 ped_parser.load_tfam(pc) ped_parser.load_genotypes() pedigree = get_lines(self.tped_filename, split=True) index = 4 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) index += 1 index = 4 for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(pedigree[index][0]), snp.chr) self.assertEqual(int(pedigree[index][3]), snp.pos) self.assertEqual(pedigree[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(genodata.genotypes)) except TooMuchMissing as e: pass except InvalidFrequency as e: pass index += 1 self.assertEqual(6, index)
def testTPedPhenoComplete(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, len(pc.covariate_data[0])) self.assertEqual(12, len(pc.phenotype_data[0])) self.assertEqual(1, len(pc.phenotype_names)) mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 0 self.assertEqual(7, ped_parser.locus_count) for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedPhenoComplete(self): PhenoCovar.sex_as_covariate = True pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() self.assertEqual(12, len(pc.covariate_data[0])) self.assertEqual(12, len(pc.phenotype_data[0])) self.assertEqual(1, len(pc.phenotype_names)) mapdata = [x.strip().split() for x in open(self.tped_filename).readlines()] index = 0 self.assertEqual(7, ped_parser.locus_count) for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testRegionBoundaryWithExclusions(self): DataParser.ind_exclusions = ["1:1", "2:2", "3:3"] genotypes = [[0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 1, 1, 0, 0, 0, 1], [1, 0, 0, 0, 2, 1, 1, 0, 0], [1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 0, 0, 1, 2, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0]] BoundaryCheck.chrom = 2 pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.tped_filename, split=True) index = 4 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingComplete(self): pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.miss_tped_filename).readlines() ] self.assertEqual(7, ped_parser.locus_count) index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = libgwas.get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0]] hetero_freq_tped = [ 0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727 ] self.assertEqual(7, ped_parser.locus_count) index = 0 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) index += 1 self.assertEqual(7, index) index = 0 missing = 0 valid = 0 for snp in ped_parser: for y in pc: (pheno, covars, nonmissing) = y.get_variables(snp.missing_genotypes) try: genodata = snp.get_genotype_data(nonmissing) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) self.assertAlmostEqual(hetero_freq_tped[index], genodata.hetero_freq, places=4) valid += 1 except TooMuchMissing as e: missing += 1 except InvalidFrequency as e: pass index += 1 self.assertEqual(1, missing) self.assertEqual(6, valid) self.assertEqual(7, index)
def testTPedNoParentsPheno(self): f = open(self.tfam_filename, "w") f.write("""1 1 1 2 2 1 3 3 2 4 4 2 5 5 1 6 6 1 7 7 1 8 8 1 9 9 2 10 10 2 11 11 1 12 12 1""") f.close() DataParser.has_parents = False DataParser.has_pheno = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.tped_filename).readlines() ] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTPedNoFamIDSex(self): f = open(self.tfam_filename, "w") f.write("""1 0 0 0.1 2 0 0 0.4 3 0 0 1.0 4 0 0 0.5 5 0 0 0.9 6 0 0 1.0 7 0 0 0.1 8 0 0 0.4 9 0 0 1.0 10 0 0 0.5 11 0 0 0.9 12 0 0 1.0""") f.close() DataParser.has_fid = False DataParser.has_sex = False pc = PhenoCovar() ped_parser = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [ x.strip().split() for x in open(self.tped_filename).readlines() ] index = 0 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(self.genotypes[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)
def testTpedSnpBounded(self): BoundaryCheck.chrom = 1 DataParser.boundary = SnpBoundaryCheck(snps=["rs1000-rs3000"]) pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertEqual(1, results[0].chr) self.assertEqual(1000, results[0].pos) self.assertAlmostEqual(0.0034756155, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.1134684009, results[0].betas[1], places=6) self.assertAlmostEqual(0.0337649965541, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.0007779211, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(-0.0033479839, results[0].betas[3], places=6) self.assertAlmostEqual(0.0492050029324, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.9457525716, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.57778118, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.02798537, results[1].betas[1], places=6) self.assertAlmostEqual(0.033790691857, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.40755865, results[1].beta_pvalues[1], places=6) self.assertAlmostEqual(0.03275892, results[1].betas[3], places=6) self.assertAlmostEqual(0.0475661, results[1].beta_stderr[3], places=6) self.assertAlmostEqual(0.49101013, results[1].beta_pvalues[3], places=6) self.assertAlmostEqual(0.44661276, results[2].p_mvtest, places=6) self.assertAlmostEqual(0.01663975, results[2].betas[1], places=6) self.assertAlmostEqual(0.03443300, results[2].beta_stderr[1], places=6) self.assertAlmostEqual(0.62891811, results[2].beta_pvalues[1], places=6) self.assertAlmostEqual(0.05712017, results[2].betas[3], places=6) self.assertAlmostEqual(0.04783608, results[2].beta_stderr[3], places=6) self.assertAlmostEqual(0.232446188, results[2].beta_pvalues[3], places=6)
def testTPedAnalysisCov(self): PhenoCovar.sex_as_covariate = True DataParser.boundary = BoundaryCheck() pheno = PhenoCovar() dataset = TransposedPedigreeParser(self.tfam_filename, self.tped_filename) dataset.load_tfam(pheno) dataset.load_genotypes() #pheno.standardize_variables() results = [x for x in mv_esteq.RunAnalysis(dataset, pheno)] self.assertAlmostEqual(0.00342380, results[0].p_mvtest, places=6) self.assertAlmostEqual(0.11362883, results[0].betas[1], places=6) self.assertAlmostEqual(0.0337610, results[0].beta_stderr[1], places=6) self.assertAlmostEqual(0.00076356, results[0].beta_pvalues[1], places=6) self.assertAlmostEqual(0.01911090, results[0].betas[3], places=6) self.assertAlmostEqual(0.10143178, results[0].beta_stderr[3], places=6) self.assertAlmostEqual(0.8505542, results[0].beta_pvalues[3], places=6) self.assertAlmostEqual(0.584950593047, results[1].p_mvtest, places=6) self.assertAlmostEqual(0.0276543736525, results[1].betas[1], places=6) self.assertAlmostEqual(0.03383588, results[1].beta_stderr[1], places=6) self.assertAlmostEqual(0.413751829881, results[1].beta_pvalues[1], places=6)
def testPedWithMissingMxIndExclusionsToo(self): pc = PhenoCovar() DataParser.ind_exclusions = ["2:2", "3:3"] DataParser.ind_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = get_lines(self.miss_tped_filename, split=True) genotypes_w_missing = [[0, 1], [1, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0]] index = 0 non_missing = self.non_missing non_missing[1] = False non_missing[2] = False for snp in ped_parser: snp_filter = numpy.ones(snp.missing_genotypes.shape[0]) == 1 try: genodata = snp.get_genotype_data(snp_filter) self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(genodata.genotypes)) index += 1 except TooMuchMissing as e: pass except InvalidFrequency as e: pass self.assertEqual(7, index)
def testPedWithMissingMxSnpComplete(self): pc = PhenoCovar() DataParser.snp_miss_tol = 0.5 # We should only lose 1 ped_parser = TransposedPedigreeParser(self.tfam_filename, self.miss_tped_filename) ped_parser.load_tfam(pc) ped_parser.load_genotypes() mapdata = [x.strip().split() for x in open(self.miss_tped_filename).readlines()] genotypes_w_missing = [ [0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0], [1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1], [0, -1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0], [0, -1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 0], [1, -1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0], [1, -1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0] ] hetero_freq_tped = [0.3636, 0.5, 0.3636, 0.4545, 0.3636, 0.2727, 0.2727] self.assertEqual(6, ped_parser.locus_count) index = 1 loci = ped_parser.get_loci() for snp in loci: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertAlmostEqual(hetero_freq_tped[index], snp.hetero_freq, places=4) index += 1 index = 1 for snp in ped_parser: self.assertEqual(int(mapdata[index][0]), snp.chr) self.assertEqual(int(mapdata[index][3]), snp.pos) self.assertEqual(mapdata[index][1], snp.rsid) self.assertEqual(genotypes_w_missing[index], list(snp.genotype_data)) index += 1 self.assertEqual(7, index)