def __assert_genotype_set_stats_correct(self, g): (n, snps) = (1415, 8) itu.assert_size_equals(g, snps, n) assert_equal(len(g.sample_id), n, 'Incorrect sample ID set size') assert_equal(g.num_snps, snps, 'Incorrect number of SNPS') assert_equal(g.data.shape, [snps, n, 2], 'Incorrect genotype data array shape')
def test_estimate_genotype_frequencies(self): '''Test estimating genotype frequencies for each SNP using the prepare module.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) self.phaser.run(problem) # Expected result frequency = np.array( [[3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03], [1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00], [1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02], [2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04], [2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03], [6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00], [7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01], [4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]]) assert_almost_equal(problem.info.genotype_frequency, frequency, decimal=5, err_msg='Wrong SNP genotype frequency estimation') # assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') # assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') assert_almost_equal( problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1), decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
def test_phase_family(self): '''Check phasing trivial cases in all genotyped trios.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) assert_equal(len(problem.trios()), 869, 'Unexpected # of genotyped trios') self.phaser.run(problem, PhaseParam(debug=False)) itu.assert_problem_stats(problem, 22640, 20225, 144)
def test_phase_trivial_cases(self): '''Check phasing trivial cases in trios. The trio data is (0,1=parents, 2=child). The solution is kept in the trio test file as the fictitious individual 3.''' g = self.problem.genotype itu.assert_size_equals(self.problem.genotype, 19, 3) assert_equal(self.problem.error.shape, (19, 3), 'Incorrect error array size') trio = (0, 1, 2) solution = self.solution.genotype.data h = self.problem.haplotype assert_problem_stats(self.problem, 6 * g.num_snps, 0, 0) self.phaser.run(self.problem) for snp in self.problem.snp_range: expected_parent_genotype = solution[snp, trio[0:CHILD], :] #expected_child_genotype = solution[snp,trio[CHILD],:] expected_child_haplotype = solution[snp, trio[CHILD], :] parent_genotype = g.data[snp, trio[0:CHILD], :] #child_genotype = g.data[snp,trio[CHILD]] child_haplotype = h.data[snp, trio[CHILD]] ''' print 'SNP', snp print 'Data', g.data[snp,trio,:] print 'Imputed parent', parent_genotype print 'Child hap', child_haplotype print 'Solution hap', solution[snp,trio,:] ''' assert_equal(child_haplotype, expected_child_haplotype, 'Wrong child haplotype by trivial phaser at snp %d' % (snp,)) assert_equal(parent_genotype, expected_parent_genotype, 'Wrong parent genotype imputation by trivial phaser at snp %d' % (snp,)) #assert_equal(np.sort(child_genotype), np.sort(expected_child_genotype), 'Wrong child genotype imputation by trivial phaser at snp %d' % (snp,)) assert_problem_stats(self.problem, 6 * g.num_snps, 66, 6, error_rate=0.1)
def test_phase_trivial_cases_all_trios(self): '''Check phasing trivial cases in all genotyped trios.''' itu.assert_size_equals(self.problem.genotype, 8, 1415) assert_equal(len(self.problem.trios()), 869, 'Unexpected # of genotyped trios') self.phaser.run(self.problem) itu.assert_problem_stats(self.problem, 22640, 18567, 10)
def test_phase_trivial_cases(self): '''Check phasing trivial cases in trios. The trio data is (0,1=parents, 2=child). The solution is kept in the trio test file as the fictitious individual 3.''' g = self.problem.genotype itu.assert_size_equals(self.problem.genotype, 1, 2) duo = (0, 1) solution = self.solution.genotype.data h = self.problem.haplotype assert_problem_stats(self.problem, 4 * g.num_snps, 0, 0) self.phaser.run(self.problem) for snp in self.problem.snp_range: expected_parent_genotype = solution[snp, duo[0], :] #expected_child_genotype = solution[snp,trio[CHILD],:] expected_child_haplotype = solution[snp, duo[1], :] parent_genotype = g.data[snp, duo[0], :] #child_genotype = g.data[snp,trio[CHILD]] child_haplotype = h.data[snp, duo[1]] ''' print 'SNP', snp print 'Data', g.data[snp,trio,:] print 'Imputed parent', parent_genotype print 'Child hap', child_haplotype print 'Solution hap', solution[snp,trio,:] ''' assert_equal(child_haplotype, expected_child_haplotype, 'Wrong child haplotype by trivial phaser at snp %d' % (snp,)) assert_equal(parent_genotype, expected_parent_genotype, 'Wrong parent genotype imputation by trivial phaser at snp %d' % (snp,)) #assert_equal(np.sort(child_genotype), np.sort(expected_child_genotype), 'Wrong child genotype imputation by trivial phaser at snp %d' % (snp,)) assert_problem_stats(self.problem, 4 * g.num_snps, 4, 0)
def test_create_from_mock_data(self): '''Create a simple genotype set from the hutterites pedigree and some mock genotype data.''' # Load data from text file to compare with the load result snp = np.array( [(0, 'rs1', 0., 12), (0, 'rs2', 0., 34), (0, 'rs3', 0., 56), (0, 'rs4', 0., 78)], dtype={ 'names': ('chrom', 'snp', 'dist_cm', 'base_pair'), 'formats': ('i2', 'S12', 'i8', 'i8') }) sample_id = [126251, 111161] data = np.array([[[1, 2]], [[2, 2]], [[1, 2]], [[1, 1]]]) g = GenotypeFactory.new_instance('genotype', data, snp, sample_id) itu.assert_size_equals(g, 4, 1) assert_equal(4, g.num_snps, 'Incorrect number of SNPS') assert_equal(g.segment_intersect([0, 40]), [0, 2], 'Wrong interval intersection') assert_equal([0, 2], g.segment_intersect([10, 40]), 'Wrong interval intersection') assert_equal([0, 3], g.segment_intersect([10, 60]), 'Wrong interval intersection') assert_equal([1, 3], g.segment_intersect([20, 60]), 'Wrong interval intersection') assert_equal([0, 4], g.segment_intersect([0, 100]), 'Wrong interval intersection') assert_equal([1, 4], g.segment_intersect([20, 100]), 'Wrong interval intersection')
def test_child_comparison_one_parent(self): '''Test applying child comparison to a nuclear family with many genotyped kids but only one genotyped parent.''' problem = io.read_npz(itu.FAMILY945_ONE_PARENT_STAGE2) itu.assert_size_equals(problem.genotype, 3218, 8) itu.assert_problem_stats(problem, 51488, 44150, 96) phaser = family_child_comparison_phaser(debug=False) phaser.run(problem) itu.assert_problem_stats(problem, 51488, 47343, 101)
def test_phase_trivial_cases_all_trios(self): '''Check phasing trivial cases in all genotyped trios.''' itu.assert_size_equals(self.problem.genotype, 8, 1415) assert_equal(len(self.problem.trios()), 869, 'Unexpected # of genotyped trios') self.phaser.run(self.problem) itu.assert_problem_stats(self.problem, 22640, 18567, 10) #--------------------------------------------- # Private Methods #---------------------------------------------
def test_family_12(self): '''Test comparing sibs with non-genotyped parents (stage 4).''' problem = io.read_npz(itu.FAMILY12_STAGE2) itu.assert_size_equals(problem.genotype, 3218, 7) itu.assert_problem_stats(problem, 45052, 42162, 237) assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families') phaser = family_sib_comparison_phaser() phaser.run(problem, PhaseParam(single_member=1)) itu.assert_problem_stats(problem, 45052, 42162, 237)
def test_family_963(self): '''Test comparing sibs with non-genotyped parents (stage 4). This was a problematic family.''' problem = io.read_npz(itu.FAMILY963_STAGE4) itu.assert_size_equals(problem.genotype, 3218, 3) itu.assert_problem_stats(problem, 19308, 19286, 23) assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families') phaser = family_sib_comparison_phaser() phaser.run(problem) itu.assert_problem_stats(problem, 19308, 19286, 23)
def test_family_dataset(self): """Test the size and number of nuclear families in the single nuclear family data set.""" # print len(self.g), self.g.__class__, self.g problem = itu.Templates.problem_family(itu.FAMILY7) min_children = 3 itu.assert_size_equals(problem.genotype, 3218, 9) assert_equal(len(problem.families_union(min_children=min_children)), 9) family = problem.families(min_children)[0] assert_equal(family.father, 0, "Wrong mother ID") assert_equal(family.mother, 1, "Wrong mother ID") assert_equal(family.children, set([2, 3, 4, 5, 6, 7, 8]), "Wrong children set")
def test_family_dataset(self): '''Test the size and number of nuclear families in the single nuclear family data set.''' # print len(self.g), self.g.__class__, self.g problem = itu.Templates.problem_family(itu.FAMILY7) min_children = 3 itu.assert_size_equals(problem.genotype, 3218, 9) assert_equal(len(problem.families_union(min_children=min_children)), 9) family = problem.families(min_children)[0] assert_equal(family.father, 0, 'Wrong mother ID') assert_equal(family.mother, 1, 'Wrong mother ID') assert_equal(family.children, set([2, 3, 4, 5, 6, 7, 8]), 'Wrong children set')
def test_family_2003_need_poo_alignment(self): '''Test comparing sibs with non-genotyped parents (stage 4). This case highlights the need to align POO-phases, i.e., swap founder haps to correctly patch families at individual ID 28412 (our original index 386; in this problem, index 10).''' problem = io.read_npz(itu.FAMILY2003_STAGE3) itu.assert_size_equals(problem.genotype, 3218, 9) itu.assert_problem_stats(problem, 57924, 43339, 85) assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families') #f = problem.families(genotyped=False)[0] #print f.member_list #print problem.pedigree.sample_id phaser = family_sib_comparison_phaser() phaser.run(problem) itu.assert_problem_stats(problem, 57924, 57515, 85)
def test_create_from_mock_data(self): '''Create a simple genotype set from the hutterites pedigree and some mock genotype data.''' # Load data from text file to compare with the load result snp = np.array([(0, 'rs1', 0., 12), (0, 'rs2', 0., 34), (0, 'rs3', 0., 56), (0, 'rs4', 0., 78)], dtype={'names': ('chrom', 'snp', 'dist_cm', 'base_pair'), 'formats': ('i2', 'S12', 'i8', 'i8')}) sample_id = [126251, 111161] data = np.array([[[1, 2]], [[2, 2]], [[1, 2]], [[1, 1]]]) g = GenotypeFactory.new_instance('genotype', data, snp, sample_id) itu.assert_size_equals(g, 4, 1) assert_equal(4, g.num_snps, 'Incorrect number of SNPS') assert_equal(g.segment_intersect([0, 40]), [0, 2], 'Wrong interval intersection') assert_equal([0, 2], g.segment_intersect([10,40]), 'Wrong interval intersection') assert_equal([0, 3], g.segment_intersect([10,60]), 'Wrong interval intersection') assert_equal([1, 3], g.segment_intersect([20,60]), 'Wrong interval intersection') assert_equal([0, 4], g.segment_intersect([0,100]), 'Wrong interval intersection') assert_equal([1, 4], g.segment_intersect([20,100]), 'Wrong interval intersection')
def test_outer_duo(self): '''Test applying child comparison to a nuclear family with many genotyped kids but only one genotyped parent. Seems to be fine for now: too many errors are flagged, but we are not going to split hair.''' p = self.problem # h = p.haplotype # (f, m) = (self.family.father, self.family.mother) snp = p.info.snp_by_name('rs5746679') assert_equal(snp, [3], 'Wrong SNP index') itu.assert_size_equals(p.genotype, 3218, 7) itu.assert_problem_stats(p, 45052, 40086, 4) # print 'genotypes' # print p.genotype.data[snp, :, :] # print 'haplotypes' # print p.haplotype.data[snp, :, :] phaser = family_phaser() phaser.run(p) itu.assert_problem_stats(p, 45052, 45023, 25)
def test_estimate_genotype_frequencies(self): '''Test estimating genotype frequencies for each SNP using the prepare module.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) self.phaser.run(problem) # Expected result frequency = np.array([[ 3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03], [ 1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00], [ 1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02], [ 2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04], [ 2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03], [ 6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00], [ 7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01], [ 4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]]) assert_almost_equal(problem.info.genotype_frequency, frequency, decimal=5, err_msg='Wrong SNP genotype frequency estimation') # assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') # assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') assert_almost_equal(problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1), decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
def test_read_snps_to_impute(self): '''Test loading SNPs to be imputed from an NPZ file generated with cgi2plink (by way of io_genotype.write()).''' a = ImputationSet.load(im.itu.IMPUTE_RARE) g = a.genotype assert_size_equals(g, 146, 98)