示例#1
0
 def __assert_genotype_set_stats_correct(self, g):
     (n, snps) = (1415, 8)
     itu.assert_size_equals(g, snps, n)
     assert_equal(len(g.sample_id), n, 'Incorrect sample ID set size')
     assert_equal(g.num_snps, snps, 'Incorrect number of SNPS')
     assert_equal(g.data.shape, [snps, n, 2],
                  'Incorrect genotype data array shape')
示例#2
0
 def test_estimate_genotype_frequencies(self):
     '''Test estimating genotype frequencies for each SNP using the prepare module.'''
     problem = io.read_plink(pedigree=itu.HUTT_PED,
                             prefix=itu.GENOTYPE_SAMPLE,
                             haplotype=None)
     itu.assert_size_equals(problem.genotype, 8, 1415)
     self.phaser.run(problem)
     # Expected result
     frequency = np.array(
         [[3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03],
          [1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00],
          [1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02],
          [2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04],
          [2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03],
          [6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00],
          [7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01],
          [4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]])
     assert_almost_equal(problem.info.genotype_frequency,
                         frequency,
                         decimal=5,
                         err_msg='Wrong SNP genotype frequency estimation')
     #        assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1],
     #                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
     #        assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1],
     #                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
     assert_almost_equal(
         problem.info.allele_frequency(2),
         1.0 - problem.info.allele_frequency(1),
         decimal=5,
         err_msg='Wrong SNP genotype allele frequency estimation')
示例#3
0
 def test_phase_family(self):
     '''Check phasing trivial cases in all genotyped trios.'''
     problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
     itu.assert_size_equals(problem.genotype, 8, 1415)
     assert_equal(len(problem.trios()), 869, 'Unexpected # of genotyped trios')
     self.phaser.run(problem, PhaseParam(debug=False))
     itu.assert_problem_stats(problem, 22640, 20225, 144)
示例#4
0
    def test_phase_trivial_cases(self):
        '''Check phasing trivial cases in trios. The trio data is (0,1=parents, 2=child). The
        solution is kept in the trio test file as the fictitious individual 3.'''
        g = self.problem.genotype
        itu.assert_size_equals(self.problem.genotype, 19, 3)
        assert_equal(self.problem.error.shape, (19, 3), 'Incorrect error array size')
        
        trio = (0, 1, 2)
        solution = self.solution.genotype.data
        h = self.problem.haplotype
        assert_problem_stats(self.problem, 6 * g.num_snps, 0, 0)
        self.phaser.run(self.problem)
        
        for snp in self.problem.snp_range:
            expected_parent_genotype = solution[snp, trio[0:CHILD], :]
            #expected_child_genotype  = solution[snp,trio[CHILD],:]
            expected_child_haplotype = solution[snp, trio[CHILD], :]
            parent_genotype = g.data[snp, trio[0:CHILD], :]
            #child_genotype  = g.data[snp,trio[CHILD]]
            child_haplotype = h.data[snp, trio[CHILD]]
            '''
            print 'SNP', snp
            print 'Data', g.data[snp,trio,:]
            print 'Imputed parent', parent_genotype
            print 'Child hap', child_haplotype
            print 'Solution hap', solution[snp,trio,:]
            '''
            assert_equal(child_haplotype, expected_child_haplotype, 'Wrong child haplotype by trivial phaser at snp %d' % (snp,))
            assert_equal(parent_genotype, expected_parent_genotype, 'Wrong parent genotype imputation by trivial phaser at snp %d' % (snp,))
            #assert_equal(np.sort(child_genotype), np.sort(expected_child_genotype), 'Wrong child genotype imputation by trivial phaser at snp %d' % (snp,))

        assert_problem_stats(self.problem, 6 * g.num_snps, 66, 6, error_rate=0.1)
示例#5
0
 def test_phase_trivial_cases_all_trios(self):
     '''Check phasing trivial cases in all genotyped trios.'''
     itu.assert_size_equals(self.problem.genotype, 8, 1415)
     assert_equal(len(self.problem.trios()), 869,
                  'Unexpected # of genotyped trios')
     self.phaser.run(self.problem)
     itu.assert_problem_stats(self.problem, 22640, 18567, 10)
示例#6
0
    def test_phase_trivial_cases(self):
        '''Check phasing trivial cases in trios. The trio data is (0,1=parents, 2=child). The
        solution is kept in the trio test file as the fictitious individual 3.'''
        g = self.problem.genotype
        itu.assert_size_equals(self.problem.genotype, 1, 2)
        
        duo = (0, 1)
        solution = self.solution.genotype.data
        h = self.problem.haplotype
        assert_problem_stats(self.problem, 4 * g.num_snps, 0, 0)
        
        self.phaser.run(self.problem)
        
        for snp in self.problem.snp_range:
            expected_parent_genotype = solution[snp, duo[0], :]
            #expected_child_genotype  = solution[snp,trio[CHILD],:]
            expected_child_haplotype = solution[snp, duo[1], :]
            parent_genotype = g.data[snp, duo[0], :]
            #child_genotype  = g.data[snp,trio[CHILD]]
            child_haplotype = h.data[snp, duo[1]]
            '''
            print 'SNP', snp
            print 'Data', g.data[snp,trio,:]
            print 'Imputed parent', parent_genotype
            print 'Child hap', child_haplotype
            print 'Solution hap', solution[snp,trio,:]
            '''
            assert_equal(child_haplotype, expected_child_haplotype, 'Wrong child haplotype by trivial phaser at snp %d' % (snp,))
            assert_equal(parent_genotype, expected_parent_genotype, 'Wrong parent genotype imputation by trivial phaser at snp %d' % (snp,))
            #assert_equal(np.sort(child_genotype), np.sort(expected_child_genotype), 'Wrong child genotype imputation by trivial phaser at snp %d' % (snp,))

        assert_problem_stats(self.problem, 4 * g.num_snps, 4, 0)
示例#7
0
 def test_create_from_mock_data(self):
     '''Create a simple genotype set from the hutterites pedigree and some mock genotype data.'''
     # Load data from text file to compare with the load result
     snp = np.array(
         [(0, 'rs1', 0., 12), (0, 'rs2', 0., 34), (0, 'rs3', 0., 56),
          (0, 'rs4', 0., 78)],
         dtype={
             'names': ('chrom', 'snp', 'dist_cm', 'base_pair'),
             'formats': ('i2', 'S12', 'i8', 'i8')
         })
     sample_id = [126251, 111161]
     data = np.array([[[1, 2]], [[2, 2]], [[1, 2]], [[1, 1]]])
     g = GenotypeFactory.new_instance('genotype', data, snp, sample_id)
     itu.assert_size_equals(g, 4, 1)
     assert_equal(4, g.num_snps, 'Incorrect number of SNPS')
     assert_equal(g.segment_intersect([0, 40]), [0, 2],
                  'Wrong interval intersection')
     assert_equal([0, 2], g.segment_intersect([10, 40]),
                  'Wrong interval intersection')
     assert_equal([0, 3], g.segment_intersect([10, 60]),
                  'Wrong interval intersection')
     assert_equal([1, 3], g.segment_intersect([20, 60]),
                  'Wrong interval intersection')
     assert_equal([0, 4], g.segment_intersect([0, 100]),
                  'Wrong interval intersection')
     assert_equal([1, 4], g.segment_intersect([20, 100]),
                  'Wrong interval intersection')
 def test_child_comparison_one_parent(self):
     '''Test applying child comparison to a nuclear family with many genotyped kids but only
     one genotyped parent.'''
     problem = io.read_npz(itu.FAMILY945_ONE_PARENT_STAGE2)
     itu.assert_size_equals(problem.genotype, 3218, 8)
     itu.assert_problem_stats(problem, 51488, 44150, 96)
     phaser = family_child_comparison_phaser(debug=False)
     phaser.run(problem)
     itu.assert_problem_stats(problem, 51488, 47343, 101)
 def test_child_comparison_one_parent(self):
     '''Test applying child comparison to a nuclear family with many genotyped kids but only
     one genotyped parent.'''
     problem = io.read_npz(itu.FAMILY945_ONE_PARENT_STAGE2)
     itu.assert_size_equals(problem.genotype, 3218, 8)
     itu.assert_problem_stats(problem, 51488, 44150, 96)
     phaser = family_child_comparison_phaser(debug=False)
     phaser.run(problem)
     itu.assert_problem_stats(problem, 51488, 47343, 101)
示例#10
0
 def test_phase_trivial_cases_all_trios(self):
     '''Check phasing trivial cases in all genotyped trios.'''
     itu.assert_size_equals(self.problem.genotype, 8, 1415)
     assert_equal(len(self.problem.trios()), 869, 'Unexpected # of genotyped trios')
     self.phaser.run(self.problem)
     itu.assert_problem_stats(self.problem, 22640, 18567, 10)
     
 #---------------------------------------------
 # Private Methods
 #---------------------------------------------
    def test_family_12(self):
        '''Test comparing sibs with non-genotyped parents (stage 4).'''
        problem = io.read_npz(itu.FAMILY12_STAGE2)
        itu.assert_size_equals(problem.genotype, 3218, 7)
        itu.assert_problem_stats(problem, 45052, 42162, 237)
        assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families')

        phaser = family_sib_comparison_phaser()
        phaser.run(problem, PhaseParam(single_member=1))

        itu.assert_problem_stats(problem, 45052, 42162, 237)
    def test_family_963(self):
        '''Test comparing sibs with non-genotyped parents (stage 4). This was a problematic family.'''
        problem = io.read_npz(itu.FAMILY963_STAGE4)
        itu.assert_size_equals(problem.genotype, 3218, 3)
        itu.assert_problem_stats(problem, 19308, 19286, 23)
        assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families')

        phaser = family_sib_comparison_phaser()
        phaser.run(problem)

        itu.assert_problem_stats(problem, 19308, 19286, 23)
示例#13
0
 def test_family_dataset(self):
     """Test the size and number of nuclear families in the single nuclear family data set."""
     # print len(self.g), self.g.__class__, self.g
     problem = itu.Templates.problem_family(itu.FAMILY7)
     min_children = 3
     itu.assert_size_equals(problem.genotype, 3218, 9)
     assert_equal(len(problem.families_union(min_children=min_children)), 9)
     family = problem.families(min_children)[0]
     assert_equal(family.father, 0, "Wrong mother ID")
     assert_equal(family.mother, 1, "Wrong mother ID")
     assert_equal(family.children, set([2, 3, 4, 5, 6, 7, 8]), "Wrong children set")
示例#14
0
    def test_family_12(self):
        '''Test comparing sibs with non-genotyped parents (stage 4).'''
        problem = io.read_npz(itu.FAMILY12_STAGE2)
        itu.assert_size_equals(problem.genotype, 3218, 7)
        itu.assert_problem_stats(problem, 45052, 42162, 237)
        assert_equal(len(list(problem.families(genotyped=False))), 1,
                     'Incorrect number of families')

        phaser = family_sib_comparison_phaser()
        phaser.run(problem, PhaseParam(single_member=1))

        itu.assert_problem_stats(problem, 45052, 42162, 237)
示例#15
0
    def test_family_963(self):
        '''Test comparing sibs with non-genotyped parents (stage 4). This was a problematic family.'''
        problem = io.read_npz(itu.FAMILY963_STAGE4)
        itu.assert_size_equals(problem.genotype, 3218, 3)
        itu.assert_problem_stats(problem, 19308, 19286, 23)
        assert_equal(len(list(problem.families(genotyped=False))), 1,
                     'Incorrect number of families')

        phaser = family_sib_comparison_phaser()
        phaser.run(problem)

        itu.assert_problem_stats(problem, 19308, 19286, 23)
示例#16
0
 def test_family_dataset(self):
     '''Test the size and number of nuclear families in the single nuclear family data set.'''
     # print len(self.g), self.g.__class__, self.g
     problem = itu.Templates.problem_family(itu.FAMILY7)
     min_children = 3
     itu.assert_size_equals(problem.genotype, 3218, 9)
     assert_equal(len(problem.families_union(min_children=min_children)), 9)
     family = problem.families(min_children)[0]
     assert_equal(family.father, 0, 'Wrong mother ID')
     assert_equal(family.mother, 1, 'Wrong mother ID')
     assert_equal(family.children, set([2, 3, 4, 5, 6, 7, 8]),
                  'Wrong children set')
    def test_family_2003_need_poo_alignment(self):
        '''Test comparing sibs with non-genotyped parents (stage 4). This case highlights
        the need to align POO-phases, i.e., swap founder haps to correctly patch families at individual
        ID 28412 (our original index 386; in this problem, index 10).'''
        problem = io.read_npz(itu.FAMILY2003_STAGE3)
        itu.assert_size_equals(problem.genotype, 3218, 9)
        itu.assert_problem_stats(problem, 57924, 43339, 85)
        assert_equal(len(list(problem.families(genotyped=False))), 1, 'Incorrect number of families')

        #f = problem.families(genotyped=False)[0]
        #print f.member_list
        #print problem.pedigree.sample_id
        phaser = family_sib_comparison_phaser()
        phaser.run(problem)

        itu.assert_problem_stats(problem, 57924, 57515, 85)
示例#18
0
    def test_family_2003_need_poo_alignment(self):
        '''Test comparing sibs with non-genotyped parents (stage 4). This case highlights
        the need to align POO-phases, i.e., swap founder haps to correctly patch families at individual
        ID 28412 (our original index 386; in this problem, index 10).'''
        problem = io.read_npz(itu.FAMILY2003_STAGE3)
        itu.assert_size_equals(problem.genotype, 3218, 9)
        itu.assert_problem_stats(problem, 57924, 43339, 85)
        assert_equal(len(list(problem.families(genotyped=False))), 1,
                     'Incorrect number of families')

        #f = problem.families(genotyped=False)[0]
        #print f.member_list
        #print problem.pedigree.sample_id
        phaser = family_sib_comparison_phaser()
        phaser.run(problem)

        itu.assert_problem_stats(problem, 57924, 57515, 85)
示例#19
0
 def test_create_from_mock_data(self):
     '''Create a simple genotype set from the hutterites pedigree and some mock genotype data.'''
     # Load data from text file to compare with the load result
     snp = np.array([(0, 'rs1', 0., 12), 
                     (0, 'rs2', 0., 34), 
                     (0, 'rs3', 0., 56),
                     (0, 'rs4', 0., 78)],
                   dtype={'names': ('chrom', 'snp', 'dist_cm', 'base_pair'), 
                          'formats': ('i2', 'S12', 'i8', 'i8')})               
     sample_id = [126251, 111161]
     data = np.array([[[1, 2]], [[2, 2]], [[1, 2]], [[1, 1]]])
     g = GenotypeFactory.new_instance('genotype', data, snp, sample_id)
     itu.assert_size_equals(g, 4, 1)
     assert_equal(4, g.num_snps, 'Incorrect number of SNPS')
     assert_equal(g.segment_intersect([0, 40]), [0, 2], 'Wrong interval intersection')
     assert_equal([0, 2], g.segment_intersect([10,40]), 'Wrong interval intersection')
     assert_equal([0, 3], g.segment_intersect([10,60]), 'Wrong interval intersection')
     assert_equal([1, 3], g.segment_intersect([20,60]), 'Wrong interval intersection')
     assert_equal([0, 4], g.segment_intersect([0,100]), 'Wrong interval intersection')
     assert_equal([1, 4], g.segment_intersect([20,100]), 'Wrong interval intersection')
    def test_outer_duo(self):
        '''Test applying child comparison to a nuclear family with many genotyped kids but only
        one genotyped parent. Seems to be fine for now: too many errors are flagged, but we are not
        going to split hair.'''
        p = self.problem
        # h = p.haplotype
        # (f, m) = (self.family.father, self.family.mother)
        snp = p.info.snp_by_name('rs5746679')
        assert_equal(snp, [3], 'Wrong SNP index')
        itu.assert_size_equals(p.genotype, 3218, 7)

        itu.assert_problem_stats(p, 45052, 40086, 4)
#        print 'genotypes'
#        print p.genotype.data[snp, :, :]
#        print 'haplotypes'
#        print p.haplotype.data[snp, :, :]

        phaser = family_phaser()
        phaser.run(p)
        
        itu.assert_problem_stats(p, 45052, 45023, 25)
示例#21
0
    def test_estimate_genotype_frequencies(self):
        '''Test estimating genotype frequencies for each SNP using the prepare module.'''
        problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
        itu.assert_size_equals(problem.genotype, 8, 1415)
        self.phaser.run(problem)
        # Expected result
        frequency = np.array([[  3.44876319e-01,   4.74911660e-01,   1.75265014e-01,   4.94699646e-03],
                              [  1.06007066e-02,   1.86572433e-01,   8.02826881e-01,   0.00000000e+00],
                              [  1.83745585e-02,   2.93286204e-01,   6.63604259e-01,   2.47349832e-02],
                              [  2.17667848e-01,   5.17314494e-01,   2.64310956e-01,   7.06713763e-04],
                              [  2.16961130e-01,   5.16607761e-01,   2.56537110e-01,   9.89399292e-03],
                              [  6.10600710e-01,   3.33568901e-01,   5.58303893e-02,   0.00000000e+00],
                              [  7.24381626e-01,   1.59010604e-01,   7.06713763e-04,   1.15901060e-01],
                              [  4.19081271e-01,   4.62190807e-01,   1.11660779e-01,   7.06713786e-03]])
        assert_almost_equal(problem.info.genotype_frequency, frequency,
                            decimal=5, err_msg='Wrong SNP genotype frequency estimation')
#        assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1],
#                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
#        assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1],
#                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
        assert_almost_equal(problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1),
                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
示例#22
0
 def test_read_snps_to_impute(self):
     '''Test loading SNPs to be imputed from an NPZ file generated with cgi2plink
     (by way of io_genotype.write()).'''
     a = ImputationSet.load(im.itu.IMPUTE_RARE)
     g = a.genotype
     assert_size_equals(g, 146, 98)
示例#23
0
 def __assert_genotype_set_stats_correct(self, g):
     (n, snps) = (1415, 8)
     itu.assert_size_equals(g, snps, n)
     assert_equal(len(g.sample_id), n, 'Incorrect sample ID set size')
     assert_equal(g.num_snps, snps, 'Incorrect number of SNPS')
     assert_equal(g.data.shape, [snps, n, 2], 'Incorrect genotype data array shape')