示例#1
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
     self.phaser = new_phaser_chain([trivial_phaser()])
示例#2
0
 def test_phase_family(self):
     '''Check phasing trivial cases in all genotyped trios.'''
     problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
     itu.assert_size_equals(problem.genotype, 8, 1415)
     assert_equal(len(problem.trios()), 869, 'Unexpected # of genotyped trios')
     self.phaser.run(problem, PhaseParam(debug=False))
     itu.assert_problem_stats(problem, 22640, 20225, 144)
示例#3
0
 def problem_family(family, haplotype=False):
     '''Load a single nuclear family data set from PLINK data.
     If haplotype=True, setting initial haplotypes.'''
     return io.read_plink(pedigree=family + '.tfam',
                          prefix=family,
                          haplotype=family + '.hap.tped' if haplotype else None,
                          info=None, idcoef=None, frames=family + '.frm')
示例#4
0
 def test_estimate_genotype_frequencies(self):
     '''Test estimating genotype frequencies for each SNP using the prepare module.'''
     problem = io.read_plink(pedigree=itu.HUTT_PED,
                             prefix=itu.GENOTYPE_SAMPLE,
                             haplotype=None)
     itu.assert_size_equals(problem.genotype, 8, 1415)
     self.phaser.run(problem)
     # Expected result
     frequency = np.array(
         [[3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03],
          [1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00],
          [1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02],
          [2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04],
          [2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03],
          [6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00],
          [7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01],
          [4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]])
     assert_almost_equal(problem.info.genotype_frequency,
                         frequency,
                         decimal=5,
                         err_msg='Wrong SNP genotype frequency estimation')
     #        assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1],
     #                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
     #        assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1],
     #                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
     assert_almost_equal(
         problem.info.allele_frequency(2),
         1.0 - problem.info.allele_frequency(1),
         decimal=5,
         err_msg='Wrong SNP genotype allele frequency estimation')
示例#5
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(prefix=itu.GENOTYPE_DUO, haplotype=None, pedigree=itu.GENOTYPE_DUO + '.tfam')
     self.phaser = trivial_phaser()
     # Expected results
     self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_DUO_SOLUTION))
示例#6
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(pedigree=itu.HUTT_PED,
                                  prefix=itu.GENOTYPE_SAMPLE,
                                  haplotype=None)
     self.phaser = new_phaser_chain([trivial_phaser()])
示例#7
0
 def __npz_to_plink(self, p, file_name):
     '''Convert p from npz to plink format using the file set specified by file_name.'''
     npz = file_name+'.npz'
     # Save test problem in plink format
     io.write_npz(p, npz)
     # Convert plink -> npz
     io.npz_to_plink(npz, file_name)
     # Load npz and check that the problem object didn't change
     p2 = io.read_plink(prefix=file_name)
     return p2
示例#8
0
 def problem_family(family, haplotype=False):
     '''Load a single nuclear family data set from PLINK data.
     If haplotype=True, setting initial haplotypes.'''
     return io.read_plink(pedigree=family + '.tfam',
                          prefix=family,
                          haplotype=family +
                          '.hap.tped' if haplotype else None,
                          info=None,
                          idcoef=None,
                          frames=family + '.frm')
示例#9
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(prefix=itu.GENOTYPE_TRIO, haplotype=None, pedigree=itu.GENOTYPE_TRIO + '.tfam')
     self.phaser = trivial_phaser()
     
     # Expected results
     self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_TRIO_SOLUTION))
示例#10
0
 def __save_and_load_problem_plink(self, problem):
     '''Save and load a problem from PLINK file set.'''
     try:
         # Get a temporary file name
         f = tempfile.NamedTemporaryFile(delete=False)
         file_name = f.name
         f.close()
         io.write_plink(problem, file_name)
         return io.read_plink(prefix=file_name)
     finally:
         # Delete test files
         for ext in ['', '.pdg.tfam', '.tfam', '.tped', '.hap.tped', '.info']:
             os.remove(file_name + ext)
示例#11
0
    def test_estimate_genotype_frequencies(self):
        '''Test estimating genotype frequencies for each SNP using the prepare module.'''
        problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
        itu.assert_size_equals(problem.genotype, 8, 1415)
        self.phaser.run(problem)
        # Expected result
        frequency = np.array([[  3.44876319e-01,   4.74911660e-01,   1.75265014e-01,   4.94699646e-03],
                              [  1.06007066e-02,   1.86572433e-01,   8.02826881e-01,   0.00000000e+00],
                              [  1.83745585e-02,   2.93286204e-01,   6.63604259e-01,   2.47349832e-02],
                              [  2.17667848e-01,   5.17314494e-01,   2.64310956e-01,   7.06713763e-04],
                              [  2.16961130e-01,   5.16607761e-01,   2.56537110e-01,   9.89399292e-03],
                              [  6.10600710e-01,   3.33568901e-01,   5.58303893e-02,   0.00000000e+00],
                              [  7.24381626e-01,   1.59010604e-01,   7.06713763e-04,   1.15901060e-01],
                              [  4.19081271e-01,   4.62190807e-01,   1.11660779e-01,   7.06713786e-03]])
        assert_almost_equal(problem.info.genotype_frequency, frequency,
                            decimal=5, err_msg='Wrong SNP genotype frequency estimation')
#        assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1],
#                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
#        assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1],
#                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
        assert_almost_equal(problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1),
                            decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
示例#12
0
文件: map.py 项目: orenlivne/ober
     print usage
     sys.exit(util.EXIT_BAD_INPUT_ARGS)
 
 try:
     # Prepare file names, create directories
     (base_name, pedigree_file, out_base_name) = args
     mkdir_if_not_exists(os.path.dirname(out_base_name))
     if options.out_gxn:
         mkdir_if_not_exists(os.path.dirname(options.out_gxn))
     else:
         options.out_gxn = out_base_name
 
     npz_file = base_name + '.npz'
     
     # Convert plink tped -> npz
     problem = io.read_plink(prefix=base_name, pedigree=pedigree_file, haplotype=None,
                             verbose=options.debug)
 
     # Phase, impute, fill missing
     phaser = phase.build_phasing_pipeline(options)      
     request = phase.run_phasing_chain(phaser, problem)
     stats = request.stats
     print ''
     stats.pprint()
     print ''
 
     # Convert phased npz -> plink tped. Save only genotypes (haplotypes may need to be saved in the stats
     # object as a hash table for 'coloring the pedigree' at a later stage.
     genotype_file = out_base_name + '.tped'
     io.write_plink(problem, out_base_name, verbose=True,
                    save_node_type=False, save_genotype=True, save_haplotype=False, save_error=False)