def setUp(self): '''Load test data and expected results.''' unittest.TestCase.setUp(self) # The way to load a pedigree in conjunction with a genotype set is to recode # its sample IDs to consecutive for easier access by phasers. self.problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) self.phaser = new_phaser_chain([trivial_phaser()])
def test_phase_family(self): '''Check phasing trivial cases in all genotyped trios.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) assert_equal(len(problem.trios()), 869, 'Unexpected # of genotyped trios') self.phaser.run(problem, PhaseParam(debug=False)) itu.assert_problem_stats(problem, 22640, 20225, 144)
def problem_family(family, haplotype=False): '''Load a single nuclear family data set from PLINK data. If haplotype=True, setting initial haplotypes.''' return io.read_plink(pedigree=family + '.tfam', prefix=family, haplotype=family + '.hap.tped' if haplotype else None, info=None, idcoef=None, frames=family + '.frm')
def test_estimate_genotype_frequencies(self): '''Test estimating genotype frequencies for each SNP using the prepare module.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) self.phaser.run(problem) # Expected result frequency = np.array( [[3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03], [1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00], [1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02], [2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04], [2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03], [6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00], [7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01], [4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]]) assert_almost_equal(problem.info.genotype_frequency, frequency, decimal=5, err_msg='Wrong SNP genotype frequency estimation') # assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') # assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') assert_almost_equal( problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1), decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
def setUp(self): '''Load test data and expected results.''' unittest.TestCase.setUp(self) # The way to load a pedigree in conjunction with a genotype set is to recode # its sample IDs to consecutive for easier access by phasers. self.problem = io.read_plink(prefix=itu.GENOTYPE_DUO, haplotype=None, pedigree=itu.GENOTYPE_DUO + '.tfam') self.phaser = trivial_phaser() # Expected results self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_DUO_SOLUTION))
def __npz_to_plink(self, p, file_name): '''Convert p from npz to plink format using the file set specified by file_name.''' npz = file_name+'.npz' # Save test problem in plink format io.write_npz(p, npz) # Convert plink -> npz io.npz_to_plink(npz, file_name) # Load npz and check that the problem object didn't change p2 = io.read_plink(prefix=file_name) return p2
def setUp(self): '''Load test data and expected results.''' unittest.TestCase.setUp(self) # The way to load a pedigree in conjunction with a genotype set is to recode # its sample IDs to consecutive for easier access by phasers. self.problem = io.read_plink(prefix=itu.GENOTYPE_TRIO, haplotype=None, pedigree=itu.GENOTYPE_TRIO + '.tfam') self.phaser = trivial_phaser() # Expected results self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_TRIO_SOLUTION))
def __save_and_load_problem_plink(self, problem): '''Save and load a problem from PLINK file set.''' try: # Get a temporary file name f = tempfile.NamedTemporaryFile(delete=False) file_name = f.name f.close() io.write_plink(problem, file_name) return io.read_plink(prefix=file_name) finally: # Delete test files for ext in ['', '.pdg.tfam', '.tfam', '.tped', '.hap.tped', '.info']: os.remove(file_name + ext)
def test_estimate_genotype_frequencies(self): '''Test estimating genotype frequencies for each SNP using the prepare module.''' problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None) itu.assert_size_equals(problem.genotype, 8, 1415) self.phaser.run(problem) # Expected result frequency = np.array([[ 3.44876319e-01, 4.74911660e-01, 1.75265014e-01, 4.94699646e-03], [ 1.06007066e-02, 1.86572433e-01, 8.02826881e-01, 0.00000000e+00], [ 1.83745585e-02, 2.93286204e-01, 6.63604259e-01, 2.47349832e-02], [ 2.17667848e-01, 5.17314494e-01, 2.64310956e-01, 7.06713763e-04], [ 2.16961130e-01, 5.16607761e-01, 2.56537110e-01, 9.89399292e-03], [ 6.10600710e-01, 3.33568901e-01, 5.58303893e-02, 0.00000000e+00], [ 7.24381626e-01, 1.59010604e-01, 7.06713763e-04, 1.15901060e-01], [ 4.19081271e-01, 4.62190807e-01, 1.11660779e-01, 7.06713786e-03]]) assert_almost_equal(problem.info.genotype_frequency, frequency, decimal=5, err_msg='Wrong SNP genotype frequency estimation') # assert_almost_equal(problem.info.allele_frequency(1), frequency[:, 0] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') # assert_almost_equal(problem.info.allele_frequency(2), frequency[:, 2] + 0.5 * frequency[:, 1], # decimal=5, err_msg='Wrong SNP genotype allele frequency estimation') assert_almost_equal(problem.info.allele_frequency(2), 1.0 - problem.info.allele_frequency(1), decimal=5, err_msg='Wrong SNP genotype allele frequency estimation')
print usage sys.exit(util.EXIT_BAD_INPUT_ARGS) try: # Prepare file names, create directories (base_name, pedigree_file, out_base_name) = args mkdir_if_not_exists(os.path.dirname(out_base_name)) if options.out_gxn: mkdir_if_not_exists(os.path.dirname(options.out_gxn)) else: options.out_gxn = out_base_name npz_file = base_name + '.npz' # Convert plink tped -> npz problem = io.read_plink(prefix=base_name, pedigree=pedigree_file, haplotype=None, verbose=options.debug) # Phase, impute, fill missing phaser = phase.build_phasing_pipeline(options) request = phase.run_phasing_chain(phaser, problem) stats = request.stats print '' stats.pprint() print '' # Convert phased npz -> plink tped. Save only genotypes (haplotypes may need to be saved in the stats # object as a hash table for 'coloring the pedigree' at a later stage. genotype_file = out_base_name + '.tped' io.write_plink(problem, out_base_name, verbose=True, save_node_type=False, save_genotype=True, save_haplotype=False, save_error=False)