def run_and_get_vcf_rows(self): outVcf = util.file.mkstempfname('.vcf.gz') intrahost.merge_to_vcf(self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), list(self.genomeFastas[s] for s in self.sample_order)) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def test_headers_with_two_samps(self): ref = makeTempFasta([('ref1', 'ATCGTTCA'), ('ref2', 'GGCCC')]) s1 = makeTempFasta([('s1_1', 'ATCGCA'), ('s1_2', 'GGCCC')]) s2 = makeTempFasta([('s2_1', 'ATCGTTCA'), ('s2_2', 'GGCCC')]) emptyfile = util.file.mkstempfname('.txt') outVcf = util.file.mkstempfname('.vcf.gz') intrahost.merge_to_vcf(ref, outVcf, ['s1', 's2'], [emptyfile, emptyfile], [s1, s2]) with util.vcf.VcfReader(outVcf) as vcf: self.assertEqual(vcf.samples(), ['s1', 's2']) self.assertEqual(vcf.chrlens(), {'ref1':8, 'ref2':5})
def run_and_get_vcf_rows(self, retree=1): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) seqIds = list(itertools.chain.from_iterable(self.sequence_order.values())) intrahost.merge_to_vcf(self.ref, outVcf, seqIds, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) seqIds = list( itertools.chain.from_iterable(self.sequence_order.values())) intrahost.merge_to_vcf( self.ref, outVcf, seqIds, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def test_empty_output(self): ref = makeTempFasta([('ref1', 'ATCGCA')]) s1 = makeTempFasta([('s1_1', 'ATCGCA')]) emptyfile = util.file.mkstempfname('.txt') outVcf = util.file.mkstempfname('.vcf') intrahost.merge_to_vcf(ref, outVcf, ['s1'], [emptyfile], [s1]) self.assertGreater(os.path.getsize(outVcf), 0) with util.file.open_or_gzopen(outVcf, 'rt') as inf: for line in inf: self.assertTrue(line.startswith('#')) outVcf = util.file.mkstempfname('.vcf.gz') intrahost.merge_to_vcf(ref, outVcf, ['s1'], [emptyfile], [s1]) self.assertGreater(os.path.getsize(outVcf), 0) with util.file.open_or_gzopen(outVcf, 'rt') as inf: for line in inf: self.assertTrue(line.startswith('#'))
def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) if not omit_samplenames: intrahost.merge_to_vcf(self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) else: intrahost.merge_to_vcf(self.ref, outVcf, [], list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) if not omit_samplenames: intrahost.merge_to_vcf( self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) else: intrahost.merge_to_vcf( self.ref, outVcf, [], list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with phylo.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows