def setUpClass(cls): cls.bam_path = pbcore.data.getBamAndCmpH5()[0] cls.ds_reader = AlignmentSet(cls.bam_path, strict=True, reference=pbcore.data.getLambdaFasta()) cls.bam_readers = cls.ds_reader.resourceReaders() cls.interval_lists = summarize_coverage.build_interval_lists( cls.bam_readers)
def setUpClass(cls): cls.xml_path = pbtestdata.get_file("aligned-xml") cls.ds_reader = AlignmentSet(cls.xml_path, strict=True, reference=pbtestdata.get_file("lambda-fasta")) cls.bam_readers = cls.ds_reader.resourceReaders() cls.interval_lists = summarize_coverage.build_interval_lists( cls.bam_readers)
def test_coverages(self): """Test that the regions and calculated coverages are the same for both the bfx and pbpy summarize_coverage results. """ # Read the pbpy gff into a dictionary for comparison pbpy_gff_reader = GffIO.GffReader(self.gff_path) pbpy_gff_records = {} for gff_record in pbpy_gff_reader: record_key = (gff_record.seqid.split()[0], gff_record.start, gff_record.end) record_val = gff_record pbpy_gff_records[record_key] = record_val # Recapitulate the first few steps of summarize_coverage.main ds_reader, readers = self._get_readers() interval_lists = summarize_coverage.build_interval_lists(readers) get_region_size_frozen = functools.partial( summarize_coverage.get_region_size, num_refs=len(interval_lists), region_size=0, num_regions=500, force_num_regions=False) for ref_group_id in sorted(interval_lists): gff_generator = summarize_coverage.generate_gff_records( interval_lists[ref_group_id], readers, ref_group_id, get_region_size_frozen, {}) for bfx_gff_record in gff_generator: bfx_key = (bfx_gff_record.seqid, bfx_gff_record.start, bfx_gff_record.end) self.assertIn(bfx_key, pbpy_gff_records) pbpy_gff_record = pbpy_gff_records.pop(bfx_key) self.assertEqual(pbpy_gff_record.cov, bfx_gff_record.cov) self.assertEqual(pbpy_gff_record.gaps, bfx_gff_record.gaps) pbpy_cov2 = [float(k) for k in pbpy_gff_record.cov2.split(',')] bfx_cov2 = [float(k) for k in bfx_gff_record.cov2.split(',')] for pair in zip(pbpy_cov2, bfx_cov2): self.assertAlmostEqual(pair[0], pair[1]) self.assertEqual(pbpy_gff_record.source, bfx_gff_record.source) self.assertEqual(pbpy_gff_record.type, bfx_gff_record.type) self.assertEqual(pbpy_gff_record.score, bfx_gff_record.score) self.assertEqual(pbpy_gff_record.strand, bfx_gff_record.strand) self.assertEqual(pbpy_gff_record.phase, bfx_gff_record.phase) if self.selected_reference is not None: remaining_pbpy_records = {} for record_key in pbpy_gff_records: if record_key[0] == self.selected_reference: remaining_pbpy_records[record_key] = pbpy_gff_records[ record_key] else: remaining_pbpy_records = pbpy_gff_records self.assertEqual(len(remaining_pbpy_records), 0)
def setUpClass(cls): cls.xml_path = pbtestdata.get_file("aligned-xml") cls.ds_reader = AlignmentSet( cls.xml_path, strict=True, reference=pbtestdata.get_file("lambda-fasta")) cls.bam_readers = cls.ds_reader.resourceReaders() cls.interval_lists = summarize_coverage.build_interval_lists( cls.bam_readers)
def test_coverages(self): """Test that the regions and calculated coverages are the same for both the bfx and pbpy summarize_coverage results. """ # Read the pbpy gff into a dictionary for comparison pbpy_gff_reader = GffIO.GffReader(self.gff_path) pbpy_gff_records = {} for gff_record in pbpy_gff_reader: record_key = (gff_record.seqid.split()[ 0], gff_record.start, gff_record.end) record_val = gff_record pbpy_gff_records[record_key] = record_val # Recapitulate the first few steps of summarize_coverage.main #cmph5_reader = CmpH5Reader(self.cmph5_path) ds_reader, readers = self._get_readers() interval_lists = summarize_coverage.build_interval_lists(readers) get_region_size_frozen = functools.partial( summarize_coverage.get_region_size, num_refs=len(interval_lists), region_size=0, num_regions=500, force_num_regions=False) for ref_group_id in sorted(interval_lists): gff_generator = summarize_coverage.generate_gff_records( interval_lists[ref_group_id], readers, ref_group_id, get_region_size_frozen, {}) for bfx_gff_record in gff_generator: bfx_key = (bfx_gff_record.seqid, bfx_gff_record.start, bfx_gff_record.end) self.assertIn(bfx_key, pbpy_gff_records) pbpy_gff_record = pbpy_gff_records.pop(bfx_key) self.assertEqual(pbpy_gff_record.cov, bfx_gff_record.cov) self.assertEqual(pbpy_gff_record.gaps, bfx_gff_record.gaps) pbpy_cov2 = [float(k) for k in pbpy_gff_record.cov2.split(',')] bfx_cov2 = [float(k) for k in bfx_gff_record.cov2.split(',')] for pair in zip(pbpy_cov2, bfx_cov2): self.assertAlmostEqual(pair[0], pair[1]) self.assertEqual(pbpy_gff_record.source, bfx_gff_record.source) self.assertEqual(pbpy_gff_record.type, bfx_gff_record.type) self.assertEqual(pbpy_gff_record.score, bfx_gff_record.score) self.assertEqual(pbpy_gff_record.strand, bfx_gff_record.strand) self.assertEqual(pbpy_gff_record.phase, bfx_gff_record.phase) if self.selected_reference is not None: remaining_pbpy_records = {} for record_key in pbpy_gff_records: if record_key[0] == self.selected_reference: remaining_pbpy_records[ record_key] = pbpy_gff_records[record_key] else: remaining_pbpy_records = pbpy_gff_records self.assertEqual(len(remaining_pbpy_records), 0)