def setUpClass(cls):
     cls.bam_path = pbcore.data.getBamAndCmpH5()[0]
     cls.ds_reader = AlignmentSet(cls.bam_path, strict=True,
         reference=pbcore.data.getLambdaFasta())
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)
 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(cls.xml_path, strict=True,
                                  reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)
    def test_coverages(self):
        """Test that the regions and calculated coverages are the same for both the bfx and pbpy summarize_coverage results.
        """
        # Read the pbpy gff into a dictionary for comparison
        pbpy_gff_reader = GffIO.GffReader(self.gff_path)
        pbpy_gff_records = {}
        for gff_record in pbpy_gff_reader:
            record_key = (gff_record.seqid.split()[0], gff_record.start,
                          gff_record.end)
            record_val = gff_record
            pbpy_gff_records[record_key] = record_val

        # Recapitulate the first few steps of summarize_coverage.main
        ds_reader, readers = self._get_readers()
        interval_lists = summarize_coverage.build_interval_lists(readers)
        get_region_size_frozen = functools.partial(
            summarize_coverage.get_region_size,
            num_refs=len(interval_lists),
            region_size=0,
            num_regions=500,
            force_num_regions=False)

        for ref_group_id in sorted(interval_lists):
            gff_generator = summarize_coverage.generate_gff_records(
                interval_lists[ref_group_id], readers, ref_group_id,
                get_region_size_frozen, {})

            for bfx_gff_record in gff_generator:
                bfx_key = (bfx_gff_record.seqid, bfx_gff_record.start,
                           bfx_gff_record.end)
                self.assertIn(bfx_key, pbpy_gff_records)
                pbpy_gff_record = pbpy_gff_records.pop(bfx_key)

                self.assertEqual(pbpy_gff_record.cov, bfx_gff_record.cov)
                self.assertEqual(pbpy_gff_record.gaps, bfx_gff_record.gaps)
                pbpy_cov2 = [float(k) for k in pbpy_gff_record.cov2.split(',')]
                bfx_cov2 = [float(k) for k in bfx_gff_record.cov2.split(',')]

                for pair in zip(pbpy_cov2, bfx_cov2):
                    self.assertAlmostEqual(pair[0], pair[1])

                self.assertEqual(pbpy_gff_record.source, bfx_gff_record.source)
                self.assertEqual(pbpy_gff_record.type, bfx_gff_record.type)
                self.assertEqual(pbpy_gff_record.score, bfx_gff_record.score)
                self.assertEqual(pbpy_gff_record.strand, bfx_gff_record.strand)
                self.assertEqual(pbpy_gff_record.phase, bfx_gff_record.phase)

        if self.selected_reference is not None:
            remaining_pbpy_records = {}
            for record_key in pbpy_gff_records:
                if record_key[0] == self.selected_reference:
                    remaining_pbpy_records[record_key] = pbpy_gff_records[
                        record_key]
        else:
            remaining_pbpy_records = pbpy_gff_records

        self.assertEqual(len(remaining_pbpy_records), 0)
 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(
         cls.xml_path,
         strict=True,
         reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)
    def test_coverages(self):
        """Test that the regions and calculated coverages are the same for both the bfx and pbpy summarize_coverage results.
        """
        # Read the pbpy gff into a dictionary for comparison
        pbpy_gff_reader = GffIO.GffReader(self.gff_path)
        pbpy_gff_records = {}
        for gff_record in pbpy_gff_reader:
            record_key = (gff_record.seqid.split()[
                          0], gff_record.start, gff_record.end)
            record_val = gff_record
            pbpy_gff_records[record_key] = record_val

        # Recapitulate the first few steps of summarize_coverage.main
        #cmph5_reader = CmpH5Reader(self.cmph5_path)
        ds_reader, readers = self._get_readers()
        interval_lists = summarize_coverage.build_interval_lists(readers)
        get_region_size_frozen = functools.partial(
            summarize_coverage.get_region_size, num_refs=len(interval_lists),
            region_size=0, num_regions=500, force_num_regions=False)

        for ref_group_id in sorted(interval_lists):
            gff_generator = summarize_coverage.generate_gff_records(
                interval_lists[ref_group_id], readers, ref_group_id,
                get_region_size_frozen, {})

            for bfx_gff_record in gff_generator:
                bfx_key = (bfx_gff_record.seqid, bfx_gff_record.start,
                           bfx_gff_record.end)
                self.assertIn(bfx_key, pbpy_gff_records)
                pbpy_gff_record = pbpy_gff_records.pop(bfx_key)

                self.assertEqual(pbpy_gff_record.cov, bfx_gff_record.cov)
                self.assertEqual(pbpy_gff_record.gaps, bfx_gff_record.gaps)
                pbpy_cov2 = [float(k) for k in pbpy_gff_record.cov2.split(',')]
                bfx_cov2 = [float(k) for k in bfx_gff_record.cov2.split(',')]

                for pair in zip(pbpy_cov2, bfx_cov2):
                    self.assertAlmostEqual(pair[0], pair[1])

                self.assertEqual(pbpy_gff_record.source, bfx_gff_record.source)
                self.assertEqual(pbpy_gff_record.type, bfx_gff_record.type)
                self.assertEqual(pbpy_gff_record.score, bfx_gff_record.score)
                self.assertEqual(pbpy_gff_record.strand, bfx_gff_record.strand)
                self.assertEqual(pbpy_gff_record.phase, bfx_gff_record.phase)

        if self.selected_reference is not None:
            remaining_pbpy_records = {}
            for record_key in pbpy_gff_records:
                if record_key[0] == self.selected_reference:
                    remaining_pbpy_records[
                        record_key] = pbpy_gff_records[record_key]
        else:
            remaining_pbpy_records = pbpy_gff_records

        self.assertEqual(len(remaining_pbpy_records), 0)