示例#1
0
    def test_single_iter_overlapping(self):
        actual = TestMafOverlapIterator.RecordsOverlapping
        items = LocatableOverlapIterator(
            [iter(TestMafOverlapIterator.RecordsOverlapping)],
            by_barcodes=False)

        # one record in one list
        records = next(items)
        self.assertEqual(len(records), 1)
        self.assertEqual(len(records[0]), 1)
        self.assertEqual(records[0][0], actual[0])

        # two records in one list
        records = next(items)
        self.assertEqual(len(records), 1)
        self.assertEqual(len(records[0]), 2)
        self.assertEqual(records[0][0], actual[1])
        self.assertEqual(records[0][1], actual[2])

        # one record in one list
        records = next(items)
        self.assertEqual(len(records), 1)
        self.assertEqual(len(records[0]), 1)
        self.assertEqual(records[0][0], actual[3])

        with self.assertRaises(StopIteration):
            next(items)
示例#2
0
    def test_by_barcode_mismatching(self):
        left = TestMafOverlapIterator.RecordsNoOverlap
        right = TestMafOverlapIterator.RecordsNoOverlap

        # add different tumor/normal barcodes to left and right
        left = [
            DummyRecord(r.chromosome, r.start, r.end, "A", "B") for r in left
        ]
        right = [
            DummyRecord(r.chromosome, r.start, r.end, "B", "A") for r in right
        ]

        # iterate by barcode
        items = LocatableOverlapIterator([iter(left), iter(right)],
                                         by_barcodes=True)

        n = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 2)
            if n < len(left):
                self.assertEqual(len(records[0]), 1)
                self.assertEqual(len(records[1]), 0)
                self.assertEqual(records[0][0], left[n])
            else:
                self.assertEqual(len(records[0]), 0)
                self.assertEqual(len(records[1]), 1)
                self.assertEqual(records[1][0], right[n - len(left)])
            n += 1
        self.assertEqual(n, len(left) + len(right))

        # ignore barcode
        items = LocatableOverlapIterator([iter(left), iter(right)],
                                         by_barcodes=False)

        n = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 2)
            self.assertEqual(len(records[0]) + len(records[1]), 2)
            self.assertEqual(records[0][0], left[n])
            self.assertEqual(records[1][0], right[n])
            n += 1
        self.assertEqual(n, len(left))
示例#3
0
    def test_single_iter_no_overlap(self):
        actual = TestMafOverlapIterator.RecordsNoOverlap
        items = LocatableOverlapIterator([iter(actual)], by_barcodes=False)

        n = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 1)
            self.assertEqual(len(records[0]), 1)
            self.assertEqual(actual[i], records[0][0])
            n += 1
        self.assertEqual(n, len(actual))
示例#4
0
    def test_two_iter_same(self):
        first = TestMafOverlapIterator.RecordsNoOverlap
        second = TestMafOverlapIterator.RecordsNoOverlap
        items = LocatableOverlapIterator(
            [iter(first), iter(second)], by_barcodes=False)

        n = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 2)
            self.assertEqual(len(records[0]) + len(records[1]), 2)
            self.assertEqual(records[0][0], first[n])
            self.assertEqual(records[1][0], second[n])
            n += 1
        self.assertEqual(n, len(first))
示例#5
0
    def test_record_overlap(self):
        # tests if right is fully contained in left
        left = DummyRecord("A", 11, 109)
        right = DummyRecord("A", 100, 200)

        items = LocatableOverlapIterator(
            [iter([left]), iter([right])], by_barcodes=False)

        # one record in one list
        records = next(items)
        self.assertEqual(len(records), 2)
        self.assertEqual(len(records[0]), 1)
        self.assertEqual(len(records[1]), 1)
        self.assertEqual(records[0][0], left)
        self.assertEqual(records[1][0], right)

        with self.assertRaises(StopIteration):
            next(items)
示例#6
0
    def test_two_iter_no_overlap(self):
        first = TestMafOverlapIterator.RecordsNoOverlap
        second = TestMafOverlapIterator.RecordsSecondNoOverlap
        items = LocatableOverlapIterator(
            [iter(first), iter(second)], by_barcodes=False)

        n = first_i = second_i = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 2)
            self.assertEqual(len(records[0]) + len(records[1]), 1)
            if len(records[0]) > 0:
                self.assertEqual(records[0][0], first[first_i])
                first_i += 1
            else:
                self.assertEqual(records[1][0], second[second_i])
                second_i += 1
            n += 1
        self.assertEqual(n, len(first) + len(second))
        self.assertEqual(first_i, len(first))
        self.assertEqual(second_i, len(second))
示例#7
0
    def test_two_iter_second_overlaps_first(self):
        first = TestMafOverlapIterator.RecordsNoOverlap
        second = TestMafOverlapIterator.RecordsSecondOverlappingFirst
        items = LocatableOverlapIterator(
            [iter(first), iter(second)], by_barcodes=False)
        expected_counts = [1, 1, 2, 1, 1, 1]

        n = first_i = second_i = 0
        for i, records in enumerate(items):
            self.assertEqual(len(records), 2)
            self.assertEqual(
                len(records[0]) + len(records[1]), expected_counts[n])
            if len(records[0]) > 0:
                self.assertEqual(records[0][0], first[first_i])
                first_i += 1
            if len(records[1]) > 0:
                self.assertEqual(records[1][0], second[second_i])
                second_i += 1
            n += 1
        self.assertEqual(first_i + second_i, len(first) + len(second))
        self.assertEqual(first_i, len(first))
        self.assertEqual(second_i, len(second))
示例#8
0
 def test_empty_iter(self):
     items = LocatableOverlapIterator([], by_barcodes=False)
     self.assertEqual(len([i for i in items]), 0)
    def do_work(self):
        """Main wrapper function for running protect MAF merging"""

        # Reader
        self.load_readers()

        # Header
        self.setup_maf_header()

        self._scheme = self.maf_header.scheme()
        self._columns = get_columns_from_header(self.maf_header)

        # Sorter
        sorter = MafSorter(max_objects_in_ram=100000,
                           sort_order_name=BarcodesAndCoordinate.name(),
                           scheme=self.maf_header.scheme(),
                           contigs=self.maf_header.contigs())

        # Merger
        self._merger = MafRecordMerger_1_0_0(self._scheme)

        # Overlap iterator
        o_iter = LocatableOverlapIterator(
            self.maf_readers,
            contigs=self.maf_header.contigs(),
            peekable_iterator_class=FilteringPeekableIterator)

        # ndp filter
        ndp_filter = Filters.NormalDepth.setup(self.options['min_n_depth'])
        ndp_tag = ndp_filter.tags[0]

        # Counts
        processed = 0
        try:
            for record in o_iter:

                if processed > 0 and processed % 1000 == 0:
                    self.logger.info(
                        "Processed {0} overlapping intervals...".format(
                            processed))

                result = OverlapSet(record, self.callers)

                for maf_record in self._merger.merge_records(result):
                    if maf_record is not None:
                        # Recheck normal depth
                        gdc_filters = maf_record['GDC_FILTER'].value
                        has_tag = ndp_tag in gdc_filters
                        ndp = ndp_filter.filter(maf_record)
                        if has_tag != ndp:
                            if ndp:
                                gdc_filters.extend(ndp_filter.tags)
                            else:
                                gdc_filters = list(
                                    filter(lambda x: x != ndp_filter.tags[0],
                                           gdc_filters))

                            maf_record["GDC_FILTER"] = get_builder(
                                "GDC_FILTER",
                                self._scheme,
                                value=sorted(gdc_filters))

                        # Add to sorter
                        sorter += maf_record

                processed += 1

            self.logger.info(
                "Writing {0} sorted, merged records...".format(processed))

            # Writer
            self.maf_writer = MafWriter.from_path(
                path=self.options['output_maf'],
                header=self.maf_header,
                validation_stringency=ValidationStringency.Strict)

            counter = 0
            for record in sorter:
                if counter > 0 and counter % 1000 == 0:
                    self.logger.info(
                        "Wrote {0} sorted, merged records...".format(counter))
                self.maf_writer += record
                counter += 1

            self.logger.info(
                "Finished writing {0} sorted, merged records.".format(counter))

        finally:
            for reader in self.maf_readers:
                reader.close()

            sorter.close()

            if self.maf_writer:
                self.maf_writer.close()