Пример #1
0
 def test_overlapping_intervals(self):
     from samsum import alignment_utils
     coords_one = (0, 100)
     coords_two = (50, 101)
     coords_three = (101, 200)
     self.assertTrue(alignment_utils.overlapping_intervals(coords_one, coords_two))
     self.assertTrue(alignment_utils.overlapping_intervals(coords_two, coords_three))
     self.assertFalse(alignment_utils.overlapping_intervals(coords_one, coords_three))
     return
Пример #2
0
def test_overlapping_intervals():
    from samsum import alignment_utils
    coords_one = (0, 100)
    coords_two = (50, 101)
    coords_three = (101, 200)
    assert alignment_utils.overlapping_intervals(coords_one,
                                                 coords_two) is True
    assert alignment_utils.overlapping_intervals(coords_two,
                                                 coords_three) is True
    assert alignment_utils.overlapping_intervals(coords_one,
                                                 coords_three) is False
Пример #3
0
    def merge_tiles(self) -> None:
        """
        Checks for Tile instances with overlapping ranges. Tile instances must have a 'start' and 'end' variable.

        :return: None
        """
        i = 0
        while i < len(self.tiles):
            tiles = sorted(self.tiles, key=lambda x: x.start)
            tile_i = tiles[i]  # type: Tile
            j = i + 1
            while j < len(tiles):
                tile_j = tiles[j]  # type: Tile
                if ss_aln_utils.overlapping_intervals((tile_i.start, tile_i.end), (tile_j.start, tile_j.end)):
                    # print("Merging:")
                    # print(tile_i.get_info(), "and", tile_j.get_info())
                    tile_i.merge(tile_j)
                    tiles.pop(j)
                else:
                    j += 1
            i += 1
        return
Пример #4
0
    def proportion_covered(self) -> float:
        """
        Calculate the proportion of the RefSequence that was covered by mapped reads.

        The algorithm works as follows:
            1. For each AlignmentDat instance in self.alignments:
                bin it into a continuously aligned regions (Tile)
            2. Merge the Tile instances from step one into the most contiguous possible
            3. Calculate the combined lengths of Tiles across the reference sequence and divide by its length

        :return: Float representing the proportion of the Reference Sequence that was covered
        """
        if self.reads_mapped == 0:
            return 0
        self.tiles.clear()
        for aln_dat in sorted(self.alignments, key=lambda x: x.start):  # type: AlignmentDat
            tile = Tile()
            tile.load_from_alignment_dat(aln_dat)
            i = 0
            while i < len(self.tiles):
                aln_coords = self.tiles[i]  # type: Tile
                if ss_aln_utils.overlapping_intervals((aln_coords.start, aln_coords.end), (aln_dat.start, aln_dat.end)):
                    aln_coords = self.tiles.pop(i)
                    tile.merge(aln_coords)
                    i = len(self.tiles)  # Increase i to the length of self.tiles to exit while loop
                i += 1
            self.tiles.append(tile)

        # Since coordinates are not compared as the tiles are merged the tiles can overlap necessitating a final merge
        self.merge_tiles()

        # Calculate the combined lengths of all tiles across the reference sequence
        total_tiled = 0
        for tile in self.tiles:
            total_tiled += (tile.end - tile.start)
        return total_tiled/self.length