Exemplo n.º 1
0
 def _determine_codon_overlap(self, s, e, codon_tuple, variant_type):
     if codon_tuple is None:
         return False
     if variant_type == VariantClassification.VT_INS:
         is_codon_overlap = TranscriptProviderUtils.test_overlap(s, s, codon_tuple[0]+1, codon_tuple[1])
     else:
         is_codon_overlap = TranscriptProviderUtils.test_overlap(s, e, codon_tuple[0]+1, codon_tuple[1])
     return is_codon_overlap
Exemplo n.º 2
0
 def _determine_codon_overlap(self, s, e, codon_tuple, variant_type):
     if codon_tuple is None:
         return False
     if variant_type == VariantClassification.VT_INS:
         is_codon_overlap = TranscriptProviderUtils.test_overlap(
             s, s, codon_tuple[0] + 1, codon_tuple[1])
     else:
         is_codon_overlap = TranscriptProviderUtils.test_overlap(
             s, e, codon_tuple[0] + 1, codon_tuple[1])
     return is_codon_overlap
Exemplo n.º 3
0
    def _determine_if_splice_site_overlap(self,
                                          start_genomic_space,
                                          end_genomic_space,
                                          tx,
                                          variant_type,
                                          dist=2):
        """

        Overlap of start and stop codon (i.e. start of first exon and end of last exon -- stranded) will not be a
            Splice_Site.  This method will return is_splice_site_overlap of False

         If overlap is detected, but the start or end is within dist bp, then this is a splice site.
         start <= end
        INS events only call splice site when they start in the splice site

        :param start_genomic_space: int in genomic space
        :param end_genomic_space: int in genomic space
        :param tx: Transcript
        :param variant_type:
        :param dist:
        :return is_splice_site_overlap, exon_i, is_right_overlap (Higher genomic position --> True)

        """
        exons = tx.get_exons()
        strand = tx.get_strand()

        # If this is an insertion, we only want to count a splice site if it starts in the splice site regions
        if variant_type == VariantClassification.VT_INS:
            end_genomic_space = start_genomic_space

        for i, exon in enumerate(exons):
            is_internal_exon = (i > 0) and (i < (len(exons) - 1))
            is_check_left = is_internal_exon or (strand == "-" and i == 0) or (
                strand == "+" and i == (len(exons) - 1))
            is_check_right = is_internal_exon or (
                strand == "+" and i == 0) or (strand == "-"
                                              and i == (len(exons) - 1))
            if is_check_left:
                splice_site_left = (exon[0] - dist + 1,
                                    exon[0] + (dist - 1) + 1)
                overlap_type_left = TranscriptProviderUtils.test_overlap(
                    start_genomic_space, end_genomic_space,
                    splice_site_left[0], splice_site_left[1])
                if overlap_type_left:
                    return True, i, False
            if is_check_right:
                splice_site_right = (exon[1] - (dist - 1), exon[1] + dist)
                overlap_type_right = TranscriptProviderUtils.test_overlap(
                    start_genomic_space, end_genomic_space,
                    splice_site_right[0], splice_site_right[1])
                if overlap_type_right:
                    return True, i, True

        return False, -1, None, False
Exemplo n.º 4
0
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
                    return True
        else:
           return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt,
                                          build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(
                        chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(
                        mut.start) == int(startPos) and int(
                            mut.end) == int(endPos):
                    return True
        else:
            return TranscriptProviderUtils.test_overlap(
                int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False
Exemplo n.º 6
0
    def _determine_if_splice_site_overlap(self, start_genomic_space, end_genomic_space, tx, variant_type, dist=2):

        """

        Overlap of start and stop codon (i.e. start of first exon and end of last exon -- stranded) will not be a
            Splice_Site.  This method will return is_splice_site_overlap of False

         If overlap is detected, but the start or end is within dist bp, then this is a splice site.
         start <= end
        INS events only call splice site when they start in the splice site

        :param start_genomic_space: int in genomic space
        :param end_genomic_space: int in genomic space
        :param tx: Transcript
        :param variant_type:
        :param dist:
        :return is_splice_site_overlap, exon_i, is_right_overlap (Higher genomic position --> True)

        """
        exons = tx.get_exons()
        strand = tx.get_strand()

        # If this is an insertion, we only want to count a splice site if it starts in the splice site regions
        if variant_type == VariantClassification.VT_INS:
            end_genomic_space = start_genomic_space

        for i,exon in enumerate(exons):
            is_internal_exon = (i > 0) and (i < (len(exons)-1))
            is_check_left = is_internal_exon or (strand == "-" and i == 0) or (strand == "+" and i == (len(exons)-1))
            is_check_right = is_internal_exon or (strand == "+" and i == 0) or (strand == "-" and i == (len(exons)-1))
            if is_check_left:
                splice_site_left = (exon[0]-dist+1, exon[0]+(dist-1)+1)
                overlap_type_left = TranscriptProviderUtils.test_overlap(start_genomic_space, end_genomic_space, splice_site_left[0], splice_site_left[1])
                if overlap_type_left:
                    return True, i, False
            if is_check_right:
                splice_site_right = (exon[1]-(dist-1), exon[1] + dist)
                overlap_type_right = TranscriptProviderUtils.test_overlap(start_genomic_space, end_genomic_space, splice_site_right[0], splice_site_right[1])
                if overlap_type_right:
                    return True, i, True

        return False, -1, None, False
Exemplo n.º 7
0
    def __get_overlapping_records(self, records, start, end, type):
        if type == "gene":
            st_key, en_key = "start", "end"
        elif type == "transcript":
            st_key, en_key = "footprint_start", "footprint_end"

        out_records = list()
        for r in records:
            if TranscriptProviderUtils.test_overlap(start, end, r[st_key], r[en_key]):
                out_records.append(r)

        return out_records
Exemplo n.º 8
0
    def __get_overlapping_records(self, records, start, end, type):
        if type == 'gene':
            st_key, en_key = 'start', 'end'
        elif type == 'transcript':
            st_key, en_key = 'footprint_start', 'footprint_end'

        out_records = list()
        for r in records:
            if TranscriptProviderUtils.test_overlap(start, end, r[st_key], r[en_key]):
                out_records.append(r)

        return out_records
Exemplo n.º 9
0
    def __get_overlapping_records(self, records, start, end, type):
        if type == 'gene':
            st_key, en_key = 'start', 'end'
        elif type == 'transcript':
            st_key, en_key = 'footprint_start', 'footprint_end'

        out_records = list()
        for r in records:
            if TranscriptProviderUtils.test_overlap(start, end, r[st_key],
                                                    r[en_key]):
                out_records.append(r)

        return out_records
 def _get_overlapping_transcript_records(self, records, start, end):
     return [
         r for r in records if TranscriptProviderUtils.test_overlap(
             int(start), int(end), r.get_start(), r.get_end())
     ]
 def _get_overlapping_transcript_records(self, records, start, end):
     return [r for r in records if TranscriptProviderUtils.test_overlap(int(start), int(end), r.get_start(), r.get_end())]