def _determine_codon_overlap(self, s, e, codon_tuple, variant_type): if codon_tuple is None: return False if variant_type == VariantClassification.VT_INS: is_codon_overlap = TranscriptProviderUtils.test_overlap(s, s, codon_tuple[0]+1, codon_tuple[1]) else: is_codon_overlap = TranscriptProviderUtils.test_overlap(s, e, codon_tuple[0]+1, codon_tuple[1]) return is_codon_overlap
def _determine_codon_overlap(self, s, e, codon_tuple, variant_type): if codon_tuple is None: return False if variant_type == VariantClassification.VT_INS: is_codon_overlap = TranscriptProviderUtils.test_overlap( s, s, codon_tuple[0] + 1, codon_tuple[1]) else: is_codon_overlap = TranscriptProviderUtils.test_overlap( s, e, codon_tuple[0] + 1, codon_tuple[1]) return is_codon_overlap
def _determine_if_splice_site_overlap(self, start_genomic_space, end_genomic_space, tx, variant_type, dist=2): """ Overlap of start and stop codon (i.e. start of first exon and end of last exon -- stranded) will not be a Splice_Site. This method will return is_splice_site_overlap of False If overlap is detected, but the start or end is within dist bp, then this is a splice site. start <= end INS events only call splice site when they start in the splice site :param start_genomic_space: int in genomic space :param end_genomic_space: int in genomic space :param tx: Transcript :param variant_type: :param dist: :return is_splice_site_overlap, exon_i, is_right_overlap (Higher genomic position --> True) """ exons = tx.get_exons() strand = tx.get_strand() # If this is an insertion, we only want to count a splice site if it starts in the splice site regions if variant_type == VariantClassification.VT_INS: end_genomic_space = start_genomic_space for i, exon in enumerate(exons): is_internal_exon = (i > 0) and (i < (len(exons) - 1)) is_check_left = is_internal_exon or (strand == "-" and i == 0) or ( strand == "+" and i == (len(exons) - 1)) is_check_right = is_internal_exon or ( strand == "+" and i == 0) or (strand == "-" and i == (len(exons) - 1)) if is_check_left: splice_site_left = (exon[0] - dist + 1, exon[0] + (dist - 1) + 1) overlap_type_left = TranscriptProviderUtils.test_overlap( start_genomic_space, end_genomic_space, splice_site_left[0], splice_site_left[1]) if overlap_type_left: return True, i, False if is_check_right: splice_site_right = (exon[1] - (dist - 1), exon[1] + dist) overlap_type_right = TranscriptProviderUtils.test_overlap( start_genomic_space, end_genomic_space, splice_site_right[0], splice_site_right[1]) if overlap_type_right: return True, i, True return False, -1, None, False
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records # TODO: This looks risky to be calling the MutationData constructor directly ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos): return True else: return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos)) return False
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records # TODO: This looks risky to be calling the MutationData constructor directly ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes( chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int( mut.start) == int(startPos) and int( mut.end) == int(endPos): return True else: return TranscriptProviderUtils.test_overlap( int(mut.start), int(mut.end), int(startPos), int(endPos)) return False
def _determine_if_splice_site_overlap(self, start_genomic_space, end_genomic_space, tx, variant_type, dist=2): """ Overlap of start and stop codon (i.e. start of first exon and end of last exon -- stranded) will not be a Splice_Site. This method will return is_splice_site_overlap of False If overlap is detected, but the start or end is within dist bp, then this is a splice site. start <= end INS events only call splice site when they start in the splice site :param start_genomic_space: int in genomic space :param end_genomic_space: int in genomic space :param tx: Transcript :param variant_type: :param dist: :return is_splice_site_overlap, exon_i, is_right_overlap (Higher genomic position --> True) """ exons = tx.get_exons() strand = tx.get_strand() # If this is an insertion, we only want to count a splice site if it starts in the splice site regions if variant_type == VariantClassification.VT_INS: end_genomic_space = start_genomic_space for i,exon in enumerate(exons): is_internal_exon = (i > 0) and (i < (len(exons)-1)) is_check_left = is_internal_exon or (strand == "-" and i == 0) or (strand == "+" and i == (len(exons)-1)) is_check_right = is_internal_exon or (strand == "+" and i == 0) or (strand == "-" and i == (len(exons)-1)) if is_check_left: splice_site_left = (exon[0]-dist+1, exon[0]+(dist-1)+1) overlap_type_left = TranscriptProviderUtils.test_overlap(start_genomic_space, end_genomic_space, splice_site_left[0], splice_site_left[1]) if overlap_type_left: return True, i, False if is_check_right: splice_site_right = (exon[1]-(dist-1), exon[1] + dist) overlap_type_right = TranscriptProviderUtils.test_overlap(start_genomic_space, end_genomic_space, splice_site_right[0], splice_site_right[1]) if overlap_type_right: return True, i, True return False, -1, None, False
def __get_overlapping_records(self, records, start, end, type): if type == "gene": st_key, en_key = "start", "end" elif type == "transcript": st_key, en_key = "footprint_start", "footprint_end" out_records = list() for r in records: if TranscriptProviderUtils.test_overlap(start, end, r[st_key], r[en_key]): out_records.append(r) return out_records
def __get_overlapping_records(self, records, start, end, type): if type == 'gene': st_key, en_key = 'start', 'end' elif type == 'transcript': st_key, en_key = 'footprint_start', 'footprint_end' out_records = list() for r in records: if TranscriptProviderUtils.test_overlap(start, end, r[st_key], r[en_key]): out_records.append(r) return out_records
def _get_overlapping_transcript_records(self, records, start, end): return [ r for r in records if TranscriptProviderUtils.test_overlap( int(start), int(end), r.get_start(), r.get_end()) ]
def _get_overlapping_transcript_records(self, records, start, end): return [r for r in records if TranscriptProviderUtils.test_overlap(int(start), int(end), r.get_start(), r.get_end())]