def get_flanking_positions(obj_tt, genomic_range, flank_type): """ Find the position flanking the genomic alteration -if mutation, then complete_rf = True for codons flanking codon affected -if insertion, then complete_rf = False, need to retrieve codons immediately before & after genomic alteration -if deletion, then complete_rf = False, need to retrieve codons immediately before & after genomic alteration Args: -obj_tt -genomic_range = string that is genomic range of genomic alteration () -alt_type = integer that is the "flank_type", either to stay on the position in 'genomic_range' or to get the position before & after the genomic alteration (-1 & +1 for before & after genomic alteration, respectively) -0 = stay on genomic position of genomic alteration -should do this for SNVs and events where genomic alteration does not alter the reading frame. -1 = retrieve the position before (-1) & after (+1) -should do this for frameshifting events as the reading frame has been altered Returns: returns a string """ hash_pos = Isoform.split_genome_pos(genomic_range) if flank_type == 0: return hash_pos else: return { 'chrom': hash_pos['chrom'], 'start': hash_pos['start'] - 1, 'end': hash_pos['end'] + 1 }
def create_obj_tt(isoform_id, genome_pos): """ Creates an instance of TranslateTranscript Args: -isoform_id = string that is the isoform, usually in the form of an Ensembl ID (e.g. ENST000..) -row = from pandas Dataframe, a row from the file contain mutation position """ ##TEST:: print "MAIN: start of cott: isoform_id = ", isoform_id db_type = 2 #this means the database is Ensembl hash_gp = Isoform.split_genome_pos(genome_pos) #hash_gp = hash genome pos hash_pos = { 'chrom': 'chr' + str(hash_gp['chrom']), 'pos_oi': hash_gp['start'] } iso_sj = IsoformSJ(db_type, isoform_id, [], -10, hash_pos, False, 0, True) canon_transcript = iso_sj.create_canon_transcript(False, False) obj_tt = TranslateTranscript(canon_transcript, iso_sj, DIR_GENOME, {}) return obj_tt
# print i2, " - aa_orig = ", aa_orig # print i2, " - aa_alt = ", aa_alt # for i2 in range( 0, len( obj_iso.list_mRNA ) ): # mrna_orig = obj_iso.list_mRNA[i2] # mrna_alt = obj_iso.list_mRNA_alt[i2] # #retrieve nucleotide # # [subseq_orig, subseq_alt] = obj_iso.retrieve_mRNA_neoep_subseq( mrna_orig, mrna_alt ) # # print "subseq_orig = ", subseq_orig # # print "subseq_alt = ", subseq_alt # #retrieve amino acid sequence # [aa_orig, aa_alt] = SimpleNeoepitopeIsoformV2.retrieve_comparative_neoeps( mrna_orig, mrna_alt, 9 ) # print i2, " - aa_orig = ", aa_orig # print i2, " - aa_alt = ", aa_alt print "mRNA: ********", i, "********\n" #retrieve the isoform IDs (from Ensembl OR RefSeq OR UCSC) and display them hash_gp = Isoform.split_genome_pos( genomic_range ) hash_gp_chrom = "chr" + hash_gp['chrom'] if not "chr" in hash_gp['chrom'] else hash_gp['chrom'] print "hash_gp = ", hash_gp db_type = 1 #1 = uses RefSeq, 2 = uses Ensembl, 3 = uses UCSC all_isoforms = Isoform.get_isoforms_by_pos_db_all( hash_gp_chrom, hash_gp['start'], hash_gp['end'], db_type ) all_iso_id = [x.name for x in all_isoforms] print "all_iso_id = ", all_iso_id print "------------ TDD Completed: 171108_SimpleNeoepV2_NMD.py ------------"