Exemplo n.º 1
0
def get_flanking_positions(obj_tt, genomic_range, flank_type):
    """
    Find the position flanking the genomic alteration
    -if mutation, then complete_rf = True for codons flanking codon affected
    -if insertion, then complete_rf = False, need to retrieve codons immediately before & after genomic alteration
    -if deletion, then complete_rf = False, need to retrieve codons immediately before & after genomic alteration
    Args:
        -obj_tt
        -genomic_range = string that is genomic range of genomic alteration ()
        -alt_type = integer that is the "flank_type", either to stay on the position in 'genomic_range' or to get the position before & after the genomic alteration (-1 & +1 for before & after genomic alteration, respectively)
            -0 = stay on genomic position of genomic alteration
                -should do this for SNVs and events where genomic alteration does not alter the reading frame.
            -1 = retrieve the position before (-1) & after (+1)
                -should do this for frameshifting events as the reading frame has been altered
    Returns:
        returns a string 
    """
    hash_pos = Isoform.split_genome_pos(genomic_range)
    if flank_type == 0:
        return hash_pos
    else:
        return {
            'chrom': hash_pos['chrom'],
            'start': hash_pos['start'] - 1,
            'end': hash_pos['end'] + 1
        }
Exemplo n.º 2
0
def create_obj_tt(isoform_id, genome_pos):
    """
    Creates an instance of TranslateTranscript
    Args:
        -isoform_id = string that is the isoform, usually in the form of an Ensembl ID (e.g. ENST000..)
        -row = from pandas Dataframe, a row from the file contain mutation position
    """
    ##TEST:: print "MAIN: start of cott: isoform_id = ", isoform_id

    db_type = 2  #this means the database is Ensembl
    hash_gp = Isoform.split_genome_pos(genome_pos)  #hash_gp = hash genome pos
    hash_pos = {
        'chrom': 'chr' + str(hash_gp['chrom']),
        'pos_oi': hash_gp['start']
    }
    iso_sj = IsoformSJ(db_type, isoform_id, [], -10, hash_pos, False, 0, True)
    canon_transcript = iso_sj.create_canon_transcript(False, False)

    obj_tt = TranslateTranscript(canon_transcript, iso_sj, DIR_GENOME, {})

    return obj_tt
    # print i2, " - aa_orig = ", aa_orig
    # print i2, " - aa_alt = ", aa_alt

    # for i2 in range( 0, len( obj_iso.list_mRNA ) ):
    #     mrna_orig = obj_iso.list_mRNA[i2]
    #     mrna_alt = obj_iso.list_mRNA_alt[i2]

    #     #retrieve nucleotide
    #     # [subseq_orig, subseq_alt] = obj_iso.retrieve_mRNA_neoep_subseq( mrna_orig, mrna_alt )
    #     # print "subseq_orig = ", subseq_orig
    #     # print "subseq_alt = ", subseq_alt

    #     #retrieve amino acid sequence        
    #     [aa_orig, aa_alt] = SimpleNeoepitopeIsoformV2.retrieve_comparative_neoeps( mrna_orig, mrna_alt, 9 )
    #     print i2, " - aa_orig = ", aa_orig
    #     print i2, " - aa_alt = ", aa_alt

    print "mRNA: ********", i, "********\n"

#retrieve the isoform IDs (from Ensembl OR RefSeq OR UCSC) and display them
hash_gp = Isoform.split_genome_pos( genomic_range )
hash_gp_chrom = "chr" + hash_gp['chrom'] if not "chr" in hash_gp['chrom'] else hash_gp['chrom']
print "hash_gp = ", hash_gp

db_type = 1     #1 = uses RefSeq, 2 = uses Ensembl, 3 = uses UCSC
all_isoforms = Isoform.get_isoforms_by_pos_db_all( hash_gp_chrom, hash_gp['start'], hash_gp['end'], db_type )
all_iso_id = [x.name for x in all_isoforms]
print "all_iso_id = ", all_iso_id


print "------------ TDD Completed: 171108_SimpleNeoepV2_NMD.py ------------"