def test_get_transcript_from_pos(): variant = { 'chr' : '3', 'pos' : 41250936, 'ref' : 'A', 'alt' : 'C' } vcf = pd.DataFrame.from_records([variant]) transcripts_ids = ensembl.annotate_vcf_transcripts(vcf) assert( "ENST00000453024" in set(transcripts_ids['stable_id_transcript']))
def test_get_gene_from_pos(): variant = { 'chr' : '3', 'pos' : 41250936, 'ref' : 'A', 'alt' : 'C' } vcf = pd.DataFrame.from_records([variant]) transcripts_df = ensembl.annotate_vcf_transcripts(vcf) assert( "ENSG00000168036" in set(transcripts_df['stable_id_gene']))
def test_get_all_transcript_from_pos(): variant = { 'chr' : '3', 'pos' : 41275636, 'ref' : 'G', 'alt' : 'A' } vcf = pd.DataFrame.from_records([variant]) transcripts_ids = ensembl.annotate_vcf_transcripts(vcf) transcript_ids = set(transcripts_ids['stable_id_transcript']) assert( "ENST00000405570" in transcript_ids) assert( "ENST00000396183" in transcript_ids) assert( "ENST00000349496" in transcript_ids) assert( "ENST00000453024" in transcript_ids) assert( "ENST00000396185" in transcript_ids)
def test_get_transcript_and_mutate_vcf(): variant = { 'chr' : '10', 'pos' : 43617416, 'ref' : 'T', 'alt' : 'C' } vcf = pd.DataFrame.from_records([variant]) transcripts_ids = ensembl.annotate_vcf_transcripts(vcf) transcript_ids = set(transcripts_ids['stable_id_transcript']) assert( "ENST00000355710" in transcript_ids) assert( "ENST00000340058" in transcript_ids) transcript_id = "ENST00000355710" cdna_idx = ensembl.get_transcript_index_from_pos( variant['pos'], transcript_id, skip_untranslated_region = False) assert cdna_idx is not None assert cdna_idx < 5569 cdna_transcript = ref_data.get_cdna(transcript_id) assert(cdna_transcript[cdna_idx] == variant['ref']) cds_idx = ensembl.get_transcript_index_from_pos( variant['pos'], transcript_id, skip_untranslated_region = True) assert cds_idx is not None cds_transcript = ref_data.get_cds(transcript_id) assert(cds_transcript[cds_idx] == variant['ref']) region = mutate_protein_from_transcript( cds_transcript, cds_idx, variant['ref'], variant['alt'], padding = 10) assert region is not None assert len(region.seq) == 21, (region.seq, len(region.seq)) assert region.seq == 'RSQGRIPVKWTAIESLFDHIY'