Пример #1
0
def test_get_transcript_from_pos():
    variant = {
        'chr' : '3',
        'pos' : 41250936,
        'ref' : 'A',
        'alt' : 'C'
    }
    vcf = pd.DataFrame.from_records([variant])
    transcripts_ids = ensembl.annotate_vcf_transcripts(vcf)
    assert( "ENST00000453024" in set(transcripts_ids['stable_id_transcript']))
Пример #2
0
def test_get_gene_from_pos():
    variant = {
        'chr' : '3',
        'pos' : 41250936,
        'ref' : 'A',
        'alt' : 'C'
    }
    vcf = pd.DataFrame.from_records([variant])
    transcripts_df = ensembl.annotate_vcf_transcripts(vcf)
    assert( "ENSG00000168036" in set(transcripts_df['stable_id_gene']))
Пример #3
0
def test_get_all_transcript_from_pos():
    variant = {
        'chr' : '3',
        'pos' : 41275636,
        'ref' : 'G',
        'alt' : 'A'
    }
    vcf = pd.DataFrame.from_records([variant])
    transcripts_ids = ensembl.annotate_vcf_transcripts(vcf)
    transcript_ids = set(transcripts_ids['stable_id_transcript'])
    assert( "ENST00000405570" in transcript_ids)
    assert( "ENST00000396183" in transcript_ids)
    assert( "ENST00000349496" in transcript_ids)
    assert( "ENST00000453024" in transcript_ids)
    assert( "ENST00000396185" in transcript_ids)
Пример #4
0
def test_get_transcript_and_mutate_vcf():
    variant = {
        'chr' : '10',
        'pos' : 43617416,
        'ref' : 'T',
        'alt' : 'C'
    }

    vcf = pd.DataFrame.from_records([variant])
    transcripts_ids = ensembl.annotate_vcf_transcripts(vcf)

    transcript_ids = set(transcripts_ids['stable_id_transcript'])
    assert( "ENST00000355710" in transcript_ids)
    assert( "ENST00000340058" in transcript_ids)

    transcript_id = "ENST00000355710"


    cdna_idx = ensembl.get_transcript_index_from_pos(
        variant['pos'], transcript_id, skip_untranslated_region = False)
    assert cdna_idx is not None
    assert cdna_idx < 5569
    cdna_transcript = ref_data.get_cdna(transcript_id)
    assert(cdna_transcript[cdna_idx] == variant['ref'])

    cds_idx = ensembl.get_transcript_index_from_pos(
        variant['pos'], transcript_id, skip_untranslated_region = True)
    assert cds_idx is not None
    cds_transcript = ref_data.get_cds(transcript_id)
    assert(cds_transcript[cds_idx] == variant['ref'])

    region = mutate_protein_from_transcript(
            cds_transcript,
            cds_idx,
            variant['ref'],
            variant['alt'],
            padding = 10)
    assert region is not None
    assert len(region.seq) == 21, (region.seq, len(region.seq))
    assert region.seq == 'RSQGRIPVKWTAIESLFDHIY'