Пример #1
0
    def test_3(self):
        """
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(
            gene5prime="TMEM87B",
            gene5primejunction=112843681,
            gene3prime="MERTK",
            gene3primejunction=112722771,
            db=db_human,
            pyensembl_data=data_human,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=False
        )

        fusion.save_transcript_cdna('TMEM87B-MERTK-case3')
        fusion.save_transcript_cds('TMEM87B-MERTK-case3')
        fusion.save_proteins('TMEM87B-MERTK-case3')
        #fusion.save_images('DLG1-BRAF_mouse')

        test_cds = open('./data/test-human-case-3.txt','r').read()
        test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
        test_protein = test_protein[0:test_protein.find('*')]

        trans=fusion.transcripts['ENST00000283206-ENST00000295408']

        assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 3)"
        assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 3)"
Пример #2
0
    def test_mouse_4(self):
        """
        Test cDNA correctly produced for junctions being in UTRs
        """

        fusion = agfusion.Fusion(
            gene5prime="ENSMUSG00000022770",
            gene5primejunction=31664851,
            gene3prime="ENSMUSG00000022770",
            gene3primejunction=31873343,
            db=db,
            pyensembl_data=data,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=True
        )

        cdna = 'GGGGGTGCGGCCGCCGAAGGGGGAGCTCCTCCCCCGTCCCCTCACCCCCTCAGCTGAGCT' + \
              'CGGGGCGGGGCGGGGTACGTGGAGCGGGGCCGGGCGGGGAAGCTGCTCCGAGTCCGGCCG' + \
              'GAGCGCACCCGGGGCGCCCGCGTACGCCGCTCGCGGGAACTTTGCGGCGGAGCCGCAGGT' + \
              'GTGGAGGCCGCGGAGGGGGGTGCATGAGCGGCGCGGAGAGCGGCGGCTGTCCGGTCCGGC' + \
              'CCCTGCTGGAGTCGCCGCCGGGAGGAGACGAACGAGGAACCAG' + \
              'GTGTGTGCCGCCTTCCTGATTCTGGAGAAAA' + \
              'AAAA'

        assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000064477'].cdna.seq)==cdna, "Test 4: cDNA wrong"
Пример #3
0
def annotate(gene5prime,
             junction5prime,
             gene3prime,
             junction3prime,
             agfusion_db,
             pyensembl_data,
             args,
             outdir=None,
             colors=None,
             rename=None,
             scale=None,
             batch_out_dir=None):
    """
    Annotate the gene fusion
    """

    try:
        fusion = agfusion.Fusion(gene5prime=gene5prime,
                                 gene5primejunction=junction5prime,
                                 gene3prime=gene3prime,
                                 gene3primejunction=junction3prime,
                                 db=agfusion_db,
                                 pyensembl_data=pyensembl_data,
                                 protein_databases=args.protein_databases,
                                 noncanonical=args.noncanonical)
    except IndexError:
        #import IPython ; IPython.embed() ; raise
        print('Problem for {} and {} fusion!'.format(gene5prime, gene3prime))
        return

    if batch_out_dir is not None:

        outdir = join(
            batch_out_dir,
            fusion.gene5prime.gene.name + '-' + str(junction5prime) + '_' +
            fusion.gene3prime.gene.name + '-' + str(junction3prime))

    fusion.save_transcript_cdna(out_dir=outdir, middlestar=args.middlestar)
    fusion.save_transcript_cds(out_dir=outdir, middlestar=args.middlestar)
    fusion.save_proteins(out_dir=outdir, middlestar=args.middlestar)

    fusion.save_images(out_dir=outdir,
                       file_type=args.type,
                       scale=scale,
                       colors=colors,
                       rename=rename,
                       fontsize=args.fontsize,
                       height=args.height,
                       width=args.width,
                       dpi=args.dpi,
                       no_domain_labels=args.no_domain_labels,
                       plot_WT=args.WT,
                       exclude=args.exclude_domain)
    fusion.save_tables(out_dir=outdir)
Пример #4
0
    def test_mouse_3(self):

        fusion = agfusion.Fusion(
            gene5prime="ENSMUSG00000022770",
            gene5primejunction=31664850,
            gene3prime="ENSMUSG00000002413",
            gene3primejunction=39610381,
            db=db,
            pyensembl_data=data,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=True
        )
        assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_5prime=='5UTR',"Test 7: Not found in 5'UTR"
        assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_3prime=='3UTR (start)', "Test 7: Not found in at 3'UTR beginning"
Пример #5
0
    def test_mouse_7(self):

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770",
                                 gene5primejunction=31664820,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39610405,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)
        t = fusion.transcripts['ENSMUST00000023454-ENSMUST00000002487']

        assert t.effect_5prime == "intron (before cds)", "Test 12: incorrect 5' effect: %s" % t.effect_5prime
        assert t.effect_3prime == "intron (cds)", "Test 12: incorrect 3' effect: %s" % t.effect_3prime
Пример #6
0
    def test_1(self):
        """
        test CDS and cDNA correct for junction that is on exon boundaries and
        produces an in-frame protein.
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770",
                                 gene5primejunction=31684294,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39648486,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)

        fusion.save_transcript_cdna('DLG1-BRAF_mouse')
        fusion.save_transcript_cds('DLG1-BRAF_mouse')
        fusion.save_proteins('DLG1-BRAF_mouse')
        #fusion.save_images('DLG1-BRAF_mouse')

        test_cdna = SeqIO.parse(open('./data/Dlg1-Braf_cdna_manual.fa', 'r'),
                                'fasta')
        test_cds = SeqIO.parse(open('./data/Dlg1-Braf_cds_manual.fa', 'r'),
                               'fasta')

        expected_transcript_combinations = [
            'ENSMUST00000100001-ENSMUST00000002487',
            'ENSMUST00000064477-ENSMUST00000002487',
            'ENSMUST00000115205-ENSMUST00000002487',
            'ENSMUST00000023454-ENSMUST00000002487',
            'ENSMUST00000115201-ENSMUST00000002487',
            'ENSMUST00000132176-ENSMUST00000002487'
        ]

        assert len(
            set(fusion.transcripts.keys()).intersection(
                set(expected_transcript_combinations))
        ) == 6, "Test 1: unexpected number protein coding transcripts."

        for seq in test_cdna:
            trans = fusion.transcripts[str(seq.id)]
            assert seq.seq == trans.cdna.seq, "cDNA is wrongly predicted: %s" % seq.id

        for seq in test_cds:
            trans = fusion.transcripts[str(seq.id)]
            assert seq.seq == trans.cds.seq, "cds is wrongly predicted: %s" % seq.id
Пример #7
0
    def test_1(self):

        all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b']
        for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger):
            fusion = agfusion.Fusion(
                gene5prime=fusion['gene5prime'],
                gene5primejunction=fusion['gene5prime_junction'],
                gene3prime=fusion['gene3prime'],
                gene3primejunction=fusion['gene3prime_junction'],
                db=db,
                pyensembl_data=data,
                protein_databases=['pfam'],
                noncanonical=False
            )
            assert fusion.name in all_fusions, '%s not in list!' % fusion.name
Пример #8
0
    def test_mouse_1(self):
        """
        Test that AGFusion determines if the effect on each individual transcript
        """

        fusion = agfusion.Fusion(
            gene5prime="ENSMUSG00000022770",
            gene5primejunction=31664852,
            gene3prime="ENSMUSG00000002413",
            gene3primejunction=39651764,
            db=db,
            pyensembl_data=data,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=True
        )

        assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_5prime=='CDS (start)',"Test 5: not CDS start"
        assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_3prime=='CDS',"Test 5: not CDS"
Пример #9
0
    def test_3(self):
        """
        Test CDS correct for junction within the exon (not on boundary) for one gene
        one the forward and one gene on the reverse strand
        """

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770",
                                 gene5primejunction=31664869,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39610402,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)

        cds = 'ATGCCGGTCCGGAAGCAAGAATTTGCAGCCTTCAAGTAG'

        assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].
                   cds.seq) == cds, "Test 3: CDS wrong"
Пример #10
0
def annotate(gene5prime,junction5prime,gene3prime,junction3prime,
             outdir,colors,rename,scale,db,pyensembl_data,args):


    fusion = agfusion.Fusion(
        gene5prime=gene5prime,
        gene5primejunction=junction5prime,
        gene3prime=gene3prime,
        gene3primejunction=junction3prime,
        db=db,
        pyensembl_data=pyensembl_data,
        protein_databases=args.protein_databases,
        noncanonical=args.noncanonical
    )

    fusion.save_transcript_cdna(
        out_dir=outdir,
        middlestar=args.middlestar
    )
    fusion.save_transcript_cds(
        out_dir=outdir,
        middlestar=args.middlestar
    )
    fusion.save_proteins(
        out_dir=outdir,
        middlestar=args.middlestar
    )

    fusion.save_images(
        out_dir=outdir,
        scale=scale,
        colors=colors,
        rename=rename,
        fontsize=args.fontsize,
        height=args.height,
        width=args.width,
        dpi=args.dpi,
        no_domain_labels=args.no_domain_labels,
        plot_WT=args.WT,
        exclude=args.exclude_domain
        )
    fusion.save_tables(out_dir=outdir)
Пример #11
0
    def test_2(self):
        """
        Test CDS correct for junction within the exon (not on boundary) for two
        genes on reverse strand
        """

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000002413",
                                 gene5primejunction=39725110,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39610402,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)

        cds = 'ATGGCGGCGCTGAGTGGCGGCGGTGGCAGCAGCAGCGGTGGCGGCGGCGGCGGTGGCGGCGGCGG' + \
              'TGGCGGTGGCGACGGCGGCGGCGGCGCCGAGCAGGGCCAGGCTCTGTTCAATGGCGACATGGAGC' + \
              'CGGAGGCCGGCGCTGGCGCCGCGGCCTCTTCGGCTGCGGACCCGGCCATTCCTGAAGAATTTGCAGCCTTCAAGTAG'

        assert str(fusion.transcripts['ENSMUST00000002487-ENSMUST00000002487'].
                   cds.seq) == cds, "Test 2: CDS wrong"
Пример #12
0
    def test_mouse_5(self):

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770",
                                 gene5primejunction=31664850,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39603240,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)

        cdna = 'GGGGGTGCGGCCGCCGAAGGGGGAGCTCCTCCCCCGTCCCCTCACCCCCTCAGCTGAGCT' + \
              'CGGGGCGGGGCGGGGTACGTGGAGCGGGGCCGGGCGGGGAAGCTGCTCCGAGTCCGGCCG' + \
              'GAGCGCACCCGGGGCGCCCGCGTACGCCGCTCGCGGGAACTTTGCGGCGGAGCCGCAGGT' + \
              'GTGGAGGCCGCGGAGGGGGGTGCATGAGCGGCGCGGAGAGCGGCGGCTGTCCGGTCCGGC' + \
              'CCCTGCTGGAGTCGCCGCCGGGAGGAGACGAACGAGGAACCAG' + \
              'GTGTGTGCCGCCTTCCTGATTCTGGAGAAA' + \
              'GAAA'

        assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].
                   cdna.seq) == cdna, "Test 10: incorrect cDNA"
Пример #13
0
def index():

    params = {}
    params['gene5prime'] = ""
    params['gene3prime'] = ""
    params['loc5prime'] = ""
    params['loc3prime'] = ""

    params['grcm38_color'] = "white"
    params['grch37_color'] = "white"
    params['grch38_color'] = "white"

    params['fontsize'] = "12"
    params['dpi'] = "100"
    params['imagewidth'] = "8"
    params['imageheight'] = "2"
    params['scale'] = "0"
    params['genome'] = "GRCm38"
    params['plotdisplay'] = 'none'
    params['inputerror'] = 'hidden'
    params['optionalparameterdisplay'] = 'none'

    if 'id' not in session:
        session['id'] = str(uuid.uuid4())

        session['userdata'] = os.path.join(os.path.abspath(os.curdir),
                                           'userdata', session['id'])

        if not os.path.exists(session['userdata']):
            os.mkdir(session['userdata'])

    if 'submitfusion' in request.form:

        params['gene5prime'] = str(request.form['gene5prime'])
        params['gene3prime'] = str(request.form['gene3prime'])
        params['loc5prime'] = str(request.form['loc5prime'])
        params['loc3prime'] = str(request.form['loc3prime'])

        params['fontsize'] = str(request.form['fontsize'])
        params['dpi'] = str(request.form['dpi'])
        params['imagewidth'] = str(request.form['imagewidth'])
        params['imageheight'] = str(request.form['imageheight'])
        params['scale'] = str(request.form['scale'])
        params['genome'] = str(request.form['genome'])
        params['inputerror'] = 'hidden'

        params, pyensembl_data = set_genome(params)

        params, error = check_fusion_input(params=params)

        if error is not None:
            return error

        params, error = check_params(params=params)
        if error is not None:
            return error

        #try to construct the fusion

        try:
            fusion = agfusion.Fusion(
                gene5prime=params['gene5prime'],
                gene5primejunction=int(params['loc5prime']),
                gene3prime=params['gene3prime'],
                gene3primejunction=int(params['loc3prime']),
                db=db,
                pyensembl_data=pyensembl_data)
        except agfusion.exceptions.GeneIDException5prime as e:

            params['inputerrormsg'] = e
            params['inputerror'] = 'visible'
            return render_template('index.html', params=params)

        except agfusion.exceptions.GeneIDException3prime as e:

            params['inputerrormsg'] = e
            params['inputerror'] = 'visible'
            return render_template('index.html', params=params)

        except agfusion.exceptions.JunctionException5prime as e:

            params['inputerrormsg'] = e
            params['inputerror'] = 'visible'
            return render_template('index.html', params=params)

        except agfusion.exceptions.JunctionException3prime as e:

            params['inputerrormsg'] = e
            params['inputerror'] = 'visible'
            return render_template('index.html', params=params)

        except agfusion.exceptions.TooManyGenesException as e:

            params['inputerrormsg'] = e
            params['inputerror'] = 'visible'
            return render_template('index.html', params=params)

        #save the fusion output and visualize

        middlestar = False

        fusion.save_transcript_cdna(out_dir=session['userdata'],
                                    middlestar=middlestar)
        fusion.save_transcript_cds(out_dir=session['userdata'],
                                   middlestar=middlestar)
        fusion.save_proteins(out_dir=session['userdata'],
                             middlestar=middlestar)
        pickle.dump(fusion, open(session['userdata'] + '/fusion.pk', 'wb'))

        session['name'] = fusion.name

        dict_of_plots, plot_key = fusion.output_to_html(
            fontsize=params['fontsize'],
            dpi=params['dpi'],
            width=params['imagewidth'],
            height=params['imageheight'],
            scale=params['scale'],
            mpld3=mpld3)

        effects = {}
        for name, transcript in fusion.transcripts.items():
            effects[name] = {
                'effect': transcript.effect,
                '5prime_effect': transcript.effect_5prime,
                '3prime_effect': transcript.effect_3prime
            }

        session['plot_key'] = plot_key

        params['plotdisplay'] = 'visible'
        params['name'] = fusion.name

        params['gene5prime'] = params['gene5prime'].upper()
        params['gene3prime'] = params['gene3prime'].upper()

        return render_template('index.html',
                               dict_of_plots=dict_of_plots,
                               effects=effects,
                               params=params)

    elif 'downloadseq' in request.form:

        #download sequence data

        if request.form['downloadseq'] == 'cdna':
            return send_file(session['userdata'] + '/' + session['name'] +
                             '_cdna.fa',
                             as_attachment=True)
        elif request.form['downloadseq'] == 'cds':
            return send_file(session['userdata'] + '/' + session['name'] +
                             '_cds.fa',
                             as_attachment=True)
        else:
            return send_file(session['userdata'] + '/' + session['name'] +
                             '_protein.fa',
                             as_attachment=True)

    elif 'downloadimage' in request.form:

        params['fontsize'] = str(request.form['fontsize'])
        params['dpi'] = str(request.form['dpi'])
        params['imagewidth'] = str(request.form['imagewidth'])
        params['imageheight'] = str(request.form['imageheight'])
        params['scale'] = str(request.form['scale'])
        params['inputerror'] = 'hidden'
        params['plotdisplay'] = 'visible'

        params, error = check_params(params=params)
        if error is not None:
            return error

        #download image data

        fusion = pickle.load(open(session['userdata'] + '/fusion.pk', 'rb'))

        fusion_name = str(request.form['image_key'])

        image_file = fusion.save_image(transcript=fusion_name,
                                       out_dir=session['userdata'],
                                       file_type=str(
                                           request.form['downloadimage']),
                                       fontsize=params['fontsize'],
                                       dpi=params['dpi'],
                                       width=params['imagewidth'],
                                       height=params['imageheight'],
                                       scale=params['scale'])

        return send_file(image_file, as_attachment=True)
    else:

        # default index load

        return render_template('index.html', params=params)