Exemplo n.º 1
0
    def test_1(self):

        agfusion_db = agfusion.AGFusionDB("agfusion.homo_sapiens.84.db", debug=False)


        all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b']
        for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger):
            fusion = agfusion.Fusion(
                gene5prime=fusion['gene5prime'],
                gene5primejunction=fusion['gene5prime_junction'],
                gene3prime=fusion['gene3prime'],
                gene3primejunction=fusion['gene3prime_junction'],
                db=db,
                pyensembl_data=data,
                protein_databases=['pfam'],
                noncanonical=False
            )
            assert fusion.name in all_fusions, '%s not in list!' % fusion.name
Exemplo n.º 2
0
def main():
    """
    Main function for processing command line options
    """

    parser = argparse.ArgumentParser(
        description='Annotate Gene Fusion (AGFusion)')
    subparsers = parser.add_subparsers(help='AGFusion programs.',
                                       dest="subparser_name")

    annotate_parser = subparsers.add_parser(
        'annotate', help='Annotate and visualize a single fusion.')
    annotate_parser.add_argument('-g5',
                                 '--gene5prime',
                                 type=str,
                                 required=True,
                                 help='5\' gene partner')
    annotate_parser.add_argument('-g3',
                                 '--gene3prime',
                                 type=str,
                                 required=True,
                                 help='3\' gene partner')
    annotate_parser.add_argument(
        '-j5',
        '--junction5prime',
        type=int,
        required=True,
        help='Genomic location of predicted fuins for the 5\' gene partner. ' +
        'The 1-based position that is the last nucleotide included in ' +
        'the fusion before the junction.')
    annotate_parser.add_argument(
        '-j3',
        '--junction3prime',
        type=int,
        required=True,
        help='Genomic location of predicted fuins for the 3\' gene partner. ' +
        'The 1-based position that is the first nucleotide included in ' +
        'the fusion after the junction.')
    add_common_flags(annotate_parser)
    annotate_parser.add_argument(
        '--scale',
        type=int,
        required=False,
        default=-1,
        help='(Optional) Set maximum width (in amino acids) of the ' +
        'figure to rescale the fusion (default: max length of ' +
        'fusion product)')

    # batch file parser

    batch_parser = subparsers.add_parser(
        'batch',
        help='Annotate fusions from an output file from a fusion ' +
        'finding algorithm.')
    batch_parser.add_argument(
        '-f',
        '--file',
        type=str,
        required=True,
        help='Output file from fusion-finding algorithm.')
    batch_parser.add_argument(
        '-a',
        '--algorithm',
        type=str,
        required=True,
        help='The fusion-finding algorithm. Can be one of the following: ' +
        ', '.join(agfusion.parsers.keys()) + '.')
    add_common_flags(batch_parser)

    # download database

    database_parser = subparsers.add_parser(
        'download', help='Download database for a reference genome.')
    database_parser.add_argument(
        '-d',
        '--dir',
        type=str,
        default='',
        help='(Optional) Directory to the database will be downloaded ' +
        'to (defaults to current working directory).')
    database_parser.add_argument(
        '-g',
        '--genome',
        type=str,
        default=None,
        help='Specify the genome shortcut (e.g. hg19). To see all' +
        'available shortcuts run \'agfusion download -a\'. Either ' +
        'specify this or --species and --release.')
    database_parser.add_argument('-s',
                                 '--species',
                                 type=str,
                                 default=None,
                                 help='The species (e.g. homo_sapiens).')
    database_parser.add_argument('-r',
                                 '--release',
                                 type=int,
                                 default=None,
                                 help='The ensembl release (e.g. 87).')
    database_parser.add_argument(
        '-a',
        '--available',
        action='store_true',
        required=False,
        help='List available species and ensembl releases.')

    # build database parser

    build_database_parser = subparsers.add_parser(
        'build', help='Build database for a reference genome.')
    build_database_parser.add_argument(
        '-d',
        '--dir',
        type=str,
        required=True,
        help='Directory to write database file to.')
    build_database_parser.add_argument('-s',
                                       '--species',
                                       type=str,
                                       required=True,
                                       help='The species (e.g. homo_sapiens).')
    build_database_parser.add_argument('-r',
                                       '--release',
                                       type=int,
                                       required=True,
                                       help='The ensembl release (e.g. 87).')
    build_database_parser.add_argument(
        '--pfam',
        type=str,
        required=True,
        help='File containing PFAM ID mappings.')
    build_database_parser.add_argument(
        '--server',
        type=str,
        required=False,
        default='ensembldb.ensembl.org',
        help='(optional) Ensembl server (default ensembldb.ensembl.org)')

    # agfusion version number

    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=agfusion.__version__)
    args = parser.parse_args()

    if args.subparser_name == 'build':
        builddb(args)
        exit()
    elif args.subparser_name == 'download':
        if args.available:
            list_available_databases()
        else:
            downloaddb(args)
        exit()

    # single or batch mode

    if not exists(args.out):
        mkdir(args.out)

    # if user does not specify a sqlite database then use the one provided
    # by the package

    db_file = split(args.database)[1]
    species = db_file.split('.')[1]
    release = db_file.split('.')[2]

    assert species in AVAILABLE_ENSEMBL_SPECIES, 'unsupported species!'

    agfusion_db = agfusion.AGFusionDB(args.database, debug=args.debug)
    agfusion_db.build = species + '_' + str(release)

    # get the pyensembl data

    pyensembl_data = pyensembl.EnsemblRelease(release, species)

    try:
        pyensembl_data.db
    except ValueError:
        agfusion_db.logger.error(
            "Missing pyensembl data. Run pyensembl install --release " +
            "{} --species {}".format(release, species))
        exit()

    # parse the re-coloring and re-naming

    colors = {}
    rename = {}

    if args.rename is not None:
        for i in args.rename:
            pair = i.split(';')

            assert len(pair) == 2, " did not properly specify --rename"

            if pair[0] in rename:
                agfusion_db.logger.warn(
                    "WARNING - you rename {} twice.".format(pair[0]))

            rename[pair[0]] = pair[1]

    if args.recolor is not None:
        for i in args.recolor:
            pair = i.split(';')

            assert len(pair) == 2, " did not properly specify --colors"

            if pair[0] in colors:
                agfusion_db.logger.warn(
                    "You specified colors for {} twice.".format(pair[0]))

            if pair[0] in rename:
                colors[rename[pair[0]]] = pair[1]
            else:
                colors[pair[0]] = pair[1]

    # check image file type is valid

    if args.type not in ['png', 'pdf', 'jpeg']:
        agfusion_db.logger.error(
            "ERROR - provided an incorrect image file type: {}.".format(
                args.type))
        exit()

    if args.subparser_name == 'annotate':
        annotate(gene5prime=args.gene5prime,
                 junction5prime=args.junction5prime,
                 gene3prime=args.gene3prime,
                 junction3prime=args.junction3prime,
                 agfusion_db=agfusion_db,
                 pyensembl_data=pyensembl_data,
                 args=args,
                 outdir=args.out,
                 colors=colors,
                 rename=rename,
                 scale=args.scale)
    elif args.subparser_name == 'batch':
        batch_mode(args, agfusion_db, pyensembl_data, rename, colors)
Exemplo n.º 3
0
from os.path import join, expanduser, curdir, abspath
import unittest
import agfusion
from agfusion import utils
import pyensembl
from Bio import SeqIO

data = pyensembl.EnsemblRelease(84, 'mouse')
db = agfusion.AGFusionDB(abspath(join(curdir, 'agfusion.mus_musculus.84.db')))
db.build = 'mus_musculus_84'


class TestSequencePrediction(unittest.TestCase):
    def test_1(self):
        """
        test CDS and cDNA correct for junction that is on exon boundaries and
        produces an in-frame protein.
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770",
                                 gene5primejunction=31684294,
                                 gene3prime="ENSMUSG00000002413",
                                 gene3primejunction=39648486,
                                 db=db,
                                 pyensembl_data=data,
                                 protein_databases=['pfam', 'tmhmm'],
                                 noncanonical=True)
Exemplo n.º 4
0
from os.path import join, expanduser, curdir, abspath
import unittest
import agfusion
from agfusion import utils
import pyensembl
from Bio import SeqIO, Seq, Alphabet

data = pyensembl.EnsemblRelease(84,'mouse')
db = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.mus_musculus.84.db')))
db.build = 'mus_musculus_84'

data_human = pyensembl.EnsemblRelease(75,'human')
db_human = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.homo_sapiens.75.db')))
db_human.build = 'homo_sapiens_75'


class TestSequencePrediction_human(unittest.TestCase):
    def test_1(self):
        """
        test CDS and prortein correct for junction that is on exon boundaries and
        produces an out-of-frame protein.
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(
            gene5prime="TMEM87B",
            gene5primejunction=112843681,
            gene3prime="MERTK",
            gene3primejunction=112722768,
Exemplo n.º 5
0
from flask_errormail import mail_on_500

parser = argparse.ArgumentParser(description='AGFusion web application')
parser.add_argument('--database',
                    type=str,
                    required=True,
                    help='Path to the database file (e.g. agfusion.db)')
args = parser.parse_args()

ADMINISTRATORS = ('*****@*****.**')

app = Flask(__name__)
app.config.from_object(__name__)
mail_on_500(app, ADMINISTRATORS)

db = agfusion.AGFusionDB(args.database)

app.config.update(
    dict(DATABASE=None,
         SECRET_KEY='development key',
         USERNAME='******',
         PASSWORD='******'))
app.config.from_envvar('FLASKR_SETTINGS', silent=True)


def check_params(params):

    #check parameters

    if params['dpi'] == '':
        params['dpi'] = '100'