Пример #1
0
def register_user_db_tasks(handler, config, databases):
    '''Run conditional recipricol best hits LAST (CRBL) against the
    user-supplied databases.
    '''

    if not 'user_databases' in config:
        return

    shmlast_tasks = set()
    input_fn = handler.files['transcriptome']
    for db_path in config['user_databases']:
        db_path = path.abspath(db_path)
        db_basename = path.basename(db_path)

        results_fn = '{0}.x.{1}.crbl.csv'.format(input_fn, db_basename)
        gff3_fn = '{0}.x.{1}.crbl.gff3'.format(input_fn, db_basename)

        crbl = CRBL(input_fn,
                    db_path,
                    results_fn,
                    n_threads=config['n_threads'],
                    cutoff=config['evalue'])

        for task in crbl.tasks():
            if tuple(sorted(task.targets)) in shmlast_tasks:
                continue
            shmlast_tasks.add(tuple(sorted(task.targets)))
            task.name = 'user-database:{0}-shmlast-{1}'.format(
                db_basename, task.name)
            handler.register_task(task.name, add_profile_actions(task))
        handler.register_task(
            'gff3:{0}'.format(results_fn),
            get_shmlast_gff3_task(results_fn, gff3_fn, db_basename),
            files={'{0}-crbl-gff3'.format(db_basename): gff3_fn})
        handler.files['{0}-crbl'.format(db_basename)] = results_fn
Пример #2
0
def register_lastal_tasks(handler,
                          config,
                          databases,
                          include_uniref=False,
                          include_nr=False):
    '''Register tasks for `lastal` searches. By default, this will just
    align the transcriptome against OrthoDB; if requested, it will align against
    uniref90 as well, which takes considerably longer.

    Args:
        handler (handler.TaskHandler): The task handler to register on.
        config (dict): Config dictionary, which contains the command
            line arguments and the entries from the config file.
        databases (dict): The dictionary of files from a database
            TaskHandler.
        include_uniref (bool): If True, add tasks for searching uniref90.
    '''

    input_fn = handler.files['transcriptome']
    lastal_cfg = config['last']['lastal']

    dbs = OrderedDict()
    dbs['OrthoDB'] = databases['OrthoDB']
    dbs['sprot'] = databases['sprot']
    if include_uniref is True:
        dbs['uniref90'] = databases['uniref90']
    if include_nr is True:
        dbs['nr'] = databases['nr']

    for name, db in dbs.items():
        output_fn = '{0}.x.{1}.maf'.format(input_fn, name)
        handler.register_task('lastal:{0}'.format(name),
                              add_profile_actions(LastalTask().task(
                                  input_fn,
                                  db,
                                  output_fn,
                                  translate=True,
                                  cutoff=config['evalue'],
                                  n_threads=config['n_threads'],
                                  frameshift=lastal_cfg['frameshift'],
                                  pbs=config['sshloginfile'],
                                  params=lastal_cfg['params'])),
                              files={name: output_fn})

        best_fn = '{0}.x.{1}.best.csv'.format(input_fn, name)
        gff3_fn = '{0}.x.{1}.best.gff3'.format(input_fn, name)

        handler.register_task('lastal:best-hits:{0}'.format(name),
                              get_maf_best_hits_task(output_fn, best_fn),
                              files={'{0}-best-hits'.format(name): best_fn})
        handler.register_task('gff3:{0}'.format(name),
                              get_maf_gff3_task(best_fn, gff3_fn, name),
                              files={'{0}-gff3'.format(name): gff3_fn})