def register_user_db_tasks(handler, config, databases): '''Run conditional recipricol best hits LAST (CRBL) against the user-supplied databases. ''' if not 'user_databases' in config: return shmlast_tasks = set() input_fn = handler.files['transcriptome'] for db_path in config['user_databases']: db_path = path.abspath(db_path) db_basename = path.basename(db_path) results_fn = '{0}.x.{1}.crbl.csv'.format(input_fn, db_basename) gff3_fn = '{0}.x.{1}.crbl.gff3'.format(input_fn, db_basename) crbl = CRBL(input_fn, db_path, results_fn, n_threads=config['n_threads'], cutoff=config['evalue']) for task in crbl.tasks(): if tuple(sorted(task.targets)) in shmlast_tasks: continue shmlast_tasks.add(tuple(sorted(task.targets))) task.name = 'user-database:{0}-shmlast-{1}'.format( db_basename, task.name) handler.register_task(task.name, add_profile_actions(task)) handler.register_task( 'gff3:{0}'.format(results_fn), get_shmlast_gff3_task(results_fn, gff3_fn, db_basename), files={'{0}-crbl-gff3'.format(db_basename): gff3_fn}) handler.files['{0}-crbl'.format(db_basename)] = results_fn
def register_lastal_tasks(handler, config, databases, include_uniref=False, include_nr=False): '''Register tasks for `lastal` searches. By default, this will just align the transcriptome against OrthoDB; if requested, it will align against uniref90 as well, which takes considerably longer. Args: handler (handler.TaskHandler): The task handler to register on. config (dict): Config dictionary, which contains the command line arguments and the entries from the config file. databases (dict): The dictionary of files from a database TaskHandler. include_uniref (bool): If True, add tasks for searching uniref90. ''' input_fn = handler.files['transcriptome'] lastal_cfg = config['last']['lastal'] dbs = OrderedDict() dbs['OrthoDB'] = databases['OrthoDB'] dbs['sprot'] = databases['sprot'] if include_uniref is True: dbs['uniref90'] = databases['uniref90'] if include_nr is True: dbs['nr'] = databases['nr'] for name, db in dbs.items(): output_fn = '{0}.x.{1}.maf'.format(input_fn, name) handler.register_task('lastal:{0}'.format(name), add_profile_actions(LastalTask().task( input_fn, db, output_fn, translate=True, cutoff=config['evalue'], n_threads=config['n_threads'], frameshift=lastal_cfg['frameshift'], pbs=config['sshloginfile'], params=lastal_cfg['params'])), files={name: output_fn}) best_fn = '{0}.x.{1}.best.csv'.format(input_fn, name) gff3_fn = '{0}.x.{1}.best.gff3'.format(input_fn, name) handler.register_task('lastal:best-hits:{0}'.format(name), get_maf_best_hits_task(output_fn, best_fn), files={'{0}-best-hits'.format(name): best_fn}) handler.register_task('gff3:{0}'.format(name), get_maf_gff3_task(best_fn, gff3_fn, name), files={'{0}-gff3'.format(name): gff3_fn})