def test_burst_post_align(self): database = pkg_resources.resource_filename('shogun.tests', os.path.join('data')) aligner = BurstAligner(database, threads=1, shell=False) alignment_file = pkg_resources.resource_filename('shogun.tests', os.path.join('data', 'results', 'burst_results.b6')) df_capitalist = aligner._post_align(alignment_file) aligner.post_align = 'taxonomy' df_non_capitalist = aligner._post_align(alignment_file) self.assertTrue(df_non_capitalist.any().any())
def test_burst_align(self): database = pkg_resources.resource_filename('shogun.tests', os.path.join('data')) aligner = BurstAligner(database, threads=1, shell=False) infile = pkg_resources.resource_filename('shogun.tests', os.path.join('data', 'combined_seqs.fna')) outdir = os.path.join(self.temp_dir.name) self.assertTrue(aligner.align(infile, outdir)[0] == 0)
def assign_taxonomy(ctx, aligner, capitalist, input, database, output): if not os.path.exists(os.path.dirname(output)): os.makedirs(os.path.dirname(output)) if not capitalist: # Set to not run Burst post-align in capitalist mode ALIGNERS['burst'] = lambda database, shell=ctx.obj[ 'shell']: BurstAligner( database, shell=ctx.obj['shell'], capitalist=False) # Sniff aligner based on file extension if aligner == 'auto': file_ending = input.split(".")[-1] sniffer_dict = dict( zip(["b6", "sam", "tsv", "txt"], ["burst", "bowtie2", "utree", "utree"])) if file_ending in sniffer_dict: aligner = sniffer_dict[file_ending] else: logger.warning("File ending %s not found, assuming burst" % file_ending) aligner = "burst" aligner_cl = ALIGNERS[aligner](database, shell=ctx.obj['shell']) df = aligner_cl._post_align(input) df.to_csv(output, sep='\t', float_format="%d", na_rep=0, index_label="#OTU ID")
def pipeline(ctx, aligner, input, database, output, level, function, capitalist, taxacut, threads, percent_id, ra): if not os.path.exists(output): os.makedirs(output) if not capitalist: # Set to not run Burst post-align in capitalist mode ALIGNERS['burst'] = lambda database, threads=threads, shell=ctx.obj[ 'shell']: BurstAligner(database, shell=ctx.obj['shell'], threads=threads, taxacut=taxacute, capitalist=False, percent_id=percent_id) redist_outs = [] redist_levels = [] if aligner == 'all': for align in ALIGNERS.values(): aligner_cl = align(database, threads=threads, shell=ctx.obj['shell'], percent_id=percent_id, taxacut=taxacut) aligner_cl.align(input, output) if level is not 'off': redist_out = os.path.join( output, "taxatable.%s.%s.txt" % (aligner_cl._name, level)) _redist_outs, _redist_levels = _redistribute( database, level, redist_out, aligner_cl.outfile, relative_abundance=ra) redist_outs.extend(_redist_outs) redist_levels.extend(_redist_levels) else: aligner_cl = ALIGNERS[aligner](database, threads=threads, shell=ctx.obj['shell'], percent_id=percent_id, taxacut=taxacut) aligner_cl.align(input, output) logger.debug(level) if level != 'off': redist_out = os.path.join(output, "taxatable.%s.txt" % (level)) redist_outs, redist_levels = _redistribute(database, level, redist_out, aligner_cl.outfile, relative_abundance=ra) if function and level != 'off': _function(redist_outs, database, output, redist_levels, save_median_taxatable=True) if ra: _convert_files_to_relative_abundances(redist_outs)
def test_burst_db(self): self.assertTrue( BurstAligner.check_database( pkg_resources.resource_filename('shogun.tests', os.path.join('data')))[0])