def test_validate_prefix(self): httpretty.register_uri( httpretty.POST, "https://test_server.com/qiita_db/jobs/job-id/step/") httpretty.register_uri( httpretty.GET, "https://test_server.com/qiita_db/prep_template/1/data", body='{"data": {"1.S1": {"orig_name": "S1"}, "1.S2": ' '{"orig_name": "S2"}, "1.S3": {"orig_name": "S3"}}}') fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.asarray([[0, 0, 1], [1, 3, 42]]) table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3']) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") self._clean_up_files.append(biom_fp) self.parameters['files'] = '{"BIOM": ["%s"]}' % biom_fp obs_success, obs_ainfo, obs_error = validate( self.qclient, 'job-id', self.parameters, self.out_dir) exp_biom_fp = join(self.out_dir, basename(biom_fp)) self._clean_up_files.append(exp_biom_fp) self.assertTrue(obs_success) self.assertEqual(obs_ainfo, [[None, 'BIOM', [exp_biom_fp, 'biom']]]) self.assertEqual(obs_error, "") obs_t = load_table(exp_biom_fp) self.assertItemsEqual(obs_t.ids(), ["1.S1", "1.S2", "1.S3"])
def test_validate_prefix(self): httpretty.register_uri( httpretty.POST, "https://test_server.com/qiita_db/jobs/job-id/step/") httpretty.register_uri( httpretty.GET, "https://test_server.com/qiita_db/prep_template/1/data", body='{"data": {"1.S1": {"orig_name": "S1"}, "1.S2": ' '{"orig_name": "S2"}, "1.S3": {"orig_name": "S3"}}}') fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.asarray([[0, 0, 1], [1, 3, 42]]) table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3']) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") self._clean_up_files.append(biom_fp) self.parameters['files'] = '{"BIOM": ["%s"]}' % biom_fp obs_success, obs_ainfo, obs_error = validate(self.qclient, 'job-id', self.parameters, self.out_dir) exp_biom_fp = join(self.out_dir, basename(biom_fp)) self._clean_up_files.append(exp_biom_fp) self.assertTrue(obs_success) self.assertEqual(obs_ainfo, [[None, 'BIOM', [exp_biom_fp, 'biom']]]) self.assertEqual(obs_error, "") obs_t = load_table(exp_biom_fp) self.assertItemsEqual(obs_t.ids(), ["1.S1", "1.S2", "1.S3"])
def _create_job_and_biom(self, sample_ids, template=None, analysis=None): # Create the BIOM table that needs to be valdiated fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.random.randint(100, size=(2, len(sample_ids))) table = Table(data, ['O1', 'O2'], sample_ids) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") self._clean_up_files.append(biom_fp) # Create a new job parameters = { 'template': template, 'files': dumps({'biom': [biom_fp]}), 'artifact_type': 'BIOM', 'analysis': analysis } data = { 'command': dumps(['BIOM type', '2.1.4', 'Validate']), 'parameters': dumps(parameters), 'status': 'running' } res = self.qclient.post('/apitest/processing_job/', data=data) job_id = res['job'] return biom_fp, job_id, parameters
def test_execute_job_error(self): # Create a prep template prep_info = {'SKB8.640193': {'col': 'val1'}, 'SKD8.640184': {'col': 'val2'}} data = {'prep_info': dumps(prep_info), 'study': 1, 'data_type': '16S'} template = self.qclient.post( '/apitest/prep_template/', data=data)['prep'] # Create a new validate job fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.random.randint(100, size=(2, 2)) table = Table(data, ['O1', 'O2'], ['S1', 'S2']) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") data = {'command': dumps(['BIOM type', '2.1.4', 'Validate']), 'parameters': dumps( {'files': dumps({'biom': [biom_fp]}), 'template': template, 'artifact_type': 'BIOM'}), 'artifact_type': 'BIOM', 'status': 'queued'} job_id = self.qclient.post( '/apitest/processing_job/', data=data)['job'] plugin("https://localhost:21174", job_id, self.out_dir) obs = self._wait_job(job_id) self.assertEqual(obs, 'error')
def test_faith_pd_invalid_input(self): # tests are based of skbio tests, checking for duplicate ids, # negative counts are not included but should be incorporated # tree has duplicated tip ids tree = TreeNode.read( StringIO('((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)' 'root;')) otu_ids = ['OTU%d' % i for i in range(1, 5)] u_counts = [1, 1, 0, 0] data = np.array([u_counts]).T bt = Table(data, otu_ids, ['u']) ta = os.path.join(gettempdir(), 'table.biom') tr = os.path.join(gettempdir(), 'tree.biom') self.files_to_delete.append(ta) self.files_to_delete.append(tr) with biom_open(ta, 'w') as fhdf5: bt.to_hdf5(fhdf5, 'Table for unit testing') tree.write(tr) self.assertRaises(IOError, faith_pd, 'dne.biom', tr) self.assertRaises(IOError, faith_pd, ta, 'dne.tre')
def noisify(table_file, metadata_file, sigma, output_file): metadata = pd.read_table(metadata_file, index_col=0) table = load_table(table_file) table = pd.DataFrame(np.array(table.matrix_data.todense()).T, index=table.ids(axis='sample'), columns=table.ids(axis='observation')) cov = np.eye(table.shape[1] - 1) m_noise = compositional_noise(cov, nsamp=table.shape[0]) table_ = table.values table_ = np.vstack( [perturb(table_[i, :], m_noise[i, :]) for i in range(table_.shape[0])]) # note that this assumes that the column is named `library_size table_ = pd.DataFrame( multinomial_sample(table_, depths=metadata['library_size'])) table_.index = table.index table_.columns = list(table.columns) metadata['observed'] = np.sum(table_.sum(axis=0) > 0) metadata['unobserved'] = np.sum(table_.sum(axis=0) == 0) metadata.to_csv(metadata_file, sep='\t') # drop zeros -- they are not informative table_ = table_.loc[:, table_.sum(axis=0) > 0] t = Table(table_.T.values, table_.columns.values, table_.index.values) with biom_open(output_file, 'w') as f: t.to_hdf5(f, generated_by='moi')
def deposit(table, groups, truth, output_table, output_groups, output_truth): t = Table(table.T.values, table.columns.values, table.index.values) with biom_open(output_table, 'w') as f: t.to_hdf5(f, generated_by='moi') groups.to_csv(output_groups, sep='\t') with open(output_truth, 'w') as f: f.write(','.join(truth))
def deposit_blocktable(output_dir, abs_table, rel_table, metadata, truth, sample_id): choice = 'abcdefghijklmnopqrstuvwxyz' output_abstable = "%s/rel_table.%s.biom" % ( output_dir, sample_id) output_reltable = "%s/abs_table.%s.biom" % ( output_dir, sample_id) output_metadata = "%s/metadata.%s.txt" % ( output_dir, sample_id) output_truth = "%s/truth.%s.txt" % ( output_dir, sample_id) abs_t = Table(abs_table.T.values, abs_table.columns.values, abs_table.index.values) with biom_open(output_abstable, 'w') as f: abs_t.to_hdf5(f, generated_by='moi') rel_t = Table(rel_table.T.values, rel_table.columns.values, rel_table.index.values) with biom_open(output_reltable, 'w') as f: rel_t.to_hdf5(f, generated_by='moi') metadata.to_csv(output_metadata, sep='\t') truth.to_csv(output_truth, sep='\t')
def hashing(unhashed_otu_table_list, unhashed_rep_seqs_list, sample_metadata_list): otu_df_list = [] rep_seq_ids = set() seqs = [] # Create OTU table for unhashed_otu_table in unhashed_otu_table_list: otu_df_list.append(hash_otu_table(unhashed_otu_table)) otu_df = pd.concat(otu_df_list, join="outer", axis=1) otu_df.fillna(0.0, inplace=True) otu_table = Table(otu_df.values, list(otu_df.index), list(otu_df.columns)) # Create rep seqs for unhashed_rep_seqs in unhashed_rep_seqs_list: seqs.extend(hash_rep_seqs(unhashed_rep_seqs, rep_seq_ids)) otu_table_ids = set(otu_df.index) assert otu_table_ids == rep_seq_ids assert len(otu_df.index) == len(rep_seq_ids) # Merge sample metadata sample_metadata = pd.concat( [pd.read_csv(s, sep="\\t") for s in sample_metadata_list]) # Write files sample_metadata.to_csv("sample_metadata.tsv", sep="\\t", index=False) with biom_open("otu_table.biom", "w") as fid: otu_table.to_hdf5(fid, "Constructed by micone in dada2/deblur pipeline") with open("rep_seqs.fasta", "w") as fid: fasta_writer = FastaIO.FastaWriter(fid, wrap=None) fasta_writer.write_file(seqs)
def setUp(self): # Register the URIs for the QiitaClient httpretty.register_uri( httpretty.POST, "https://test_server.com/qiita_db/authenticate/", body='{"access_token": "token", "token_type": "Bearer", ' '"expires_in": "3600"}') self.qclient = QiitaClient('https://test_server.com', 'client_id', 'client_secret') # Create a biom table fd, self.biom_fp = mkstemp(suffix=".biom") close(fd) data = np.asarray([[0, 0, 1], [1, 3, 42]]) table = Table(data, ['O1', 'O2'], ['1.S1', '1.S2', '1.S3']) with biom_open(self.biom_fp, 'w') as f: table.to_hdf5(f, "Test") self.out_dir = mkdtemp() self.parameters = { 'template': 1, 'files': '{"BIOM": ["%s"]}' % self.biom_fp, 'artifact_type': 'BIOM' } self._clean_up_files = [self.biom_fp, self.out_dir]
def hash_otu_table(unhashed_otu_table, seqid_hash_dict, output_file): table = load_table(unhashed_otu_table) df = table.to_dataframe(dense=True) seq_ids = [seqid_hash_dict[i] for i in df.index] df.index = seq_ids new_table = Table(df.values, list(df.index), list(df.columns)) with biom_open(output_file, "w") as fid: new_table.to_hdf5(fid, "Constructed using qiime1 clustering")
def deposit_biofilm(table1, table2, metadata, U, V, edges, it, rep, output_dir): """ Writes down tables, metadata and feature metadata into files. Parameters ---------- table : biom.Table Biom table metadata : pd.DataFrame Dataframe of sample metadata feature_metadata : pd.DataFrame Dataframe of features metadata it : int iteration number rep : int repetition number output_dir : str output directory """ choice = 'abcdefghijklmnopqrstuvwxyz' output_microbes = "%s/table_microbes.%d_%s.biom" % (output_dir, it, choice[rep]) output_metabolites = "%s/table_metabolites.%d_%s.biom" % (output_dir, it, choice[rep]) output_md = "%s/metadata.%d_%s.txt" % (output_dir, it, choice[rep]) output_U = "%s/U.%d_%s.txt" % (output_dir, it, choice[rep]) output_V = "%s/V.%d_%s.txt" % (output_dir, it, choice[rep]) output_B = "%s/edges.%d_%s.txt" % (output_dir, it, choice[rep]) output_ranks = "%s/ranks.%d_%s.txt" % (output_dir, it, choice[rep]) idx1 = table1.sum(axis=0) > 0 idx2 = table2.sum(axis=0) > 0 table1 = table1.loc[:, idx1] table2 = table2.loc[:, idx2] table1 = Table(table1.values.T, table1.columns, table1.index) table2 = Table(table2.values.T, table2.columns, table2.index) with biom_open(output_microbes, 'w') as f: table1.to_hdf5(f, generated_by='moi1') with biom_open(output_metabolites, 'w') as f: table2.to_hdf5(f, generated_by='moi2') ranks = (U @ V) ranks = ranks[idx1, :] ranks = ranks[:, idx2] ranks = pd.DataFrame(ranks, index=table1.ids(axis='observation'), columns=table2.ids(axis='observation')) ranks.to_csv(output_ranks, sep='\t') metadata.to_csv(output_md, sep='\t', index_label='#SampleID') B = B[:, idx1] np.savetxt(output_U, U) np.savetxt(output_V, V) np.savetxt(output_B, B)
def hash_otu_table(unhashed_otu_table, output_file): table = load_table(unhashed_otu_table) df = table.to_dataframe(dense=True) seq_ids = list(map(hash_function, df.index)) df.index = seq_ids new_table = Table(df.values, list(df.index), list(df.columns)) with biom_open(output_file, "w") as fid: new_table.to_hdf5(fid, "Constructed by micone in dada2 pipeline") return seq_ids
def write_biom(table: biom.Table, fp: str): """Write a BIOM table to file. Parameters ---------- table : biom.Table BIOM table to write. fp : str Output filepath. """ with biom.util.biom_open(fp, 'w') as f: table.to_hdf5(f, table.generated_by)
def deposit_biofilms(output_dir, abs_table1, abs_table2, rel_table1, rel_table2, edges, metadata, sample_id): """ Writes down tables and edges into files. Parameters ---------- output_dir : str output directory rel_table1 : biom.Table Biom table of relative abundances rel_table2 : biom.Table Biom table of relative abundances abs_table1 : biom.Table Biom table of absolute abundances abs_table2 : biom.Table Biom table of absolute abundances edges : list Edge list for ground truthing. metadata : pd.DataFrame Dataframe of sample metadata sample_id : str sample id """ output_abs_microbes = "%s/table.abs.microbes.%s.biom" % (output_dir, sample_id) output_abs_metabolites = "%s/table.abs.metabolites.%s.biom" % (output_dir, sample_id) output_rel_microbes = "%s/table.rel.microbes.%s.biom" % (output_dir, sample_id) output_rel_metabolites = "%s/table.rel.metabolites.%s.biom" % (output_dir, sample_id) output_md = "%s/metadata.%s.txt" % (output_dir, sample_id) output_U = "%s/U.%s.txt" % (output_dir, sample_id) output_V = "%s/V.%s.txt" % (output_dir, sample_id) output_edges = "%s/edges.%s.txt" % (output_dir, sample_id) output_ranks = "%s/ranks.%s.txt" % (output_dir, sample_id) # idx1 = table1.sum(axis=0) > 0 # idx2 = table2.sum(axis=0) > 0 # table1 = table1.loc[:, idx1] # table2 = table2.loc[:, idx2] # relative abundances table1 = Table(rel_table1.values.T, rel_table1.columns, rel_table1.index) table2 = Table(rel_table2.values.T, rel_table2.columns, rel_table2.index) with biom_open(output_rel_microbes, 'w') as f: table1.to_hdf5(f, generated_by='moi1') with biom_open(output_rel_metabolites, 'w') as f: table2.to_hdf5(f, generated_by='moi2') # absolute abundances table1 = Table(abs_table1.values.T, abs_table1.columns, abs_table1.index) table2 = Table(abs_table2.values.T, abs_table2.columns, abs_table2.index) with biom_open(output_abs_microbes, 'w') as f: table1.to_hdf5(f, generated_by='moi1') with biom_open(output_abs_metabolites, 'w') as f: table2.to_hdf5(f, generated_by='moi2') pd.DataFrame(edges).to_csv(output_edges, sep='\t') metadata.to_csv(output_md, sep='\t')
def write_table_tree(self, u_counts, otu_ids, sample_ids, tree): data = np.array([u_counts]).T bt = Table(data, otu_ids, sample_ids) ta = os.path.join(gettempdir(), 'table.biom') tr = os.path.join(gettempdir(), 'tree.biom') self.files_to_delete.append(ta) self.files_to_delete.append(tr) with biom_open(ta, 'w') as fhdf5: bt.to_hdf5(fhdf5, 'Table for unit testing') tree.write(tr) return ta, tr
def _work(self, u_counts, v_counts, otu_ids, tree, method): data = np.array([u_counts, v_counts]).T bt = Table(data, otu_ids, ['u', 'v']) ta = os.path.join(gettempdir(), 'table.biom') tr = os.path.join(gettempdir(), 'tree.biom') self.files_to_delete.append(ta) self.files_to_delete.append(tr) with biom_open(ta, 'w') as fhdf5: bt.to_hdf5(fhdf5, 'Table for unit testing') tree.write(tr) # return value is a distance matrix, get the distance from u->v return ssu(ta, tr, method, False, 1.0, False, 1)['u', 'v']
def write_biom(table: biom.Table, fp: str): """Write a BIOM table to file. Parameters ---------- table : biom.Table BIOM table to write. fp : str Output filepath. Notes ----- The `generated_by` attribute of the output BIOM table will be like "woltka-version". """ with biom.util.biom_open(fp, 'w') as f: table.to_hdf5(f, f'{__name__}-{__version__}')
def write_outputs( o_biom_file: str, o_metadata_file: str, biom_updated: biom.Table, metadata_edit_best: pd.DataFrame, dim: bool = False) -> None: """ Write the metadata and the biom table outputs. Parameters ---------- o_metadata_file : str Path to the output metadata table file. o_biom_file : str Path to the output biom table file. biom_updated : biom.table The biom table without ambiguous samples, with a min number of reads per sample, and without duplicated sample per host, and with samples re-named as per AGP system. metadata_edit_best : pd.DataFrame Corresponding metadata table. dim : bool Whether to add the number of samples in the final biom file name before extension or not. """ if dim: o_metadata_file, o_biom_file = get_outputs( metadata_edit_best, o_metadata_file, o_biom_file) if biom_updated.shape[0]: print('Outputs:') if o_metadata_file[0] == '/': if not isdir(dirname(o_metadata_file)): os.makedirs(dirname(o_metadata_file)) metadata_edit_best.to_csv(o_metadata_file, index=False, sep='\t') print(o_metadata_file) if not isdir(dirname(o_biom_file)): os.makedirs(dirname(o_biom_file)) with biom_open(o_biom_file, 'w') as f: biom_updated.to_hdf5(f, 'Xrbfetch') print(o_biom_file)
def test_execute_job_error(self): # Create a prep template prep_info = { 'SKB8.640193': { 'col': 'val1' }, 'SKD8.640184': { 'col': 'val2' } } data = {'prep_info': dumps(prep_info), 'study': 1, 'data_type': '16S'} template = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # Create a new validate job fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.random.randint(100, size=(2, 2)) table = Table(data, ['O1', 'O2'], ['S1', 'S2']) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") data = { 'command': dumps(['BIOM type', '2.1.4 - Qiime2', 'Validate']), 'parameters': dumps({ 'files': dumps({'biom': [biom_fp]}), 'template': template, 'artifact_type': 'BIOM' }), 'artifact_type': 'BIOM', 'status': 'queued' } job_id = self.qclient.post('/apitest/processing_job/', data=data)['job'] plugin("https://localhost:8383", job_id, self.out_dir) obs = self._wait_job(job_id) self.assertEqual(obs, 'error')
def main(args): os.mkdir(args.output_dir) np.random.seed(args.seed) sims = multinomial_bioms( k=args.latent_dim, D=args.input_dim, N=args.samples, M=args.depth) Y = sims['Y'] parts = Y.shape[0] // 10 samp_ids = list(map(str, range(Y.shape[0]))) obs_ids = list(map(str, range(Y.shape[1]))) train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8]) test = Table(Y[parts * 8 : parts * 9].T, obs_ids, samp_ids[parts * 8 : parts * 9]) valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:]) output_dir = args.output_dir with biom_open(f'{output_dir}/train.biom', 'w') as f: train.to_hdf5(f, 'train') with biom_open(f'{output_dir}/test.biom', 'w') as f: test.to_hdf5(f, 'test') with biom_open(f'{output_dir}/valid.biom', 'w') as f: valid.to_hdf5(f, 'valid') tree = sims['tree'] tree.write(f'{output_dir}/basis.nwk') np.savetxt(f'{output_dir}/eigvals.txt', sims['eigs']) np.savetxt(f'{output_dir}/eigvecs.txt', sims['eigvectors']) np.savetxt(f'{output_dir}/W.txt', sims['W'])
def setUp(self): np.random.seed(0) torch.manual_seed(0) self.k, self.D, self.N, self.M, self.C = 10, 50, 500, 100000, 3 self.sims = multinomial_batch_bioms(k=self.k, D=self.D, N=self.N, M=self.M, C=self.C) Y = self.sims['Y'] parts = Y.shape[0] // 10 samp_ids = list(map(str, range(Y.shape[0]))) obs_ids = list(map(str, range(Y.shape[1]))) train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8]) test = Table(Y[parts * 8:parts * 9].T, obs_ids, samp_ids[parts * 8:parts * 9]) valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:]) with biom_open('train.biom', 'w') as f: train.to_hdf5(f, 'train') with biom_open('test.biom', 'w') as f: test.to_hdf5(f, 'test') with biom_open('valid.biom', 'w') as f: valid.to_hdf5(f, 'valid') md = pd.DataFrame({'batch_category': self.sims['batch_idx']}, index=samp_ids) md.index.name = 'sampleid' md.to_csv('metadata.txt', sep='\t') batch_priors = pd.Series(self.sims['alphaILR']) batch_priors.to_csv('batch_priors.txt', sep='\t') self.sims['tree'].write('basis.nwk')
def _create_job_and_biom(self, sample_ids, template=None, analysis=None): # Create the BIOM table that needs to be valdiated fd, biom_fp = mkstemp(suffix=".biom") close(fd) data = np.random.randint(100, size=(2, len(sample_ids))) table = Table(data, ['O1', 'O2'], sample_ids) with biom_open(biom_fp, 'w') as f: table.to_hdf5(f, "Test") self._clean_up_files.append(biom_fp) # Create a new job parameters = {'template': template, 'files': dumps({'biom': [biom_fp]}), 'artifact_type': 'BIOM', 'analysis': analysis} data = {'command': dumps(['BIOM type', '2.1.4', 'Validate']), 'parameters': dumps(parameters), 'status': 'running'} res = self.qclient.post('/apitest/processing_job/', data=data) job_id = res['job'] return biom_fp, job_id, parameters
def setUp(self): # Registewr the URIs for the QiitaClient httpretty.register_uri( httpretty.POST, "https://test_server.com/qiita_db/authenticate/", body='{"access_token": "token", "token_type": "Bearer", ' '"expires_in": "3600"}') self.qclient = QiitaClient('https://test_server.com', 'client_id', 'client_secret') # Create a biom table fd, self.biom_fp = mkstemp(suffix=".biom") close(fd) data = np.asarray([[0, 0, 1], [1, 3, 42]]) table = Table(data, ['O1', 'O2'], ['1.S1', '1.S2', '1.S3']) with biom_open(self.biom_fp, 'w') as f: table.to_hdf5(f, "Test") self.out_dir = mkdtemp() self.artifact_id = 4 self.parameters = {'input_data': self.artifact_id} self._clean_up_files = [self.biom_fp, self.out_dir]
def test__qiime2_rclr(self): """Tests q2-rclr matches standalone rclr.""" # make mock table to write samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])] feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])] table_test = Table(self.cdata.T, feats_ids, samps_ids) # write table in_ = get_data_path('test.biom', subfolder='data') out_path = os_path_sep.join(in_.split(os_path_sep)[:-1]) test_path = os.path.join(out_path, 'rclr-test.biom') with biom_open(test_path, 'w') as wf: table_test.to_hdf5(wf, "test") # run standalone runner = CliRunner() result = runner.invoke(sdc.commands['rclr'], ['--in-biom', test_path, '--output-dir', out_path]) out_table = get_data_path('rclr-table.biom', subfolder='data') res_table = load_table(out_table) standalone_mat = res_table.matrix_data.toarray().T # check that exit code was 0 (indicating success) try: self.assertEqual(0, result.exit_code) except AssertionError: ex = result.exception error = Exception('Command failed with non-zero exit code') raise error.with_traceback(ex.__traceback__) # run QIIME2 q2_table_test = Artifact.import_data("FeatureTable[Frequency]", table_test) q2_res = rclr_transformation(q2_table_test).rclr_table.view(Table) q2_res_mat = q2_res.matrix_data.toarray().T # check same and check both correct npt.assert_allclose(standalone_mat, q2_res_mat) npt.assert_allclose(standalone_mat, self.true) npt.assert_allclose(q2_res_mat, self.true)
def test_standalone_rclr(self): """Test the standalone rlcr.""" # make mock table to write samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])] feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])] table_test = Table(self.cdata.T, feats_ids, samps_ids) # write table in_ = get_data_path('test.biom', subfolder='rpca_data') out_path = os_path_sep.join(in_.split(os_path_sep)[:-1]) test_path = os.path.join(out_path, 'rclr-test.biom') with biom_open(test_path, 'w') as wf: table_test.to_hdf5(wf, "test") runner = CliRunner() result = runner.invoke(sdc.commands['rclr'], ['--in-biom', test_path, '--output-dir', out_path]) out_table = get_data_path('rclr-table.biom', subfolder='rpca_data') res_table = load_table(out_table) test_cmat = res_table.matrix_data.toarray().T npt.assert_allclose(test_cmat, self.true) # Lastly, check that exit code was 0 (indicating success) CliTestCase().assertExitCode(0, result)
def setUp(self): np.random.seed(1) torch.manual_seed(1) self.k, self.D, self.N, self.M = 10, 50, 500, 100000 self.sims = multinomial_bioms(k=self.k, D=self.D, N=self.N, M=self.M) Y = self.sims['Y'] parts = Y.shape[0] // 10 samp_ids = list(map(str, range(Y.shape[0]))) obs_ids = list(map(str, range(Y.shape[1]))) train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8]) test = Table(Y[parts * 8:parts * 9].T, obs_ids, samp_ids[parts * 8:parts * 9]) valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:]) with biom_open('train.biom', 'w') as f: train.to_hdf5(f, 'train') with biom_open('test.biom', 'w') as f: test.to_hdf5(f, 'test') with biom_open('valid.biom', 'w') as f: valid.to_hdf5(f, 'valid') self.sims['tree'].write('basis.nwk')
def save_bioms(args, sims): output_dir = args.output_dir Y = sims['Y'] parts = Y.shape[0] // 10 samp_ids = list(map(str, range(Y.shape[0]))) obs_ids = list(map(str, range(Y.shape[1]))) train = Table(Y[:parts * 8].T, obs_ids, samp_ids[:parts * 8]) test = Table(Y[parts * 8:parts * 9].T, obs_ids, samp_ids[parts * 8:parts * 9]) valid = Table(Y[parts * 9:].T, obs_ids, samp_ids[parts * 9:]) output_dir = args.output_dir with biom_open(f'{output_dir}/train.biom', 'w') as f: train.to_hdf5(f, 'train') with biom_open(f'{output_dir}/test.biom', 'w') as f: test.to_hdf5(f, 'test') with biom_open(f'{output_dir}/valid.biom', 'w') as f: valid.to_hdf5(f, 'valid') tree = sims['tree'] tree.write(f'{output_dir}/basis.nwk') np.savetxt(f'{output_dir}/eigvals.txt', sims['eigs']) np.savetxt(f'{output_dir}/eigvecs.txt', sims['eigvectors']) np.savetxt(f'{output_dir}/W.txt', sims['W'])
def deposit(output_dir, table1, table2, metadata, U, V, B, it, rep): """ Writes down tables, metadata and feature metadata into files. Parameters ---------- output_dir : str output directory table1 : biom.Table Biom table table2 : biom.Table Biom table metadata : pd.DataFrame Dataframe of sample metadata U : np.array Microbial latent variables V : np.array Metabolite latent variables edges : list Edge list for ground truthing. feature_metadata : pd.DataFrame Dataframe of features metadata it : int iteration number rep : int repetition number """ choice = 'abcdefghijklmnopqrstuvwxyz' output_microbes = "%s/table_microbes.%d_%s.biom" % ( output_dir, it, choice[rep]) output_metabolites = "%s/table_metabolites.%d_%s.biom" % ( output_dir, it, choice[rep]) output_md = "%s/metadata.%d_%s.txt" % ( output_dir, it, choice[rep]) output_U = "%s/U.%d_%s.txt" % ( output_dir, it, choice[rep]) output_V = "%s/V.%d_%s.txt" % ( output_dir, it, choice[rep]) output_B = "%s/B.%d_%s.txt" % ( output_dir, it, choice[rep]) output_ranks = "%s/ranks.%d_%s.txt" % ( output_dir, it, choice[rep]) idx1 = table1.sum(axis=0) > 0 idx2 = table2.sum(axis=0) > 0 table1 = table1.loc[:, idx1] table2 = table2.loc[:, idx2] table1 = Table(table1.values.T, table1.columns, table1.index) table2 = Table(table2.values.T, table2.columns, table2.index) with biom_open(output_microbes, 'w') as f: table1.to_hdf5(f, generated_by='moi1') with biom_open(output_metabolites, 'w') as f: table2.to_hdf5(f, generated_by='moi2') ranks = clr(softmax(np.hstack( (np.zeros((U.shape[0], 1)), U @ V)))) ranks = ranks[idx1, :] ranks = ranks[:, idx2] ranks = pd.DataFrame( ranks, index=table1.ids(axis='observation'), columns=table2.ids(axis='observation')) ranks.to_csv(output_ranks, sep='\t') metadata.to_csv(output_md, sep='\t', index_label='#SampleID') np.savetxt(output_B, B) np.savetxt(output_U, U) np.savetxt(output_V, V)
def make_biom(seq_table, output_file): new_table = Table(seq_table.values, list(seq_table.index), list(seq_table.columns)) with biom_open(output_file, "w") as fid: new_table.to_hdf5(fid, "Constructucted by micone in dada2 pipeline")
def custom_tree_pipeline( table: biom.Table, tree: skbio.TreeNode, threads: int = 1, hsp_method: str = "mp", max_nsti: float = 2.0) -> (biom.Table, biom.Table, biom.Table): # Run pipeline in temporary directory so that files are not saved locally. with TemporaryDirectory() as temp_dir: # Need to write out BIOM table and newick tree to be used in pipeline. # Write out biom table: biom_infile = path.join(temp_dir, "intable.biom") with biom.util.biom_open(biom_infile, 'w') as out_biom: table.to_hdf5(h5grp=out_biom, generated_by="PICRUSt2 QIIME2 Plugin") # Write out newick tree. newick_infile = path.join(temp_dir, "placed_seqs.tre") tree.write(newick_infile, format="newick") picrust2_out = path.join(temp_dir, "picrust2_out") print("Running the below commands:", file=sys.stderr) # Run hidden-state prediction step (on 16S, EC, and KO tables # separately. hsp_out_16S = path.join(picrust2_out, "16S_predicted.tsv.gz") system_call_check("hsp.py -i 16S " + " -t " + newick_infile + " -p 1 " + " -n " + "-o " + hsp_out_16S + " -m " + hsp_method, print_out=True) hsp_out_EC = path.join(picrust2_out, "EC_predicted.tsv.gz") system_call_check("hsp.py -i EC " + " -t " + newick_infile + " -p " + str(threads) + " -o " + hsp_out_EC + " -m " + hsp_method, print_out=True) hsp_out_KO = path.join(picrust2_out, "KO_predicted.tsv.gz") system_call_check("hsp.py -i KO " + " -t " + newick_infile + " -p " + str(threads) + " -o " + hsp_out_KO + " -m " + hsp_method, print_out=True) # Run metagenome pipeline step. EC_metagenome_out = path.join(picrust2_out, "EC_metagenome_out") system_call_check("metagenome_pipeline.py -i " + biom_infile + " -m " + hsp_out_16S + " -f " + hsp_out_EC + " -o " + EC_metagenome_out + " --max_nsti " + str(max_nsti), print_out=True) KO_metagenome_out = path.join(picrust2_out, "KO_metagenome_out") system_call_check("metagenome_pipeline.py -i " + biom_infile + " -m " + hsp_out_16S + " -f " + hsp_out_KO + " -o " + KO_metagenome_out + " --max_nsti " + str(max_nsti), print_out=True) EC_out = path.join(EC_metagenome_out, "pred_metagenome_unstrat.tsv.gz") KO_out = path.join(KO_metagenome_out, "pred_metagenome_unstrat.tsv.gz") # Run pathway inference step. pathways_out = path.join(picrust2_out, "pathways_out") pathabun_out = path.join(pathways_out, "path_abun_unstrat.tsv.gz") system_call_check("pathway_pipeline.py -i " + EC_out + " -o " + pathways_out + " -p " + str(threads), print_out=True) # Read in output unstratified metagenome tables and return as BIOM # objects. ko_biom = biom.load_table(KO_out) ec_biom = biom.load_table(EC_out) pathabun_biom = biom.load_table(pathabun_out) return ko_biom, ec_biom, pathabun_biom
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table): """Creates the initial non-rarefied BIOM artifact of the analysis Parameters ---------- analysis : dict Dictionary with the analysis information biom_data : dict Dictionary with the biom file information rarefied_table : biom.Table The rarefied BIOM table Returns ------- int The id of the new artifact """ # The non rarefied biom artifact is the initial biom table of the analysis. # This table does not currently exist anywhere, so we need to actually # create the BIOM file. To create this BIOM file we need: (1) the samples # and artifacts they come from and (2) whether the samples where # renamed or not. (1) is on the database, but we need to inferr (2) from # the existing rarefied BIOM table. Fun, fun... with TRN: # Get the samples included in the BIOM table grouped by artifact id # Note that the analysis contains a BIOM table per data type included # in it, and the table analysis_sample does not differentiate between # datatypes, so we need to check the data type in the artifact table sql = """SELECT artifact_id, array_agg(sample_id) FROM qiita.analysis_sample JOIN qiita.artifact USING (artifact_id) WHERE analysis_id = %s AND data_type_id = %s GROUP BY artifact_id""" TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']]) samples_by_artifact = TRN.execute_fetchindex() # Create an empty BIOM table to be the new master table new_table = Table([], [], []) ids_map = {} for a_id, samples in samples_by_artifact: # Get the filepath of the BIOM table from the artifact artifact = Artifact(a_id) biom_fp = None for _, fp, fp_type in artifact.filepaths: if fp_type == 'biom': biom_fp = fp # Note that we are sure that the biom table exists for sure, so # no need to check if biom_fp is undefined biom_table = load_table(biom_fp) samples = set(samples).intersection(biom_table.ids()) biom_table.filter(samples, axis='sample', inplace=True) # we need to check if the table has samples left before merging if biom_table.shape[0] != 0 and biom_table.shape[1] != 0: new_table = new_table.merge(biom_table) ids_map.update( {sid: "%d.%s" % (a_id, sid) for sid in biom_table.ids()}) # Check if we need to rename the sample ids in the biom table new_table_ids = set(new_table.ids()) if not new_table_ids.issuperset(rarefied_table.ids()): # We need to rename the sample ids new_table.update_ids(ids_map, 'sample', True, True) sql = """INSERT INTO qiita.artifact (generated_timestamp, data_type_id, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_id""" # Magic number 4 -> visibility sandbox # Magix number 7 -> biom artifact type TRN.add( sql, [analysis['timestamp'], biom_data['data_type_id'], 4, 7, False]) artifact_id = TRN.execute_fetchlast() # Associate the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis['analysis_id'], artifact_id]) # Link the artifact with its file dd_id, mp = get_mountpoint('BIOM')[0] dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id)) if not exists(dir_fp): makedirs(dir_fp) new_table_fp = join(dir_fp, "biom_table.biom") with biom_open(new_table_fp, 'w') as f: new_table.to_hdf5(f, "Generated by Qiita") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (%s, %s, %s, %s, %s) RETURNING filepath_id""" # Magic number 7 -> filepath_type_id = 'biom' # Magic number 1 -> the checksum algorithm id TRN.add(sql, [ basename(new_table_fp), 7, compute_checksum(new_table_fp), 1, dd_id ]) fp_id = TRN.execute_fetchlast() sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, fp_id]) TRN.execute() return artifact_id
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table): """Creates the initial non-rarefied BIOM artifact of the analysis Parameters ---------- analysis : dict Dictionary with the analysis information biom_data : dict Dictionary with the biom file information rarefied_table : biom.Table The rarefied BIOM table Returns ------- int The id of the new artifact """ # The non rarefied biom artifact is the initial biom table of the analysis. # This table does not currently exist anywhere, so we need to actually # create the BIOM file. To create this BIOM file we need: (1) the samples # and artifacts they come from and (2) whether the samples where # renamed or not. (1) is on the database, but we need to inferr (2) from # the existing rarefied BIOM table. Fun, fun... with TRN: # Get the samples included in the BIOM table grouped by artifact id # Note that the analysis contains a BIOM table per data type included # in it, and the table analysis_sample does not differentiate between # datatypes, so we need to check the data type in the artifact table sql = """SELECT artifact_id, array_agg(sample_id) FROM qiita.analysis_sample JOIN qiita.artifact USING (artifact_id) WHERE analysis_id = %s AND data_type_id = %s GROUP BY artifact_id""" TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']]) samples_by_artifact = TRN.execute_fetchindex() # Create an empty BIOM table to be the new master table new_table = Table([], [], []) ids_map = {} for a_id, samples in samples_by_artifact: # Get the filepath of the BIOM table from the artifact artifact = Artifact(a_id) biom_fp = None for _, fp, fp_type in artifact.filepaths: if fp_type == 'biom': biom_fp = fp # Note that we are sure that the biom table exists for sure, so # no need to check if biom_fp is undefined biom_table = load_table(biom_fp) samples = set(samples).intersection(biom_table.ids()) biom_table.filter(samples, axis='sample', inplace=True) # we need to check if the table has samples left before merging if biom_table.shape[0] != 0 and biom_table.shape[1] != 0: new_table = new_table.merge(biom_table) ids_map.update({sid: "%d.%s" % (a_id, sid) for sid in biom_table.ids()}) # Check if we need to rename the sample ids in the biom table new_table_ids = set(new_table.ids()) if not new_table_ids.issuperset(rarefied_table.ids()): # We need to rename the sample ids new_table.update_ids(ids_map, 'sample', True, True) sql = """INSERT INTO qiita.artifact (generated_timestamp, data_type_id, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_id""" # Magic number 4 -> visibility sandbox # Magix number 7 -> biom artifact type TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'], 4, 7, False]) artifact_id = TRN.execute_fetchlast() # Associate the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis['analysis_id'], artifact_id]) # Link the artifact with its file dd_id, mp = get_mountpoint('BIOM')[0] dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id)) if not exists(dir_fp): makedirs(dir_fp) new_table_fp = join(dir_fp, "biom_table.biom") with biom_open(new_table_fp, 'w') as f: new_table.to_hdf5(f, "Generated by Qiita") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (%s, %s, %s, %s, %s) RETURNING filepath_id""" # Magic number 7 -> filepath_type_id = 'biom' # Magic number 1 -> the checksum algorithm id TRN.add(sql, [basename(new_table_fp), 7, compute_checksum(new_table_fp), 1, dd_id]) fp_id = TRN.execute_fetchlast() sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, fp_id]) TRN.execute() return artifact_id
def full_pipeline( table: biom.Table, seq: pd.Series, threads: int = 1, hsp_method: str = "mp", max_nsti: float = 2.0) -> (biom.Table, biom.Table, biom.Table): # Write out BIOM table and FASTA to be used in pipeline. with TemporaryDirectory() as temp_dir: # Write out BIOM table: biom_infile = path.join(temp_dir, "intable.biom") with biom.util.biom_open(biom_infile, 'w') as out_biom: table.to_hdf5(h5grp=out_biom, generated_by="PICRUSt2 QIIME2 Plugin") # Write out Pandas series as FASTA: seq_outfile = path.join(temp_dir, "seqs.fna") with open(seq_outfile, "w") as outfile_fh: for seqname, sequence in seq.iteritems(): print(">" + str(seqname) + "\n" + str(sequence), file=outfile_fh) picrust2_out = path.join(temp_dir, "picrust2_out") func_outputs, pathway_outputs = picrust2.pipeline.full_pipeline( study_fasta=seq_outfile, input_table=biom_infile, output_folder=picrust2_out, processes=threads, ref_dir=default_ref_dir, in_traits="EC,KO", custom_trait_tables=None, marker_gene_table=default_tables["16S"], pathway_map=default_pathway_map, rxn_func="EC", no_pathways=False, regroup_map=default_regroup_map, no_regroup=False, stratified=False, max_nsti=max_nsti, min_reads=1, min_samples=1, hsp_method=hsp_method, skip_nsti=False, skip_minpath=False, no_gap_fill=False, coverage=False, per_sequence_contrib=False, wide_table=False, skip_norm=False, remove_intermediate=False, verbose=True) # Convert the returned unstratified tables to BIOM tables. # Note that the 0-index in the func table returned objects corresponds # to the path to the unstratified table. ko_biom = biom.load_table(func_outputs["KO"][0]) ec_biom = biom.load_table(func_outputs["EC"][0]) pathabun_biom = biom.load_table(pathway_outputs["unstrat_abun"]) return ko_biom, ec_biom, pathabun_biom
def deblur(qclient, job_id, parameters, out_dir): """Run deblur with the given parameters Parameters ---------- qclient : qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run deblur out_dir : str The path to the job's output directory Returns ------- boolean, list, str The results of the job Notes ----- The code will check if the artifact has a preprocessed_demux element, if not it will use the preprocessed_fastq. We prefer to work with the preprocessed_demux as running time will be greatly improved """ out_dir = join(out_dir, 'deblur_out') # Step 1 get the rest of the information need to run deblur qclient.update_job_step(job_id, "Step 1 of 4: Collecting information") artifact_id = parameters['Demultiplexed sequences'] # removing input from parameters so it's not part of the final command del parameters['Demultiplexed sequences'] # Get the artifact filepath information artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id) fps = artifact_info['files'] # Step 2 generating command deblur if 'preprocessed_demux' in fps: qclient.update_job_step(job_id, "Step 2 of 4: Generating per sample " "from demux (1/2)") if not exists(out_dir): mkdir(out_dir) split_out_dir = join(out_dir, 'split') if not exists(split_out_dir): mkdir(split_out_dir) # using the same number of parallel jobs as defined by the command n_jobs = int(parameters['Jobs to start']) # [0] cause there should be only 1 file to_per_sample_files(fps['preprocessed_demux'][0], out_dir=split_out_dir, n_jobs=n_jobs) qclient.update_job_step(job_id, "Step 2 of 4: Generating per sample " "from demux (2/2)") out_dir = join(out_dir, 'deblured') cmd = generate_deblur_workflow_commands([split_out_dir], out_dir, parameters) else: qclient.update_job_step(job_id, "Step 2 of 4: Generating deblur " "command") cmd = generate_deblur_workflow_commands(fps['preprocessed_fastq'], out_dir, parameters) # Step 3 execute deblur qclient.update_job_step(job_id, "Step 3 of 4: Executing deblur job") std_out, std_err, return_value = system_call(cmd) if return_value != 0: error_msg = ("Error running deblur:\nStd out: %s\nStd err: %s" % (std_out, std_err)) return False, None, error_msg # Generating artifact pb = partial(join, out_dir) # Generate the filepaths final_biom = pb('all.biom') final_seqs = pb('all.seqs.fa') final_biom_hit = pb('reference-hit.biom') final_seqs_hit = pb('reference-hit.seqs.fa') if not exists(final_biom_hit): # Create an empty table. We need to send something to Qiita that is # a valid BIOM, so we are going to create an empty table t = Table([], [], []) with biom_open(final_biom_hit, 'w') as f: t.to_hdf5(f, 'qp-deblur generated') if not exists(final_seqs_hit): # Same as before, create an empty sequence file so we can send it with open(final_seqs_hit, 'w') as f: f.write("") # Step 4, communicate with archive to check and generate placements qclient.update_job_step(job_id, "Step 4 of 4 (1/4): Retrieving " "observations information") features = list(load_table(final_biom_hit).ids(axis='observation')) fp_phylogeny = None if features: observations = qclient.post( "/qiita_db/archive/observations/", data={'job_id': job_id, 'features': features}) novel_fragments = list(set(features) - set(observations.keys())) qclient.update_job_step(job_id, "Step 4 of 4 (2/4): Generating %d new " "placements" % len(novel_fragments)) # Once we support alternative reference phylogenies for SEPP in the # future, we need to translate the reference name here into # filepaths pointing to the correct reference alignment and # reference tree. If left 'None' the Greengenes 13.8 reference # shipped with the fragment-insertion conda package will be used. fp_reference_alignment = None fp_reference_phylogeny = None fp_reference_template = None fp_reference_rename = None if 'Reference phylogeny for SEPP' in parameters: if parameters['Reference phylogeny for SEPP'] == 'tiny': fp_reference_alignment = qp_deblur.get_data(join( 'sepp', 'reference_alignment_tiny.fasta')) fp_reference_phylogeny = qp_deblur.get_data(join( 'sepp', 'reference_phylogeny_tiny.nwk')) fp_reference_template = qp_deblur.get_data(join( 'sepp', 'tmpl_tiny_placement.json')) fp_reference_rename = qp_deblur.get_data(join( 'sepp', 'tmpl_tiny_rename-json.py')) try: new_placements = generate_sepp_placements( novel_fragments, out_dir, parameters['Threads per sample'], reference_alignment=fp_reference_alignment, reference_phylogeny=fp_reference_phylogeny) except ValueError as e: return False, None, str(e) qclient.update_job_step(job_id, "Step 4 of 4 (3/4): Archiving %d " "new placements" % len(novel_fragments)) # values needs to be json strings as well for fragment in new_placements.keys(): new_placements[fragment] = json.dumps(new_placements[fragment]) # fragments that get rejected by a SEPP run don't show up in # the placement file, however being rejected is a valuable # information and should be stored in the archive as well. # Thus, we avoid re-computation for rejected fragments in the # future. for fragment in novel_fragments: if fragment not in new_placements: new_placements[fragment] = "" if len(new_placements.keys()) > 0: qclient.patch(url="/qiita_db/archive/observations/", op="add", path=job_id, value=json.dumps(new_placements)) # retrieve all fragments and create actuall tree qclient.update_job_step(job_id, "Step 4 of 4 (4/4): Composing " "phylogenetic insertion tree") placements = qclient.post( "/qiita_db/archive/observations/", data={'job_id': job_id, 'features': features}) # remove fragments that have been rejected by SEPP, i.e. whoes # placement is the empty string and # convert all other placements from string to json placements = {frag: json.loads(placements[frag]) for frag, plc in placements.items() if plc != ''} try: fp_phylogeny = generate_insertion_trees( placements, out_dir, reference_template=fp_reference_template, reference_rename=fp_reference_rename) except ValueError as e: return False, None, str(e) else: new_placements = None ainfo = [ArtifactInfo('deblur final table', 'BIOM', [(final_biom, 'biom'), (final_seqs, 'preprocessed_fasta')])] if fp_phylogeny is not None: ainfo.append(ArtifactInfo('deblur reference hit table', 'BIOM', [(final_biom_hit, 'biom'), (final_seqs_hit, 'preprocessed_fasta'), (fp_phylogeny, 'plain_text')], new_placements)) return True, ainfo, ""