def main(): args = parser.parse_args() input_biom_fp = args.input_biom_fp output_biom_fp = args.output_biom_fp merged_fastq_fp = args.merged_fastq_fp deblur_biom = load_table(input_biom_fp) if output_biom_fp is None: output_biom_fp = os.path.splitext(input_biom_fp)[0] + '.merged.biom' with open(merged_fastq_fp) as fq: merged_fastq = readfq(fq) # read each of the fastqs, make a dict of label:merged read merge_dict = get_merged_dict(merged_fastq) # filter biom to just the keys of dict deblur_biom = deblur_biom.filter(lambda val, id_, md: id_ in merge_dict, axis='observation') output_biom = collapse_biom_observations(deblur_biom, merge_dict) with biom_open(output_biom_fp, 'w') as f: output_biom.to_hdf5(f, 'deblur_relabel_merged.py')
def main(): args = parser.parse_args() input_biom_fp = args.input_biom_fp output_biom_fp = args.output_biom_fp merged_fastq_fp = args.merged_fastq_fp deblur_biom = load_table(input_biom_fp) if output_biom_fp is None: output_biom_fp = os.path.splitext(input_biom_fp)[0] + '.merged.biom' with open(merged_fastq_fp) as fq: merged_fastq = readfq(fq) # read each of the fastqs, make a dict of label:merged read merge_dict = get_merged_dict(merged_fastq) # filter biom to just the keys of dict deblur_biom = deblur_biom.filter( lambda val, id_, md: id_ in merge_dict, axis='observation') output_biom = collapse_biom_observations(deblur_biom, merge_dict) with biom_open(output_biom_fp, 'w') as f: output_biom.to_hdf5(f, 'deblur_relabel_merged.py')
def main(): args = parser.parse_args() input_biom_fp = args.input_biom_fp output_biom_fp = args.output_biom_fp output_fasta_fp = args.output_fasta_fp metadata_name = args.metadata_name name_stub = args.name_stub test = args.test if test: run_unittests() return(0) deblur_biom = load_table(input_biom_fp) if output_biom_fp is None: output_biom_fp = os.path.splitext(input_biom_fp)[0] + '.renamed.biom' output_biom = rename_deblur_biom(deblur_biom, name_stub=name_stub, metadata_name=metadata_name) if output_fasta_fp is not None: fasta = format_seqs_from_deblur_biom(output_biom, metadata_name=metadata_name) with open(output_fasta_fp, 'w') as f: f.write(fasta) with biom_open(output_biom_fp, 'w') as f: output_biom.to_hdf5(f, 'deblur_relabel_merged.py')
def test_json_to_hdf5_collapsed_samples(self): """Correctly converts json to HDF5 changing the sample metadata""" with biom_open(self.json_collapsed_samples) as f: obs = self.cmd(table=parse_biom_table(f), output_filepath=self.output_filepath, to_hdf5=True, collapsed_samples=True) obs = load_table(self.output_filepath) exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.], [1., 4.], [0., 2.]]), observation_ids=[ u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4', u'GG_OTU_5' ], sample_ids=[u'skin', u'gut'], observation_metadata=[{ u'taxonomy': [ u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Cyanobacteria', u'c__Nostocophycideae', u'o__Nostocales', u'f__Nostocaceae', u'g__Dolichospermum', u's__' ] }, { u'taxonomy': [ u'k__Archaea', u'p__Euryarchaeota', u'c__Methanomicrobia', u'o__Methanosarcinales', u'f__Methanosarcinaceae', u'g__Methanosarcina', u's__' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Halanaerobiales', u'f__Halanaerobiaceae', u'g__Halanaerobium', u's__Halanaerobiumsaccharolyticum' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__' ] }], sample_metadata=[{ u'collapsed_ids': [u'Sample4', u'Sample5', u'Sample6'] }, { u'collapsed_ids': [u'Sample1', u'Sample2', u'Sample3'] }], type=u'OTU table') self.assertEqual(obs, exp)
def test_json_to_hdf5_collapsed_metadata(self): """Correctly converts json to HDF5 changing the observation metadata""" with biom_open(self.json_collapsed_obs) as f: t = parse_biom_table(f) obs = self.cmd(table=t, output_filepath=self.output_filepath, to_hdf5=True, collapsed_observations=True) obs = load_table(self.output_filepath) exp = Table(np.array([[2., 1., 1., 0., 0., 1.], [0., 0., 1., 4., 0., 2.], [5., 1., 0., 2., 3., 1.], [0., 1., 2., 0., 0., 0.]]), observation_ids=[u'p__Firmicutes', u'p__Euryarchaeota', u'p__Cyanobacteria', u'p__Proteobacteria'], sample_ids=[u'Sample1', u'Sample2', u'Sample3', u'Sample4', u'Sample5', u'Sample6'], observation_metadata=[ {u'collapsed_ids': [u'GG_OTU_4']}, {u'collapsed_ids': [u'GG_OTU_3']}, {u'collapsed_ids': [u'GG_OTU_2']}, {u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5']}], sample_metadata=[ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CGCTTATCGAGA', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CATACCAGTAGC', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCTGT', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCGGCCTGT', u'Description': u'human skin', u'BODY_SITE': u'skin'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTAACTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin'}], type=u'OTU table') self.assertEqual(obs, exp)
def test_json_to_hdf5_collapsed_samples(self): """Correctly converts json to HDF5 changing the sample metadata""" with biom_open(self.json_collapsed_samples) as f: obs = self.cmd(table=parse_biom_table(f), output_filepath=self.output_filepath, to_hdf5=True, collapsed_samples=True) obs = load_table(self.output_filepath) exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.], [1., 4.], [0., 2.]]), observation_ids=[u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4', u'GG_OTU_5'], sample_ids=[u'skin', u'gut'], observation_metadata=[ {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__']}, {u'taxonomy': [u'k__Bacteria', u'p__Cyanobacteria', u'c__Nostocophycideae', u'o__Nostocales', u'f__Nostocaceae', u'g__Dolichospermum', u's__']}, {u'taxonomy': [u'k__Archaea', u'p__Euryarchaeota', u'c__Methanomicrobia', u'o__Methanosarcinales', u'f__Methanosarcinaceae', u'g__Methanosarcina', u's__']}, {u'taxonomy': [u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Halanaerobiales', u'f__Halanaerobiaceae', u'g__Halanaerobium', u's__Halanaerobiumsaccharolyticum']}, {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__']}], sample_metadata=[ {u'collapsed_ids': [u'Sample4', u'Sample5', u'Sample6']}, {u'collapsed_ids': [u'Sample1', u'Sample2', u'Sample3']} ], type=u'OTU table') self.assertEqual(obs, exp)
def test_json_to_hdf5_collapsed_metadata(self): """Correctly converts json to HDF5 changing the observation metadata""" with biom_open(self.json_collapsed_obs) as f: t = parse_biom_table(f) obs = self.cmd(table=t, output_filepath=self.output_filepath, to_hdf5=True, collapsed_observations=True) obs = load_table(self.output_filepath) exp = Table(np.array([[2., 1., 1., 0., 0., 1.], [0., 0., 1., 4., 0., 2.], [5., 1., 0., 2., 3., 1.], [0., 1., 2., 0., 0., 0.]]), observation_ids=[ u'p__Firmicutes', u'p__Euryarchaeota', u'p__Cyanobacteria', u'p__Proteobacteria' ], sample_ids=[ u'Sample1', u'Sample2', u'Sample3', u'Sample4', u'Sample5', u'Sample6' ], observation_metadata=[{ u'collapsed_ids': [u'GG_OTU_4'] }, { u'collapsed_ids': [u'GG_OTU_3'] }, { u'collapsed_ids': [u'GG_OTU_2'] }, { u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5'] }], sample_metadata=[{ u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CGCTTATCGAGA', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CATACCAGTAGC', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCTGT', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCGGCCTGT', u'Description': u'human skin', u'BODY_SITE': u'skin' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTAACTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin' }], type=u'OTU table') self.assertEqual(obs, exp)