def test_load_classic(self): tab = load_table(get_data_path('test.json')) with NamedTemporaryFile(mode='w') as fp: fp.write(str(tab)) fp.flush() obs = load_table(fp.name) npt.assert_equal(obs.ids(), tab.ids()) npt.assert_equal(obs.ids(axis='observation'), tab.ids(axis='observation')) npt.assert_equal(obs.matrix_data.toarray(), tab.matrix_data.toarray())
def test_biom_to_classic1(self): """Correctly converts biom to classic.""" self.cmd(table=self.biom_table1, output_filepath=self.output_filepath, to_tsv=True, header_key='taxonomy') self.assertEqual(load_table(self.output_filepath), self.classic_biom1)
def test_biom_to_classic2(self): """Correctly converts biom to classic with metadata renaming.""" self.cmd(table=self.biom_table1, output_filepath=self.output_filepath, to_tsv=True, header_key='taxonomy', output_metadata_id='foo') obs = load_table(self.output_filepath) self.assertTrue('foo' in obs.metadata(axis='observation')[0])
def test_classic_to_biom_with_metadata(self): """Correctly converts classic to biom with metadata.""" # No processing of metadata. obs = self.cmd(table=self.classic_biom1, output_filepath=self.output_filepath, sample_metadata=self.sample_md1, to_json=True, table_type='OTU table', process_obs_metadata='naive') obs = load_table(self.output_filepath) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.ids()), 9) self.assertEqual(len(obs.ids(axis='observation')), 14) self.assertNotEqual(obs.metadata(), None) self.assertNotEqual(obs.metadata(axis='observation'), None) self.assertEqual(obs.metadata()[obs.index(u'p2', u'sample')], {'foo': 'c;b;a'}) self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')], {'foo': 'b;c;d'}) self.assertEqual( obs.metadata(axis='observation')[obs.index('None11', 'observation')], {'taxonomy': 'Unclassified'}) # With processing of metadata (currently only supports observation md). obs = self.cmd(table=self.classic_biom1, output_filepath=self.output_filepath, sample_metadata=self.sample_md1, table_type='OTU table', process_obs_metadata='sc_separated', to_json=True) obs = load_table(self.output_filepath) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.ids()), 9) self.assertEqual(len(obs.ids(axis='observation')), 14) self.assertNotEqual(obs.metadata(), None) self.assertNotEqual(obs.metadata(axis='observation'), None) self.assertEqual(obs.metadata()[obs.index('p2', 'sample')], {'foo': 'c;b;a'}) self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')], {'foo': 'b;c;d'}) self.assertEqual( obs.metadata(axis='observation')[obs.index('None11', 'observation')], {'taxonomy': ['Unclassified']})
def test_json_to_hdf5_collapsed_samples(self): """Correctly converts json to HDF5 changing the sample metadata""" with biom_open(self.json_collapsed_samples) as f: obs = self.cmd(table=parse_biom_table(f), output_filepath=self.output_filepath, to_hdf5=True, collapsed_samples=True) obs = load_table(self.output_filepath) exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.], [1., 4.], [0., 2.]]), observation_ids=[ u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4', u'GG_OTU_5' ], sample_ids=[u'skin', u'gut'], observation_metadata=[{ u'taxonomy': [ u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Cyanobacteria', u'c__Nostocophycideae', u'o__Nostocales', u'f__Nostocaceae', u'g__Dolichospermum', u's__' ] }, { u'taxonomy': [ u'k__Archaea', u'p__Euryarchaeota', u'c__Methanomicrobia', u'o__Methanosarcinales', u'f__Methanosarcinaceae', u'g__Methanosarcina', u's__' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Halanaerobiales', u'f__Halanaerobiaceae', u'g__Halanaerobium', u's__Halanaerobiumsaccharolyticum' ] }, { u'taxonomy': [ u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__' ] }], sample_metadata=[{ u'collapsed_ids': [u'Sample4', u'Sample5', u'Sample6'] }, { u'collapsed_ids': [u'Sample1', u'Sample2', u'Sample3'] }], type=u'OTU table') self.assertEqual(obs, exp)
def test_classic_to_biom_with_metadata(self): """Correctly converts classic to biom with metadata.""" # No processing of metadata. obs = self.cmd(table=self.classic_biom1, output_filepath=self.output_filepath, sample_metadata=self.sample_md1, to_json=True, table_type='OTU table', process_obs_metadata='naive') obs = load_table(self.output_filepath) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.ids()), 9) self.assertEqual(len(obs.ids(axis='observation')), 14) self.assertNotEqual(obs.metadata(), None) self.assertNotEqual(obs.metadata(axis='observation'), None) self.assertEqual(obs.metadata()[obs.index(u'p2', u'sample')], {'foo': 'c;b;a'}) self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')], {'foo': 'b;c;d'}) self.assertEqual(obs.metadata(axis='observation')[ obs.index('None11', 'observation')], {'taxonomy': 'Unclassified'}) # With processing of metadata (currently only supports observation md). obs = self.cmd(table=self.classic_biom1, output_filepath=self.output_filepath, sample_metadata=self.sample_md1, table_type='OTU table', process_obs_metadata='sc_separated', to_json=True) obs = load_table(self.output_filepath) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.ids()), 9) self.assertEqual(len(obs.ids(axis='observation')), 14) self.assertNotEqual(obs.metadata(), None) self.assertNotEqual(obs.metadata(axis='observation'), None) self.assertEqual(obs.metadata()[obs.index('p2', 'sample')], {'foo': 'c;b;a'}) self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')], {'foo': 'b;c;d'}) self.assertEqual(obs.metadata(axis='observation')[ obs.index('None11', 'observation')], {'taxonomy': ['Unclassified']})
def validate_datasets(self): # try read a file # gives stupid errors like "Invalid format 'Biological Observation Matrix 0.9.1-dev', must be '1.0.0'" # valid, report = _validate_table(self._dataset_file_path) # if not valid: # raise EupathExporter.ValidationException(report) try: table = load_table(self._dataset_file_path) except ValueError, e: raise EupathExporter.ValidationException( "Could not load the file as BIOM - does it conform to the specification on https://biom-format.org?", e)
def test_classic_to_biom(self): """Correctly converts classic to biom.""" self.cmd(table=self.classic_biom1, output_filepath=self.output_filepath, to_json=True, table_type='OTU table') obs = load_table(self.output_filepath) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.ids()), 9) self.assertEqual(len(obs.ids(axis='observation')), 14) self.assertEqual(obs.metadata(), None) self.assertNotEqual(obs.metadata(axis='observation'), None)
def test_json_to_hdf5_collapsed_metadata(self): """Correctly converts json to HDF5 changing the observation metadata""" with biom_open(self.json_collapsed_obs) as f: t = parse_biom_table(f) obs = self.cmd(table=t, output_filepath=self.output_filepath, to_hdf5=True, collapsed_observations=True) obs = load_table(self.output_filepath) exp = Table(np.array([[2., 1., 1., 0., 0., 1.], [0., 0., 1., 4., 0., 2.], [5., 1., 0., 2., 3., 1.], [0., 1., 2., 0., 0., 0.]]), observation_ids=[u'p__Firmicutes', u'p__Euryarchaeota', u'p__Cyanobacteria', u'p__Proteobacteria'], sample_ids=[u'Sample1', u'Sample2', u'Sample3', u'Sample4', u'Sample5', u'Sample6'], observation_metadata=[ {u'collapsed_ids': [u'GG_OTU_4']}, {u'collapsed_ids': [u'GG_OTU_3']}, {u'collapsed_ids': [u'GG_OTU_2']}, {u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5']}], sample_metadata=[ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CGCTTATCGAGA', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CATACCAGTAGC', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCTGT', u'Description': u'human gut', u'BODY_SITE': u'gut'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCGGCCTGT', u'Description': u'human skin', u'BODY_SITE': u'skin'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin'}, {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTAACTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin'}], type=u'OTU table') self.assertEqual(obs, exp)
def setUp(self): """Set up data for use in unit tests.""" self.cmd = _convert self.output_filepath = tempfile.NamedTemporaryFile().name with tempfile.NamedTemporaryFile('w') as fh: fh.write(biom1) fh.flush() self.biom_table1 = load_table(fh.name) self.biom_lines1 = biom1.split('\n') with tempfile.NamedTemporaryFile('w') as fh: fh.write(classic1) fh.flush() self.classic_biom1 = load_table(fh.name) self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n')) test_data_dir = join(dirname(abspath(__file__)), 'test_data') self.json_collapsed_obs = join(test_data_dir, 'json_obs_collapsed.biom') self.json_collapsed_samples = join(test_data_dir, 'json_sample_collapsed.biom')
def test_json_to_hdf5_collapsed_samples(self): """Correctly converts json to HDF5 changing the sample metadata""" with biom_open(self.json_collapsed_samples) as f: obs = self.cmd(table=parse_biom_table(f), output_filepath=self.output_filepath, to_hdf5=True, collapsed_samples=True) obs = load_table(self.output_filepath) exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.], [1., 4.], [0., 2.]]), observation_ids=[u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4', u'GG_OTU_5'], sample_ids=[u'skin', u'gut'], observation_metadata=[ {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__']}, {u'taxonomy': [u'k__Bacteria', u'p__Cyanobacteria', u'c__Nostocophycideae', u'o__Nostocales', u'f__Nostocaceae', u'g__Dolichospermum', u's__']}, {u'taxonomy': [u'k__Archaea', u'p__Euryarchaeota', u'c__Methanomicrobia', u'o__Methanosarcinales', u'f__Methanosarcinaceae', u'g__Methanosarcina', u's__']}, {u'taxonomy': [u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Halanaerobiales', u'f__Halanaerobiaceae', u'g__Halanaerobium', u's__Halanaerobiumsaccharolyticum']}, {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria', u'c__Gammaproteobacteria', u'o__Enterobacteriales', u'f__Enterobacteriaceae', u'g__Escherichia', u's__']}], sample_metadata=[ {u'collapsed_ids': [u'Sample4', u'Sample5', u'Sample6']}, {u'collapsed_ids': [u'Sample1', u'Sample2', u'Sample3']} ], type=u'OTU table') self.assertEqual(obs, exp)
def file_choices(choices, fname): ext = os.path.splitext(fname)[1][1:] if ext not in choices: parser.error('file must be biom format') return fname parser.add_argument('-i', type=lambda s: file_choices(('biom', 'tab'), s), help='Input biom file', action='store', required=True) if len(sys.argv) <= 1: parser.print_help() sys.exit(1) results = parser.parse_args() print '\nInput file:', results.i out = results.i[:-5] + '_relativized.biom' t = load_table(results.i) normed = t.norm(axis='sample', inplace=False) with biom_open(out, 'w') as f: normed.to_hdf5(f, 'example') print '\n\tSuccess!\n\tOutput file: ' + out + '\n'
def setUp(self): with tempfile.NamedTemporaryFile(mode="w") as fh: fh.write(biom1) fh.flush() self.biom1 = load_table(fh.name)
def test_load_table_unicode(self): t = load_table(get_data_path('bad_table.txt')) self.assertEqual(u's__Cortinarius grosmornënsis', t.metadata('otu1', 'observation')['taxonomy'])
def test_json_to_hdf5_collapsed_metadata(self): """Correctly converts json to HDF5 changing the observation metadata""" with biom_open(self.json_collapsed_obs) as f: t = parse_biom_table(f) obs = self.cmd(table=t, output_filepath=self.output_filepath, to_hdf5=True, collapsed_observations=True) obs = load_table(self.output_filepath) exp = Table(np.array([[2., 1., 1., 0., 0., 1.], [0., 0., 1., 4., 0., 2.], [5., 1., 0., 2., 3., 1.], [0., 1., 2., 0., 0., 0.]]), observation_ids=[ u'p__Firmicutes', u'p__Euryarchaeota', u'p__Cyanobacteria', u'p__Proteobacteria' ], sample_ids=[ u'Sample1', u'Sample2', u'Sample3', u'Sample4', u'Sample5', u'Sample6' ], observation_metadata=[{ u'collapsed_ids': [u'GG_OTU_4'] }, { u'collapsed_ids': [u'GG_OTU_3'] }, { u'collapsed_ids': [u'GG_OTU_2'] }, { u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5'] }], sample_metadata=[{ u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CGCTTATCGAGA', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CATACCAGTAGC', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCTGT', u'Description': u'human gut', u'BODY_SITE': u'gut' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCGGCCTGT', u'Description': u'human skin', u'BODY_SITE': u'skin' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTCTCTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin' }, { u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT', u'BarcodeSequence': u'CTAACTACCAAT', u'Description': u'human skin', u'BODY_SITE': u'skin' }], type=u'OTU table') self.assertEqual(obs, exp)
import argparse import os.path parser = argparse.ArgumentParser(description='This is a python script to relativize biom tables') def file_choices(choices, fname): ext = os.path.splitext(fname)[1][1:] if ext not in choices: parser.error('file must be biom format') return fname parser.add_argument('-i', type=lambda s:file_choices(('biom','tab'),s), help='Input biom file', action='store', required = True) if len(sys.argv)<=1: parser.print_help() sys.exit(1) results = parser.parse_args() print '\nInput file:', results.i out = results.i[:-5] + '_relativized.biom' t = load_table(results.i) normed = t.norm(axis='sample', inplace=False) with biom_open(out, 'w') as f: normed.to_hdf5(f, 'example') print '\n\tSuccess!\n\tOutput file: ' + out + '\n'
raise ValidationException("Not a good URL: " + content_with_url) try: downloaded_file = url.read() except Exception, e: raise ValidationException( "Could not read data from URL: " + url.geturl()) with open(self._dataset_file_path + ".downloaded", 'w') as tmp: tmp.write(downloaded_file) content_path = self._dataset_file_path + ".downloaded" else: content_path = self._dataset_file_path if not os.path.exists(content_path): raise ValueError("Does not exist: ".content_path) try: table = load_table(content_path) except TypeError, e: raise ValidationException(e) except ValueError, e: raise ValidationException(e) except Exception, e: raise ValidationException( "Could not load the file as BIOM - does it conform to the specification on https://biom-format.org?" ) give_table_extra_methods(table) generated_by = "MicrobiomeDb exporter" with open(self._dataset_file_path + ".metadata.json", 'w') as f1: table.to_json_but_only_metadata(generated_by, direct_io=f1)
def setUp(self): with tempfile.NamedTemporaryFile(mode='w') as fh: fh.write(biom1) fh.flush() self.biom1 = load_table(fh.name)
def load_data(self): with self.__zipfile__ as myzip: with tempfile.TemporaryDirectory() as tmpdir: extracted_file_pth_str = myzip.extract(self.__data_path__, tmpdir) return load_table(extracted_file_pth_str)