示例#1
0
    def test_load_classic(self):
        tab = load_table(get_data_path('test.json'))
        with NamedTemporaryFile(mode='w') as fp:
            fp.write(str(tab))
            fp.flush()

            obs = load_table(fp.name)

        npt.assert_equal(obs.ids(), tab.ids())
        npt.assert_equal(obs.ids(axis='observation'),
                         tab.ids(axis='observation'))
        npt.assert_equal(obs.matrix_data.toarray(), tab.matrix_data.toarray())
    def test_biom_to_classic1(self):
        """Correctly converts biom to classic."""
        self.cmd(table=self.biom_table1,
                       output_filepath=self.output_filepath,
                       to_tsv=True, header_key='taxonomy')

        self.assertEqual(load_table(self.output_filepath), self.classic_biom1)
 def test_biom_to_classic2(self):
     """Correctly converts biom to classic with metadata renaming."""
     self.cmd(table=self.biom_table1,
                    output_filepath=self.output_filepath, to_tsv=True,
                    header_key='taxonomy', output_metadata_id='foo')
     obs = load_table(self.output_filepath)
     self.assertTrue('foo' in obs.metadata(axis='observation')[0])
示例#4
0
    def test_classic_to_biom_with_metadata(self):
        """Correctly converts classic to biom with metadata."""
        # No processing of metadata.
        obs = self.cmd(table=self.classic_biom1,
                       output_filepath=self.output_filepath,
                       sample_metadata=self.sample_md1,
                       to_json=True,
                       table_type='OTU table',
                       process_obs_metadata='naive')

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertNotEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
        self.assertEqual(obs.metadata()[obs.index(u'p2', u'sample')],
                         {'foo': 'c;b;a'})
        self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
                         {'foo': 'b;c;d'})
        self.assertEqual(
            obs.metadata(axis='observation')[obs.index('None11',
                                                       'observation')],
            {'taxonomy': 'Unclassified'})

        # With processing of metadata (currently only supports observation md).
        obs = self.cmd(table=self.classic_biom1,
                       output_filepath=self.output_filepath,
                       sample_metadata=self.sample_md1,
                       table_type='OTU table',
                       process_obs_metadata='sc_separated',
                       to_json=True)

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertNotEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
        self.assertEqual(obs.metadata()[obs.index('p2', 'sample')],
                         {'foo': 'c;b;a'})
        self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
                         {'foo': 'b;c;d'})
        self.assertEqual(
            obs.metadata(axis='observation')[obs.index('None11',
                                                       'observation')],
            {'taxonomy': ['Unclassified']})
示例#5
0
    def test_biom_to_classic1(self):
        """Correctly converts biom to classic."""
        self.cmd(table=self.biom_table1,
                 output_filepath=self.output_filepath,
                 to_tsv=True,
                 header_key='taxonomy')

        self.assertEqual(load_table(self.output_filepath), self.classic_biom1)
示例#6
0
 def test_biom_to_classic2(self):
     """Correctly converts biom to classic with metadata renaming."""
     self.cmd(table=self.biom_table1,
              output_filepath=self.output_filepath,
              to_tsv=True,
              header_key='taxonomy',
              output_metadata_id='foo')
     obs = load_table(self.output_filepath)
     self.assertTrue('foo' in obs.metadata(axis='observation')[0])
示例#7
0
 def test_json_to_hdf5_collapsed_samples(self):
     """Correctly converts json to HDF5 changing the sample metadata"""
     with biom_open(self.json_collapsed_samples) as f:
         obs = self.cmd(table=parse_biom_table(f),
                        output_filepath=self.output_filepath,
                        to_hdf5=True,
                        collapsed_samples=True)
     obs = load_table(self.output_filepath)
     exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.], [1., 4.], [0.,
                                                                    2.]]),
                 observation_ids=[
                     u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4',
                     u'GG_OTU_5'
                 ],
                 sample_ids=[u'skin', u'gut'],
                 observation_metadata=[{
                     u'taxonomy': [
                         u'k__Bacteria', u'p__Proteobacteria',
                         u'c__Gammaproteobacteria', u'o__Enterobacteriales',
                         u'f__Enterobacteriaceae', u'g__Escherichia', u's__'
                     ]
                 }, {
                     u'taxonomy': [
                         u'k__Bacteria', u'p__Cyanobacteria',
                         u'c__Nostocophycideae', u'o__Nostocales',
                         u'f__Nostocaceae', u'g__Dolichospermum', u's__'
                     ]
                 }, {
                     u'taxonomy': [
                         u'k__Archaea', u'p__Euryarchaeota',
                         u'c__Methanomicrobia', u'o__Methanosarcinales',
                         u'f__Methanosarcinaceae', u'g__Methanosarcina',
                         u's__'
                     ]
                 }, {
                     u'taxonomy': [
                         u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia',
                         u'o__Halanaerobiales', u'f__Halanaerobiaceae',
                         u'g__Halanaerobium',
                         u's__Halanaerobiumsaccharolyticum'
                     ]
                 }, {
                     u'taxonomy': [
                         u'k__Bacteria', u'p__Proteobacteria',
                         u'c__Gammaproteobacteria', u'o__Enterobacteriales',
                         u'f__Enterobacteriaceae', u'g__Escherichia', u's__'
                     ]
                 }],
                 sample_metadata=[{
                     u'collapsed_ids': [u'Sample4', u'Sample5', u'Sample6']
                 }, {
                     u'collapsed_ids': [u'Sample1', u'Sample2', u'Sample3']
                 }],
                 type=u'OTU table')
     self.assertEqual(obs, exp)
    def test_classic_to_biom_with_metadata(self):
        """Correctly converts classic to biom with metadata."""
        # No processing of metadata.
        obs = self.cmd(table=self.classic_biom1,
                       output_filepath=self.output_filepath,
                       sample_metadata=self.sample_md1, to_json=True,
                       table_type='OTU table', process_obs_metadata='naive')

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertNotEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
        self.assertEqual(obs.metadata()[obs.index(u'p2', u'sample')],
                         {'foo': 'c;b;a'})
        self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
                         {'foo': 'b;c;d'})
        self.assertEqual(obs.metadata(axis='observation')[
            obs.index('None11', 'observation')],
            {'taxonomy': 'Unclassified'})

        # With processing of metadata (currently only supports observation md).
        obs = self.cmd(table=self.classic_biom1,
                       output_filepath=self.output_filepath,
                       sample_metadata=self.sample_md1, table_type='OTU table',
                       process_obs_metadata='sc_separated', to_json=True)

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertNotEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
        self.assertEqual(obs.metadata()[obs.index('p2', 'sample')],
                         {'foo': 'c;b;a'})
        self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
                         {'foo': 'b;c;d'})
        self.assertEqual(obs.metadata(axis='observation')[
            obs.index('None11', 'observation')],
            {'taxonomy': ['Unclassified']})
示例#9
0
 def validate_datasets(self):
     # try read a file
     # gives stupid errors like "Invalid format 'Biological Observation Matrix 0.9.1-dev', must be '1.0.0'"
     #        valid, report = _validate_table(self._dataset_file_path)
     #        if not valid:
     #          raise EupathExporter.ValidationException(report)
     try:
         table = load_table(self._dataset_file_path)
     except ValueError, e:
         raise EupathExporter.ValidationException(
             "Could not load the file as BIOM - does it conform to the specification on https://biom-format.org?",
             e)
    def test_classic_to_biom(self):
        """Correctly converts classic to biom."""
        self.cmd(table=self.classic_biom1,
                 output_filepath=self.output_filepath,
                 to_json=True, table_type='OTU table')

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
示例#11
0
    def test_classic_to_biom(self):
        """Correctly converts classic to biom."""
        self.cmd(table=self.classic_biom1,
                 output_filepath=self.output_filepath,
                 to_json=True,
                 table_type='OTU table')

        obs = load_table(self.output_filepath)
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.ids()), 9)
        self.assertEqual(len(obs.ids(axis='observation')), 14)
        self.assertEqual(obs.metadata(), None)
        self.assertNotEqual(obs.metadata(axis='observation'), None)
    def test_json_to_hdf5_collapsed_metadata(self):
        """Correctly converts json to HDF5 changing the observation metadata"""
        with biom_open(self.json_collapsed_obs) as f:
            t = parse_biom_table(f)
            obs = self.cmd(table=t,
                           output_filepath=self.output_filepath, to_hdf5=True,
                           collapsed_observations=True)
        obs = load_table(self.output_filepath)
        exp = Table(np.array([[2., 1., 1., 0., 0., 1.],
                              [0., 0., 1., 4., 0., 2.],
                              [5., 1., 0., 2., 3., 1.],
                              [0., 1., 2., 0., 0., 0.]]),
                    observation_ids=[u'p__Firmicutes', u'p__Euryarchaeota',
                                     u'p__Cyanobacteria',
                                     u'p__Proteobacteria'],
                    sample_ids=[u'Sample1', u'Sample2', u'Sample3',
                                u'Sample4', u'Sample5', u'Sample6'],
                    observation_metadata=[
                        {u'collapsed_ids': [u'GG_OTU_4']},
                        {u'collapsed_ids': [u'GG_OTU_3']},
                        {u'collapsed_ids': [u'GG_OTU_2']},
                        {u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5']}],
                    sample_metadata=[
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CGCTTATCGAGA',
                         u'Description': u'human gut',
                         u'BODY_SITE': u'gut'},
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CATACCAGTAGC',
                         u'Description': u'human gut',
                         u'BODY_SITE': u'gut'},
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CTCTCTACCTGT',
                         u'Description': u'human gut',
                         u'BODY_SITE': u'gut'},
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CTCTCGGCCTGT',
                         u'Description': u'human skin',
                         u'BODY_SITE': u'skin'},
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CTCTCTACCAAT',
                         u'Description': u'human skin',
                         u'BODY_SITE': u'skin'},
                        {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                         u'BarcodeSequence': u'CTAACTACCAAT',
                         u'Description': u'human skin',
                         u'BODY_SITE': u'skin'}],
                    type=u'OTU table')

        self.assertEqual(obs, exp)
示例#13
0
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = _convert
        self.output_filepath = tempfile.NamedTemporaryFile().name

        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(biom1)
            fh.flush()
            self.biom_table1 = load_table(fh.name)

        self.biom_lines1 = biom1.split('\n')
        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(classic1)
            fh.flush()
            self.classic_biom1 = load_table(fh.name)

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))

        test_data_dir = join(dirname(abspath(__file__)), 'test_data')
        self.json_collapsed_obs = join(test_data_dir,
                                       'json_obs_collapsed.biom')
        self.json_collapsed_samples = join(test_data_dir,
                                           'json_sample_collapsed.biom')
    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = _convert
        self.output_filepath = tempfile.NamedTemporaryFile().name

        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(biom1)
            fh.flush()
            self.biom_table1 = load_table(fh.name)

        self.biom_lines1 = biom1.split('\n')
        with tempfile.NamedTemporaryFile('w') as fh:
            fh.write(classic1)
            fh.flush()
            self.classic_biom1 = load_table(fh.name)

        self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))

        test_data_dir = join(dirname(abspath(__file__)), 'test_data')
        self.json_collapsed_obs = join(test_data_dir,
                                       'json_obs_collapsed.biom')
        self.json_collapsed_samples = join(test_data_dir,
                                           'json_sample_collapsed.biom')
 def test_json_to_hdf5_collapsed_samples(self):
     """Correctly converts json to HDF5 changing the sample metadata"""
     with biom_open(self.json_collapsed_samples) as f:
         obs = self.cmd(table=parse_biom_table(f),
                        output_filepath=self.output_filepath, to_hdf5=True,
                        collapsed_samples=True)
     obs = load_table(self.output_filepath)
     exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.],
                           [1., 4.], [0., 2.]]),
                 observation_ids=[u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3',
                                  u'GG_OTU_4', u'GG_OTU_5'],
                 sample_ids=[u'skin', u'gut'],
                 observation_metadata=[
                     {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria',
                                    u'c__Gammaproteobacteria',
                                    u'o__Enterobacteriales',
                                    u'f__Enterobacteriaceae',
                                    u'g__Escherichia', u's__']},
                     {u'taxonomy': [u'k__Bacteria', u'p__Cyanobacteria',
                                    u'c__Nostocophycideae',
                                    u'o__Nostocales', u'f__Nostocaceae',
                                    u'g__Dolichospermum', u's__']},
                     {u'taxonomy': [u'k__Archaea', u'p__Euryarchaeota',
                                    u'c__Methanomicrobia',
                                    u'o__Methanosarcinales',
                                    u'f__Methanosarcinaceae',
                                    u'g__Methanosarcina', u's__']},
                     {u'taxonomy': [u'k__Bacteria', u'p__Firmicutes',
                                    u'c__Clostridia', u'o__Halanaerobiales',
                                    u'f__Halanaerobiaceae',
                                    u'g__Halanaerobium',
                                    u's__Halanaerobiumsaccharolyticum']},
                     {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria',
                                    u'c__Gammaproteobacteria',
                                    u'o__Enterobacteriales',
                                    u'f__Enterobacteriaceae',
                                    u'g__Escherichia', u's__']}],
                 sample_metadata=[
                     {u'collapsed_ids': [u'Sample4', u'Sample5',
                                         u'Sample6']},
                     {u'collapsed_ids': [u'Sample1', u'Sample2',
                                         u'Sample3']}
                     ],
                 type=u'OTU table')
     self.assertEqual(obs, exp)
示例#16
0
def file_choices(choices, fname):
    ext = os.path.splitext(fname)[1][1:]
    if ext not in choices:
        parser.error('file must be biom format')
    return fname


parser.add_argument('-i',
                    type=lambda s: file_choices(('biom', 'tab'), s),
                    help='Input biom file',
                    action='store',
                    required=True)

if len(sys.argv) <= 1:
    parser.print_help()
    sys.exit(1)

results = parser.parse_args()

print '\nInput file:', results.i
out = results.i[:-5] + '_relativized.biom'

t = load_table(results.i)
normed = t.norm(axis='sample', inplace=False)

with biom_open(out, 'w') as f:
    normed.to_hdf5(f, 'example')

print '\n\tSuccess!\n\tOutput file: ' + out + '\n'
 def setUp(self):
     with tempfile.NamedTemporaryFile(mode="w") as fh:
         fh.write(biom1)
         fh.flush()
         self.biom1 = load_table(fh.name)
示例#18
0
 def test_load_table_unicode(self):
     t = load_table(get_data_path('bad_table.txt'))
     self.assertEqual(u's__Cortinarius grosmornënsis',
                      t.metadata('otu1', 'observation')['taxonomy'])
示例#19
0
    def test_json_to_hdf5_collapsed_metadata(self):
        """Correctly converts json to HDF5 changing the observation metadata"""
        with biom_open(self.json_collapsed_obs) as f:
            t = parse_biom_table(f)
            obs = self.cmd(table=t,
                           output_filepath=self.output_filepath,
                           to_hdf5=True,
                           collapsed_observations=True)
        obs = load_table(self.output_filepath)
        exp = Table(np.array([[2., 1., 1., 0., 0.,
                               1.], [0., 0., 1., 4., 0., 2.],
                              [5., 1., 0., 2., 3., 1.],
                              [0., 1., 2., 0., 0., 0.]]),
                    observation_ids=[
                        u'p__Firmicutes', u'p__Euryarchaeota',
                        u'p__Cyanobacteria', u'p__Proteobacteria'
                    ],
                    sample_ids=[
                        u'Sample1', u'Sample2', u'Sample3', u'Sample4',
                        u'Sample5', u'Sample6'
                    ],
                    observation_metadata=[{
                        u'collapsed_ids': [u'GG_OTU_4']
                    }, {
                        u'collapsed_ids': [u'GG_OTU_3']
                    }, {
                        u'collapsed_ids': [u'GG_OTU_2']
                    }, {
                        u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5']
                    }],
                    sample_metadata=[{
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CGCTTATCGAGA',
                        u'Description': u'human gut',
                        u'BODY_SITE': u'gut'
                    }, {
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CATACCAGTAGC',
                        u'Description': u'human gut',
                        u'BODY_SITE': u'gut'
                    }, {
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CTCTCTACCTGT',
                        u'Description': u'human gut',
                        u'BODY_SITE': u'gut'
                    }, {
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CTCTCGGCCTGT',
                        u'Description': u'human skin',
                        u'BODY_SITE': u'skin'
                    }, {
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CTCTCTACCAAT',
                        u'Description': u'human skin',
                        u'BODY_SITE': u'skin'
                    }, {
                        u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
                        u'BarcodeSequence': u'CTAACTACCAAT',
                        u'Description': u'human skin',
                        u'BODY_SITE': u'skin'
                    }],
                    type=u'OTU table')

        self.assertEqual(obs, exp)
import argparse
import os.path

parser = argparse.ArgumentParser(description='This is a python script to relativize biom tables')

def file_choices(choices, fname):
	ext = os.path.splitext(fname)[1][1:]
	if ext not in choices:
		parser.error('file must be biom format')
	return fname

parser.add_argument('-i', type=lambda s:file_choices(('biom','tab'),s), help='Input biom file', action='store', required = True)

if len(sys.argv)<=1:
	parser.print_help()
	sys.exit(1)

results = parser.parse_args()

print '\nInput file:', results.i
out = results.i[:-5] + '_relativized.biom'
	
t = load_table(results.i)
normed = t.norm(axis='sample', inplace=False)


with biom_open(out, 'w') as f:
	normed.to_hdf5(f, 'example')

print '\n\tSuccess!\n\tOutput file: ' + out + '\n'
                    raise ValidationException("Not a good URL: " +
                                              content_with_url)
                try:
                    downloaded_file = url.read()
                except Exception, e:
                    raise ValidationException(
                        "Could not read data from URL: " + url.geturl())
                with open(self._dataset_file_path + ".downloaded", 'w') as tmp:
                    tmp.write(downloaded_file)
            content_path = self._dataset_file_path + ".downloaded"
        else:
            content_path = self._dataset_file_path
        if not os.path.exists(content_path):
            raise ValueError("Does not exist: ".content_path)
        try:
            table = load_table(content_path)
        except TypeError, e:
            raise ValidationException(e)
        except ValueError, e:
            raise ValidationException(e)
        except Exception, e:
            raise ValidationException(
                "Could not load the file as BIOM - does it conform to the specification on https://biom-format.org?"
            )

        give_table_extra_methods(table)
        generated_by = "MicrobiomeDb exporter"

        with open(self._dataset_file_path + ".metadata.json", 'w') as f1:
            table.to_json_but_only_metadata(generated_by, direct_io=f1)
示例#22
0
 def setUp(self):
     with tempfile.NamedTemporaryFile(mode='w') as fh:
         fh.write(biom1)
         fh.flush()
         self.biom1 = load_table(fh.name)
示例#23
0
 def load_data(self):
     with self.__zipfile__ as myzip:
         with tempfile.TemporaryDirectory() as tmpdir:
             extracted_file_pth_str = myzip.extract(self.__data_path__,
                                                    tmpdir)
             return load_table(extracted_file_pth_str)