Exemplo n.º 1
0
    def test_invalid_header(self):
        fp = get_data_path('invalid/invalid-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'unrecognized ID column name.*'
                                    'invalid_id_header'):
            Metadata.load(fp)
Exemplo n.º 2
0
    def test_empty_file(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/empty')

        with self.assertRaisesRegex(MetadataFileError,
                                    'locate header.*file may be empty'):
            Metadata.load(fp)
Exemplo n.º 3
0
    def test_column_types_unrecognized_column_name(self):
        fp = get_data_path('valid/simple.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'not_a_column.*column_types.*not a column '
                                    'in the metadata file'):
            Metadata.load(fp, column_types={'not_a_column': 'numeric'})
Exemplo n.º 4
0
    def test_duplicate_column_names_with_whitespace(self):
        fp = get_data_path(
            'invalid/duplicate-column-names-with-whitespace.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'Column names must be unique.*col1'):
            Metadata.load(fp)
Exemplo n.º 5
0
    def test_directive_before_header(self):
        fp = get_data_path('invalid/directive-before-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'directive.*#q2:types.*searching for '
                                    'header'):
            Metadata.load(fp)
Exemplo n.º 6
0
    def test_column_types_override_directive_not_convertible_to_numeric(self):
        fp = get_data_path('valid/simple-with-directive.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    "column 'col3' to numeric.*could not be "
                                    "interpreted as numeric: 'bar', 'foo'"):
            Metadata.load(fp, column_types={'col3': 'numeric'})
Exemplo n.º 7
0
    def test_unrecognized_column_type_in_directive(self):
        fp = get_data_path('invalid/unrecognized-column-type.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'col2.*unrecognized column type.*foo.*'
                                    '#q2:types directive'):
            Metadata.load(fp)
Exemplo n.º 8
0
    def test_unrecognized_directive(self):
        fp = get_data_path('invalid/unrecognized-directive.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'Unrecognized directive.*#q2:foo.*'
                                    '#q2:types directive is supported'):
            Metadata.load(fp)
Exemplo n.º 9
0
    def test_data_longer_than_header(self):
        fp = get_data_path('invalid/data-longer-than-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'row has 5 cells.*header declares 4 '
                                    'cells'):
            Metadata.load(fp)
Exemplo n.º 10
0
    def test_path_is_directory(self):
        fp = get_data_path('valid')

        with self.assertRaisesRegex(MetadataFileError,
                                    "path points to something other than a "
                                    "file"):
            Metadata.load(fp)
Exemplo n.º 11
0
    def test_comments_and_empty_rows_only(self):
        fp = get_data_path('invalid/comments-and-empty-rows-only.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'locate header.*only of comments or empty '
                                    'rows'):
            Metadata.load(fp)
Exemplo n.º 12
0
    def test_qiime1_empty_mapping_file(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/qiime1-empty.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'at least one ID.*empty'):
            Metadata.load(fp)
Exemplo n.º 13
0
    def test_non_standard_characters(self):
        fp = get_data_path('valid/non-standard-characters.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Exemplo n.º 14
0
    def test_all_cells_padded(self):
        fp = get_data_path('valid/all-cells-padded.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Exemplo n.º 15
0
    def test_numeric_column(self):
        fp = get_data_path('valid/numeric-column.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Exemplo n.º 16
0
    def test_directive_after_directives_section(self):
        fp = get_data_path(
            'invalid/directive-after-directives-section.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    '#q2:types.*outside of the directives '
                                    'section'):
            Metadata.load(fp)
Exemplo n.º 17
0
    def test_minimal_file(self):
        fp = get_data_path('valid/minimal.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Exemplo n.º 18
0
    def test_column_types_unrecognized_column_type(self):
        fp = get_data_path('valid/simple.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'col2.*column_types.*unrecognized column '
                                    'type.*CATEGORICAL'):
            Metadata.load(fp, column_types={'col1': 'numeric',
                                            'col2': 'CATEGORICAL'})
Exemplo n.º 19
0
    def test_column_name_conflicts_with_id_header(self):
        fp = get_data_path(
            'invalid/column-name-conflicts-with-id-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    "column name 'featureid' conflicts.*ID "
                                    "column header"):
            Metadata.load(fp)
Exemplo n.º 20
0
    def test_column_types_directive_not_convertible_to_numeric(self):
        fp = get_data_path('invalid/types-directive-non-numeric.tsv')

        # This error message regex is intentionally verbose because we want to
        # assert that many different types of non-numeric strings aren't
        # interpreted as numbers. The error message displays a sorted list of
        # all values that couldn't be converted to numbers, making it possible
        # to test a variety of non-numeric strings in a single test case.
        msg = (r"column 'col2' to numeric.*could not be interpreted as "
               r"numeric: '\$42', '\+inf', '-inf', '0xAF', '1,000', "
               r"'1\.000\.0', '1_000_000', '1e3e4', 'Infinity', 'NA', 'NaN', "
               "'a', 'e3', 'foo', 'inf', 'nan', 'sample-1'")
        with self.assertRaisesRegex(MetadataFileError, msg):
            Metadata.load(fp)
Exemplo n.º 21
0
    def test_numeric_metadata_column(self):
        fp = get_data_path('valid/simple.tsv')
        md1 = Metadata.load(fp)
        mdc1 = md1.get_column('col1')

        self.assertIsInstance(mdc1, NumericMetadataColumn)

        mdc1.save(self.filepath)

        md2 = Metadata.load(self.filepath)
        mdc2 = md2.get_column('col1')

        self.assertIsInstance(mdc1, NumericMetadataColumn)
        self.assertEqual(mdc1, mdc2)
Exemplo n.º 22
0
    def test_numeric_metadata_column(self):
        fp = get_data_path('valid/simple.tsv')
        md1 = Metadata.load(fp)
        mdc1 = md1.get_column('col1')

        self.assertIsInstance(mdc1, NumericMetadataColumn)

        mdc1.save(self.filepath)

        md2 = Metadata.load(self.filepath)
        mdc2 = md2.get_column('col1')

        self.assertIsInstance(mdc1, NumericMetadataColumn)
        self.assertEqual(mdc1, mdc2)
Exemplo n.º 23
0
    def test_column_types_directive_not_convertible_to_numeric(self):
        fp = get_data_path('invalid/types-directive-non-numeric.tsv')

        # This error message regex is intentionally verbose because we want to
        # assert that many different types of non-numeric strings aren't
        # interpreted as numbers. The error message displays a sorted list of
        # all values that couldn't be converted to numbers, making it possible
        # to test a variety of non-numeric strings in a single test case.
        msg = (r"column 'col2' to numeric.*could not be interpreted as "
               r"numeric: '\$42', '\+inf', '-inf', '0xAF', '1,000', "
               r"'1\.000\.0', '1_000_000', '1e3e4', 'Infinity', 'NA', 'NaN', "
               "'a', 'e3', 'foo', 'inf', 'nan', 'sample-1'")
        with self.assertRaisesRegex(MetadataFileError, msg):
            Metadata.load(fp)
Exemplo n.º 24
0
    def test_bom_simple_txt(self):
        # This is the encoding that notepad.exe will use most commonly
        fp = get_data_path('valid/BOM-simple.txt')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 25
0
    def test_jagged_trailing_columns(self):
        # Test case based on https://github.com/qiime2/qiime2/issues/335
        fp = get_data_path('valid/jagged-trailing-columns.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 26
0
    def test_artifacts(self):
        fp = pkg_resources.resource_filename('qiime2.metadata.tests',
                                             'data/simple.tsv')

        metadata = Metadata.load(fp)

        self.assertEqual(metadata.artifacts, ())
Exemplo n.º 27
0
    def test_jagged_trailing_columns(self):
        # Test case based on https://github.com/qiime2/qiime2/issues/335
        fp = get_data_path('valid/jagged-trailing-columns.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 28
0
    def test_bom_simple_txt(self):
        # This is the encoding that notepad.exe will use most commonly
        fp = get_data_path('valid/BOM-simple.txt')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 29
0
    def test_artifacts(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/simple.tsv')

        metadata = Metadata.load(fp)

        self.assertEqual(metadata.artifacts, ())
Exemplo n.º 30
0
    def test_non_standard_characters(self):
        # Test that non-standard characters in IDs, column names, and cells are
        # handled correctly. The test case isn't exhaustive (e.g. it doesn't
        # test every Unicode character; that would be a nice additional test
        # case to have in the future). Instead, this test aims to be more of an
        # integration test for the robustness of the reader to non-standard
        # data. Many of the characters and their placement within the data file
        # are based on use-cases/bugs reported on the forum, Slack, etc. The
        # data file has comments explaining these test case choices in more
        # detail.
        fp = get_data_path('valid/non-standard-characters.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(
            ['©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5'],
            name='id')
        exp_columns = [
            '↩c@l1™', 'col(#2)', "#col'3", '"<col_4>"', 'col\t  \r\n5'
        ]
        exp_data = [['ƒoo', '(foo)', '#f o #o', 'fo\ro', np.nan],
                    ["''2''", 'b#r', 'ba\nr', np.nan, np.nan],
                    ['b"ar', 'c\td', '4\r\n2', np.nan, np.nan],
                    ['b__a_z', '<42>', '>42', np.nan, np.nan],
                    ['baz', np.nan, '42']]
        exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 31
0
    def test_non_standard_characters(self):
        # Test that non-standard characters in IDs, column names, and cells are
        # handled correctly. The test case isn't exhaustive (e.g. it doesn't
        # test every Unicode character; that would be a nice additional test
        # case to have in the future). Instead, this test aims to be more of an
        # integration test for the robustness of the reader to non-standard
        # data. Many of the characters and their placement within the data file
        # are based on use-cases/bugs reported on the forum, Slack, etc. The
        # data file has comments explaining these test case choices in more
        # detail.
        fp = get_data_path('valid/non-standard-characters.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['©id##1', '((id))2', "'id_3<>'", '"id#4"',
                              'i d\r\t\n5'], name='id')
        exp_columns = ['↩c@l1™', 'col(#2)', "#col'3", '"<col_4>"',
                       'col\t  \r\n5']
        exp_data = [
            ['ƒoo', '(foo)', '#f o #o', 'fo\ro', np.nan],
            ["''2''", 'b#r', 'ba\nr', np.nan, np.nan],
            ['b"ar', 'c\td', '4\r\n2', np.nan, np.nan],
            ['b__a_z', '<42>', '>42', np.nan, np.nan],
            ['baz', np.nan, '42']
        ]
        exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 32
0
    def test_type_mismatch(self):
        fp = pkg_resources.resource_filename('qiime2.metadata.tests',
                                             'data/simple.tsv')
        md = Metadata.load(fp)
        mdc = md.get_column('col1')

        self.assertIsInstance(md, Metadata)
        self.assertIsInstance(mdc, NumericMetadataColumn)
        self.assertReallyNotEqual(md, mdc)
Exemplo n.º 33
0
    def test_no_columns(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/no-columns.tsv')
        metadata = Metadata.load(fp)

        obs = metadata.get_ids()

        exp = {'a', 'b', 'my-id'}
        self.assertEqual(obs, exp)
Exemplo n.º 34
0
    def test_type_mismatch(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/simple.tsv')
        md = Metadata.load(fp)
        mdc = md.get_column('col1')

        self.assertIsInstance(md, Metadata)
        self.assertIsInstance(mdc, NumericMetadataColumn)
        self.assertReallyNotEqual(md, mdc)
Exemplo n.º 35
0
    def test_no_columns(self):
        fp = pkg_resources.resource_filename('qiime2.metadata.tests',
                                             'data/no-columns.tsv')
        metadata = Metadata.load(fp)

        obs = metadata.get_ids()

        exp = {'a', 'b', 'my-id'}
        self.assertEqual(obs, exp)
Exemplo n.º 36
0
    def test_no_columns(self):
        fp = get_data_path('valid/no-columns.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['a', 'b', 'my-id'], name='id')
        exp_df = pd.DataFrame({}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 37
0
    def test_single_column(self):
        fp = get_data_path('valid/single-column.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0]}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 38
0
    def test_simple(self):
        # Simple metadata file without comments, empty rows, jaggedness,
        # missing data, odd IDs or column names, directives, etc. The file has
        # multiple column types (numeric, categorical, and something that has
        # mixed numbers and strings, which must be interpreted as categorical).
        fp = get_data_path('valid/simple.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 39
0
    def test_no_columns(self):
        fp = get_data_path('valid/no-columns.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['a', 'b', 'my-id'], name='id')
        exp_df = pd.DataFrame({}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 40
0
    def test_single_column(self):
        fp = get_data_path('valid/single-column.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0]}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 41
0
    def test_simple(self):
        # Simple metadata file without comments, empty rows, jaggedness,
        # missing data, odd IDs or column names, directives, etc. The file has
        # multiple column types (numeric, categorical, and something that has
        # mixed numbers and strings, which must be interpreted as categorical).
        fp = get_data_path('valid/simple.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Exemplo n.º 42
0
    def test_recommended_ids(self):
        fp = get_data_path('valid/recommended-ids.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID'],
                             name='id')
        exp_df = pd.DataFrame({'col1': ['foo', 'bar']}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 43
0
    def test_no_columns(self):
        fp = pkg_resources.resource_filename('qiime2.metadata.tests',
                                             'data/no-columns.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['a', 'b', 'my-id'], name='id', dtype=object)
        exp_df = pd.DataFrame({}, index=exp_index, dtype=object)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 44
0
    def test_minimal_file(self):
        # Simplest possible metadata file consists of one ID and zero columns.
        fp = get_data_path('valid/minimal.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['a'], name='id')
        exp_df = pd.DataFrame({}, index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 45
0
    def test_single_id(self):
        fp = get_data_path('valid/single-id.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1'], name='id')
        exp_df = pd.DataFrame({'col1': [1.0], 'col2': ['a'], 'col3': ['foo']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 46
0
    def test_with_partial_types_directive(self):
        fp = get_data_path('valid/partial-types-directive.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': ['1', '2', '3'],
                               'col2': ['a', 'b', 'c'],
                               'col3': ['foo', 'bar', '42']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 47
0
    def test_numeric_column(self):
        fp = get_data_path('valid/numeric-column.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7',
                              'id8', 'id9', 'id10', 'id11', 'id12'], name='id')
        exp_df = pd.DataFrame({'col1': [0.0, 2.0, 0.0003, -4.2, 1e-4, 1e4,
                                        1.5e2, np.nan, 1.0, 0.5, 1e-8, -0.0]},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 48
0
    def test_all_cells_padded(self):
        fp = get_data_path('valid/all-cells-padded.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': [np.nan, np.nan, np.nan],
                               'col2': [np.nan, np.nan, np.nan],
                               'col3': [np.nan, np.nan, np.nan]},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 49
0
    def test_biom_observation_metadata_file(self):
        fp = get_data_path('valid/biom-observation-metadata.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['OTU_1', 'OTU_2'], name='#OTUID')
        exp_df = pd.DataFrame(
            [['k__Bacteria;p__Firmicutes', 0.890], ['k__Bacteria', 0.9999]],
            columns=['taxonomy', 'confidence'],
            index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 50
0
    def test_column_types_without_directive(self):
        fp = get_data_path('valid/simple.tsv')

        obs_md = Metadata.load(fp, column_types={'col1': 'categorical'})

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': ['1', '2', '3'],
                               'col2': ['a', 'b', 'c'],
                               'col3': ['foo', 'bar', '42']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 51
0
    def test_qiime1_mapping_file(self):
        fp = get_data_path('valid/qiime1.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='#SampleID')
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0],
                               'col2': ['a', 'b', 'c'],
                               'col3': ['foo', 'bar', '42']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 52
0
    def test_qiita_sample_information_file(self):
        fp = get_data_path('valid/qiita-sample-information.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id.1', 'id.2'], name='sample_name')
        exp_df = pd.DataFrame({
            'DESCRIPTION': ['description 1', 'description 2'],
            'TITLE': ['A Title', 'Another Title']},
            index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 53
0
    def test_padding_rows_shorter_than_header(self):
        fp = get_data_path('valid/rows-shorter-than-header.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, np.nan],
                               'col2': ['a', np.nan, np.nan],
                               'col3': [np.nan, np.nan, np.nan]},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 54
0
    def test_with_case_insensitive_types_directive(self):
        fp = get_data_path('valid/case-insensitive-types-directive.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id')
        exp_df = pd.DataFrame({'col1': ['1', '2', '3'],
                               'col2': ['a', 'b', 'c'],
                               'col3': [-5.0, 0.0, 42.0]},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 55
0
    def test_empty_rows(self):
        fp = self.get_data_path('valid/empty-rows.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id', dtype=object)
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0],
                               'col2': ['a', 'b', 'c'],
                               'col3': ['foo', 'bar', '42']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 56
0
    def test_does_not_cast_ids_or_column_names(self):
        fp = get_data_path('valid/no-id-or-column-name-type-cast.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['0.000001', '0.004000', '0.000000'],
                             dtype=object,
                             name='id')
        exp_columns = ['42.0', '1000', '-4.2']
        exp_data = [[2.0, 'b', 2.5], [1.0, 'b', 4.2], [3.0, 'c', -9.999]]
        exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 57
0
    def test_jagged_trailing_columns(self):
        # Test case based on https://github.com/qiime2/qiime2/issues/335
        fp = self.get_data_path('valid/jagged-trailing-columns.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id1', 'id2', 'id3'], name='id', dtype=object)
        exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0],
                               'col2': ['a', 'b', 'c'],
                               'col3': ['foo', 'bar', '42']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 58
0
    def test_numeric_column_as_categorical(self):
        fp = get_data_path('valid/numeric-column.tsv')

        obs_md = Metadata.load(fp, column_types={'col1': 'categorical'})

        exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7',
                              'id8', 'id9', 'id10', 'id11', 'id12'], name='id')
        exp_df = pd.DataFrame({'col1': ['0', '2.0', '0.00030', '-4.2', '1e-4',
                                        '1e4', '+1.5E+2', np.nan, '1.', '.5',
                                        '1e-08', '-0']},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 59
0
    def test_qiita_preparation_information_file(self):
        fp = get_data_path('valid/qiita-preparation-information.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['id.1', 'id.2'], name='sample_name')
        exp_df = pd.DataFrame({
            'BARCODE': ['ACGT', 'TGCA'],
            'EXPERIMENT_DESIGN_DESCRIPTION': ['longitudinal study',
                                              'longitudinal study']},
            index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)
Exemplo n.º 60
0
    def test_does_not_cast_ids(self):
        fp = self.get_data_path('valid/no-type-cast.tsv')

        obs_md = Metadata.load(fp)

        exp_index = pd.Index(['0.000001', '0.004000', '0.000000'],
                             dtype=object, name='id')
        exp_df = pd.DataFrame({'col1': [2.0, 1.0, 3.0],
                               'col2': ['b', 'b', 'c'],
                               'col3': [2.5, 4.2, -9.999]},
                              index=exp_index)
        exp_md = Metadata(exp_df)

        self.assertEqual(obs_md, exp_md)