Exemplo n.º 1
0
    def test_various_numbers(self):
        numbers = [
            0.0, -0.0, np.nan, 1.0, 42.0, -33.0, 1e-10, 1.5e15, 0.0003, -4.234,
            # This last number should be rounded because it exceeds 15 digits
            # of precision.
            12.34567891234567
        ]
        index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7',
                          'id8', 'id9', 'id10', 'id11'], name='ID')
        md = Metadata(pd.DataFrame({'numbers': numbers}, index=index))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "ID\tnumbers\n"
            "#q2:types\tnumeric\n"
            "id1\t0\n"
            "id2\t-0\n"
            "id3\t\n"
            "id4\t1\n"
            "id5\t42\n"
            "id6\t-33\n"
            "id7\t1e-10\n"
            "id8\t1.5e+15\n"
            "id9\t0.0003\n"
            "id10\t-4.234\n"
            "id11\t12.3456789123457\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 2
0
    def test_ids_and_column_names_as_numeric_strings(self):
        index = pd.Index(['0.000001', '0.004000', '0.000000'],
                         dtype=object, name='id')
        columns = ['42.0', '1000', '-4.2']
        data = [
            [2.0, 'b', 2.5],
            [1.0, 'b', 4.2],
            [3.0, 'c', -9.999]
        ]
        df = pd.DataFrame(data, index=index, columns=columns)
        md = Metadata(df)

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\t42.0\t1000\t-4.2\n"
            "#q2:types\tnumeric\tcategorical\tnumeric\n"
            "0.000001\t2\tb\t2.5\n"
            "0.004000\t1\tb\t4.2\n"
            "0.000000\t3\tc\t-9.999\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 3
0
    def test_various_numbers(self):
        numbers = [
            0.0, -0.0, np.nan, 1.0, 42.0, -33.0, 1e-10, 1.5e15, 0.0003, -4.234,
            # This last number should be rounded because it exceeds 15 digits
            # of precision.
            12.34567891234567
        ]
        index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7',
                          'id8', 'id9', 'id10', 'id11'], name='ID')
        md = Metadata(pd.DataFrame({'numbers': numbers}, index=index))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "ID\tnumbers\n"
            "#q2:types\tnumeric\n"
            "id1\t0\n"
            "id2\t-0\n"
            "id3\t\n"
            "id4\t1\n"
            "id5\t42\n"
            "id6\t-33\n"
            "id7\t1e-10\n"
            "id8\t1.5e+15\n"
            "id9\t0.0003\n"
            "id10\t-4.234\n"
            "id11\t12.3456789123457\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 4
0
    def test_ids_and_column_names_as_numeric_strings(self):
        index = pd.Index(['0.000001', '0.004000', '0.000000'],
                         dtype=object, name='id')
        columns = ['42.0', '1000', '-4.2']
        data = [
            [2.0, 'b', 2.5],
            [1.0, 'b', 4.2],
            [3.0, 'c', -9.999]
        ]
        df = pd.DataFrame(data, index=index, columns=columns)
        md = Metadata(df)

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\t42.0\t1000\t-4.2\n"
            "#q2:types\tnumeric\tcategorical\tnumeric\n"
            "0.000001\t2\tb\t2.5\n"
            "0.004000\t1\tb\t4.2\n"
            "0.000000\t3\tc\t-9.999\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 5
0
    def test_minimal(self):
        md = Metadata(pd.DataFrame({}, index=pd.Index(['my-id'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\n" "#q2:types\n" "my-id\n")

        self.assertEqual(obs, exp)
Exemplo n.º 6
0
    def test_no_columns(self):
        md = Metadata(
            pd.DataFrame({}, index=pd.Index(['foo', 'bar', 'baz'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\n" "#q2:types\n" "foo\n" "bar\n" "baz\n")

        self.assertEqual(obs, exp)
Exemplo n.º 7
0
    def test_no_bom(self):
        md = Metadata(pd.DataFrame(
            {'col1': [1.0, 2.0, 3.0],
             'col2': ['a', 'b', 'c'],
             'col3': ['foo', 'bar', '42']},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'rb') as fh:
            obs = fh.read(2)

        self.assertEqual(obs, b'id')
Exemplo n.º 8
0
    def test_minimal(self):
        md = Metadata(pd.DataFrame({}, index=pd.Index(['my-id'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\n"
            "#q2:types\n"
            "my-id\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 9
0
    def test_no_bom(self):
        md = Metadata(
            pd.DataFrame(
                {
                    'col1': [1.0, 2.0, 3.0],
                    'col2': ['a', 'b', 'c'],
                    'col3': ['foo', 'bar', '42']
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'rb') as fh:
            obs = fh.read(2)

        self.assertEqual(obs, b'id')
Exemplo n.º 10
0
    def test_single_id(self):
        md = Metadata(pd.DataFrame(
            {'col1': ['foo'], 'col2': [4.002]},
            index=pd.Index(['my-id'], name='featureid')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "featureid\tcol1\tcol2\n"
            "#q2:types\tcategorical\tnumeric\n"
            "my-id\tfoo\t4.002\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 11
0
    def test_single_column(self):
        md = Metadata(
            pd.DataFrame({'col1': ['42', '4.3', '4.4000']},
                         index=pd.Index(['foo', 'bar', 'baz'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\tcol1\n"
               "#q2:types\tcategorical\n"
               "foo\t42\n"
               "bar\t4.3\n"
               "baz\t4.4000\n")

        self.assertEqual(obs, exp)
Exemplo n.º 12
0
    def test_single_id(self):
        md = Metadata(pd.DataFrame(
            {'col1': ['foo'], 'col2': [4.002]},
            index=pd.Index(['my-id'], name='featureid')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "featureid\tcol1\tcol2\n"
            "#q2:types\tcategorical\tnumeric\n"
            "my-id\tfoo\t4.002\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 13
0
    def test_unsorted_column_order(self):
        index = pd.Index(['id1', 'id2', 'id3'], name='id')
        columns = ['z', 'b', 'y']
        data = [[1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42']]
        md = Metadata(pd.DataFrame(data, index=index, columns=columns))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\tz\tb\ty\n"
               "#q2:types\tnumeric\tcategorical\tcategorical\n"
               "id1\t1\ta\tfoo\n"
               "id2\t2\tb\tbar\n"
               "id3\t3\tc\t42\n")

        self.assertEqual(obs, exp)
Exemplo n.º 14
0
    def test_no_columns(self):
        md = Metadata(pd.DataFrame(
            {}, index=pd.Index(['foo', 'bar', 'baz'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\n"
            "#q2:types\n"
            "foo\n"
            "bar\n"
            "baz\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 15
0
    def test_single_column(self):
        md = Metadata(pd.DataFrame(
            {'col1': ['42', '4.3', '4.4000']},
            index=pd.Index(['foo', 'bar', 'baz'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\n"
            "#q2:types\tcategorical\n"
            "foo\t42\n"
            "bar\t4.3\n"
            "baz\t4.4000\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 16
0
    def test_some_missing_data(self):
        md = Metadata(
            pd.DataFrame({'col1': [42.0, np.nan, -3.5],
                          'col2': ['a', np.nan, np.nan]},
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\tcol2\n"
            "#q2:types\tnumeric\tcategorical\n"
            "id1\t42\ta\n"
            "id2\t\t\n"
            "id3\t-3.5\t\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 17
0
    def test_some_missing_data(self):
        md = Metadata(
            pd.DataFrame({'col1': [42.0, np.nan, -3.5],
                          'col2': ['a', np.nan, np.nan]},
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\tcol2\n"
            "#q2:types\tnumeric\tcategorical\n"
            "id1\t42\ta\n"
            "id2\t\t\n"
            "id3\t-3.5\t\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 18
0
    def test_alternate_id_header(self):
        md = Metadata(pd.DataFrame(
            {'col1': [1.0, 2.0, 3.0],
             'col2': ['a', 'b', 'c'],
             'col3': ['foo', 'bar', '42']},
            index=pd.Index(['id1', 'id2', 'id3'], name='#SampleID')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "#SampleID\tcol1\tcol2\tcol3\n"
            "#q2:types\tnumeric\tcategorical\tcategorical\n"
            "id1\t1\ta\tfoo\n"
            "id2\t2\tb\tbar\n"
            "id3\t3\tc\t42\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 19
0
    def test_alternate_id_header(self):
        md = Metadata(pd.DataFrame(
            {'col1': [1.0, 2.0, 3.0],
             'col2': ['a', 'b', 'c'],
             'col3': ['foo', 'bar', '42']},
            index=pd.Index(['id1', 'id2', 'id3'], name='#SampleID')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "#SampleID\tcol1\tcol2\tcol3\n"
            "#q2:types\tnumeric\tcategorical\tcategorical\n"
            "id1\t1\ta\tfoo\n"
            "id2\t2\tb\tbar\n"
            "id3\t3\tc\t42\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 20
0
    def test_different_file_extension(self):
        md = Metadata(pd.DataFrame(
            {'col1': [1.0, 2.0, 3.0],
             'col2': ['a', 'b', 'c'],
             'col3': ['foo', 'bar', '42']},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        filepath = os.path.join(self.temp_dir, 'metadata.txt')
        md.save(filepath)

        with open(filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\tcol2\tcol3\n"
            "#q2:types\tnumeric\tcategorical\tcategorical\n"
            "id1\t1\ta\tfoo\n"
            "id2\t2\tb\tbar\n"
            "id3\t3\tc\t42\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 21
0
    def test_all_missing_data(self):
        # nan-only columns that are numeric or categorical.
        md = Metadata(
            pd.DataFrame({'col1': [np.nan, np.nan, np.nan],
                          'col2': np.array([np.nan, np.nan, np.nan],
                                           dtype=object)},
                         index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\tcol2\n"
            "#q2:types\tnumeric\tcategorical\n"
            "id1\t\t\n"
            "id2\t\t\n"
            "id3\t\t\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 22
0
    def test_different_file_extension(self):
        md = Metadata(pd.DataFrame(
            {'col1': [1.0, 2.0, 3.0],
             'col2': ['a', 'b', 'c'],
             'col3': ['foo', 'bar', '42']},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        filepath = os.path.join(self.temp_dir, 'metadata.txt')
        md.save(filepath)

        with open(filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tcol1\tcol2\tcol3\n"
            "#q2:types\tnumeric\tcategorical\tcategorical\n"
            "id1\t1\ta\tfoo\n"
            "id2\t2\tb\tbar\n"
            "id3\t3\tc\t42\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 23
0
    def test_all_missing_data(self):
        # nan-only columns that are numeric or categorical.
        md = Metadata(
            pd.DataFrame(
                {
                    'col1': [np.nan, np.nan, np.nan],
                    'col2': np.array([np.nan, np.nan, np.nan], dtype=object)
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\tcol1\tcol2\n"
               "#q2:types\tnumeric\tcategorical\n"
               "id1\t\t\n"
               "id2\t\t\n"
               "id3\t\t\n")

        self.assertEqual(obs, exp)
Exemplo n.º 24
0
    def test_unsorted_column_order(self):
        index = pd.Index(['id1', 'id2', 'id3'], name='id')
        columns = ['z', 'b', 'y']
        data = [
            [1.0, 'a', 'foo'],
            [2.0, 'b', 'bar'],
            [3.0, 'c', '42']
        ]
        md = Metadata(pd.DataFrame(data, index=index, columns=columns))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\tz\tb\ty\n"
            "#q2:types\tnumeric\tcategorical\tcategorical\n"
            "id1\t1\ta\tfoo\n"
            "id2\t2\tb\tbar\n"
            "id3\t3\tc\t42\n"
        )

        self.assertEqual(obs, exp)
Exemplo n.º 25
0
    def test_save_metadata_auto_extension(self):
        md = Metadata(
            pd.DataFrame(
                {
                    'col1': [1.0, 2.0, 3.0],
                    'col2': ['a', 'b', 'c'],
                    'col3': ['foo', 'bar', '42']
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        # Filename & extension endswith is matching (non-default).
        fp = os.path.join(self.temp_dir, 'metadatatsv')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadatatsv.tsv')

        # No period in filename; no extension included.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata')

        # No period in filename; no period in extension.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, 'tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # No period in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; no period in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, 'tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; single period in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Multiple periods in filename; single period in extension.
        fp = os.path.join(self.temp_dir, 'metadata..')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Multiple periods in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata..')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # No extension in filename; no extension input.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata')

        # No extension in filename; extension input.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Extension in filename; no extension input.
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Extension in filename; extension input (non-matching).
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp, '.txt')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv.txt')

        # Extension in filename; extension input (matching).
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')