Пример #1
0
    def test_3_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Foo; p__Bar', '-1.0'],
                            ['k__Foo; p__Baz', '-42.0']], index=index,
                           columns=['Taxon', 'Confidence'], dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '3-column.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '3-column.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #2
0
    def test_2_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Bacteria; p__Proteobacteria'],
                            ['k__Bacteria']], index=index, columns=['Taxon'],
                           dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '2-column.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '2-column.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #3
0
    def test_3_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Foo; p__Bar', '-1.0'],
                            ['k__Foo; p__Baz', '-42.0']], index=index,
                           columns=['Taxon', 'Confidence'], dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '3-column.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '3-column.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #4
0
    def test_2_columns(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Bacteria; p__Proteobacteria'],
                            ['k__Bacteria']], index=index, columns=['Taxon'],
                           dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '2-column.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', '2-column.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #5
0
    def test_headerless(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        columns = ['Taxon', 'Unnamed Column 1', 'Unnamed Column 2']
        exp = pd.DataFrame([['k__Foo; p__Bar', 'some', 'another'],
                            ['k__Foo; p__Baz', 'column', 'column!']],
                           index=index, columns=columns, dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'headerless.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=False
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'headerless.tsv')),
            has_header=False)

        assert_frame_equal(obs, exp)
Пример #6
0
    def test_headerless(self):
        index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
        columns = ['Taxon', 'Unnamed Column 1', 'Unnamed Column 2']
        exp = pd.DataFrame([['k__Foo; p__Bar', 'some', 'another'],
                            ['k__Foo; p__Baz', 'column', 'column!']],
                           index=index,
                           columns=columns,
                           dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy', 'headerless.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=False
        obs = _taxonomy_formats_to_dataframe(self.get_data_path(
            os.path.join('taxonomy', 'headerless.tsv')),
                                             has_header=False)

        assert_frame_equal(obs, exp)
Пример #7
0
    def test_valid_but_messy_file(self):
        index = pd.Index(
            ['SEQUENCE1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Bar; p__Baz', 'foo'],
                            ['some; taxonomy; for; ya', 'bar baz']],
                           index=index, columns=['Taxon', 'Extra Column'],
                           dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'valid-but-messy.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'valid-but-messy.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #8
0
    def test_valid_but_messy_file(self):
        index = pd.Index(
            ['SEQUENCE1', 'seq2'], name='Feature ID', dtype=object)
        exp = pd.DataFrame([['k__Bar; p__Baz', 'foo'],
                            ['some; taxonomy; for; ya', 'bar baz']],
                           index=index, columns=['Taxon', 'Extra Column'],
                           dtype=object)

        # has_header=None (default)
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'valid-but-messy.tsv')))

        assert_frame_equal(obs, exp)

        # has_header=True
        obs = _taxonomy_formats_to_dataframe(
            self.get_data_path(os.path.join('taxonomy',
                                            'valid-but-messy.tsv')),
            has_header=True)

        assert_frame_equal(obs, exp)
Пример #9
0
 def test_duplicate_columns(self):
     with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(
                 os.path.join('taxonomy', 'duplicate-columns.tsv')))
Пример #10
0
 def test_has_header_with_headerless(self):
     with self.assertRaisesRegex(ValueError, 'requires a header'):
         _taxonomy_formats_to_dataframe(self.get_data_path(
             os.path.join('taxonomy', 'headerless.tsv')),
                                        has_header=True)
Пример #11
0
 def test_jagged(self):
     with self.assertRaises(pandas.errors.ParserError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', 'jagged.tsv')))
Пример #12
0
 def test_empty(self):
     with self.assertRaises(pandas.errors.EmptyDataError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', 'empty')))
Пример #13
0
 def test_header_only(self):
     with self.assertRaisesRegex(ValueError, 'one row of data'):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy',
                                             'header-only.tsv')))
Пример #14
0
 def test_blanks_and_comments(self):
     with self.assertRaises(pandas.io.common.EmptyDataError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(
                 os.path.join('taxonomy', 'blanks-and-comments')))
Пример #15
0
 def test_one_column(self):
     with self.assertRaisesRegex(ValueError, "two columns, found 1"):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', '1-column.tsv')))
Пример #16
0
 def test_one_column(self):
     with self.assertRaisesRegex(ValueError, "two columns, found 1"):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', '1-column.tsv')))
Пример #17
0
 def test_blanks_and_comments(self):
     with self.assertRaises(pandas.io.common.EmptyDataError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy',
                                             'blanks-and-comments')))
Пример #18
0
 def test_empty(self):
     with self.assertRaises(pandas.io.common.EmptyDataError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', 'empty')))
Пример #19
0
 def test_header_only(self):
     with self.assertRaisesRegex(ValueError, 'one row of data'):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy',
                                             'header-only.tsv')))
Пример #20
0
 def test_duplicate_columns(self):
     with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join(
                 'taxonomy', 'duplicate-columns.tsv')))
Пример #21
0
 def test_jagged(self):
     with self.assertRaises(pandas.io.common.CParserError):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', 'jagged.tsv')))
Пример #22
0
 def test_has_header_with_headerless(self):
     with self.assertRaisesRegex(ValueError, 'requires a header'):
         _taxonomy_formats_to_dataframe(
             self.get_data_path(os.path.join('taxonomy', 'headerless.tsv')),
             has_header=True)