Пример #1
0
    def test_all_missing_data(self):
        mdc = CategoricalMetadataColumn(pd.Series(
            np.array([np.nan, np.nan, np.nan], dtype=object), name='col1',
            index=pd.Index(['a', 'b', 'c'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series(
            np.array([np.nan, np.nan, np.nan], dtype=object), name='col1',
            index=pd.Index(['a', 'b', 'c'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
Пример #2
0
    def test_all_missing_data(self):
        mdc = CategoricalMetadataColumn(
            pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object),
                      name='col1',
                      index=pd.Index(['a', 'b', 'c'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object),
                        name='col1',
                        index=pd.Index(['a', 'b', 'c'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
Пример #3
0
    def test_numeric_strings_preserved_as_strings(self):
        series = pd.Series(['1', np.nan, '2.5', '3.0'],
                           name='my column',
                           index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Пример #4
0
    def test_supported_dtype(self):
        series = pd.Series(['foo', np.nan, 'bar', 'foo'],
                           name='my column',
                           index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Пример #5
0
    def test_numeric_strings_preserved_as_strings(self):
        series = pd.Series(
            ['1', np.nan, '2.5', '3.0'], name='my column',
            index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Пример #6
0
    def test_supported_dtype(self):
        series = pd.Series(
            ['foo', np.nan, 'bar', 'foo'], name='my column',
            index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Пример #7
0
    def test_missing_data_normalized(self):
        # Different missing data representations should be normalized to np.nan
        mdc = CategoricalMetadataColumn(pd.Series(
            [np.nan, 'foo', float('nan'), None], name='col1',
            index=pd.Index(['a', 'b', 'c', 'd'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series(
            [np.nan, 'foo', np.nan, np.nan], name='col1',
            index=pd.Index(['a', 'b', 'c', 'd'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
        self.assertTrue(np.isnan(obs['a']))
        self.assertTrue(np.isnan(obs['c']))
        self.assertTrue(np.isnan(obs['d']))
Пример #8
0
    def test_missing_data_normalized(self):
        # Different missing data representations should be normalized to np.nan
        mdc = CategoricalMetadataColumn(
            pd.Series([np.nan, 'foo', float('nan'), None],
                      name='col1',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series([np.nan, 'foo', np.nan, np.nan],
                        name='col1',
                        index=pd.Index(['a', 'b', 'c', 'd'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
        self.assertTrue(np.isnan(obs['a']))
        self.assertTrue(np.isnan(obs['c']))
        self.assertTrue(np.isnan(obs['d']))