def test_all_missing_data(self): mdc = CategoricalMetadataColumn(pd.Series( np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series( np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object)
def test_all_missing_data(self): mdc = CategoricalMetadataColumn( pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object)
def test_numeric_strings_preserved_as_strings(self): series = pd.Series(['1', np.nan, '2.5', '3.0'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object)
def test_supported_dtype(self): series = pd.Series(['foo', np.nan, 'bar', 'foo'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object)
def test_numeric_strings_preserved_as_strings(self): series = pd.Series( ['1', np.nan, '2.5', '3.0'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object)
def test_supported_dtype(self): series = pd.Series( ['foo', np.nan, 'bar', 'foo'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object)
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = CategoricalMetadataColumn(pd.Series( [np.nan, 'foo', float('nan'), None], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, 'foo', np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) self.assertTrue(np.isnan(obs['d']))
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = CategoricalMetadataColumn( pd.Series([np.nan, 'foo', float('nan'), None], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series([np.nan, 'foo', np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) self.assertTrue(np.isnan(obs['d']))