def test_all_missing_data(self): mdc = NumericMetadataColumn(pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64)
def test_all_missing_data(self): mdc = NumericMetadataColumn( pd.Series([np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series([np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64)
def test_supported_dtype_float(self): series = pd.Series([1.23, np.nan, 4.56, -7.891], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, np.float64)
def test_supported_dtype_float(self): series = pd.Series( [1.23, np.nan, 4.56, -7.891], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, np.float64)
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = NumericMetadataColumn(pd.Series( [np.nan, 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c']))
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = NumericMetadataColumn( pd.Series([np.nan, 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series([np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c']))
def test_supported_dtype_int(self): series = pd.Series([0, 1, 42, -2], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() exp_series = pd.Series([0.0, 1.0, 42.0, -2.0], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs_series, exp_series) self.assertEqual(obs_series.dtype, np.float64)
def test_supported_dtype_int(self): series = pd.Series( [0, 1, 42, -2], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() exp_series = pd.Series( [0.0, 1.0, 42.0, -2.0], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs_series, exp_series) self.assertEqual(obs_series.dtype, np.float64)