def test_all_missing_data(self): mdc = NumericMetadataColumn(pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64)
def test_all_missing_data(self): mdc = NumericMetadataColumn( pd.Series([np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series([np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64)
def test_supported_dtype_float(self): series = pd.Series([1.23, np.nan, 4.56, -7.891], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, np.float64)
def test_supported_dtype_float(self): series = pd.Series( [1.23, np.nan, 4.56, -7.891], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pdt.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, np.float64)
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = NumericMetadataColumn(pd.Series( [np.nan, 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c']))
def test_unsupported_dtype(self): with self.assertRaisesRegex( TypeError, "NumericMetadataColumn 'col1' does not support" ".*Series.*dtype.*bool"): NumericMetadataColumn( pd.Series([True, False, True], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')))
def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = NumericMetadataColumn( pd.Series([np.nan, 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series([np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c']))
def test_infinity_value(self): with self.assertRaisesRegex( ValueError, "NumericMetadataColumn.*positive or negative " "infinity.*column 'col1'"): NumericMetadataColumn( pd.Series([42, float('+inf'), 4.3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')))
def test_numeric_metadata_column(self): mdc = NumericMetadataColumn( pd.Series([1e-15, 42.50, -999.0], name='numeric-column', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID'))) mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("#OTU ID\tnumeric-column\n" "#q2:types\tnumeric\n" "id1\t1e-15\n" "id2\t42.5\n" "id3\t-999\n") self.assertEqual(obs, exp)
def test_wrong_obj(self): with self.assertRaisesRegex( TypeError, 'NumericMetadataColumn constructor.*pandas.Series'): NumericMetadataColumn(pd.DataFrame([[1, 2, 3]])) with self.assertRaisesRegex( TypeError, 'CategoricalMetadataColumn constructor.*pandas.Series'): CategoricalMetadataColumn({})
def test_supported_dtype_int(self): series = pd.Series([0, 1, 42, -2], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() exp_series = pd.Series([0.0, 1.0, 42.0, -2.0], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs_series, exp_series) self.assertEqual(obs_series.dtype, np.float64)
def test_numeric_metadata_column(self): mdc = NumericMetadataColumn(pd.Series( [1e-15, 42.50, -999.0], name='numeric-column', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID'))) mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#OTU ID\tnumeric-column\n" "#q2:types\tnumeric\n" "id1\t1e-15\n" "id2\t42.5\n" "id3\t-999\n" ) self.assertEqual(obs, exp)
def test_supported_dtype_int(self): series = pd.Series( [0, 1, 42, -2], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() exp_series = pd.Series( [0.0, 1.0, 42.0, -2.0], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pdt.assert_series_equal(obs_series, exp_series) self.assertEqual(obs_series.dtype, np.float64)
def test_type_mismatch(self): dummy = DummyMetadataColumn( pd.Series([1.0, 2.0, 3.0], name='col1', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) numeric = NumericMetadataColumn( pd.Series([1.0, 2.0, 3.0], name='col1', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) categorical = CategoricalMetadataColumn( pd.Series(['a', 'b', 'c'], name='col1', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertReallyNotEqual(dummy, numeric) self.assertReallyNotEqual(dummy, categorical)