def test_index_mismatch(self): """Indexes can be different as long as they're the same length. The returned Series will have an index matching the first column's.""" df = pd.DataFrame(data=self.reference_data) # Same-length columns with mismatched indexes compare just fine. a = df.loc[:, 'A'] a.index = test_index = ['v', 'w', 'x', 'y', 'z'] result = coalesce(a, df['C']) self.assertTrue((result.index == test_index).all()) self.assertTrue((result.index != df['C'].index).all()) self.assertTrue( (result.values == pd.Series(['c', 'aa', 'ccc', 'cccc', 'ccccc']).values).all()) # Columns must be the same length, however. too_short = pd.Series(['foo', 'bar']) too_long = pd.Series(['foo', 'bar', 'baz', 'qux', 'quux', 'corge']) with self.assertRaises(Exception): result = coalesce(a, too_short) with self.assertRaises(Exception): result = coalesce(a, too_long)
def test_consider_null_cross_type(self): """Test the optional keyword argument test_consider_null.""" df = pd.DataFrame(data=self.reference_data) # Don't allow cross-type comparison with mixed types... # ...for now. with self.assertRaises(TypeError): coalesce(df['two'], df['C'], consider_null=['cccc'])
def test_consider_null(self): """Test the optional keyword argument test_consider_null.""" df = pd.DataFrame(data=self.reference_data) # Maybe zero is a bad number. Consider it null. result = coalesce(df['two'], df['three'], consider_null=[0]) self.assertTrue((result == pd.Series([2, 2, 2.2, 3, 3])).all()) # consider_nulls takes multiple values. result = coalesce(df['D'], df['C'], consider_null=['d', '']) self.assertTrue( (result == pd.Series(['c', 'cc', 'ccc', 'cccc', 'ccccc'])).all())
def test_numeric_columns(self): """Test the basic case with numbers.""" df = pd.DataFrame(data=self.reference_data) # Two columns result = coalesce(df['one'], df['two']) result = result.fillna('nan') self.assertTrue((result == pd.Series([1., 2., 2.2, 'nan', 0.])).all()) # Three columns result = coalesce(df['one'], df['two'], df['three']) self.assertTrue((result == pd.Series([1., 2., 2.2, 3., 0.])).all())
def test_value_preservation(self): """Make sure valid values aren't overwritten by nulls.""" df = pd.DataFrame(data=self.reference_data) result = coalesce(df['C'], df['A']) self.assertTrue((result == df['C']).all())
def test_cross_type_comparison(self): """Cross type comparison is allowed in the standard use case.""" df = pd.DataFrame(data=self.reference_data) result = coalesce(df['A'], df['one'], df['E']) result = result.fillna('nan') self.assertTrue((result == pd.Series([1, 'aa', 'nan', 'nan', 7])).all())
def test_one_column(self): """Test that using one column is a no-op, returning no changes.""" df = pd.DataFrame(data=self.reference_data) for c in df.columns: col = df.loc[:, c] result = coalesce(col) self.assertTrue((result.fillna('nan') == col.fillna('nan')).all()) self.assertTrue((result.index == col.index).all())
def test_string_columns(self): """Test the basic case with strings.""" df = pd.DataFrame(data=self.reference_data) # Two columns result = coalesce(df['A'], df['C']) self.assertTrue( (result == pd.Series(['c', 'aa', 'ccc', 'cccc', 'ccccc'])).all()) # Three columns result = coalesce(df['A'], df['D'], df['C']) self.assertTrue( (result == pd.Series(['d', 'aa', 'ccc', 'cccc', 'ccccc'])).all()) # None is equivalent to NaN result = coalesce(df['B'], df['C']) self.assertTrue( (result == pd.Series(['b', 'bb', 'ccc', 'cccc', 'bbbbb'])).all())