def test__analyze_invalid_dtype(self): """Test _analyze when a list of dtypes containing an invalid dtype is passed.""" # Setup hp = HyperTransformer(dtypes=['int', 'complex']) # Run data = pd.DataFrame({ 'int': [1, 2, None], 'complex': [1.0 + 0j, 2.0 + 1j, None], }) with pytest.raises(ValueError): hp._analyze(data)
def test__analyze_raise_error(self): """Test _analyze raise error""" # Setup data = Mock() data.columns = ['foo'] dtypes = [Mock()] # Run transformer = Mock() transformer.dtypes = dtypes with self.assertRaises(ValueError): HyperTransformer._analyze(transformer, data)
def test__analyze(self): """Test _analyze""" # Setup hp = HyperTransformer(dtype_transformers={'O': 'one_hot_encoding'}) # Run data = pd.DataFrame({ 'int': [1, 2, None], 'float': [1.0, 2.0, None], 'object': ['foo', 'bar', None], 'category': [1, 2, None], 'bool': [True, False, None], 'datetime': pd.to_datetime(['1965-05-23', None, '1997-10-17']), }) data['category'] = data['category'].astype('category') result = hp._analyze(data) # Asserts assert isinstance(result, dict) assert set(result.keys()) == {'int', 'float', 'object', 'category', 'bool', 'datetime'} assert isinstance(result['int'], NumericalTransformer) assert isinstance(result['float'], NumericalTransformer) assert isinstance(result['object'], OneHotEncodingTransformer) assert isinstance(result['category'], OneHotEncodingTransformer) assert isinstance(result['bool'], BooleanTransformer) assert isinstance(result['datetime'], DatetimeTransformer)
def test__analyze_bool(self): """Test _analyze bool dtype""" # Setup data = pd.DataFrame({'booleans': [True, False, None, False, True]}) dtypes = [bool] # Run transformer = Mock() transformer.dtypes = dtypes result = HyperTransformer._analyze(transformer, data) # Asserts expect_class = BooleanTransformer self.assertIsInstance(result['booleans'], expect_class)
def test__analyze_object(self): """Test _analyze object dtype""" # Setup data = pd.DataFrame({'objects': ['foo', 'bar', None, 'tar']}) dtypes = [np.object] # Run transformer = Mock() transformer.dtypes = dtypes result = HyperTransformer._analyze(transformer, data) # Asserts expect_class = CategoricalTransformer self.assertIsInstance(result['objects'], expect_class)
def test__analyze_int(self): """Test _analyze int dtype""" # Setup data = pd.DataFrame({'integers': [1, 2, 3, 4, 5, None, 6, 7, 8, 9, 0]}) dtypes = [int] # Run transformer = Mock() transformer.dtypes = dtypes result = HyperTransformer._analyze(transformer, data) # Asserts expect_class = NumericalTransformer self.assertIsInstance(result['integers'], expect_class)
def test__analyze_float(self): """Test _analyze float dtype""" # Setup data = pd.DataFrame({ 'floats': [1.1, 2.2, 3.3, 4.4, 5.5, None, 6.6, 7.7, 8.8, 9.9, 0.0] }) dtypes = [float] # Run transformer = Mock() transformer.dtypes = dtypes result = HyperTransformer._analyze(transformer, data) # Asserts expect_class = NumericalTransformer self.assertIsInstance(result['floats'], expect_class)
def test__analyze_datetime64(self): """Test _analyze datetime64 dtype""" # Setup data = pd.DataFrame({'datetimes': ['1965-05-23', None, '1997-10-17']}) data['datetimes'] = pd.to_datetime(data['datetimes'], format='%Y-%m-%d', errors='coerce') dtypes = [np.datetime64] # Run transformer = Mock() transformer.dtypes = dtypes result = HyperTransformer._analyze(transformer, data) # Asserts expect_class = DatetimeTransformer self.assertIsInstance(result['datetimes'], expect_class)