def test_fit_min_max_auto(self): """Test fit min and max parameters with ``'auto'`` If the min or max parameters are set to ``'auto'`` the ``fit`` method should learn them from the fitted data. Input: - Array of floats and null values Side Effect: - ``_min_value`` and ``_max_value`` are learned """ # Setup data = np.array([-100, -5000, 0, None, 100, 4000]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', min_value='auto', max_value='auto') transformer.fit(data) # Asserts assert transformer._min_value == -5000 assert transformer._max_value == 4000
def test_fit_min_max_none(self): """Test fit min and max parameters with ``None`` If the min and max parameters are set to ``None``, the ``fit`` method should not set its ``min`` or ``max`` instance variables. Input: - Array of floats and null values Side Effect: - ``_min_value`` and ``_max_value`` stay ``None`` """ # Setup data = np.array([1.5, None, 2.5]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', min_value=None, max_value=None) transformer.fit(data) # Asserts assert transformer._min_value is None assert transformer._max_value is None
def test_fit_rounding_auto_max_decimals(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'``, ``fit`` should learn the ``_rounding_digits`` to be the max number of decimal places seen in the data. The max amount of decimals that floats can be accurately compared with is 15. If the input data has values with more than 14 decimals, we will not be able to accurately learn the number of decimal places required, so we do not round. Input: - Array with a value that has 15 decimals Side Effect: - ``_rounding_digits`` is set to ``None`` """ # Setup data = np.array([0.000000000000001]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits is None
def test_fit_rounding_auto_max_inf(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'``, and the data contains infinite values, ``fit`` should learn the ``_rounding_digits`` to be the min number of decimal places seen in the data with the infinite values filtered out. Input: - Array with ``np.inf`` as a value Side Effect: - ``_rounding_digits`` is set to max seen in rest of data """ # Setup data = np.array([15000, 4000, 60000, np.inf]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits == -3
def test_fit_rounding_auto_large_numbers(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'`` and the data is very large, ``fit`` should learn ``_rounding_digits`` to be the biggest number of 0s to round to that keeps the data the same. Input: - Array of data with numbers between 10^10 and 10^20 Side Effect: - ``_rounding_digits`` is set to the minimum exponent seen in the data """ # Setup exponents = [np.random.randint(10, 20) for i in range(10)] big_numbers = [10**exponents[i] for i in range(10)] data = np.array(big_numbers) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits == -min(exponents)
def test_fit_rounding_int(self): """Test fit rounding parameter with int If the rounding parameter is set to ``None``, the ``fit`` method should not set its ``rounding`` or ``_rounding_digits`` instance variables. Input: - An array with floats rounded to one decimal and a None value Side Effect: - ``rounding`` and ``_rounding_digits`` are the provided int """ # Setup data = np.array([1.5, None, 2.5]) expected_digits = 3 # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding=expected_digits) transformer.fit(data) # Asserts assert transformer.rounding == expected_digits assert transformer._rounding_digits == expected_digits
def test_fit(self): """Test fit nan mean with numpy.array""" # Setup data = np.array([1.5, None, 2.5]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan') transformer.fit(data) # Asserts expect_fill_value = 'nan' expect_dtype = np.float assert transformer.null_transformer.fill_value == expect_fill_value assert transformer._dtype == expect_dtype
def test_fit_rounding_auto_max_zero(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'``, and the max in the data is 0, ``fit`` should learn the ``_rounding_digits`` to be 0. Input: - Array with 0 as max value Side Effect: - ``_rounding_digits`` is set to 0 """ # Setup data = np.array([0, 0, 0]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits == 0
def test_fit_rounding_auto(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'``, ``fit`` should learn the ``_rounding_digits`` to be the max number of decimal places seen in the data. Input: - Array of floats with up to 4 decimals Side Effect: - ``_rounding_digits`` is set to 4 """ # Setup data = np.array([1, 2.1, 3.12, 4.123, 5.1234, 6.123, 7.12, 8.1, 9]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits == 4
def test_fit_min_max_int(self): """Test fit min and max parameters with int values If the min and max parameters are set to an int, the ``fit`` method should not change them. Input: - Array of floats and null values Side Effect: - ``_min_value`` and ``_max_value`` remain unchanged """ # Setup data = np.array([1.5, None, 2.5]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', min_value=1, max_value=10) transformer.fit(data) # Asserts assert transformer._min_value == 1 assert transformer._max_value == 10
def test_fit_rounding_auto_max_negative(self): """Test fit rounding parameter with ``'auto'`` If the ``rounding`` parameter is set to ``'auto'``, and the max in the data is negative, the ``fit`` method should learn ``_rounding_digits`` to be the min number of digits seen in those negative values. Input: - Array with negative max value Side Effect: - ``_rounding_digits`` is set to min number of digits in array """ # Setup data = np.array([-500, -220, -10]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding='auto') transformer.fit(data) # Asserts assert transformer._rounding_digits == -1
def test_fit_rounding_none(self): """Test fit rounding parameter with ``None`` If the rounding parameter is set to ``None``, the ``fit`` method should not set its ``rounding`` or ``_rounding_digits`` instance variables. Input: - An array with floats rounded to one decimal and a None value Side Effect: - ``rounding`` and ``_rounding_digits`` continue to be ``None`` """ # Setup data = np.array([1.5, None, 2.5]) # Run transformer = NumericalTransformer(dtype=np.float, nan='nan', rounding=None) transformer.fit(data) # Asserts assert transformer.rounding is None assert transformer._rounding_digits is None