def test_extract_float_from_category(self): table = pd.DataFrame({"A": ["1", "2.1", "note: 3.2", "-3", "-3.0"]}, dtype="category") form = Form(["A"], True, InputNumberType.FLOAT, error_means_null=True) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [np.nan, 2.1, 3.2, np.nan, -3]}))
def test_extract_float_eu(self): table = pd.DataFrame({"A": ["1,234", "2,345.67", "3.456"]}) form = Form(["A"], True, InputNumberType.FLOAT, InputLocale.EU, error_means_null=True) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1.234, 2.345, np.nan]}))
def test_error_on_no_match(self): table = pd.DataFrame({"A": ["", ".", np.nan, "1", "2.1"]}) form = Form(["A"], True, InputNumberType.INTEGER, InputLocale.US) result = form.convert_table(table) self.assertEqual( result, ("'' in row 1 of 'A' cannot be converted. Overall, there are 2 " "errors in 1 column. Select 'Convert non-numbers to null' to set " "these values to null."), )
def test_match_eu_thousands_must_be_in_groups_of_3(self): form = Form(["A"], False, InputNumberType.ANY, InputLocale.EU, error_means_null=True) result = form.convert_table( pd.DataFrame({"A": ["123.4", "2.345.1", "3.23,123"]})) assert_frame_equal(result, pd.DataFrame({"A": [np.nan, np.nan, np.nan]}))
def test_ignore_numbers(self): table = pd.DataFrame({"A": [1, 2]}) result = Form(["A"]).convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_replace_with_null(self): table = pd.DataFrame({"A": ["", ".", np.nan, "1", "2.1"]}) form = Form(["A"], True, InputNumberType.INTEGER, InputLocale.US, True) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [np.nan, np.nan, np.nan, 1, 2]}))
def test_extract_integer_eu(self): table = pd.DataFrame({"A": ["1,234", "2,345.67", "3.456"]}) form = Form(["A"], True, InputNumberType.INTEGER, InputLocale.EU) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3456]}))
def test_extract_integer_no_separator(self): table = pd.DataFrame({"A": ["10000", "20001"]}) result = Form(["A"], True, InputNumberType.INTEGER).convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [10000, 20001]}))
def test_extract_integer_from_category(self): table = pd.DataFrame({"A": ["1", "2.1", "note: 3.2"]}, dtype="category") form = Form(["A"], True, InputNumberType.INTEGER) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3]}))
def test_extract_any_eu_thousands_must_be_in_groups_of_3(self): form = Form(["A"], True, InputNumberType.ANY, InputLocale.EU, True) result = form.convert_table( pd.DataFrame({"A": ["123.4", "2.345.1", "3.23,123"]})) assert_frame_equal(result, pd.DataFrame({"A": [123, 2345, 3]}))
def test_extract_any_many_commas(self): result = Form(["A"], True, InputNumberType.ANY, InputLocale.US).convert_table( pd.DataFrame({"A": ["1,234,567,890"]})) assert_frame_equal(result, pd.DataFrame({"A": [1234567890]}))
def test_extract_any_us(self): table = pd.DataFrame({"A": ["1,234", "2,345.67", "3.456"]}) form = Form(["A"], True, InputNumberType.ANY, InputLocale.US) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1234, 2345.67, 3.456]}))
def test_extract_any_from_category(self): table = pd.DataFrame({"A": ["1", "2.1", "note: 3.2"]}, dtype="category") form = Form(["A"], True, InputNumberType.ANY) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1.0, 2.1, 3.2]}))
def test_extract_any_from_str(self): table = pd.DataFrame({"A": ["1", "2.1", "note: 3.2", "-3.1"]}) form = Form(["A"], True, InputNumberType.ANY, error_means_null=True) result = form.convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [1.0, 2.1, 3.2, -3.1]}))
def test_match_unicode_minus(self): table = pd.DataFrame({"A": ["-1", "\u22122"]}) result = Form(["A"]).convert_table(table) assert_frame_equal(result, pd.DataFrame({"A": [-1, -2]}))