def test_parses_valid_number_with_custom_characters() -> None: """It parses valid numbers with custom group and decimal separators.""" df = pd.DataFrame([ ("1 234", 1234.0), ("1 234,56", 1234.56), ("1 234 567,89", 1234567.89), ("1,23", 1.23), ("+1,23", 1.23), ("-1,23", -1.23), (",1", 0.1), ("+,1", 0.1), ("-,1", -0.1), (",1e2", 0.1e2), ]) parsed = parse_number(df[0], decimalChar=",", groupChar=" ") pd.testing.assert_series_equal(parsed, df[1], check_names=False) df = pd.DataFrame([ ("1,,234", 1234.0), ("1,,234..56", 1234.56), ("1,,234,,567..89", 1234567.89), ("1..23", 1.23), ("+1..23", 1.23), ("-1..23", -1.23), ("..1", 0.1), ("+..1", 0.1), ("-..1", -0.1), ("..1e2", 0.1e2), ]) parsed = parse_number(df[0], decimalChar="..", groupChar=",,") pd.testing.assert_series_equal(parsed, df[1], check_names=False)
def test_parses_valid_number_with_text() -> None: """It parses valid numbers with leading and trailing text.""" df = pd.DataFrame([ ("$nan", float("nan")), ("EUR NaN", float("nan")), ("inf%", float("inf")), ("€INF", float("inf")), ("$Inf", float("inf")), ("EUR -inf", float("-inf")), ("-INF %", float("-inf")), ("€-Inf", float("-inf")), ("$1+", 1.0), ("$+1", 1.0), ("-1 USD ", -1.0), ("1.23%", 1.23), ("EUR +1.23", 1.23), ("$ -1.23 USD", -1.23), (".1%", 0.1), ("Total: +.1", 0.1), ("** -.1 **", -0.1), ("$1e2", 1e2), ("1E2%", 1e2), ("$ +1e2 USD", 1e2), ("Total: -1e2", -1e2), ("1e2%", 1e2), ("EUR 1e+2", 1e2), ("E1e-2", 1e-2), (".1e2E", 0.1e2), ("1e23E", 1e23), ]) parsed = parse_number(df[0], bareNumber=False) pd.testing.assert_series_equal(parsed, df[1], check_names=False)
def test_parses_valid_number(raise_first: bool) -> None: """It parses valid numbers.""" OPTIONS.raise_first_invalid_number = raise_first df = pd.DataFrame([ ("nan", float("nan")), ("+nan", float("nan")), ("-nan", float("nan")), ("NaN", float("nan")), ("inf", float("inf")), ("+inf", float("inf")), ("-inf", float("-inf")), ("INF", float("inf")), # NOTE: Python supports 'infinity', Table Schema only supports 'inf' # https://docs.python.org/3.8/library/decimal.html ("infinity", float("inf")), ("1", 1.0), ("+1", 1.0), ("-1", -1.0), ("01", 1.0), ("-01", -1.0), ("1.", 1), ("1.000", 1.0), ("1.23", 1.23), ("+1.23", 1.23), ("-1.23", -1.23), (".1", 0.1), ("+.1", 0.1), ("-.1", -0.1), ("1e2", 1e2), ("1E2", 1e2), ("+1e2", 1e2), ("-1e2", -1e2), ("1e2", 1e2), ("1e+2", 1e2), ("1e-2", 1e-2), (".1e2", 0.1e2), ("1.e2", 1e2), ("1.2e2", 1.2e2), ("0e2", 0e2), ("1e23", 1e23), ]) pd.testing.assert_series_equal(parse_number(df[0]), df[1], check_names=False)
def test_rejects_ambiguous_number_with_text() -> None: """It rejects numbers made ambiguous by leading or trailing text.""" x = pd.Series(["$nan inf", "E 0e2.1", "E 0e2-1", "E 0e2+1", "1e23E2"]) error = parse_number(x, bareNumber=False) pd.testing.assert_series_equal(x, pd.Series(error["values"]))
def test_rejects_invalid_number() -> None: """It rejects invalid numbers.""" x = pd.Series(["NA", "nan1", "++1", "--1", "1+", "1e2+1", "e2", "1e"]) error = parse_number(x) pd.testing.assert_series_equal(x, pd.Series(error["values"]))