示例#1
0
def test_parses_valid_number_with_custom_characters() -> None:
    """It parses valid numbers with custom group and decimal separators."""
    df = pd.DataFrame([
        ("1 234", 1234.0),
        ("1 234,56", 1234.56),
        ("1 234 567,89", 1234567.89),
        ("1,23", 1.23),
        ("+1,23", 1.23),
        ("-1,23", -1.23),
        (",1", 0.1),
        ("+,1", 0.1),
        ("-,1", -0.1),
        (",1e2", 0.1e2),
    ])
    parsed = parse_number(df[0], decimalChar=",", groupChar=" ")
    pd.testing.assert_series_equal(parsed, df[1], check_names=False)
    df = pd.DataFrame([
        ("1,,234", 1234.0),
        ("1,,234..56", 1234.56),
        ("1,,234,,567..89", 1234567.89),
        ("1..23", 1.23),
        ("+1..23", 1.23),
        ("-1..23", -1.23),
        ("..1", 0.1),
        ("+..1", 0.1),
        ("-..1", -0.1),
        ("..1e2", 0.1e2),
    ])
    parsed = parse_number(df[0], decimalChar="..", groupChar=",,")
    pd.testing.assert_series_equal(parsed, df[1], check_names=False)
示例#2
0
def test_parses_valid_number_with_text() -> None:
    """It parses valid numbers with leading and trailing text."""
    df = pd.DataFrame([
        ("$nan", float("nan")),
        ("EUR NaN", float("nan")),
        ("inf%", float("inf")),
        ("€INF", float("inf")),
        ("$Inf", float("inf")),
        ("EUR -inf", float("-inf")),
        ("-INF %", float("-inf")),
        ("€-Inf", float("-inf")),
        ("$1+", 1.0),
        ("$+1", 1.0),
        ("-1 USD ", -1.0),
        ("1.23%", 1.23),
        ("EUR +1.23", 1.23),
        ("$ -1.23 USD", -1.23),
        (".1%", 0.1),
        ("Total: +.1", 0.1),
        ("** -.1 **", -0.1),
        ("$1e2", 1e2),
        ("1E2%", 1e2),
        ("$ +1e2 USD", 1e2),
        ("Total: -1e2", -1e2),
        ("1e2%", 1e2),
        ("EUR 1e+2", 1e2),
        ("E1e-2", 1e-2),
        (".1e2E", 0.1e2),
        ("1e23E", 1e23),
    ])
    parsed = parse_number(df[0], bareNumber=False)
    pd.testing.assert_series_equal(parsed, df[1], check_names=False)
示例#3
0
def test_parses_valid_number(raise_first: bool) -> None:
    """It parses valid numbers."""
    OPTIONS.raise_first_invalid_number = raise_first
    df = pd.DataFrame([
        ("nan", float("nan")),
        ("+nan", float("nan")),
        ("-nan", float("nan")),
        ("NaN", float("nan")),
        ("inf", float("inf")),
        ("+inf", float("inf")),
        ("-inf", float("-inf")),
        ("INF", float("inf")),
        # NOTE: Python supports 'infinity', Table Schema only supports 'inf'
        # https://docs.python.org/3.8/library/decimal.html
        ("infinity", float("inf")),
        ("1", 1.0),
        ("+1", 1.0),
        ("-1", -1.0),
        ("01", 1.0),
        ("-01", -1.0),
        ("1.", 1),
        ("1.000", 1.0),
        ("1.23", 1.23),
        ("+1.23", 1.23),
        ("-1.23", -1.23),
        (".1", 0.1),
        ("+.1", 0.1),
        ("-.1", -0.1),
        ("1e2", 1e2),
        ("1E2", 1e2),
        ("+1e2", 1e2),
        ("-1e2", -1e2),
        ("1e2", 1e2),
        ("1e+2", 1e2),
        ("1e-2", 1e-2),
        (".1e2", 0.1e2),
        ("1.e2", 1e2),
        ("1.2e2", 1.2e2),
        ("0e2", 0e2),
        ("1e23", 1e23),
    ])
    pd.testing.assert_series_equal(parse_number(df[0]),
                                   df[1],
                                   check_names=False)
示例#4
0
def test_rejects_ambiguous_number_with_text() -> None:
    """It rejects numbers made ambiguous by leading or trailing text."""
    x = pd.Series(["$nan inf", "E 0e2.1", "E 0e2-1", "E 0e2+1", "1e23E2"])
    error = parse_number(x, bareNumber=False)
    pd.testing.assert_series_equal(x, pd.Series(error["values"]))
示例#5
0
def test_rejects_invalid_number() -> None:
    """It rejects invalid numbers."""
    x = pd.Series(["NA", "nan1", "++1", "--1", "1+", "1e2+1", "e2", "1e"])
    error = parse_number(x)
    pd.testing.assert_series_equal(x, pd.Series(error["values"]))