Exemplo n.º 1
0
def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
    values = Series(
        [
            "om", None, "foo_nom", "nom", "bar_foo", None, "foo", "regex",
            "rege."
        ],
        dtype=nullable_string_dtype,
    )
    with tm.maybe_produces_warning(
            PerformanceWarning,
            nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.endswith("foo", na=na)
    exp = Series([False, na, False, False, True, na, True, False, False],
                 dtype="boolean")
    tm.assert_series_equal(result, exp)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.endswith("rege.", na=na)
    exp = Series([False, na, False, False, False, na, False, False, True],
                 dtype="boolean")
    tm.assert_series_equal(result, exp)
Exemplo n.º 2
0
def test_nunique_null(null_obj, index_or_series_obj):
    obj = index_or_series_obj

    if not allow_na_ops(obj):
        pytest.skip("type doesn't allow for NA operations")
    elif isinstance(obj, pd.MultiIndex):
        pytest.skip(f"MultiIndex can't hold '{null_obj}'")

    values = obj._values
    values[0:2] = null_obj

    klass = type(obj)
    repeated_values = np.repeat(values, range(1, len(values) + 1))
    obj = klass(repeated_values, dtype=obj.dtype)

    if isinstance(obj, pd.CategoricalIndex):
        assert obj.nunique() == len(obj.categories)
        assert obj.nunique(dropna=False) == len(obj.categories) + 1
    else:
        with tm.maybe_produces_warning(
            PerformanceWarning,
            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
        ):
            num_unique_values = len(obj.unique())
        with tm.maybe_produces_warning(
            PerformanceWarning,
            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
        ):
            assert obj.nunique() == max(0, num_unique_values - 1)
        with tm.maybe_produces_warning(
            PerformanceWarning,
            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
        ):
            assert obj.nunique(dropna=False) == max(0, num_unique_values)
Exemplo n.º 3
0
def test_replace_moar(any_string_dtype):
    # PR #1179
    ser = Series(
        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
        dtype=any_string_dtype,
    )

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.replace("A", "YYY")
    expected = Series(
        [
            "YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog",
            "cat"
        ],
        dtype=any_string_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace("A", "YYY", case=False)
    expected = Series(
        [
            "YYY",
            "B",
            "C",
            "YYYYYYbYYY",
            "BYYYcYYY",
            "",
            np.nan,
            "CYYYBYYY",
            "dog",
            "cYYYt",
        ],
        dtype=any_string_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
    expected = Series(
        [
            "A",
            "B",
            "C",
            "XX-XX ba",
            "XX-XX ca",
            "",
            np.nan,
            "XX-XX BA",
            "XX-XX ",
            "XX-XX t",
        ],
        dtype=any_string_dtype,
    )
    tm.assert_series_equal(result, expected)
Exemplo n.º 4
0
def test_isin(dtype, fixed_now_ts):
    s = pd.Series(["a", "b", None], dtype=dtype)

    with tm.maybe_produces_warning(PerformanceWarning, dtype == "pyarrow"
                                   and pa_version_under2p0):
        result = s.isin(["a", "c"])
    expected = pd.Series([True, False, False])
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning, dtype == "pyarrow"
                                   and pa_version_under2p0):
        result = s.isin(["a", pd.NA])
    expected = pd.Series([True, False, True])
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning, dtype == "pyarrow"
                                   and pa_version_under2p0):
        result = s.isin([])
    expected = pd.Series([False, False, False])
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning, dtype == "pyarrow"
                                   and pa_version_under2p0):
        result = s.isin(["a", fixed_now_ts])
    expected = pd.Series([True, False, False])
    tm.assert_series_equal(result, expected)
Exemplo n.º 5
0
def test_contains_moar(any_string_dtype):
    # PR #1179
    s = Series(
        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
        dtype=any_string_dtype,
    )

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("a")
    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
    expected = Series(
        [False, False, False, True, True, False, np.nan, False, False, True],
        dtype=expected_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = s.str.contains("a", case=False)
    expected = Series(
        [True, False, False, True, True, False, np.nan, True, False, True],
        dtype=expected_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("Aa")
    expected = Series(
        [False, False, False, True, False, False, np.nan, False, False, False],
        dtype=expected_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("ba")
    expected = Series(
        [False, False, False, True, False, False, np.nan, False, False, False],
        dtype=expected_dtype,
    )
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = s.str.contains("ba", case=False)
    expected = Series(
        [False, False, False, True, True, False, np.nan, True, False, False],
        dtype=expected_dtype,
    )
    tm.assert_series_equal(result, expected)
Exemplo n.º 6
0
def test_replace_not_case_sensitive_not_regex(any_string_dtype):
    # https://github.com/pandas-dev/pandas/issues/41602
    ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace("a", "c", case=False, regex=False)
    expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace("a.", "c.", case=False, regex=False)
    expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 7
0
def test_match(any_string_dtype):
    # New match behavior introduced in 0.13
    expected_dtype = "object" if any_string_dtype == "object" else "boolean"

    values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.match(".*(BAD[_]+).*(BAD)")
    expected = Series([True, np.nan, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    values = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"],
                    dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.match(".*BAD[_]+.*BAD")
    expected = Series([True, True, np.nan, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.match("BAD[_]+.*BAD")
    expected = Series([False, True, np.nan, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    values = Series(["fooBAD__barBAD", "^BAD_BADleroybrown", np.nan, "foo"],
                    dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.match("^BAD[_]+.*BAD")
    expected = Series([False, False, np.nan, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = values.str.match("\\^BAD[_]+.*BAD")
    expected = Series([False, True, np.nan, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 8
0
def test_unique(index_or_series_obj):
    obj = index_or_series_obj
    obj = np.repeat(obj, range(1, len(obj) + 1))
    with tm.maybe_produces_warning(
        PerformanceWarning,
        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
    ):
        result = obj.unique()

    # dict.fromkeys preserves the order
    unique_values = list(dict.fromkeys(obj.values))
    if isinstance(obj, pd.MultiIndex):
        expected = pd.MultiIndex.from_tuples(unique_values)
        expected.names = obj.names
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index:
        expected = NumericIndex(unique_values, dtype=obj.dtype)
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index):
        expected = pd.Index(unique_values, dtype=obj.dtype)
        if is_datetime64tz_dtype(obj.dtype):
            expected = expected.normalize()
        tm.assert_index_equal(result, expected, exact=True)
    else:
        expected = np.array(unique_values)
        tm.assert_numpy_array_equal(result, expected)
Exemplo n.º 9
0
def test_replace_regex_single_character(regex, any_string_dtype):
    # https://github.com/pandas-dev/pandas/pull/24809

    # The current behavior is to treat single character patterns as literal strings,
    # even when ``regex`` is set to ``True``.

    s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype)

    if regex is None:
        msg = re.escape(
            "The default value of regex will change from True to False in a future "
            "version. In addition, single character regular expressions will *not* "
            "be treated as literal strings when regex=True.")
        pyarrow_warn = any_string_dtype == "string[pyarrow]" and pa_version_under4p0
        with tm.assert_produces_warning(
                FutureWarning, match=msg,
                raise_on_extra_warnings=not pyarrow_warn):
            result = s.str.replace(".", "a", regex=regex)
    else:
        with tm.maybe_produces_warning(
                PerformanceWarning,
                any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
        ):
            result = s.str.replace(".", "a", regex=regex)

    expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 10
0
def test_replace_compiled_regex(any_string_dtype):
    # GH 15446
    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

    # test with compiled regex
    pat = re.compile(r"BAD_*")
    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace(pat, "", regex=True)
    expected = Series(["foobar", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace(pat, "", n=1, regex=True)
    expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 11
0
def test_match_case_kwarg(any_string_dtype):
    values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = values.str.match("ab", case=False)
    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
    expected = Series([True, True, True, True], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 12
0
def test_replace_compiled_regex_unicode(any_string_dtype):
    ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
    expected = Series([b"abcd, \xc3\xa0".decode("utf-8")],
                      dtype=any_string_dtype)
    pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace(pat, ", ")
    tm.assert_series_equal(result, expected)
Exemplo n.º 13
0
def test_nunique(index_or_series_obj):
    obj = index_or_series_obj
    obj = np.repeat(obj, range(1, len(obj) + 1))
    with tm.maybe_produces_warning(
        PerformanceWarning,
        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
    ):
        expected = len(obj.unique())
    assert obj.nunique(dropna=False) == expected
Exemplo n.º 14
0
def test_replace_max_replacements(any_string_dtype):
    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

    expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.replace("BAD[_]*", "", n=1, regex=True)
    tm.assert_series_equal(result, expected)

    expected = Series(["foo__barBAD", np.nan], dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.replace("BAD", "", n=1, regex=False)
    tm.assert_series_equal(result, expected)
Exemplo n.º 15
0
 def test_dropna_array(self, data_missing):
     with tm.maybe_produces_warning(
             PerformanceWarning,
             pa_version_under6p0
             and data_missing.dtype.storage == "pyarrow",
     ):
         result = data_missing.dropna()
     expected = data_missing[[1]]
     self.assert_extension_array_equal(result, expected)
Exemplo n.º 16
0
def test_replace_callable_named_groups(any_string_dtype):
    # test regex named groups
    ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
    pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
    repl = lambda m: m.group("middle").swapcase()
    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace(pat, repl, regex=True)
    expected = Series(["bAR", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 17
0
def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp):
    ser = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = getattr(ser.str, method)("x")
    expected = Series(exp, dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 18
0
def test_strip_lstrip_rstrip(any_string_dtype, method, exp):
    ser = Series(["  aa   ", " bb \n", np.nan, "cc  "], dtype=any_string_dtype)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = getattr(ser.str, method)()
    expected = Series(exp, dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 19
0
def test_replace_literal(regex, expected, any_string_dtype):
    # GH16808 literal replace (regex=False vs regex=True)
    ser = Series(["f.o", "foo", np.nan], dtype=any_string_dtype)
    expected = Series(expected, dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.replace("f.", "ba", regex=regex)
    tm.assert_series_equal(result, expected)
Exemplo n.º 20
0
def test_replace_compiled_regex_callable(any_string_dtype):
    # test with callable
    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
    repl = lambda m: m.group(0).swapcase()
    pat = re.compile("[a-z][A-Z]{2}")
    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.replace(pat, repl, n=2)
    expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 21
0
def test_replace_callable_raises(any_string_dtype, repl):
    # GH 15055
    values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

    # test with wrong number of arguments, raising an error
    msg = (r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
           r"(?(3)required )positional arguments?")
    with pytest.raises(TypeError, match=msg):
        with tm.maybe_produces_warning(PerformanceWarning,
                                       any_string_dtype == "string[pyarrow]"):
            values.str.replace("a", repl)
Exemplo n.º 22
0
def test_fullmatch_na_kwarg(any_string_dtype):
    ser = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"],
                 dtype=any_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False)
    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
    expected = Series([True, False, False, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 23
0
def test_match_na_kwarg(any_string_dtype):
    # GH #6609
    s = Series(["a", "b", np.nan], dtype=any_string_dtype)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.match("a", na=False)
    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
    expected = Series([True, False, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.match("a")
    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
    expected = Series([True, False, np.nan], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 24
0
def test_contains_nan(any_string_dtype):
    # PR #14171
    s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("foo", na=False)
    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
    expected = Series([False, False, False], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("foo", na=True)
    expected = Series([True, True, True], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("foo", na="foo")
    if any_string_dtype == "object":
        expected = Series(["foo", "foo", "foo"], dtype=np.object_)
    else:
        expected = Series([True, True, True], dtype="boolean")
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = s.str.contains("foo")
    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
    expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 25
0
def test_len(any_string_dtype):
    ser = Series(
        ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"],
        dtype=any_string_dtype,
    )
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.len()
    expected_dtype = "float64" if any_string_dtype == "object" else "Int64"
    expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 26
0
def test_contains_na_kwarg_for_nullable_string_dtype(nullable_string_dtype, na,
                                                     expected, regex):
    # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416

    values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0
            and regex,
    ):
        result = values.str.contains("a", na=na, regex=regex)
    expected = Series([True, False, False, True, expected], dtype="boolean")
    tm.assert_series_equal(result, expected)
Exemplo n.º 27
0
def test_fullmatch_case_kwarg(any_string_dtype):
    ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"

    expected = Series([True, False, False, False], dtype=expected_dtype)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.fullmatch("ab", case=True)
    tm.assert_series_equal(result, expected)

    expected = Series([True, True, False, False], dtype=expected_dtype)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.fullmatch("ab", case=False)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(PerformanceWarning,
                                   any_string_dtype == "string[pyarrow]"):
        result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
    tm.assert_series_equal(result, expected)
Exemplo n.º 28
0
def test_pipe_failures(any_string_dtype):
    # #2119
    ser = Series(["A|B|C"], dtype=any_string_dtype)

    result = ser.str.split("|")
    expected = Series([["A", "B", "C"]], dtype=object)
    tm.assert_series_equal(result, expected)

    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under4p0,
    ):
        result = ser.str.replace("|", " ", regex=False)
    expected = Series(["A B C"], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Exemplo n.º 29
0
def test_flags_kwarg(any_string_dtype):
    data = {
        "Dave": "*****@*****.**",
        "Steve": "*****@*****.**",
        "Rob": "*****@*****.**",
        "Wes": np.nan,
    }
    data = Series(data, dtype=any_string_dtype)

    pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"

    using_pyarrow = any_string_dtype == "string[pyarrow]"

    result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
    assert result.iloc[0].tolist() == ["dave", "google", "com"]

    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow):
        result = data.str.match(pat, flags=re.IGNORECASE)
    assert result[0]

    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow):
        result = data.str.fullmatch(pat, flags=re.IGNORECASE)
    assert result[0]

    result = data.str.findall(pat, flags=re.IGNORECASE)
    assert result[0][0] == ("dave", "google", "com")

    result = data.str.count(pat, flags=re.IGNORECASE)
    assert result[0] == 1

    msg = "has match groups"
    with tm.assert_produces_warning(UserWarning,
                                    match=msg,
                                    raise_on_extra_warnings=not using_pyarrow):
        result = data.str.contains(pat, flags=re.IGNORECASE)
    assert result[0]
Exemplo n.º 30
0
def test_ismethods(method, expected, any_string_dtype):
    ser = Series(["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", "  "],
                 dtype=any_string_dtype)
    expected_dtype = "bool" if any_string_dtype == "object" else "boolean"
    expected = Series(expected, dtype=expected_dtype)
    with tm.maybe_produces_warning(
            PerformanceWarning,
            any_string_dtype == "string[pyarrow]" and pa_version_under2p0
            and method == "isspace",
    ):
        result = getattr(ser.str, method)()
    tm.assert_series_equal(result, expected)

    # compare with standard library
    expected = [getattr(item, method)() for item in ser]
    assert list(result) == expected