示例#1
0
def test_series_replace_with_nulls():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([-10, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, None).fillna(-10)
    np.testing.assert_equal(sr2.to_array(), a2)

    # List input
    a6 = np.array([-10, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10)
    np.testing.assert_equal(sr6.to_array(), a6)

    a7 = np.array([5.5, 6.5, 2, 3, 4, -10])
    sr1 = Series([0, 1, 2, 3, 4, None])
    sr7 = sr1.replace([0, 1], [5.5, 6.5]).fillna(-10)
    np.testing.assert_equal(sr7.to_array(), a7)

    # Series input
    a8 = np.array([-10, -10, -10, 3, 4, -10])
    sr8 = sr1.replace(sr1[:3], None).fillna(-10)
    np.testing.assert_equal(sr8.to_array(), a8)

    a9 = np.array([-10, 6.5, 2, 3, 4, -10])
    sr9 = sr1.replace([0, 1], [None, 6.5]).fillna(-10)
    np.testing.assert_equal(sr9.to_array(), a9)
示例#2
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    assert_eq(a2, sr2.to_array())

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype="category")
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    assert_eq(psr4, sr4)

    psr5 = psr3.replace("one", "five")
    sr5 = sr3.replace("one", "five")

    assert_eq(psr5, sr5)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    assert_eq(a6, sr6.to_array())

    with pytest.raises(TypeError):
        sr1.replace([0, 1], [5.5, 6.5])

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3].to_array(), 5)
    assert_eq(a8, sr8.to_array())

    # large input containing null
    sr9 = Series(list(range(400)) + [None])
    sr10 = sr9.replace([22, 323, 27, 0], None)
    assert sr10.null_count == 5
    assert len(sr10.to_array()) == (401 - 5)

    sr11 = sr9.replace([22, 323, 27, 0], -1)
    assert sr11.null_count == 1
    assert len(sr11.to_array()) == (401 - 1)

    # large input not containing nulls
    sr9 = sr9.fillna(-11)
    sr12 = sr9.replace([22, 323, 27, 0], None)
    assert sr12.null_count == 4
    assert len(sr12.to_array()) == (401 - 4)

    sr13 = sr9.replace([22, 323, 27, 0], -1)
    assert sr13.null_count == 0
    assert len(sr13.to_array()) == 401
示例#3
0
def test_series_replace_with_nulls():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([-10, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, None).fillna(-10)
    assert_eq(a2, sr2.to_array())

    # List input
    a6 = np.array([-10, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10)
    assert_eq(a6, sr6.to_array())

    sr1 = Series([0, 1, 2, 3, 4, None])
    with pytest.raises(TypeError):
        sr1.replace([0, 1], [5.5, 6.5]).fillna(-10)

    # Series input
    a8 = np.array([-10, -10, -10, 3, 4, -10])
    sr8 = sr1.replace(cudf.Series([-10] * 3, index=sr1[:3]), None).fillna(-10)
    assert_eq(a8, sr8.to_array())

    a9 = np.array([-10, 6, 2, 3, 4, -10])
    sr9 = sr1.replace([0, 1], [None, 6]).fillna(-10)
    assert_eq(a9, sr9.to_array())
示例#4
0
def test_series_replace():
    a1 = np.array([0, 1, 2, 3, 4])

    # Numerical
    a2 = np.array([5, 1, 2, 3, 4])
    sr1 = Series(a1)
    sr2 = sr1.replace(0, 5)
    np.testing.assert_equal(sr2.to_array(), a2)

    # Categorical
    psr3 = pd.Series(["one", "two", "three"], dtype="category")
    psr4 = psr3.replace("one", "two")
    sr3 = Series.from_pandas(psr3)
    sr4 = sr3.replace("one", "two")
    pd.testing.assert_series_equal(sr4.to_pandas(), psr4)

    # List input
    a6 = np.array([5, 6, 2, 3, 4])
    sr6 = sr1.replace([0, 1], [5, 6])
    np.testing.assert_equal(sr6.to_array(), a6)

    a7 = np.array([5.5, 6.5, 2, 3, 4])
    sr7 = sr1.replace([0, 1], [5.5, 6.5])
    np.testing.assert_equal(sr7.to_array(), a7)

    # Series input
    a8 = np.array([5, 5, 5, 3, 4])
    sr8 = sr1.replace(sr1[:3], 5)
    np.testing.assert_equal(sr8.to_array(), a8)

    # large input containing null
    sr9 = Series(list(range(400)) + [None])
    sr10 = sr9.replace([22, 323, 27, 0], None)
    assert sr10.null_count == 5
    assert len(sr10.to_array()) == (401 - 5)

    sr11 = sr9.replace([22, 323, 27, 0], -1)
    assert sr11.null_count == 1
    assert len(sr11.to_array()) == (401 - 1)

    # large input not containing nulls
    sr9 = sr9.fillna(-11)
    sr12 = sr9.replace([22, 323, 27, 0], None)
    assert sr12.null_count == 4
    assert len(sr12.to_array()) == (401 - 4)

    sr13 = sr9.replace([22, 323, 27, 0], -1)
    assert sr13.null_count == 0
    assert len(sr13.to_array()) == 401
示例#5
0
def test_series_multiple_times_with_nulls():
    sr = Series([1, 2, 3, None])
    expected = Series([None, None, None, None], dtype=np.int64)

    for i in range(3):
        got = sr.replace([1, 2, 3], None)
        assert_eq(expected, got)
        # BUG: #2695
        # The following series will acquire a chunk of memory and update with
        # values, but these values may still linger even after the memory
        # gets released. This memory space might get used for replace in
        # subsequent calls and the memory used for mask may have junk values.
        # So, if it is not updated properly, the result would be wrong.
        # So, this will help verify that scenario.
        Series([1, 1, 1, None])
示例#6
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    with pytest.raises(TypeError):
        pdf.replace(-1, [])

    with pytest.raises(TypeError):
        gdf.replace(-1, [])
示例#7
0
def test_replace_strings():
    pdf = pd.Series(["a", "b", "c", "d"])
    gdf = Series(["a", "b", "c", "d"])
    assert_eq(pdf.replace("a", "e"), gdf.replace("a", "e"))
示例#8
0
def test_replace_inplace():
    data = np.array([5, 1, 2, 3, 4])
    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace(5, 0, inplace=True)
    psr.replace(5, 0, inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    sr = Series(data)
    psr = pd.Series(data)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace({5: 0, 3: -5})
    psr.replace({5: 0, 3: -5})
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    srr = sr.replace()
    psrr = psr.replace()
    assert_eq(srr, psrr)

    psr = pd.Series(["one", "two", "three"], dtype="category")
    sr = Series.from_pandas(psr)

    sr_copy = sr.copy()
    psr_copy = psr.copy()

    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)
    sr.replace("one", "two", inplace=True)
    psr.replace("one", "two", inplace=True)
    assert_eq(sr, psr)
    assert_eq(sr_copy, psr_copy)

    pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]})
    gdf = DataFrame.from_pandas(pdf)

    pdf_copy = pdf.copy()
    gdf_copy = gdf.copy()
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)
    pdf.replace(5, 0, inplace=True)
    gdf.replace(5, 0, inplace=True)
    assert_eq(pdf, gdf)
    assert_eq(pdf_copy, gdf_copy)

    pds = pd.Series([1, 2, 3, 45])
    gds = Series.from_pandas(pds)
    vals = np.array([]).astype(int)

    assert_eq(pds.replace(vals, -1), gds.replace(vals, -1))

    pds.replace(vals, 77, inplace=True)
    gds.replace(vals, 77, inplace=True)
    assert_eq(pds, gds)

    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
    gdf = DataFrame.from_pandas(pdf)

    assert_eq(pdf.replace({"a": 2}, {"a": -33}),
              gdf.replace({"a": 2}, {"a": -33}))

    assert_eq(
        pdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
        gdf.replace({"a": [2, 5]}, {"a": [9, 10]}),
    )

    assert_eq(
        pdf.replace([], []),
        gdf.replace([], []),
    )

    assert_exceptions_equal(
        lfunc=pdf.replace,
        rfunc=gdf.replace,
        lfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        rfunc_args_and_kwargs=([], {
            "to_replace": -1,
            "value": []
        }),
        compare_error_message=False,
    )