示例#1
0
def test_concatenate_blocks():
    v1 = fr.StringArray(TEST_ARRAY)
    s = pd.Series(v1, index=pd.RangeIndex(3), fastpath=True)
    result = pd.concat([s, s], ignore_index=True)
    expected = pd.Series(
        fr.StringArray(
            pa.array(["Test", "string", None, "Test", "string", None])))
    tm.assert_series_equal(result, expected)
示例#2
0
def test_dataframe_from_series_no_dict():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    result = pd.DataFrame(s)
    expected = pd.DataFrame({0: s})
    tm.assert_frame_equal(result, expected)

    s = pd.Series(fr.StringArray(TEST_ARRAY), name="A")
    result = pd.DataFrame(s)
    expected = pd.DataFrame({"A": s})
    tm.assert_frame_equal(result, expected)
示例#3
0
def test_series_attributes():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    assert s.ndim == 1
    assert s.size == 3
    assert s.base is not None
    assert (s.T == s).all()
    assert s.memory_usage() > 8
示例#4
0
def test_nbytes():
    array = fr.StringArray(pa.array(["A", None, "CC"]))
    # Minimal storage usage:
    # 1 byte for the valid bitmap
    # 4 bytes for the offset array
    # 3 bytes for the actual string content
    assert array.nbytes >= 8
示例#5
0
 def setup(self):
     array = [
         str(x) + str(x) + str(x) if x % 7 == 0 else None
         for x in range(2**15)
     ]
     self.df = pd.DataFrame({"str": array})
     self.df_ext = pd.DataFrame({"str": fr.StringArray(pa.array(array))})
示例#6
0
def test_groupby():
    arr = fr.StringArray(["a", "a", "b", None])
    df = pd.DataFrame({"str": arr, "int": [10, 5, 24, 6]})
    result = df.groupby("str").sum()

    expected = pd.DataFrame({"int": [15, 24]},
                            index=pd.Index(["a", "b"], name="str"))
    tm.assert_frame_equal(result, expected)
示例#7
0
def test_dataframe_constructor():
    v = fr.StringArray(TEST_ARRAY)
    df = pd.DataFrame({"A": v})
    assert isinstance(df.dtypes["A"], fr.StringDtype)
    assert df.shape == (3, 1)

    # Test some calls to typical DataFrame functions
    str(df)
    df.info()
示例#8
0
def test_factorize():
    arr = fr.StringArray(TEST_ARRAY)
    labels, uniques = arr.factorize()
    expected_labels, expected_uniques = pd.factorize(arr.astype(object))

    assert isinstance(uniques, fr.StringArray)

    uniques = uniques.astype(object)
    npt.assert_array_equal(labels, expected_labels)
    npt.assert_array_equal(uniques, expected_uniques)
示例#9
0
def test_set_index():
    pd.DataFrame({
        "index": [3, 2, 1],
        "A": fr.StringArray(TEST_ARRAY)
    }).set_index("index")
示例#10
0
# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals
import operator as op

import pandas as pd
import pandas.util.testing as pdt
import pytest

import fletcher as fr


data = ["foo", None, "baz", "bar", None, "..bar"]

df = pd.DataFrame({"pd": pd.Series(data), "fr": fr.StringArray(data)})


# syntactic sugar to make test cases easier to read
class Case:

    def __init__(self, label):
        self._label = label

    def __getattr__(self, name):
        return lambda *args, **kwargs: dict(
            label=self._label, method=name, args=args, kwargs=kwargs
        )


test_cases = [
    Case("startswith").startswith("ba"),
示例#11
0
def test_isnull():
    df = pd.DataFrame({"A": fr.StringArray(TEST_ARRAY)})

    tm.assert_series_equal(df["A"].isnull(),
                           pd.Series([False, False, True], name="A"))
示例#12
0
def test_setitem_scalar():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    ser[1] = "other_string"
    expected = pd.Series(
        fr.StringArray(pa.array(["Test", "other_string", None])))
    tm.assert_series_equal(ser, expected)
示例#13
0
def test_getitem_slice():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    result = ser[1:]
    expected = pd.Series(fr.StringArray(TEST_ARRAY[1:]), index=range(1, 3))
    tm.assert_series_equal(result, expected)
示例#14
0
def test_getitem_scalar():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    result = ser[1]
    assert result == "string"
示例#15
0
def test_dataframe_from_series():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    c = pd.Series(pd.Categorical(["a", "b"]))
    result = pd.DataFrame({"A": s, "B": c})
    assert isinstance(result.dtypes["A"], fr.StringDtype)
示例#16
0
def test_isna():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = pd.Series([False, False, True])
    tm.assert_series_equal(s.isna(), expected)
    tm.assert_series_equal(s.notna(), ~expected)
示例#17
0
def test_series_constructor():
    v = fr.StringArray(TEST_ARRAY)
    result = pd.Series(v)
    assert result.dtype == v.dtype
    assert isinstance(result._data.blocks[0], ExtensionBlock)
示例#18
0
def test_copy():
    df = pd.DataFrame({"A": fr.StringArray(TEST_ARRAY)})
    df["A"].copy()
示例#19
0
def test_argsort(ascending, kind):
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    result = s.argsort(ascending=ascending, kind=kind)
    expected = s.astype(object).argsort(ascending=ascending, kind=kind)
    tm.assert_frame_equal(result, expected)
示例#20
0
def test_astype_object():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = pd.Series(TEST_LIST)
    tm.assert_series_equal(s.astype(object), expected)
示例#21
0
def test_np_asarray():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = np.asarray(TEST_LIST)
    npt.assert_array_equal(np.asarray(s), expected)