Python StringArray示例，fletcher.StringArray Python示例

示例#1

0

显示文件

def test_concatenate_blocks():
    v1 = fr.StringArray(TEST_ARRAY)
    s = pd.Series(v1, index=pd.RangeIndex(3), fastpath=True)
    result = pd.concat([s, s], ignore_index=True)
    expected = pd.Series(
        fr.StringArray(
            pa.array(["Test", "string", None, "Test", "string", None])))
    tm.assert_series_equal(result, expected)

示例#2

0

显示文件

def test_dataframe_from_series_no_dict():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    result = pd.DataFrame(s)
    expected = pd.DataFrame({0: s})
    tm.assert_frame_equal(result, expected)

    s = pd.Series(fr.StringArray(TEST_ARRAY), name="A")
    result = pd.DataFrame(s)
    expected = pd.DataFrame({"A": s})
    tm.assert_frame_equal(result, expected)

示例#3

0

显示文件

def test_series_attributes():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    assert s.ndim == 1
    assert s.size == 3
    assert s.base is not None
    assert (s.T == s).all()
    assert s.memory_usage() > 8

示例#4

0

显示文件

def test_nbytes():
    array = fr.StringArray(pa.array(["A", None, "CC"]))
    # Minimal storage usage:
    # 1 byte for the valid bitmap
    # 4 bytes for the offset array
    # 3 bytes for the actual string content
    assert array.nbytes >= 8

示例#5

0

显示文件

文件： benchmarks.py 项目： xmnlab/fletcher

 def setup(self):
     array = [
         str(x) + str(x) + str(x) if x % 7 == 0 else None
         for x in range(2**15)
     ]
     self.df = pd.DataFrame({"str": array})
     self.df_ext = pd.DataFrame({"str": fr.StringArray(pa.array(array))})

示例#6

0

显示文件

def test_groupby():
    arr = fr.StringArray(["a", "a", "b", None])
    df = pd.DataFrame({"str": arr, "int": [10, 5, 24, 6]})
    result = df.groupby("str").sum()

    expected = pd.DataFrame({"int": [15, 24]},
                            index=pd.Index(["a", "b"], name="str"))
    tm.assert_frame_equal(result, expected)

示例#7

0

显示文件

def test_dataframe_constructor():
    v = fr.StringArray(TEST_ARRAY)
    df = pd.DataFrame({"A": v})
    assert isinstance(df.dtypes["A"], fr.StringDtype)
    assert df.shape == (3, 1)

    # Test some calls to typical DataFrame functions
    str(df)
    df.info()

示例#8

0

显示文件

def test_factorize():
    arr = fr.StringArray(TEST_ARRAY)
    labels, uniques = arr.factorize()
    expected_labels, expected_uniques = pd.factorize(arr.astype(object))

    assert isinstance(uniques, fr.StringArray)

    uniques = uniques.astype(object)
    npt.assert_array_equal(labels, expected_labels)
    npt.assert_array_equal(uniques, expected_uniques)

示例#9

0

显示文件

def test_set_index():
    pd.DataFrame({
        "index": [3, 2, 1],
        "A": fr.StringArray(TEST_ARRAY)
    }).set_index("index")

示例#10

0

显示文件

文件： test_reference_impl.py 项目： xmnlab/fletcher

# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals
import operator as op

import pandas as pd
import pandas.util.testing as pdt
import pytest

import fletcher as fr


data = ["foo", None, "baz", "bar", None, "..bar"]

df = pd.DataFrame({"pd": pd.Series(data), "fr": fr.StringArray(data)})


# syntactic sugar to make test cases easier to read
class Case:

    def __init__(self, label):
        self._label = label

    def __getattr__(self, name):
        return lambda *args, **kwargs: dict(
            label=self._label, method=name, args=args, kwargs=kwargs
        )


test_cases = [
    Case("startswith").startswith("ba"),

示例#11

0

显示文件

def test_isnull():
    df = pd.DataFrame({"A": fr.StringArray(TEST_ARRAY)})

    tm.assert_series_equal(df["A"].isnull(),
                           pd.Series([False, False, True], name="A"))

示例#12

0

显示文件

def test_setitem_scalar():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    ser[1] = "other_string"
    expected = pd.Series(
        fr.StringArray(pa.array(["Test", "other_string", None])))
    tm.assert_series_equal(ser, expected)

示例#13

0

显示文件

def test_getitem_slice():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    result = ser[1:]
    expected = pd.Series(fr.StringArray(TEST_ARRAY[1:]), index=range(1, 3))
    tm.assert_series_equal(result, expected)

示例#14

0

显示文件

def test_getitem_scalar():
    ser = pd.Series(fr.StringArray(TEST_ARRAY))
    result = ser[1]
    assert result == "string"

示例#15

0

显示文件

def test_dataframe_from_series():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    c = pd.Series(pd.Categorical(["a", "b"]))
    result = pd.DataFrame({"A": s, "B": c})
    assert isinstance(result.dtypes["A"], fr.StringDtype)

示例#16

0

显示文件

def test_isna():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = pd.Series([False, False, True])
    tm.assert_series_equal(s.isna(), expected)
    tm.assert_series_equal(s.notna(), ~expected)

示例#17

0

显示文件

def test_series_constructor():
    v = fr.StringArray(TEST_ARRAY)
    result = pd.Series(v)
    assert result.dtype == v.dtype
    assert isinstance(result._data.blocks[0], ExtensionBlock)

示例#18

0

显示文件

def test_copy():
    df = pd.DataFrame({"A": fr.StringArray(TEST_ARRAY)})
    df["A"].copy()

示例#19

0

显示文件

def test_argsort(ascending, kind):
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    result = s.argsort(ascending=ascending, kind=kind)
    expected = s.astype(object).argsort(ascending=ascending, kind=kind)
    tm.assert_frame_equal(result, expected)

示例#20

0

显示文件

def test_astype_object():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = pd.Series(TEST_LIST)
    tm.assert_series_equal(s.astype(object), expected)

示例#21

0

显示文件

def test_np_asarray():
    s = pd.Series(fr.StringArray(TEST_ARRAY))
    expected = np.asarray(TEST_LIST)
    npt.assert_array_equal(np.asarray(s), expected)