Exemplo n.º 1
0
def construct_test_data(df_length, append_mul):
    serializer = DataFrameSerializer()
    tmp_df = get_random_df(df_length, 10)
    recs = serializer.serialize(tmp_df)[0]
    _str = recs.tostring()
    if append_mul > 1:
        _str = "".join([_str] * append_mul)
    return _str
Exemplo n.º 2
0
def construct_test_data(df_length, append_mul):
    serializer = DataFrameSerializer()
    tmp_df = get_random_df(df_length, 10)
    recs = serializer.serialize(tmp_df)[0]
    _str = recs.tostring()
    if append_mul > 1:
        _str = "".join([_str] * append_mul)
    return _str
Exemplo n.º 3
0
class PandasDataFrameStore(PandasStore):
    TYPE = 'pandasdf'
    SERIALIZER = DataFrameSerializer()

    def can_write(self, version, symbol, data):
        if isinstance(data, DataFrame):
            if np.any(data.dtypes.values == 'object'):
                return self.SERIALIZER.can_convert_to_records_without_objects(
                    data, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).write(arctic_lib,
                                                version,
                                                symbol,
                                                item,
                                                previous_version,
                                                dtype=md)

    def append(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).append(arctic_lib,
                                                 version,
                                                 symbol,
                                                 item,
                                                 previous_version,
                                                 dtype=md)

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasDataFrameStore, self).read(arctic_lib, version,
                                                      symbol, **kwargs)
        return self.SERIALIZER.deserialize(item)
Exemplo n.º 4
0
class PandasDataFrameStore(PandasStore):
    TYPE = 'pandasdf'
    SERIALIZER = DataFrameSerializer()

    @staticmethod
    def can_write_type(data):
        return isinstance(data, DataFrame)

    def can_write(self, version, symbol, data):
        if self.can_write_type(data):
            if NP_OBJECT_DTYPE in data.dtypes.values or data.index.dtype is NP_OBJECT_DTYPE:
                return self.SERIALIZER.can_convert_to_records_without_objects(data, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)

    def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasDataFrameStore, self).read(arctic_lib, version, symbol, **kwargs)
        # Try to check if force_bytes_to_unicode is set in kwargs else use the config value (which defaults to False)
        force_bytes_to_unicode = kwargs.get('force_bytes_to_unicode', FORCE_BYTES_TO_UNICODE)
        return self.SERIALIZER.deserialize(item, force_bytes_to_unicode=force_bytes_to_unicode)

    def read_options(self):
        return super(PandasDataFrameStore, self).read_options()
Exemplo n.º 5
0
class PandasDataFrameStore(PandasStore):
    TYPE = 'pandasdf'
    SERIALIZER = DataFrameSerializer()

    @staticmethod
    def can_write_type(data):
        return isinstance(data, DataFrame)

    def can_write(self, version, symbol, data):
        if self.can_write_type(data):
            if NP_OBJECT_DTYPE in data.dtypes.values or data.index.dtype is NP_OBJECT_DTYPE:
                return self.SERIALIZER.can_convert_to_records_without_objects(
                    data, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).write(arctic_lib,
                                                version,
                                                symbol,
                                                item,
                                                previous_version,
                                                dtype=md)

    def append(self, arctic_lib, version, symbol, item, previous_version,
               **kwargs):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).append(arctic_lib,
                                                 version,
                                                 symbol,
                                                 item,
                                                 previous_version,
                                                 dtype=md,
                                                 **kwargs)

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasDataFrameStore, self).read(arctic_lib, version,
                                                      symbol, **kwargs)
        return self.SERIALIZER.deserialize(item)

    def read_options(self):
        return super(PandasDataFrameStore, self).read_options()
Exemplo n.º 6
0
import numpy as np
import pandas as pd

from arctic.serialization.numpy_records import DataFrameSerializer
from tests.integration.chunkstore.test_utils import create_test_data
from tests.util import get_large_ts

NON_HOMOGENEOUS_DTYPE_PATCH_SIZE_ROWS = 50
_TEST_DATA = None

df_serializer = DataFrameSerializer()


def _mixed_test_data():
    global _TEST_DATA
    if _TEST_DATA is None:
        onerow_ts = get_large_ts(1)
        small_ts = get_large_ts(10)
        medium_ts = get_large_ts(600)
        large_ts = get_large_ts(1800)
        empty_ts = pd.DataFrame()
        empty_index = create_test_data(size=0,
                                       cols=10,
                                       index=True,
                                       multiindex=False,
                                       random_data=True,
                                       random_ids=True)

        with_some_objects_ts = medium_ts.copy(deep=True)
        with_some_objects_ts.iloc[0:NON_HOMOGENEOUS_DTYPE_PATCH_SIZE_ROWS,
                                  0] = None