Exemplo n.º 1
0
    def test_store_config_map_a(self) -> None:

        sc1 = StoreConfig(index_depth=3, columns_depth=3)
        sc1m = StoreConfigMap.from_config(sc1)
        self.assertEqual(sc1m['a'].index_depth, 3)
        self.assertEqual(sc1m['b'].index_depth, 3)

        sc2 = StoreConfig(include_index=False)
        sc2m = StoreConfigMap.from_config(sc2)
        self.assertEqual(sc2m['a'].include_index, False)
        self.assertEqual(sc2m['b'].include_index, False)
    def test_store_read_many_single_thread_weak_cache(self) -> None:

        f1, f2, f3 = get_test_framesA()

        with temp_file('.zip') as fp:

            st = StoreZipTSV(fp)
            st.write((f.name, f) for f in (f1, f2, f3))

            kwargs = dict(config_map=StoreConfigMap.from_initializer(
                StoreConfig(index_depth=1)),
                          constructor=st._container_type_to_constructor(Frame),
                          container_type=Frame)

            labels = tuple(st.labels(strip_ext=False))
            self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt'))

            self.assertEqual(0, len(list(st._weak_cache)))

            # Result is not held onto!
            next(st._read_many_single_thread(('foo', ), **kwargs))

            self.assertEqual(0, len(list(st._weak_cache)))

            # Result IS held onto!
            frame = next(st._read_many_single_thread(('foo', ), **kwargs))

            self.assertEqual(1, len(list(st._weak_cache)))

            # Reference in our weak_cache _is_ `frame`
            self.assertIs(frame, st._weak_cache['foo'])
            del frame

            # Reference is gone now!
            self.assertEqual(0, len(list(st._weak_cache)))
Exemplo n.º 3
0
    def write(self,
              items: tp.Iterable[tp.Tuple[tp.Hashable, Frame]],
              *,
              config: StoreConfigMapInitializer = None) -> None:
        config_map = StoreConfigMap.from_initializer(config)
        multiprocess = (config_map.default.write_max_workers is not None
                        and config_map.default.write_max_workers > 1)

        def gen() -> tp.Iterable[PayloadFrameToBytes]:
            for label, frame in items:
                yield PayloadFrameToBytes(  # pylint: disable=no-value-for-parameter
                    name=label,
                    config=config_map[label].to_store_config_he(),
                    frame=frame,
                    exporter=self.__class__._EXPORTER,
                )

        if multiprocess:

            def label_and_bytes() -> tp.Iterator[LabelAndBytes]:
                with ProcessPoolExecutor(max_workers=config_map.default.
                                         write_max_workers) as executor:
                    yield from executor.map(
                        self._payload_to_bytes,
                        gen(),
                        chunksize=config_map.default.write_chunksize)
        else:
            label_and_bytes = lambda: (self._payload_to_bytes(x)
                                       for x in gen())

        with zipfile.ZipFile(self._fp, 'w', zipfile.ZIP_DEFLATED) as zf:
            for label, frame_bytes in label_and_bytes():
                label_encoded = config_map.default.label_encode(label)
                # this will write it without a container
                zf.writestr(label_encoded + self._EXT_CONTAINED, frame_bytes)
Exemplo n.º 4
0
    def __init__(self,
            series: Series,
            *,
            store: tp.Optional[Store] = None,
            config: StoreConfigMapInitializer = None
            ):
        '''
        Args:
            config: StoreConfig for handling ``Frame`` construction and exporting from Store.
        '''

        if series.dtype != DTYPE_OBJECT:
            raise ErrorInitBus(
                    f'Series passed to initializer must have dtype object, not {series.dtype}')

        # do a one time iteration of series
        def gen() -> tp.Iterator[bool]:
            for label, value in series.items():
                if not isinstance(label, str):
                    raise ErrorInitBus(f'supplied label {label} is not a string.')

                if isinstance(value, Frame):
                    yield True
                elif value is FrameDeferred:
                    yield False
                else:
                    raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.')

        self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series))
        self._loaded_all = self._loaded.all()
        self._series = series
        self._store = store

        # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map
        self._config = StoreConfigMap.from_initializer(config)
Exemplo n.º 5
0
    def test_bus_init_c(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='foo')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(),
                ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_csv(fp)
            b2 = Bus.from_zip_csv(fp, config=config)

            f1_loaded = b2['foo']
            f2_loaded = b2['bar']

            self.assertEqualFrames(f1, f1_loaded)
            self.assertEqualFrames(f2, f2_loaded)
Exemplo n.º 6
0
    def test_bus_to_hdf5_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        frames = (f1, f2, f3)
        config = StoreConfigMap.from_frames(frames)
        b1 = Bus.from_frames(frames, config=config)

        with temp_file('.h5') as fp:
            b1.to_hdf5(fp)
            b2 = Bus.from_hdf5(fp, config=config)
            tuple(b2.items()) # force loading all

        for frame in frames:
            self.assertEqualFrames(frame, b2[frame.name])
Exemplo n.º 7
0
    def test_bus_to_xlsx_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        config = StoreConfigMap.from_config(
            StoreConfig(index_depth=1,
                        columns_depth=1,
                        include_columns=True,
                        include_index=True))
        b1 = Bus.from_frames((f1, f2, f3), config=config)

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)

            b2 = Bus.from_xlsx(fp, config=config)
            tuple(b2.items())  # force loading all

        for frame in (f1, f2, f3):
            self.assertEqualFrames(frame, b2[frame.name])
Exemplo n.º 8
0
    def test_bus_init_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_tsv(fp)
            b2 = Bus.from_zip_tsv(fp)

            f3 = b2['bar']
            f4 = b2['foo']
            # import ipdb; ipdb.set_trace()
            zs = StoreZipTSV(fp)
            zs.write(b1.items())

            # how to show that this derived getitem has derived type?
            f3 = zs.read('foo', config=config['foo'])
            self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))),
                                              ('b', (('x', 3), ('y', 4)))))
Exemplo n.º 9
0
    def test_store_config_map_c(self) -> None:
        sc1 = StoreConfig(index_depth=3, columns_depth=3)
        maps = {'a': StoreConfig(index_depth=2),
                'b': StoreConfig(index_depth=3)}
        sc1m = StoreConfigMap(maps)

        sc2m = StoreConfigMap.from_initializer(sc1)
        self.assertEqual(sc2m['a'].index_depth, 3)

        sc3m = StoreConfigMap.from_initializer(sc1m)
        self.assertEqual(sc3m['a'].index_depth, 2)
        self.assertEqual(sc3m['b'].index_depth, 3)

        sc4m = StoreConfigMap.from_initializer(maps)
        self.assertEqual(sc4m['a'].index_depth, 2)
        self.assertEqual(sc4m['b'].index_depth, 3)
Exemplo n.º 10
0
    def test_store_config_map_init_b(self) -> None:
        maps = {'a': StoreConfig(index_depth=2, label_encoder=str),
                'b': StoreConfig(index_depth=3, label_encoder=str)}
        default = StoreConfig(label_encoder=str)

        sc1m = StoreConfigMap(maps, default=default)
        self.assertEqual(sc1m.default.label_encoder, str)
Exemplo n.º 11
0
    def test_store_xlsx_write_b(self) -> None:

        f1 = Frame.from_records(
                ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')),
                index=('p', 'q', 'r'),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                )

        config_map = StoreConfigMap.from_config(
                StoreConfig(include_index=True, include_columns=True))

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((STORE_LABEL_DEFAULT, f1),), config=config_map)

            c = StoreConfig(
                    index_depth=f1.index.depth,
                    columns_depth=f1.columns.depth
                    )
            f2 = st.read(STORE_LABEL_DEFAULT, config=c)

            # just a sample column for now
            self.assertEqual(
                    f1[HLoc[('II', 'a')]].values.tolist(),
                    f2[HLoc[('II', 'a')]].values.tolist() )

            self.assertEqualFrames(f1, f2)
Exemplo n.º 12
0
    def write(self,
            items: tp.Iterable[tp.Tuple[tp.Optional[str], Frame]],
            *,
            config: StoreConfigMapInitializer = None,
            # store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
            ) -> None:

        config_map = StoreConfigMap.from_initializer(config)

        # NOTE: register adapters for NP types:
        # numpy types go in as blobs if they are not individualy converted tp python types
        sqlite3.register_adapter(np.int64, int)
        sqlite3.register_adapter(np.int32, int)
        sqlite3.register_adapter(np.int16, int)
        # common python types
        sqlite3.register_adapter(Fraction, str)
        sqlite3.register_adapter(complex, lambda x: f'{x.real}:{x.imag}')


        # hierarchical columns might be stored as tuples
        with sqlite3.connect(self._fp, detect_types=sqlite3.PARSE_DECLTYPES) as conn:
            cursor = conn.cursor()
            for label, frame in items:
                c = config_map[label]

                self._frame_to_table(frame=frame,
                        label=label,
                        cursor=cursor,
                        include_columns=c.include_columns,
                        include_index=c.include_index,
                        # store_filter=store_filter
                        )

            conn.commit()
Exemplo n.º 13
0
    def read_many(self,
            labels: tp.Iterable[tp.Hashable],
            *,
            config: StoreConfigMapInitializer = None,
            container_type: tp.Type[Frame] = Frame,
            ) -> tp.Iterator[Frame]:
        import tables
        config_map = StoreConfigMap.from_initializer(config)

        with tables.open_file(self._fp, mode='r') as file:
            for label in labels:
                c = config_map[label]
                label_encoded = config_map.default.label_encode(label)

                index_depth = c.index_depth
                index_constructors = c.index_constructors
                columns_depth = c.columns_depth
                columns_constructors = c.columns_constructors
                consolidate_blocks = c.consolidate_blocks
                if c.dtypes:
                    raise NotImplementedError('using config.dtypes on HDF5 not yet supported')

                index_arrays = []
                columns_labels = []

                table = file.get_node(f'/{label_encoded}')
                colnames = table.cols._v_colnames

                def blocks() -> tp.Iterator[np.ndarray]:
                    for col_idx, colname in enumerate(colnames):
                        # can also do: table.read(field=colname)
                        array = table.col(colname)
                        if array.dtype.kind in DTYPE_STR_KINDS:
                            array = array.astype(str)
                        array.flags.writeable = False

                        if col_idx < index_depth:
                            index_arrays.append(array)
                            continue
                        # only store column labels for those yielded
                        columns_labels.append(colname)
                        yield array

                if consolidate_blocks:
                    data = TypeBlocks.from_blocks(TypeBlocks.consolidate_blocks(blocks()))
                else:
                    data = TypeBlocks.from_blocks(blocks())

                # this will own_data in subsequent constructor call
                yield container_type._from_data_index_arrays_column_labels(
                        data=data,
                        index_depth=index_depth,
                        index_arrays=index_arrays,
                        index_constructors=index_constructors,
                        columns_depth=columns_depth,
                        columns_labels=columns_labels,
                        columns_constructors=columns_constructors,
                        name=label,
                        )
Exemplo n.º 14
0
    def test_store_config_map_init_d(self) -> None:
        maps1 = {'a': StoreConfig(read_chunksize=2),
                'b': StoreConfig(read_chunksize=3)}

        default = StoreConfig(read_chunksize=2)

        with self.assertRaises(ErrorInitStoreConfig):
            StoreConfigMap(maps1, default=default) # Config has conflicting info

        maps2 = {'a': StoreConfig(read_chunksize=2),
                'b': StoreConfig(read_chunksize=2)}

        with self.assertRaises(ErrorInitStoreConfig):
            StoreConfigMap(maps2) # Default is 1

        sc1m = StoreConfigMap(maps2, default=default)
        self.assertEqual(sc1m.default.read_chunksize, 2)
Exemplo n.º 15
0
    def test_store_config_map_init_e(self) -> None:
        maps1 = {'a': StoreConfig(write_max_workers=2),
                'b': StoreConfig(write_max_workers=3)}

        default = StoreConfig(write_max_workers=2)

        with self.assertRaises(ErrorInitStoreConfig):
            StoreConfigMap(maps1, default=default) # Config has conflicting info

        maps2 = {'a': StoreConfig(write_max_workers=2),
                'b': StoreConfig(write_max_workers=2)}

        with self.assertRaises(ErrorInitStoreConfig):
            StoreConfigMap(maps2) # Default is None

        sc1m = StoreConfigMap(maps2, default=default)
        self.assertEqual(sc1m.default.write_max_workers, 2)
Exemplo n.º 16
0
    def test_store_config_map_init_a(self) -> None:
        maps = {
            'a': StoreConfig(index_depth=2),
            'b': StoreConfig(index_depth=3, label_encoder=str)
        }

        with self.assertRaises(ErrorInitStoreConfig):
            sc1m = StoreConfigMap.from_initializer(maps)
Exemplo n.º 17
0
    def test_store_config_map_b(self) -> None:

        maps = {'a': StoreConfig(index_depth=2),
                'b': StoreConfig(index_depth=3)}
        sc1m = StoreConfigMap(maps)
        self.assertEqual(sc1m['a'].index_depth, 2)
        self.assertEqual(sc1m['b'].index_depth, 3)
        self.assertEqual(sc1m['c'].index_depth, 0)
Exemplo n.º 18
0
    def test_store_config_map_get_default_a(self) -> None:
        maps = {
            'a': StoreConfig(index_depth=2),
            'b': StoreConfig(index_depth=3)
        }

        sc1m = StoreConfigMap.from_initializer(maps)
        self.assertTrue(sc1m.default == StoreConfigMap._DEFAULT)
Exemplo n.º 19
0
 def _from_store(
     cls,
     store: Store,
     config: StoreConfigMapInitializer = None,
 ) -> 'Batch':
     config_map = StoreConfigMap.from_initializer(config)
     items = ((label, store.read(label, config=config_map[label]))
              for label in store.labels())
     return cls(items, config=config)
Exemplo n.º 20
0
 def _from_store(
         cls,
         store: Store,
         config: StoreConfigMapInitializer = None,
         max_persist: tp.Optional[int] = None,  # not used
 ) -> 'Batch':
     config_map = StoreConfigMap.from_initializer(config)
     items = ((label, store.read(label, config=config_map[label]))
              for label in store.labels())
     return cls(items, config=config)
Exemplo n.º 21
0
    def test_store_sqlite_read_many_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config_map_write = StoreConfigMap.from_config(
            StoreConfig(include_index=True, include_columns=True))

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write(((f.name, f) for f in frames), config=config_map_write)

            labels = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), labels)

            config_map_read: tp.Dict[tp.Hashable, StoreConfig] = {}
            for i, name in enumerate(labels):
                f_src = frames[i]
                c = StoreConfig(index_depth=f_src.index.depth,
                                columns_depth=f_src.columns.depth)
                config_map_read[name] = c

            for i, f_loaded in enumerate(
                    st1.read_many(labels, config=config_map_read)):
                f_src = frames[i]
                self.assertEqualFrames(f_src, f_loaded, compare_dtype=False)
Exemplo n.º 22
0
    def __init__(self,
            series: Series,
            *,
            store: tp.Optional[Store] = None,
            config: StoreConfigMapInitializer = None,
            max_persist: tp.Optional[int] = None,
            own_data: bool = False,
            ):
        '''
        Default Bus constructor.

        {args}
        '''
        if series.dtype != DTYPE_OBJECT:
            raise ErrorInitBus(
                    f'Series passed to initializer must have dtype object, not {series.dtype}')

        if max_persist is not None:
            # use an (ordered) dictionary to give use an ordered set, simply pointing to None for all keys
            self._last_accessed: tp.Dict[str, None] = {}

        # do a one time iteration of series
        def gen() -> tp.Iterator[bool]:
            for label, value in series.items():
                if isinstance(value, Frame):
                    if max_persist is not None:
                        self._last_accessed[label] = None
                    yield True
                elif value is FrameDeferred:
                    yield False
                else:
                    raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.')

        self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series))
        self._loaded_all = self._loaded.all()

        if own_data:
            self._values_mutable = series.values
            self._values_mutable.flags.writeable = True
        else:
            self._values_mutable = series.values.copy()

        self._index = series._index
        self._name = series._name
        self._store = store

        # Not handling cases of max_persist being greater than the length of the Series (might floor to length)
        if max_persist is not None and max_persist < self._loaded.sum():
            raise ErrorInitBus('max_persist cannot be less than the number of already loaded Frames')
        self._max_persist = max_persist

        # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map
        self._config = StoreConfigMap.from_initializer(config)
Exemplo n.º 23
0
    def test_store_xlsx_write_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config_map = StoreConfigMap.from_config(
            StoreConfig(include_index=True, include_columns=True))

        with temp_file('.xlsx') as fp:

            st1 = StoreXLSX(fp)
            st1.write(((f.name, f) for f in frames), config=config_map)

            # import ipdb; ipdb.set_trace()
            sheet_names = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), sheet_names)

            for i, name in enumerate(sheet_names):
                f_src = frames[i]
                c = StoreConfig(index_depth=f_src.index.depth,
                                columns_depth=f_src.columns.depth)
                f_loaded = st1.read(name, config=c)
                self.assertEqualFrames(f_src, f_loaded, check_dtypes=False)
Exemplo n.º 24
0
    def labels(
        self,
        *,
        config: StoreConfigMapInitializer = None,
        strip_ext: bool = True,
    ) -> tp.Iterator[tp.Hashable]:

        config_map = StoreConfigMap.from_initializer(config)

        with zipfile.ZipFile(self._fp) as zf:
            for name in zf.namelist():
                if strip_ext:
                    name = name.replace(self._EXT_CONTAINED, '')
                # always use default decoder
                yield config_map.default.label_decode(name)
Exemplo n.º 25
0
    def labels(
        self,
        *,
        config: StoreConfigMapInitializer = None,
        strip_ext: bool = True,
    ) -> tp.Iterator[tp.Hashable]:

        config_map = StoreConfigMap.from_initializer(config)

        with sqlite3.connect(self._fp) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT name FROM sqlite_master WHERE type='table';")
            for row in cursor:
                yield config_map.default.label_decode(row[0])
Exemplo n.º 26
0
    def test_bus_update_series_cache_iloc(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))

        # simulating a Bus with a FrameDefferred but no Store, just for testing
        s1 = Series((f1, FrameDeferred), index=('p', 'q'))
        b1 = Bus(s1, config=config)
        self.assertFalse(b1._loaded_all)

        with self.assertRaises(RuntimeError):
            b1._update_series_cache_iloc(1)
Exemplo n.º 27
0
    def labels(
        self,
        *,
        config: StoreConfigMapInitializer = None,
        strip_ext: bool = True,
    ) -> tp.Iterator[tp.Hashable]:

        config_map = StoreConfigMap.from_initializer(config)

        wb = self._load_workbook(self._fp)
        labels = tuple(wb.sheetnames)
        wb.close()

        for label in labels:
            yield config_map.default.label_decode(label)
Exemplo n.º 28
0
    def __init__(self,
            series: Series,
            *,
            store: tp.Optional[Store] = None,
            config: StoreConfigMapInitializer = None,
            max_persist: tp.Optional[int] = None,
            ):
        '''
        Args:
            config: StoreConfig for handling :obj:`Frame` construction and exporting from Store.
            max_persist: When loading :obj:`Frame` from a :obj:`Store`, optionally define the maximum number of :obj:`Frame` to remain in the :obj:`Bus`, regardless of the size of the :obj:`Bus`. If more than ``max_persist`` number of :obj:`Frame` are loaded, least-recently loaded :obj:`Frame` will be replaced by ``FrameDeferred``. A ``max_persist`` of 1, for example, permits reading one :obj:`Frame` at a time without ever holding in memory more than 1 :obj:`Frame`.
        '''

        if series.dtype != DTYPE_OBJECT:
            raise ErrorInitBus(
                    f'Series passed to initializer must have dtype object, not {series.dtype}')

        if max_persist is not None:
            self._last_accessed: tp.Dict[str, None] = {}

        # do a one time iteration of series
        def gen() -> tp.Iterator[bool]:
            for label, value in series.items():
                if not isinstance(label, str):
                    raise ErrorInitBus(f'supplied label {label} is not a string.')

                if isinstance(value, Frame):
                    if max_persist is not None:
                        self._last_accessed[label] = None
                    yield True
                elif value is FrameDeferred:
                    yield False
                else:
                    raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.')

        self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series))
        self._loaded_all = self._loaded.all()
        self._series = series
        self._store = store

        # max_persist might be less than the number of Frames already loaded
        if max_persist is not None:
            self._max_persist = max(max_persist, self._loaded.sum())
        else:
            self._max_persist = None

        # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map
        self._config = StoreConfigMap.from_initializer(config)
Exemplo n.º 29
0
    def test_store_hdf5_write_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config = StoreConfigMap.from_frames(frames)

        with temp_file('.hdf5') as fp:

            st1 = StoreHDF5(fp)
            st1.write(((f.name, f) for f in frames), config=config)

            labels = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), labels)

            for i, name in enumerate(labels):
                f_src = frames[i]
                c = config[f_src.name]
                f_loaded = st1.read(name, config=c)
                self.assertEqualFrames(f_src, f_loaded)
Exemplo n.º 30
0
    def labels(self, *,
            config: StoreConfigMapInitializer = None,
            strip_ext: bool = True,
            ) -> tp.Iterator[tp.Hashable]:
        '''
        Iterator of labels.
        '''
        import tables

        config_map = StoreConfigMap.from_initializer(config)

        with tables.open_file(self._fp, mode='r') as file:
            for node in file.iter_nodes(where='/',
                    classname=tables.Table.__name__):
                # NOTE: this is not the complete path
                yield config_map.default.label_decode(node.name)