Пример #1
0
    def sort_values(self,
            *,
            ascending: bool = True,
            kind: str = DEFAULT_SORT_KIND,
            key: tp.Callable[['Series'], tp.Union[np.ndarray, 'Series']],
            ) -> 'Bus':
        '''
        Return a new Bus ordered by the sorted values. Note that as a Bus contains Frames, a `key` argument must be provided to extract a sortable value, and this key function will process a :obj:`Series` of :obj:`Frame`.

        Args:
            *
            {ascending}
            {kind}
            {key}

        Returns:
            :obj:`Bus`
        '''
        values = self.values # this will handle max_persist, but will deliver an array with all Frame loaded
        cfs = Series(values,
                index=self._index,
                own_index=True,
                name=self._name,
                )
        series = cfs.sort_values(
                ascending=ascending,
                kind=kind,
                key=key,
                )
        return self._derive(series, own_data=True)
Пример #2
0
    def __init__(
        self,
        series: tp.Union[Series, tp.Iterable[Bus]],
        *,
        index: tp.Optional[tp.Union[IndexBase, IndexAutoFactoryType]] = None,
        index_constructor: tp.Optional[IndexConstructor] = None,
        deepcopy_from_bus: bool = False,
        hierarchy: tp.Optional[IndexHierarchy] = None,
        own_index: bool = False,
    ) -> None:
        '''
        Args:
            series: An iterable (or :obj:`Series`) of :obj:`Bus`. The length of this container is not the same as ``index``, if provided.
            index: Optionally provide an index for the :obj:`Frame` contained in all :obj:`Bus`.
            index_constructor:
            deepcopy_from_bus:
            hierarchy:
            own_index:
        '''

        if isinstance(series, Series):
            if series.dtype != DTYPE_OBJECT:
                raise ErrorInitYarn(
                    f'Series passed to initializer must have dtype object, not {series.dtype}'
                )
            self._series = series  # Bus by Bus label
        else:
            self._series = Series(series,
                                  dtype=DTYPE_OBJECT)  # get a default index

        self._deepcopy_from_bus = deepcopy_from_bus

        # _hierarchy might be None while we still need to set self._index
        if hierarchy is None:
            self._hierarchy = buses_to_hierarchy(
                self._series.values,
                self._series.index,
                deepcopy_from_bus=self._deepcopy_from_bus,
                init_exception_cls=ErrorInitYarn,
            )
        else:
            self._hierarchy = hierarchy

        if own_index:
            self._index = index  #type: ignore
        elif index is None or index is IndexAutoFactory:
            self._index = IndexAutoFactory.from_optional_constructor(
                len(self._hierarchy),
                default_constructor=Index,
                explicit_constructor=index_constructor)
        else:  # an iterable of labels or an Index
            self._index = index_from_optional_constructor(
                index,  #type: ignore
                default_constructor=Index,
                explicit_constructor=index_constructor)

        if len(self._index) != len(self._hierarchy):
            raise ErrorInitYarn(
                f'Length of supplied index ({len(self._index)}) not of sufficient size ({len(self._hierarchy)}).'
            )
Пример #3
0
    def _update_series_cache_iloc(self, key: GetItemKeyType) -> None:
        '''
        Update the Series cache with the key specified, where key can be any iloc GetItemKeyType.
        '''

        # do nothing if all loaded, or if the requested keys are already loadsed
        if not self._loaded_all and not self._loaded[key].all():
            if self._store is None:
                raise RuntimeError('no store defined')

            labels = set(self._iloc_to_labels(key))

            array = np.empty(shape=len(self._series._index),
                             dtype=object)  # type: ignore
            for idx, (label, frame) in enumerate(self._series.items()):
                if frame is FrameDeferred and label in labels:
                    frame = self._store.read(label)
                    self._loaded[idx] = True  # update loaded status
                array[idx] = frame
            array.flags.writeable = False

            self._series = Series(array,
                                  index=self._series._index,
                                  dtype=object)
            self._loaded_all = self._loaded.all()
Пример #4
0
    def test_bus_init_b(self) -> None:

        with self.assertRaises(ErrorInitBus):
            Bus(Series([1, 2, 3]))

        with self.assertRaises(ErrorInitBus):
            Bus(Series([3, 4], dtype=object))
Пример #5
0
    def display(
        self,
        config: tp.Optional[DisplayConfig] = None,
        *,
        style_config: tp.Optional[StyleConfig] = None,
    ) -> Display:
        '''{doc}

        Args:
            {config}
        '''
        # NOTE: the key change over serires is providing the Bus as the displayed class
        config = config or DisplayActive.get()
        display_cls = Display.from_values(
            (),
            header=DisplayHeader(self.__class__, self._series._name),
            config=config)

        array = np.empty(shape=len(self._index), dtype=DTYPE_OBJECT)
        # NOTE: do not load FrameDeferred, so concate contained Series's values directly
        np.concatenate([b._values_mutable for b in self._series.values],
                       out=array)
        array.flags.writeable = False
        series = Series(array, index=self._index, own_index=True)

        return series._display(
            config,
            display_cls=display_cls,
            style_config=style_config,
        )
Пример #6
0
    def _extract_loc2d(self,
            row_key: GetItemKeyType = NULL_SLICE,
            column_key: GetItemKeyType = NULL_SLICE,
            ) -> tp.Union['Frame', 'Series']:
        '''
        NOTE: keys are loc keys; None is interpreted as selector, not a NULL_SLICE
        '''
        from static_frame.core.series import Series
        from static_frame.core.container_util import get_col_fill_value_factory

        fill_value = self._fill_value
        container = self._container # always a Frame

        row_key, row_is_multiple, row_is_null_slice = self._extract_key_attrs(
                row_key,
                container._index,
                )
        column_key, column_is_multiple, column_is_null_slice = self._extract_key_attrs(
                column_key,
                container._columns, #type: ignore
                )

        if row_is_multiple and column_is_multiple:
            # cannot reindex if loc keys are elements
            return container.reindex( # type: ignore
                    index=row_key if not row_is_null_slice else None,
                    columns=column_key if not column_is_null_slice else None,
                    fill_value=fill_value,
                    )
        elif not row_is_multiple and not column_is_multiple: # selecting an element
            try:
                return container.loc[row_key, column_key]
            except KeyError:
                fv = get_col_fill_value_factory(fill_value, None)(0, None)
                return fv #type: ignore
        elif not row_is_multiple:
            # row is an element, return Series indexed by columns
            if row_key in container._index: #type: ignore
                s = container.loc[row_key]
                return s.reindex(column_key, fill_value=fill_value) #type: ignore
            fv = get_col_fill_value_factory(fill_value, None)(0, None)
            return Series.from_element(fv,
                    index=column_key,
                    name=row_key,
                    )
        # columns is an element, return Series indexed by index
        if column_key in container._columns: #type: ignore
            s = container[column_key]
            return s.reindex(row_key, fill_value=fill_value) #type: ignore

        fv = get_col_fill_value_factory(fill_value, None)(0, None)
        return Series.from_element(fv,
                index=row_key,
                name=column_key,
                )
Пример #7
0
def normalize_container(post: tp.Any) -> FrameOrSeries:
    # post might be an element, promote to a Series to permit concatenation
    if post.__class__ is np.ndarray:
        if post.ndim == 1:
            return Series(post)
        elif post.ndim == 2:
            return Frame(post)
        # let ndim 0 pass
    if not isinstance(post, (Frame, Series)):
        # NOTE: do not set index as (container.name,), as this can lead to diagonal formations; will already be paired with stored labels
        return Series.from_element(post, index=ELEMENT_TUPLE)
    return post
Пример #8
0
    def mloc(self) -> Series:
        '''Returns a Series of tuples of dtypes, one for each loaded Frame.
        '''
        if not self._loaded.any():
            return Series.from_element(None, index=self._series._index)

        def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[int, ...]]]]:
            for label, f in zip(self._series._index, self._series.values):
                if f is FrameDeferred:
                    yield label, None
                else:
                    yield label, tuple(f.mloc)

        return Series.from_items(gen())
Пример #9
0
    def mloc(self) -> Series:
        '''Returns a :obj:`Series` showing a tuple of memory locations within each loaded Frame.
        '''
        if not self._loaded.any():
            return Series.from_element(None, index=self._index)

        def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[int, ...]]]]:
            for label, f in zip(self._index, self._values_mutable):
                if f is FrameDeferred:
                    yield label, None
                else:
                    yield label, tuple(f.mloc)

        return Series.from_items(gen())
Пример #10
0
    def test_bus_max_persist_3(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(4):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4)

            _ = b2.iloc[[0, 1]]
            _ = b2.iloc[[2, 3]]
            self.assertTrue(b2._loaded_all)

            _ = b2.iloc[[1, 0]]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '3', '1', '0'])

            _ = b2.iloc[3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '1', '0', '3'])

            _ = b2.iloc[:3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['3', '0', '1', '2'])
Пример #11
0
    def to_series() -> Series:
        def items() -> tp.Iterator[tp.Tuple[str, tp.Any]]:
            yield 'platform', platform_mod.platform()
            yield 'sys.version', sys.version.replace('\n', '')

            yield 'static-frame', static_frame.__version__

            # NOTE: see requirements-extras.txt
            for package in (
                    'numpy',
                    'pandas',
                    'xlsxwriter',
                    'openpyxl',
                    'xarray',
                    'tables',
                    'pyarrow',
                    'msgpack',
                    'msgpack_numpy',
                    ):
                mod = None
                try:
                    mod = importlib.import_module(package)
                except ModuleNotFoundError: #pragma: no cover
                    yield package, ModuleNotFoundError #pragma: no cover
                    continue #pragma: no cover

                if hasattr(mod, '__version__'):
                    yield package, mod.__version__ #type: ignore
                elif hasattr(mod, 'version'): # msgpack
                    yield package, mod.version #type: ignore
                else:
                    yield package, None

        return Series.from_items(items(), name='platform')
Пример #12
0
    def test_yarn_init_c(self) -> None:

        with self.assertRaises(ErrorInitYarn):
            Yarn((ff.parse('s(2,2)'), ))

        with self.assertRaises(ErrorInitYarn):
            Yarn(Series((ff.parse('s(2,2)'), ), dtype=object))
Пример #13
0
 def _to_series_state(self) -> Series:
     # the mutable array will be copied in the Series construction
     return Series(self._values_mutable,
             index=self._index,
             own_index=True,
             name=self._name,
             )
Пример #14
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Bus':

        iloc_key = self._series._index.loc_to_iloc(key)

        # NOTE: if we update before slicing, we change the local and the object handed back
        self._update_series_cache_iloc(key=iloc_key)

        values = self._series.values[iloc_key]

        if not isinstance(values, np.ndarray): # if we have a single element
            # NOTE: only support str labels, not IndexHierarchy
            # if isinstance(key, HLoc) and key.has_key_multiple():
            #     values = np.array(values)
            #     values.flags.writeable = False
            return values #type: ignore

        series = Series(values,
                index=self._series._index.iloc[iloc_key],
                own_index=True,
                name=self._series._name)
        return self.__class__(series=series,
                store=self._store,
                config=self._config,
                max_persist=self._max_persist,
                )
Пример #15
0
    def test_bus_max_persist_a(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3)
            for i in b2.index:
                _ = b2[i]
                self.assertTrue(b2._loaded.sum() <= 3)

            # after iteration only the last three are loaded
            self.assertEqual(b2._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
Пример #16
0
    def from_buses(
        cls,
        buses: tp.Iterable[Bus],
        *,
        name: NameType = None,
        retain_labels: bool,
        deepcopy_from_bus: bool = False,
    ) -> 'Yarn':
        '''Return a :obj:`Yarn` from an iterable of :obj:`Bus`; labels will be drawn from :obj:`Bus.name`.
        '''
        series = Series.from_items(
            ((b.name, b) for b in buses),
            dtype=DTYPE_OBJECT,
            name=name,
        )

        hierarchy = buses_to_hierarchy(
            series.values,
            series.index,
            deepcopy_from_bus=deepcopy_from_bus,
            init_exception_cls=ErrorInitYarn,
        )

        if retain_labels:
            index = hierarchy
        else:
            index = hierarchy.level_drop(1)  #type: ignore

        return cls(
            series,
            hierarchy=hierarchy,
            index=index,
            deepcopy_from_bus=deepcopy_from_bus,
        )
Пример #17
0
    def test_bus_max_persist_b(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1)
            b3 = b2.iloc[10:]
            self.assertEqual(b3._loaded.sum(), 1)
            # only the last one is loasded
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, True]
                    )
            self.assertEqual(b3.iloc[0].sum().sum(), 145)
            self.assertEqual(b3._loaded.tolist(),
                    [True, False, False, False, False, False, False, False, False, False]
                    )
            self.assertEqual(b3.iloc[4].sum().sum(), 185)
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, True, False, False, False, False, False]
                    )
Пример #18
0
    def to_series_values(
        self,
        values: tp.Iterator[tp.Any],
        *,
        dtype: DtypeSpecifier,
        name: NameType = None,
        index_constructor: tp.Optional[IndexConstructor] = None,
        axis: int = 0,
    ) -> 'Series':
        from static_frame.core.series import Series

        # Creating a Series that will have the same index as source container
        if self._container._NDIM == 2 and axis == 0:
            index = self._container._columns  #type: ignore
            own_index = False
        else:
            index = self._container._index
            own_index = True

        if index_constructor is not None:
            index = index_constructor(index)

        # PERF: passing count here permits faster generator realization
        values, _ = iterable_to_array_1d(
            values,
            count=index.shape[0],
            dtype=dtype,
        )
        return Series(
            values,
            name=name,
            index=index,
            own_index=own_index,
        )
Пример #19
0
 def to_bus(self) -> 'Bus':
     '''Realize the :obj:`Batch` as an :obj:`Bus`. Note that, as a :obj:`Bus` must have all labels (even if :obj:`Frame` are loaded lazily)
     '''
     return Bus(
         Series.from_items(self.items(),
                           name=self._name,
                           dtype=DTYPE_OBJECT))
 def _checkSelectedIndex(self):
     if self._selectedMask.index is not self.frame.index:
         # selection is no longer valid
         vd.status('frame.index updated, clearing {} selected rows'.format(
             self._selectedMask.sum()))
         self._selectedMask = Series.from_element(False,
                                                  index=self.frame.index)
Пример #21
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Bus':

        iloc_key = self._series._index.loc_to_iloc(key)  #type: ignore

        # NOTE: if we update before slicing, we change the local and the object handed back
        self._update_series_cache_iloc(key=iloc_key)

        values = self._series.values[iloc_key]

        if not isinstance(values, np.ndarray):  # if we have a single element
            if isinstance(key, HLoc) and key.has_key_multiple():
                # must return a Series, even though we do not have an array
                values = np.array(values)
                values.flags.writeable = False
            else:
                return values  #type: ignore

        series = Series(values,
                        index=self._series._index.iloc[iloc_key],
                        own_index=True,
                        name=self._series._name)
        return self.__class__(
            series=series,
            store=self._store,
            config=self._config,
        )
    def reload(self):
        if isinstance(self.source, Frame):
            frame = self.source
        else:
            # vd.fail(f'no support for loading {self.source.__class__}')
            raise NotImplementedError(
                f'no support for loading a Frame from {self.source}')

        # If the index is not an IndexAutoFactory, try to move it onto the Frame. If this fails it might mean we are trying to unset an auto index post selection
        if frame.index.depth > 1 or frame.index._map:  # if it is not an IndexAutoFactory
            frame = frame.unset_index()

        # VisiData assumes string column names
        if frame.columns.dtype != str:
            frame = frame.relabel(columns=frame.columns.astype(str))

        dtypes = frame.dtypes

        self.columns = []
        for col in (c for c in frame.columns if not c.startswith('__vd_')):
            self.addColumn(
                Column(
                    col,
                    type=self.dtype_to_type(dtypes[col]),
                    getter=self.getValue,
                    setter=self.setValue,
                    expr=col,
                ))

        self.rows = StaticFrameAdapter(frame)
        self._selectedMask = Series.from_element(False, index=frame.index)
Пример #23
0
        def gen() -> tp.Iterator[Series]:

            yield Series(self._loaded,
                    index=self._series._index,
                    dtype=DTYPE_BOOL,
                    name='loaded')

            for attr, dtype, missing in (
                    ('size', DTYPE_FLOAT_DEFAULT, np.nan),
                    ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan),
                    ('shape', DTYPE_OBJECT, None)
                    ):

                values = (getattr(f, attr) if f is not FrameDeferred
                        else missing for f in self._series.values)
                yield Series(values, index=self._series._index, dtype=dtype, name=attr)
Пример #24
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each iterated :obj:`Frame`.

        Returns:
            :obj:`tp.Tuple[int]`
        '''
        items = ((label, f.shape) for label, f in self._items)
        return Series.from_items(items, name='shape', dtype=DTYPE_OBJECT)
Пример #25
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None.

        Returns:
            :obj:`Series`
        '''
        values = (f.shape if f is not FrameDeferred else None for f in self._values_mutable)
        return Series(values, index=self._index, dtype=object, name='shape')
Пример #26
0
    def test_interface_summary_c(self) -> None:
        s = Series(['a', 'b', 'c'])
        post = s.interface

        counts = post.iter_group('group').apply(len)
        counts_cls = s.__class__.interface.iter_group('group').apply(len)

        self.assertTrue((counts == counts_cls).all())
Пример #27
0
    def to_bus(self) -> 'Bus':
        '''Realize the :obj:`Batch` as an :obj:`Bus`. Note that, as a :obj:`Bus` must have all labels (even if :obj:`Frame` are loaded lazily), this :obj:`Batch` will be exhausted.
        '''
        series = Series.from_items(self.items(),
                                   name=self._name,
                                   dtype=DTYPE_OBJECT)

        return Bus(series, config=self._config)
Пример #28
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`.

        Returns:
            :obj:`tp.Tuple[int]`
        '''
        values = (f.shape if f is not FrameDeferred else None for f in self._series.values)
        return Series(values, index=self._series._index, dtype=object, name='shape')
Пример #29
0
 def _deferred_series(labels: tp.Iterable[str]) -> Series:
     '''
     Return an object ``Series`` of ``FrameDeferred`` objects, based on the passed in ``labels``.
     '''
     # make an object dtype
     return tp.cast(
         Series,
         Series.from_element(FrameDeferred, index=labels, dtype=object))
Пример #30
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None.

        Returns:
            :obj:`tp.Series`
        '''
        return Series.from_concat((b.shapes for b in self._series.values),
                                  index=self._index)