示例#1
0
    def test_bus_init_b(self) -> None:

        with self.assertRaises(ErrorInitBus):
            Bus(Series([1, 2, 3]))

        with self.assertRaises(ErrorInitBus):
            Bus(Series([3, 4], dtype=object))
示例#2
0
    def sort_values(self,
            *,
            ascending: bool = True,
            kind: str = DEFAULT_SORT_KIND,
            key: tp.Callable[['Series'], tp.Union[np.ndarray, 'Series']],
            ) -> 'Bus':
        '''
        Return a new Bus ordered by the sorted values. Note that as a Bus contains Frames, a `key` argument must be provided to extract a sortable value, and this key function will process a :obj:`Series` of :obj:`Frame`.

        Args:
            *
            {ascending}
            {kind}
            {key}

        Returns:
            :obj:`Bus`
        '''
        values = self.values # this will handle max_persist, but will deliver an array with all Frame loaded
        cfs = Series(values,
                index=self._index,
                own_index=True,
                name=self._name,
                )
        series = cfs.sort_values(
                ascending=ascending,
                kind=kind,
                key=key,
                )
        return self._derive(series, own_data=True)
示例#3
0
 def _to_series_state(self) -> Series:
     # the mutable array will be copied in the Series construction
     return Series(self._values_mutable,
             index=self._index,
             own_index=True,
             name=self._name,
             )
示例#4
0
    def display(
        self,
        config: tp.Optional[DisplayConfig] = None,
        *,
        style_config: tp.Optional[StyleConfig] = None,
    ) -> Display:
        '''{doc}

        Args:
            {config}
        '''
        # NOTE: the key change over serires is providing the Bus as the displayed class
        config = config or DisplayActive.get()
        display_cls = Display.from_values(
            (),
            header=DisplayHeader(self.__class__, self._series._name),
            config=config)

        array = np.empty(shape=len(self._index), dtype=DTYPE_OBJECT)
        # NOTE: do not load FrameDeferred, so concate contained Series's values directly
        np.concatenate([b._values_mutable for b in self._series.values],
                       out=array)
        array.flags.writeable = False
        series = Series(array, index=self._index, own_index=True)

        return series._display(
            config,
            display_cls=display_cls,
            style_config=style_config,
        )
示例#5
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Bus':

        iloc_key = self._series._index.loc_to_iloc(key)

        # NOTE: if we update before slicing, we change the local and the object handed back
        self._update_series_cache_iloc(key=iloc_key)

        values = self._series.values[iloc_key]

        if not isinstance(values, np.ndarray): # if we have a single element
            # NOTE: only support str labels, not IndexHierarchy
            # if isinstance(key, HLoc) and key.has_key_multiple():
            #     values = np.array(values)
            #     values.flags.writeable = False
            return values #type: ignore

        series = Series(values,
                index=self._series._index.iloc[iloc_key],
                own_index=True,
                name=self._series._name)
        return self.__class__(series=series,
                store=self._store,
                config=self._config,
                max_persist=self._max_persist,
                )
示例#6
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Bus':

        iloc_key = self._series._index.loc_to_iloc(key)  #type: ignore

        # NOTE: if we update before slicing, we change the local and the object handed back
        self._update_series_cache_iloc(key=iloc_key)

        values = self._series.values[iloc_key]

        if not isinstance(values, np.ndarray):  # if we have a single element
            if isinstance(key, HLoc) and key.has_key_multiple():
                # must return a Series, even though we do not have an array
                values = np.array(values)
                values.flags.writeable = False
            else:
                return values  #type: ignore

        series = Series(values,
                        index=self._series._index.iloc[iloc_key],
                        own_index=True,
                        name=self._series._name)
        return self.__class__(
            series=series,
            store=self._store,
            config=self._config,
        )
示例#7
0
    def __init__(
        self,
        series: tp.Union[Series, tp.Iterable[Bus]],
        *,
        index: tp.Optional[tp.Union[IndexBase, IndexAutoFactoryType]] = None,
        index_constructor: tp.Optional[IndexConstructor] = None,
        deepcopy_from_bus: bool = False,
        hierarchy: tp.Optional[IndexHierarchy] = None,
        own_index: bool = False,
    ) -> None:
        '''
        Args:
            series: An iterable (or :obj:`Series`) of :obj:`Bus`. The length of this container is not the same as ``index``, if provided.
            index: Optionally provide an index for the :obj:`Frame` contained in all :obj:`Bus`.
            index_constructor:
            deepcopy_from_bus:
            hierarchy:
            own_index:
        '''

        if isinstance(series, Series):
            if series.dtype != DTYPE_OBJECT:
                raise ErrorInitYarn(
                    f'Series passed to initializer must have dtype object, not {series.dtype}'
                )
            self._series = series  # Bus by Bus label
        else:
            self._series = Series(series,
                                  dtype=DTYPE_OBJECT)  # get a default index

        self._deepcopy_from_bus = deepcopy_from_bus

        # _hierarchy might be None while we still need to set self._index
        if hierarchy is None:
            self._hierarchy = buses_to_hierarchy(
                self._series.values,
                self._series.index,
                deepcopy_from_bus=self._deepcopy_from_bus,
                init_exception_cls=ErrorInitYarn,
            )
        else:
            self._hierarchy = hierarchy

        if own_index:
            self._index = index  #type: ignore
        elif index is None or index is IndexAutoFactory:
            self._index = IndexAutoFactory.from_optional_constructor(
                len(self._hierarchy),
                default_constructor=Index,
                explicit_constructor=index_constructor)
        else:  # an iterable of labels or an Index
            self._index = index_from_optional_constructor(
                index,  #type: ignore
                default_constructor=Index,
                explicit_constructor=index_constructor)

        if len(self._index) != len(self._hierarchy):
            raise ErrorInitYarn(
                f'Length of supplied index ({len(self._index)}) not of sufficient size ({len(self._hierarchy)}).'
            )
示例#8
0
    def to_series_values(
        self,
        values: tp.Iterator[tp.Any],
        *,
        dtype: DtypeSpecifier,
        name: NameType = None,
        index_constructor: tp.Optional[IndexConstructor] = None,
        axis: int = 0,
    ) -> 'Series':
        from static_frame.core.series import Series

        # Creating a Series that will have the same index as source container
        if self._container._NDIM == 2 and axis == 0:
            index = self._container._columns  #type: ignore
            own_index = False
        else:
            index = self._container._index
            own_index = True

        if index_constructor is not None:
            index = index_constructor(index)

        # PERF: passing count here permits faster generator realization
        values, _ = iterable_to_array_1d(
            values,
            count=index.shape[0],
            dtype=dtype,
        )
        return Series(
            values,
            name=name,
            index=index,
            own_index=own_index,
        )
示例#9
0
    def _update_series_cache_iloc(self, key: GetItemKeyType) -> None:
        '''
        Update the Series cache with the key specified, where key can be any iloc GetItemKeyType.
        '''

        # do nothing if all loaded, or if the requested keys are already loadsed
        if not self._loaded_all and not self._loaded[key].all():
            if self._store is None:
                raise RuntimeError('no store defined')

            labels = set(self._iloc_to_labels(key))

            array = np.empty(shape=len(self._series._index),
                             dtype=object)  # type: ignore
            for idx, (label, frame) in enumerate(self._series.items()):
                if frame is FrameDeferred and label in labels:
                    frame = self._store.read(label)
                    self._loaded[idx] = True  # update loaded status
                array[idx] = frame
            array.flags.writeable = False

            self._series = Series(array,
                                  index=self._series._index,
                                  dtype=object)
            self._loaded_all = self._loaded.all()
示例#10
0
    def test_yarn_init_c(self) -> None:

        with self.assertRaises(ErrorInitYarn):
            Yarn((ff.parse('s(2,2)'), ))

        with self.assertRaises(ErrorInitYarn):
            Yarn(Series((ff.parse('s(2,2)'), ), dtype=object))
示例#11
0
        def gen() -> tp.Iterator[Series]:

            yield Series(self._loaded,
                    index=self._series._index,
                    dtype=DTYPE_BOOL,
                    name='loaded')

            for attr, dtype, missing in (
                    ('size', DTYPE_FLOAT_DEFAULT, np.nan),
                    ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan),
                    ('shape', DTYPE_OBJECT, None)
                    ):

                values = (getattr(f, attr) if f is not FrameDeferred
                        else missing for f in self._series.values)
                yield Series(values, index=self._series._index, dtype=dtype, name=attr)
示例#12
0
    def test_interface_summary_c(self) -> None:
        s = Series(['a', 'b', 'c'])
        post = s.interface

        counts = post.iter_group('group').apply(len)
        counts_cls = s.__class__.interface.iter_group('group').apply(len)

        self.assertTrue((counts == counts_cls).all())
示例#13
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None.

        Returns:
            :obj:`Series`
        '''
        values = (f.shape if f is not FrameDeferred else None for f in self._values_mutable)
        return Series(values, index=self._index, dtype=object, name='shape')
示例#14
0
    def shapes(self) -> Series:
        '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`.

        Returns:
            :obj:`tp.Tuple[int]`
        '''
        values = (f.shape if f is not FrameDeferred else None for f in self._series.values)
        return Series(values, index=self._series._index, dtype=object, name='shape')
示例#15
0
 def to_series(self) -> Series:
     '''Return a :obj:`Series` with the :obj:`Frame` contained in this :obj:`Bus`. If the :obj:`Bus` is associated with a :obj:`Store`, all :obj:`Frame` will be loaded into memory and the returned :obj:`Bus` will no longer be associated with the :obj:`Store`.
     '''
     # values returns an immutable array and will fully realize from Store
     return Series(self.values,
             index=self._index,
             own_index=True,
             name=self._name,
             )
示例#16
0
def normalize_container(post: tp.Any) -> FrameOrSeries:
    # post might be an element, promote to a Series to permit concatenation
    if post.__class__ is np.ndarray:
        if post.ndim == 1:
            return Series(post)
        elif post.ndim == 2:
            return Frame(post)
        # let ndim 0 pass
    if not isinstance(post, (Frame, Series)):
        # NOTE: do not set index as (container.name,), as this can lead to diagonal formations; will already be paired with stored labels
        return Series.from_element(post, index=ELEMENT_TUPLE)
    return post
示例#17
0
    def index_types(self) -> 'Series':
        '''
        Return a Series of Index classes for each index depth.

        Returns:
            :py:class:`static_frame.Series`
        '''
        from static_frame.core.series import Series
        if self._name and len(self._name) == self.depth:
            labels = self._name
        else:
            labels = None
        return Series(self._levels.index_types(), index=labels)
示例#18
0
    def _extract_iloc(self, key: GetItemKeyType) -> 'Bus':
        self._update_series_cache_iloc(key=key)

        # iterable selection should be handled by NP
        values = self._series.values[key]

        if not isinstance(values, np.ndarray):  # if we have a single element
            return values
        series = Series(
            values,
            index=self._series._index.iloc[key],  # type: ignore
            name=self._name)
        return self.__class__(series=series, store=self._store)
示例#19
0
    def _extract_iloc(self, key: GetItemKeyType) -> 'Yarn':
        '''
        Returns:
            Yarn or, if an element is selected, a Frame
        '''
        target_hierarchy = self._hierarchy._extract_iloc(key)
        if isinstance(target_hierarchy, tuple):
            # got a single element, return a Frame
            return self._series[target_hierarchy[0]][
                target_hierarchy[1]]  #type: ignore

        # get the outer-most index of the hierarchical index
        target_bus_index = target_hierarchy._get_unique_labels_in_occurence_order(
            depth=0)
        target_bus_index = next(iter(target_hierarchy._index_constructors))(
            target_bus_index)  # type: ignore

        # create a Boolean array equal to the entire realized length
        valid = np.full(len(self._index), False)
        valid[key] = True
        index = self._index.iloc[key]

        buses = np.empty(len(target_bus_index), dtype=DTYPE_OBJECT)

        pos = 0
        for bus_label, width in self._hierarchy.label_widths_at_depth(0):
            if bus_label not in target_bus_index:
                pos += width
                continue
            extract_per_bus = valid[pos:pos + width]
            pos += width

            idx = target_bus_index.loc_to_iloc(bus_label)  # type: ignore
            buses[idx] = self._series[bus_label]._extract_iloc(extract_per_bus)

        buses.flags.writeable = False
        target_series = Series(
            buses,
            index=target_bus_index,
            own_index=True,
            name=self._series._name,
        )

        return self.__class__(
            target_series,
            index=index,
            hierarchy=target_hierarchy,
            deepcopy_from_bus=self._deepcopy_from_bus,
            own_index=True,
        )
示例#20
0
    def mloc(self) -> Series:
        '''Returns a Series of tuples of dtypes, one for each loaded Frame.
        '''
        if not self._loaded.any():
            return Series(None, index=self._series._index)

        def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[
            int, ...]]]]:
            for label, f in zip(self._series._index, self._series.values):
                if f is FrameDeferred:
                    yield label, None
                else:
                    yield label, tuple(f.mloc)

        return Series.from_items(gen())
示例#21
0
    def test_bus_update_series_cache_iloc(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))

        # simulating a Bus with a FrameDefferred but no Store, just for testing
        s1 = Series((f1, FrameDeferred), index=('p', 'q'))
        b1 = Bus(s1, config=config)
        self.assertFalse(b1._loaded_all)

        with self.assertRaises(RuntimeError):
            b1._update_series_cache_iloc(1)
示例#22
0
    def test_bus_extract_loc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        ih = IndexHierarchy.from_labels((('a', 1), ('b', 2), ('b', 1)))
        s1 = Series((f1, f2, f3), index=ih, dtype=object)

        # do not support IndexHierarchy, as lables are tuples, not strings
        with self.assertRaises(ErrorInitBus):
            b1 = Bus(s1)
示例#23
0
    def from_concat(
        cls,
        containers: tp.Iterable['Yarn'],
        *,
        index: tp.Optional[tp.Union[IndexInitializer,
                                    IndexAutoFactoryType]] = None,
        name: NameType = NAME_DEFAULT,
        deepcopy_from_bus: bool = False,
    ) -> 'Yarn':
        '''
        Concatenate multiple :obj:`Yarn` into a new :obj:`Yarn`. Loaded status of :obj:`Frame` within each :obj:`Bus` will not be altered.

        Args:
            containers:
            index: Optionally provide new labels for the result of the concatenation.
            name:
            deepcopy_from_bus:
        '''
        bus_components = []
        index_components: tp.Optional[
            tp.List[IndexBase]] = None if index is not None else []
        for element in containers:
            if isinstance(element, Yarn):
                bus_components.extend(element._series.values)
                if index_components is not None:
                    index_components.append(element.index)
            else:
                raise NotImplementedError(
                    f'cannot instantiate from {type(element)}')

        array = np.empty(len(bus_components), dtype=DTYPE_OBJECT)
        for i, bus in enumerate(bus_components):
            array[i] = bus
        array.flags.writeable = False

        if index_components is not None:
            index = index_many_set(index_components, Index, union=True)

        series = Series(array, name=name)
        return cls(
            series,
            deepcopy_from_bus=deepcopy_from_bus,
            index=index,
        )
示例#24
0
    def _extract_iloc(self, key: GetItemKeyType) -> 'Bus':
        '''
        Returns:
            Bus or, if an element is selected, a Frame
        '''
        self._update_series_cache_iloc(key=key)

        # iterable selection should be handled by NP
        values = self._values_mutable[key]

        # NOTE: Bus only stores Frame and FrameDeferred, can rely on check with values
        if not values.__class__ is np.ndarray: # if we have a single element
            return values #type: ignore

        # values will be copied and made immutable
        series = Series(
                values,
                index=self._index.iloc[key],
                name=self._name,
                )
        return self._derive(series, own_data=True)
示例#25
0
 def to_series(self):
     '''Return a Series with values from this Index's labels.
     '''
     # not sure if index should be self here
     from static_frame import Series
     return Series(self.values, index=None)
示例#26
0
 def _axis_series(self, axis: int) -> tp.Iterator[Series]:
     '''Generator of Series across an axis
     '''
     index = self._index if axis == 0 else self._columns
     for label, axis_values in self._axis_array_items(axis):
         yield Series(axis_values, index=index, name=label, own_index=True)
示例#27
0
 def _deferred_series(labels: tp.Iterable[str]) -> Series:
     # make an object dtype
     return Series(FrameDeferred, index=labels, dtype=object)
示例#28
0
 def index_types(self) -> 'Series':
     # NOTE: this implementation is here due to pydoc.render_doc call that led to calling this base class method
     from static_frame.core.series import Series
     return Series(EMPTY_TUPLE)  # pragma: no cover
示例#29
0
 def items() -> tp.Iterator[tp.Tuple[tp.Hashable, Series]]:
     for idx, label in enumerate(letters):
         s = Series(series_arrays[idx], index=f1.index)
         yield label, s
示例#30
0
 def to_series(self) -> Series:
     '''Return a :obj:`Series` with the :obj:`Frame` contained in all contained :obj:`Bus`.
     '''
     # NOTE: this should load all deferred Frame
     return Series(self.values, index=self._index, own_index=True)