def test_bus_init_b(self) -> None: with self.assertRaises(ErrorInitBus): Bus(Series([1, 2, 3])) with self.assertRaises(ErrorInitBus): Bus(Series([3, 4], dtype=object))
def sort_values(self, *, ascending: bool = True, kind: str = DEFAULT_SORT_KIND, key: tp.Callable[['Series'], tp.Union[np.ndarray, 'Series']], ) -> 'Bus': ''' Return a new Bus ordered by the sorted values. Note that as a Bus contains Frames, a `key` argument must be provided to extract a sortable value, and this key function will process a :obj:`Series` of :obj:`Frame`. Args: * {ascending} {kind} {key} Returns: :obj:`Bus` ''' values = self.values # this will handle max_persist, but will deliver an array with all Frame loaded cfs = Series(values, index=self._index, own_index=True, name=self._name, ) series = cfs.sort_values( ascending=ascending, kind=kind, key=key, ) return self._derive(series, own_data=True)
def _to_series_state(self) -> Series: # the mutable array will be copied in the Series construction return Series(self._values_mutable, index=self._index, own_index=True, name=self._name, )
def display( self, config: tp.Optional[DisplayConfig] = None, *, style_config: tp.Optional[StyleConfig] = None, ) -> Display: '''{doc} Args: {config} ''' # NOTE: the key change over serires is providing the Bus as the displayed class config = config or DisplayActive.get() display_cls = Display.from_values( (), header=DisplayHeader(self.__class__, self._series._name), config=config) array = np.empty(shape=len(self._index), dtype=DTYPE_OBJECT) # NOTE: do not load FrameDeferred, so concate contained Series's values directly np.concatenate([b._values_mutable for b in self._series.values], out=array) array.flags.writeable = False series = Series(array, index=self._index, own_index=True) return series._display( config, display_cls=display_cls, style_config=style_config, )
def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element # NOTE: only support str labels, not IndexHierarchy # if isinstance(key, HLoc) and key.has_key_multiple(): # values = np.array(values) # values.flags.writeable = False return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__(series=series, store=self._store, config=self._config, max_persist=self._max_persist, )
def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) #type: ignore # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element if isinstance(key, HLoc) and key.has_key_multiple(): # must return a Series, even though we do not have an array values = np.array(values) values.flags.writeable = False else: return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__( series=series, store=self._store, config=self._config, )
def __init__( self, series: tp.Union[Series, tp.Iterable[Bus]], *, index: tp.Optional[tp.Union[IndexBase, IndexAutoFactoryType]] = None, index_constructor: tp.Optional[IndexConstructor] = None, deepcopy_from_bus: bool = False, hierarchy: tp.Optional[IndexHierarchy] = None, own_index: bool = False, ) -> None: ''' Args: series: An iterable (or :obj:`Series`) of :obj:`Bus`. The length of this container is not the same as ``index``, if provided. index: Optionally provide an index for the :obj:`Frame` contained in all :obj:`Bus`. index_constructor: deepcopy_from_bus: hierarchy: own_index: ''' if isinstance(series, Series): if series.dtype != DTYPE_OBJECT: raise ErrorInitYarn( f'Series passed to initializer must have dtype object, not {series.dtype}' ) self._series = series # Bus by Bus label else: self._series = Series(series, dtype=DTYPE_OBJECT) # get a default index self._deepcopy_from_bus = deepcopy_from_bus # _hierarchy might be None while we still need to set self._index if hierarchy is None: self._hierarchy = buses_to_hierarchy( self._series.values, self._series.index, deepcopy_from_bus=self._deepcopy_from_bus, init_exception_cls=ErrorInitYarn, ) else: self._hierarchy = hierarchy if own_index: self._index = index #type: ignore elif index is None or index is IndexAutoFactory: self._index = IndexAutoFactory.from_optional_constructor( len(self._hierarchy), default_constructor=Index, explicit_constructor=index_constructor) else: # an iterable of labels or an Index self._index = index_from_optional_constructor( index, #type: ignore default_constructor=Index, explicit_constructor=index_constructor) if len(self._index) != len(self._hierarchy): raise ErrorInitYarn( f'Length of supplied index ({len(self._index)}) not of sufficient size ({len(self._hierarchy)}).' )
def to_series_values( self, values: tp.Iterator[tp.Any], *, dtype: DtypeSpecifier, name: NameType = None, index_constructor: tp.Optional[IndexConstructor] = None, axis: int = 0, ) -> 'Series': from static_frame.core.series import Series # Creating a Series that will have the same index as source container if self._container._NDIM == 2 and axis == 0: index = self._container._columns #type: ignore own_index = False else: index = self._container._index own_index = True if index_constructor is not None: index = index_constructor(index) # PERF: passing count here permits faster generator realization values, _ = iterable_to_array_1d( values, count=index.shape[0], dtype=dtype, ) return Series( values, name=name, index=index, own_index=own_index, )
def _update_series_cache_iloc(self, key: GetItemKeyType) -> None: ''' Update the Series cache with the key specified, where key can be any iloc GetItemKeyType. ''' # do nothing if all loaded, or if the requested keys are already loadsed if not self._loaded_all and not self._loaded[key].all(): if self._store is None: raise RuntimeError('no store defined') labels = set(self._iloc_to_labels(key)) array = np.empty(shape=len(self._series._index), dtype=object) # type: ignore for idx, (label, frame) in enumerate(self._series.items()): if frame is FrameDeferred and label in labels: frame = self._store.read(label) self._loaded[idx] = True # update loaded status array[idx] = frame array.flags.writeable = False self._series = Series(array, index=self._series._index, dtype=object) self._loaded_all = self._loaded.all()
def test_yarn_init_c(self) -> None: with self.assertRaises(ErrorInitYarn): Yarn((ff.parse('s(2,2)'), )) with self.assertRaises(ErrorInitYarn): Yarn(Series((ff.parse('s(2,2)'), ), dtype=object))
def gen() -> tp.Iterator[Series]: yield Series(self._loaded, index=self._series._index, dtype=DTYPE_BOOL, name='loaded') for attr, dtype, missing in ( ('size', DTYPE_FLOAT_DEFAULT, np.nan), ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan), ('shape', DTYPE_OBJECT, None) ): values = (getattr(f, attr) if f is not FrameDeferred else missing for f in self._series.values) yield Series(values, index=self._series._index, dtype=dtype, name=attr)
def test_interface_summary_c(self) -> None: s = Series(['a', 'b', 'c']) post = s.interface counts = post.iter_group('group').apply(len) counts_cls = s.__class__.interface.iter_group('group').apply(len) self.assertTrue((counts == counts_cls).all())
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None. Returns: :obj:`Series` ''' values = (f.shape if f is not FrameDeferred else None for f in self._values_mutable) return Series(values, index=self._index, dtype=object, name='shape')
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Returns: :obj:`tp.Tuple[int]` ''' values = (f.shape if f is not FrameDeferred else None for f in self._series.values) return Series(values, index=self._series._index, dtype=object, name='shape')
def to_series(self) -> Series: '''Return a :obj:`Series` with the :obj:`Frame` contained in this :obj:`Bus`. If the :obj:`Bus` is associated with a :obj:`Store`, all :obj:`Frame` will be loaded into memory and the returned :obj:`Bus` will no longer be associated with the :obj:`Store`. ''' # values returns an immutable array and will fully realize from Store return Series(self.values, index=self._index, own_index=True, name=self._name, )
def normalize_container(post: tp.Any) -> FrameOrSeries: # post might be an element, promote to a Series to permit concatenation if post.__class__ is np.ndarray: if post.ndim == 1: return Series(post) elif post.ndim == 2: return Frame(post) # let ndim 0 pass if not isinstance(post, (Frame, Series)): # NOTE: do not set index as (container.name,), as this can lead to diagonal formations; will already be paired with stored labels return Series.from_element(post, index=ELEMENT_TUPLE) return post
def index_types(self) -> 'Series': ''' Return a Series of Index classes for each index depth. Returns: :py:class:`static_frame.Series` ''' from static_frame.core.series import Series if self._name and len(self._name) == self.depth: labels = self._name else: labels = None return Series(self._levels.index_types(), index=labels)
def _extract_iloc(self, key: GetItemKeyType) -> 'Bus': self._update_series_cache_iloc(key=key) # iterable selection should be handled by NP values = self._series.values[key] if not isinstance(values, np.ndarray): # if we have a single element return values series = Series( values, index=self._series._index.iloc[key], # type: ignore name=self._name) return self.__class__(series=series, store=self._store)
def _extract_iloc(self, key: GetItemKeyType) -> 'Yarn': ''' Returns: Yarn or, if an element is selected, a Frame ''' target_hierarchy = self._hierarchy._extract_iloc(key) if isinstance(target_hierarchy, tuple): # got a single element, return a Frame return self._series[target_hierarchy[0]][ target_hierarchy[1]] #type: ignore # get the outer-most index of the hierarchical index target_bus_index = target_hierarchy._get_unique_labels_in_occurence_order( depth=0) target_bus_index = next(iter(target_hierarchy._index_constructors))( target_bus_index) # type: ignore # create a Boolean array equal to the entire realized length valid = np.full(len(self._index), False) valid[key] = True index = self._index.iloc[key] buses = np.empty(len(target_bus_index), dtype=DTYPE_OBJECT) pos = 0 for bus_label, width in self._hierarchy.label_widths_at_depth(0): if bus_label not in target_bus_index: pos += width continue extract_per_bus = valid[pos:pos + width] pos += width idx = target_bus_index.loc_to_iloc(bus_label) # type: ignore buses[idx] = self._series[bus_label]._extract_iloc(extract_per_bus) buses.flags.writeable = False target_series = Series( buses, index=target_bus_index, own_index=True, name=self._series._name, ) return self.__class__( target_series, index=index, hierarchy=target_hierarchy, deepcopy_from_bus=self._deepcopy_from_bus, own_index=True, )
def mloc(self) -> Series: '''Returns a Series of tuples of dtypes, one for each loaded Frame. ''' if not self._loaded.any(): return Series(None, index=self._series._index) def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[ int, ...]]]]: for label, f in zip(self._series._index, self._series.values): if f is FrameDeferred: yield label, None else: yield label, tuple(f.mloc) return Series.from_items(gen())
def test_bus_update_series_cache_iloc(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) # simulating a Bus with a FrameDefferred but no Store, just for testing s1 = Series((f1, FrameDeferred), index=('p', 'q')) b1 = Bus(s1, config=config) self.assertFalse(b1._loaded_all) with self.assertRaises(RuntimeError): b1._update_series_cache_iloc(1)
def test_bus_extract_loc_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='bar') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') ih = IndexHierarchy.from_labels((('a', 1), ('b', 2), ('b', 1))) s1 = Series((f1, f2, f3), index=ih, dtype=object) # do not support IndexHierarchy, as lables are tuples, not strings with self.assertRaises(ErrorInitBus): b1 = Bus(s1)
def from_concat( cls, containers: tp.Iterable['Yarn'], *, index: tp.Optional[tp.Union[IndexInitializer, IndexAutoFactoryType]] = None, name: NameType = NAME_DEFAULT, deepcopy_from_bus: bool = False, ) -> 'Yarn': ''' Concatenate multiple :obj:`Yarn` into a new :obj:`Yarn`. Loaded status of :obj:`Frame` within each :obj:`Bus` will not be altered. Args: containers: index: Optionally provide new labels for the result of the concatenation. name: deepcopy_from_bus: ''' bus_components = [] index_components: tp.Optional[ tp.List[IndexBase]] = None if index is not None else [] for element in containers: if isinstance(element, Yarn): bus_components.extend(element._series.values) if index_components is not None: index_components.append(element.index) else: raise NotImplementedError( f'cannot instantiate from {type(element)}') array = np.empty(len(bus_components), dtype=DTYPE_OBJECT) for i, bus in enumerate(bus_components): array[i] = bus array.flags.writeable = False if index_components is not None: index = index_many_set(index_components, Index, union=True) series = Series(array, name=name) return cls( series, deepcopy_from_bus=deepcopy_from_bus, index=index, )
def _extract_iloc(self, key: GetItemKeyType) -> 'Bus': ''' Returns: Bus or, if an element is selected, a Frame ''' self._update_series_cache_iloc(key=key) # iterable selection should be handled by NP values = self._values_mutable[key] # NOTE: Bus only stores Frame and FrameDeferred, can rely on check with values if not values.__class__ is np.ndarray: # if we have a single element return values #type: ignore # values will be copied and made immutable series = Series( values, index=self._index.iloc[key], name=self._name, ) return self._derive(series, own_data=True)
def to_series(self): '''Return a Series with values from this Index's labels. ''' # not sure if index should be self here from static_frame import Series return Series(self.values, index=None)
def _axis_series(self, axis: int) -> tp.Iterator[Series]: '''Generator of Series across an axis ''' index = self._index if axis == 0 else self._columns for label, axis_values in self._axis_array_items(axis): yield Series(axis_values, index=index, name=label, own_index=True)
def _deferred_series(labels: tp.Iterable[str]) -> Series: # make an object dtype return Series(FrameDeferred, index=labels, dtype=object)
def index_types(self) -> 'Series': # NOTE: this implementation is here due to pydoc.render_doc call that led to calling this base class method from static_frame.core.series import Series return Series(EMPTY_TUPLE) # pragma: no cover
def items() -> tp.Iterator[tp.Tuple[tp.Hashable, Series]]: for idx, label in enumerate(letters): s = Series(series_arrays[idx], index=f1.index) yield label, s
def to_series(self) -> Series: '''Return a :obj:`Series` with the :obj:`Frame` contained in all contained :obj:`Bus`. ''' # NOTE: this should load all deferred Frame return Series(self.values, index=self._index, own_index=True)