def sort_values(self, *, ascending: bool = True, kind: str = DEFAULT_SORT_KIND, key: tp.Callable[['Series'], tp.Union[np.ndarray, 'Series']], ) -> 'Bus': ''' Return a new Bus ordered by the sorted values. Note that as a Bus contains Frames, a `key` argument must be provided to extract a sortable value, and this key function will process a :obj:`Series` of :obj:`Frame`. Args: * {ascending} {kind} {key} Returns: :obj:`Bus` ''' values = self.values # this will handle max_persist, but will deliver an array with all Frame loaded cfs = Series(values, index=self._index, own_index=True, name=self._name, ) series = cfs.sort_values( ascending=ascending, kind=kind, key=key, ) return self._derive(series, own_data=True)
def __init__( self, series: tp.Union[Series, tp.Iterable[Bus]], *, index: tp.Optional[tp.Union[IndexBase, IndexAutoFactoryType]] = None, index_constructor: tp.Optional[IndexConstructor] = None, deepcopy_from_bus: bool = False, hierarchy: tp.Optional[IndexHierarchy] = None, own_index: bool = False, ) -> None: ''' Args: series: An iterable (or :obj:`Series`) of :obj:`Bus`. The length of this container is not the same as ``index``, if provided. index: Optionally provide an index for the :obj:`Frame` contained in all :obj:`Bus`. index_constructor: deepcopy_from_bus: hierarchy: own_index: ''' if isinstance(series, Series): if series.dtype != DTYPE_OBJECT: raise ErrorInitYarn( f'Series passed to initializer must have dtype object, not {series.dtype}' ) self._series = series # Bus by Bus label else: self._series = Series(series, dtype=DTYPE_OBJECT) # get a default index self._deepcopy_from_bus = deepcopy_from_bus # _hierarchy might be None while we still need to set self._index if hierarchy is None: self._hierarchy = buses_to_hierarchy( self._series.values, self._series.index, deepcopy_from_bus=self._deepcopy_from_bus, init_exception_cls=ErrorInitYarn, ) else: self._hierarchy = hierarchy if own_index: self._index = index #type: ignore elif index is None or index is IndexAutoFactory: self._index = IndexAutoFactory.from_optional_constructor( len(self._hierarchy), default_constructor=Index, explicit_constructor=index_constructor) else: # an iterable of labels or an Index self._index = index_from_optional_constructor( index, #type: ignore default_constructor=Index, explicit_constructor=index_constructor) if len(self._index) != len(self._hierarchy): raise ErrorInitYarn( f'Length of supplied index ({len(self._index)}) not of sufficient size ({len(self._hierarchy)}).' )
def _update_series_cache_iloc(self, key: GetItemKeyType) -> None: ''' Update the Series cache with the key specified, where key can be any iloc GetItemKeyType. ''' # do nothing if all loaded, or if the requested keys are already loadsed if not self._loaded_all and not self._loaded[key].all(): if self._store is None: raise RuntimeError('no store defined') labels = set(self._iloc_to_labels(key)) array = np.empty(shape=len(self._series._index), dtype=object) # type: ignore for idx, (label, frame) in enumerate(self._series.items()): if frame is FrameDeferred and label in labels: frame = self._store.read(label) self._loaded[idx] = True # update loaded status array[idx] = frame array.flags.writeable = False self._series = Series(array, index=self._series._index, dtype=object) self._loaded_all = self._loaded.all()
def test_bus_init_b(self) -> None: with self.assertRaises(ErrorInitBus): Bus(Series([1, 2, 3])) with self.assertRaises(ErrorInitBus): Bus(Series([3, 4], dtype=object))
def display( self, config: tp.Optional[DisplayConfig] = None, *, style_config: tp.Optional[StyleConfig] = None, ) -> Display: '''{doc} Args: {config} ''' # NOTE: the key change over serires is providing the Bus as the displayed class config = config or DisplayActive.get() display_cls = Display.from_values( (), header=DisplayHeader(self.__class__, self._series._name), config=config) array = np.empty(shape=len(self._index), dtype=DTYPE_OBJECT) # NOTE: do not load FrameDeferred, so concate contained Series's values directly np.concatenate([b._values_mutable for b in self._series.values], out=array) array.flags.writeable = False series = Series(array, index=self._index, own_index=True) return series._display( config, display_cls=display_cls, style_config=style_config, )
def _extract_loc2d(self, row_key: GetItemKeyType = NULL_SLICE, column_key: GetItemKeyType = NULL_SLICE, ) -> tp.Union['Frame', 'Series']: ''' NOTE: keys are loc keys; None is interpreted as selector, not a NULL_SLICE ''' from static_frame.core.series import Series from static_frame.core.container_util import get_col_fill_value_factory fill_value = self._fill_value container = self._container # always a Frame row_key, row_is_multiple, row_is_null_slice = self._extract_key_attrs( row_key, container._index, ) column_key, column_is_multiple, column_is_null_slice = self._extract_key_attrs( column_key, container._columns, #type: ignore ) if row_is_multiple and column_is_multiple: # cannot reindex if loc keys are elements return container.reindex( # type: ignore index=row_key if not row_is_null_slice else None, columns=column_key if not column_is_null_slice else None, fill_value=fill_value, ) elif not row_is_multiple and not column_is_multiple: # selecting an element try: return container.loc[row_key, column_key] except KeyError: fv = get_col_fill_value_factory(fill_value, None)(0, None) return fv #type: ignore elif not row_is_multiple: # row is an element, return Series indexed by columns if row_key in container._index: #type: ignore s = container.loc[row_key] return s.reindex(column_key, fill_value=fill_value) #type: ignore fv = get_col_fill_value_factory(fill_value, None)(0, None) return Series.from_element(fv, index=column_key, name=row_key, ) # columns is an element, return Series indexed by index if column_key in container._columns: #type: ignore s = container[column_key] return s.reindex(row_key, fill_value=fill_value) #type: ignore fv = get_col_fill_value_factory(fill_value, None)(0, None) return Series.from_element(fv, index=row_key, name=column_key, )
def normalize_container(post: tp.Any) -> FrameOrSeries: # post might be an element, promote to a Series to permit concatenation if post.__class__ is np.ndarray: if post.ndim == 1: return Series(post) elif post.ndim == 2: return Frame(post) # let ndim 0 pass if not isinstance(post, (Frame, Series)): # NOTE: do not set index as (container.name,), as this can lead to diagonal formations; will already be paired with stored labels return Series.from_element(post, index=ELEMENT_TUPLE) return post
def mloc(self) -> Series: '''Returns a Series of tuples of dtypes, one for each loaded Frame. ''' if not self._loaded.any(): return Series.from_element(None, index=self._series._index) def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[int, ...]]]]: for label, f in zip(self._series._index, self._series.values): if f is FrameDeferred: yield label, None else: yield label, tuple(f.mloc) return Series.from_items(gen())
def mloc(self) -> Series: '''Returns a :obj:`Series` showing a tuple of memory locations within each loaded Frame. ''' if not self._loaded.any(): return Series.from_element(None, index=self._index) def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[int, ...]]]]: for label, f in zip(self._index, self._values_mutable): if f is FrameDeferred: yield label, None else: yield label, tuple(f.mloc) return Series.from_items(gen())
def test_bus_max_persist_3(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(4): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4) _ = b2.iloc[[0, 1]] _ = b2.iloc[[2, 3]] self.assertTrue(b2._loaded_all) _ = b2.iloc[[1, 0]] self.assertEqual(list(b2._last_accessed.keys()), ['2', '3', '1', '0']) _ = b2.iloc[3] self.assertEqual(list(b2._last_accessed.keys()), ['2', '1', '0', '3']) _ = b2.iloc[:3] self.assertEqual(list(b2._last_accessed.keys()), ['3', '0', '1', '2'])
def to_series() -> Series: def items() -> tp.Iterator[tp.Tuple[str, tp.Any]]: yield 'platform', platform_mod.platform() yield 'sys.version', sys.version.replace('\n', '') yield 'static-frame', static_frame.__version__ # NOTE: see requirements-extras.txt for package in ( 'numpy', 'pandas', 'xlsxwriter', 'openpyxl', 'xarray', 'tables', 'pyarrow', 'msgpack', 'msgpack_numpy', ): mod = None try: mod = importlib.import_module(package) except ModuleNotFoundError: #pragma: no cover yield package, ModuleNotFoundError #pragma: no cover continue #pragma: no cover if hasattr(mod, '__version__'): yield package, mod.__version__ #type: ignore elif hasattr(mod, 'version'): # msgpack yield package, mod.version #type: ignore else: yield package, None return Series.from_items(items(), name='platform')
def test_yarn_init_c(self) -> None: with self.assertRaises(ErrorInitYarn): Yarn((ff.parse('s(2,2)'), )) with self.assertRaises(ErrorInitYarn): Yarn(Series((ff.parse('s(2,2)'), ), dtype=object))
def _to_series_state(self) -> Series: # the mutable array will be copied in the Series construction return Series(self._values_mutable, index=self._index, own_index=True, name=self._name, )
def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element # NOTE: only support str labels, not IndexHierarchy # if isinstance(key, HLoc) and key.has_key_multiple(): # values = np.array(values) # values.flags.writeable = False return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__(series=series, store=self._store, config=self._config, max_persist=self._max_persist, )
def test_bus_max_persist_a(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3) for i in b2.index: _ = b2[i] self.assertTrue(b2._loaded.sum() <= 3) # after iteration only the last three are loaded self.assertEqual(b2._loaded.tolist(), [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
def from_buses( cls, buses: tp.Iterable[Bus], *, name: NameType = None, retain_labels: bool, deepcopy_from_bus: bool = False, ) -> 'Yarn': '''Return a :obj:`Yarn` from an iterable of :obj:`Bus`; labels will be drawn from :obj:`Bus.name`. ''' series = Series.from_items( ((b.name, b) for b in buses), dtype=DTYPE_OBJECT, name=name, ) hierarchy = buses_to_hierarchy( series.values, series.index, deepcopy_from_bus=deepcopy_from_bus, init_exception_cls=ErrorInitYarn, ) if retain_labels: index = hierarchy else: index = hierarchy.level_drop(1) #type: ignore return cls( series, hierarchy=hierarchy, index=index, deepcopy_from_bus=deepcopy_from_bus, )
def test_bus_max_persist_b(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1) b3 = b2.iloc[10:] self.assertEqual(b3._loaded.sum(), 1) # only the last one is loasded self.assertEqual(b3._loaded.tolist(), [False, False, False, False, False, False, False, False, False, True] ) self.assertEqual(b3.iloc[0].sum().sum(), 145) self.assertEqual(b3._loaded.tolist(), [True, False, False, False, False, False, False, False, False, False] ) self.assertEqual(b3.iloc[4].sum().sum(), 185) self.assertEqual(b3._loaded.tolist(), [False, False, False, False, True, False, False, False, False, False] )
def to_series_values( self, values: tp.Iterator[tp.Any], *, dtype: DtypeSpecifier, name: NameType = None, index_constructor: tp.Optional[IndexConstructor] = None, axis: int = 0, ) -> 'Series': from static_frame.core.series import Series # Creating a Series that will have the same index as source container if self._container._NDIM == 2 and axis == 0: index = self._container._columns #type: ignore own_index = False else: index = self._container._index own_index = True if index_constructor is not None: index = index_constructor(index) # PERF: passing count here permits faster generator realization values, _ = iterable_to_array_1d( values, count=index.shape[0], dtype=dtype, ) return Series( values, name=name, index=index, own_index=own_index, )
def to_bus(self) -> 'Bus': '''Realize the :obj:`Batch` as an :obj:`Bus`. Note that, as a :obj:`Bus` must have all labels (even if :obj:`Frame` are loaded lazily) ''' return Bus( Series.from_items(self.items(), name=self._name, dtype=DTYPE_OBJECT))
def _checkSelectedIndex(self): if self._selectedMask.index is not self.frame.index: # selection is no longer valid vd.status('frame.index updated, clearing {} selected rows'.format( self._selectedMask.sum())) self._selectedMask = Series.from_element(False, index=self.frame.index)
def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) #type: ignore # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element if isinstance(key, HLoc) and key.has_key_multiple(): # must return a Series, even though we do not have an array values = np.array(values) values.flags.writeable = False else: return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__( series=series, store=self._store, config=self._config, )
def reload(self): if isinstance(self.source, Frame): frame = self.source else: # vd.fail(f'no support for loading {self.source.__class__}') raise NotImplementedError( f'no support for loading a Frame from {self.source}') # If the index is not an IndexAutoFactory, try to move it onto the Frame. If this fails it might mean we are trying to unset an auto index post selection if frame.index.depth > 1 or frame.index._map: # if it is not an IndexAutoFactory frame = frame.unset_index() # VisiData assumes string column names if frame.columns.dtype != str: frame = frame.relabel(columns=frame.columns.astype(str)) dtypes = frame.dtypes self.columns = [] for col in (c for c in frame.columns if not c.startswith('__vd_')): self.addColumn( Column( col, type=self.dtype_to_type(dtypes[col]), getter=self.getValue, setter=self.setValue, expr=col, )) self.rows = StaticFrameAdapter(frame) self._selectedMask = Series.from_element(False, index=frame.index)
def gen() -> tp.Iterator[Series]: yield Series(self._loaded, index=self._series._index, dtype=DTYPE_BOOL, name='loaded') for attr, dtype, missing in ( ('size', DTYPE_FLOAT_DEFAULT, np.nan), ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan), ('shape', DTYPE_OBJECT, None) ): values = (getattr(f, attr) if f is not FrameDeferred else missing for f in self._series.values) yield Series(values, index=self._series._index, dtype=dtype, name=attr)
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each iterated :obj:`Frame`. Returns: :obj:`tp.Tuple[int]` ''' items = ((label, f.shape) for label, f in self._items) return Series.from_items(items, name='shape', dtype=DTYPE_OBJECT)
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None. Returns: :obj:`Series` ''' values = (f.shape if f is not FrameDeferred else None for f in self._values_mutable) return Series(values, index=self._index, dtype=object, name='shape')
def test_interface_summary_c(self) -> None: s = Series(['a', 'b', 'c']) post = s.interface counts = post.iter_group('group').apply(len) counts_cls = s.__class__.interface.iter_group('group').apply(len) self.assertTrue((counts == counts_cls).all())
def to_bus(self) -> 'Bus': '''Realize the :obj:`Batch` as an :obj:`Bus`. Note that, as a :obj:`Bus` must have all labels (even if :obj:`Frame` are loaded lazily), this :obj:`Batch` will be exhausted. ''' series = Series.from_items(self.items(), name=self._name, dtype=DTYPE_OBJECT) return Bus(series, config=self._config)
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Returns: :obj:`tp.Tuple[int]` ''' values = (f.shape if f is not FrameDeferred else None for f in self._series.values) return Series(values, index=self._series._index, dtype=object, name='shape')
def _deferred_series(labels: tp.Iterable[str]) -> Series: ''' Return an object ``Series`` of ``FrameDeferred`` objects, based on the passed in ``labels``. ''' # make an object dtype return tp.cast( Series, Series.from_element(FrameDeferred, index=labels, dtype=object))
def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Unloaded :obj:`Frame` will have a shape of None. Returns: :obj:`tp.Series` ''' return Series.from_concat((b.shapes for b in self._series.values), index=self._index)