示例#1
0
    def _axis_array(self, axis: int) -> tp.Iterator[np.ndarray]:
        '''Generator of arrays across an axis

        Args:
            axis: 0 iterates over columns, 1 iterates over rows
        '''
        extractor = get_extractor(
                self._deepcopy_from_bus,
                is_array=True,
                memo_active=False,
                )

        if axis == 1: # iterate over rows
            if self._axis == 0: # bus components aligned vertically
                for _, component in self._bus.items():
                    for array in component._blocks.axis_values(axis):
                        yield extractor(array)
            else: # bus components aligned horizontally
                raise NotImplementedAxis()
        elif axis == 0: # iterate over columns
            if self._axis == 1: # bus components aligned horizontally
                for _, component in self._bus.items():
                    for array in component._blocks.axis_values(axis):
                        yield extractor(array)
            else: # bus components aligned horizontally
                raise NotImplementedAxis()
        else:
            raise AxisInvalid(f'no support for axis {axis}')
示例#2
0
    def _axis_tuple(self, *,
            axis: int,
            constructor: tp.Optional[tp.Type[tp.NamedTuple]] = None,
            ) -> tp.Iterator[tp.NamedTuple]:
        '''Generator of named tuples across an axis.

        Args:
            axis: 0 iterates over columns (index axis), 1 iterates over rows (column axis)
        '''
        if constructor is None:
            if axis == 1:
                labels = self._columns.values
            elif axis == 0:
                labels = self._index.values
            else:
                raise AxisInvalid(f'no support for axis {axis}')
            # uses _make method to call with iterable
            constructor = get_tuple_constructor(labels) #type: ignore
        elif (isinstance(constructor, type) and
                issubclass(constructor, tuple) and
                hasattr(constructor, '_make')):
            constructor = constructor._make #type: ignore

        assert constructor is not None

        for axis_values in self._axis_array(axis):
            yield constructor(axis_values)
def bus_to_hierarchy(
    bus: tp.Union[Bus, 'Yarn'],
    axis: int,
    deepcopy_from_bus: bool,
    init_exception_cls: tp.Type[Exception],
) -> tp.Tuple[IndexHierarchy, IndexBase]:
    '''
    Given a :obj:`Bus` and an axis, derive a :obj:`IndexHierarchy`; also return and validate the :obj:`Index` of the opposite axis.
    '''
    # NOTE: need to extract just axis labels, not the full Frame; need new Store/Bus loaders just for label data
    extractor = get_extractor(deepcopy_from_bus,
                              is_array=False,
                              memo_active=False)

    def tree_extractor(index: IndexBase) -> tp.Union[IndexBase, TreeNodeT]:
        index = extractor(index)
        if isinstance(index, IndexHierarchy):
            return index.to_tree()
        return index

    tree: TreeNodeT = {}
    opposite: tp.Optional[IndexBase] = None

    for label, f in bus.items():
        if axis == 0:
            tree[label] = tree_extractor(f.index)
            if opposite is None:
                opposite = extractor(f.columns)
            else:
                if not opposite.equals(f.columns):
                    raise init_exception_cls(
                        'opposite axis must have equivalent indices')
        elif axis == 1:
            tree[label] = tree_extractor(f.columns)
            if opposite is None:
                opposite = extractor(f.index)
            else:
                if not opposite.equals(f.index):
                    raise init_exception_cls(
                        'opposite axis must have equivalent indices')
        else:
            raise AxisInvalid(f'invalid axis {axis}')

    # NOTE: we could try to collect index constructors by using the index of the Bus and observing the inidices of the contained Frames, but it is not clear that will be better then using IndexAutoConstructorFactory

    return IndexHierarchy.from_tree(
        tree, index_constructors=IndexAutoConstructorFactory
    ), opposite  # type: ignore
示例#4
0
        def values() -> tp.Iterator[Frame]:
            nonlocal opposite

            for start, end in zip_longest(starts, ends, fillvalue=vector_len):
                if axis == 0: # along rows
                    f = frame.iloc[start:end]
                    label = label_extractor(f.index) #type: ignore
                    axis_map_components[label] = f.index
                    if opposite is None:
                        opposite = f.columns
                elif axis == 1: # along columns
                    f = frame.iloc[:, start:end]
                    label = label_extractor(f.columns) #type: ignore
                    axis_map_components[label] = f.columns
                    if opposite is None:
                        opposite = f.index
                else:
                    raise AxisInvalid(f'invalid axis {axis}')
                yield f.rename(label)
def apex_to_name(
    rows: tp.Sequence[tp.Sequence[tp.Hashable]],
    depth_level: tp.Optional[DepthLevelSpecifier],
    axis: int,  # 0 is by row (for index, 1 is by column (for columns)
    axis_depth: int,
) -> NameType:
    '''
    Utility for translating apex values (the upper left corner created be index/columns) in the appropriate name.
    '''
    if depth_level is None:
        return None
    if axis == 0:
        if isinstance(depth_level, INT_TYPES):
            row = rows[depth_level]
            if axis_depth == 1:  # return a single label
                return row[0] if row[0] != '' else None
            else:
                return tuple(row)
        else:  # its a list selection
            targets = [rows[level] for level in depth_level]
            # combine into tuples
            if axis_depth == 1:
                return next(zip(*targets))
            else:
                return tuple(zip(*targets))
    elif axis == 1:
        if isinstance(depth_level, INT_TYPES):
            # depth_level refers to position in inner row
            row = [r[depth_level] for r in rows]
            if axis_depth == 1:  # return a single label
                return row[0] if row[0] != '' else None
            else:
                return tuple(row)
        else:  # its a list selection
            targets = (tuple(row[level] for level in depth_level)
                       for row in rows)  #type: ignore
            # combine into tuples
            if axis_depth == 1:
                return next(targets)  #type: ignore
            else:
                return tuple(targets)

    raise AxisInvalid(f'invalid axis: {axis}')
示例#6
0
    def from_frames(
        self,
        frames: tp.Iterable['Frame'],
        *,
        include_index: bool = True,
        include_columns: bool = True,
        axis: int = 0,
        union: bool = True,
        name: NameType = None,
        fill_value: object = np.nan,
    ) -> None:
        '''Given an iterable of Frames, write out an NPZ or NPY directly, without building up an intermediary Frame. If axis 0, the Frames must be block compatible; if axis 1, the Frames must have the same number of rows. For both axis, if included, concatenated indices must be unique or aligned.

        Args:
            frames:
            *
            include_index:
            include_columns:
            axis:
            union:
            name:
            fill_value:

        '''
        if not self._writeable:
            raise UnsupportedOperation('Open with mode "w" to write.')

        from static_frame.core.type_blocks import TypeBlocks
        from static_frame.core.frame import Frame

        frames = [
            f if isinstance(f, Frame) else f.to_frame(axis) for f in frames
        ]  # type: ignore

        # NOTE: based on Frame.from_concat
        if axis == 1:  # stacks columns (extends rows horizontally)
            if include_columns:
                try:
                    columns = index_many_concat(
                        (f._columns for f in frames),
                        Index,
                    )
                except ErrorInitIndexNonUnique:
                    raise RuntimeError(
                        'Column names after horizontal concatenation are not unique; set include_columns to None to ignore.'
                    )
            else:
                columns = None

            if include_index:
                index = index_many_set(
                    (f._index for f in frames),
                    Index,
                    union=union,
                )
            else:
                raise RuntimeError(
                    'Must include index for horizontal alignment.')

            def blocks() -> tp.Iterator[np.ndarray]:
                for f in frames:
                    if len(f.index) != len(index) or (f.index != index).any():
                        f = f.reindex(index=index, fill_value=fill_value)
                    for block in f._blocks._blocks:
                        yield block

        elif axis == 0:  # stacks rows (extends columns vertically)
            if include_index:
                try:
                    index = index_many_concat((f._index for f in frames),
                                              Index)
                except ErrorInitIndexNonUnique:
                    raise RuntimeError(
                        'Index names after vertical concatenation are not unique; set include_index to None to ignore'
                    )
            else:
                index = None

            if include_columns:
                columns = index_many_set(
                    (f._columns for f in frames),
                    Index,
                    union=union,
                )
            else:
                raise RuntimeError(
                    'Must include columns for vertical alignment.')

            def blocks() -> tp.Iterator[np.ndarray]:
                type_blocks = []
                previous_f: tp.Optional[Frame] = None
                block_compatible = True
                reblock_compatible = True

                for f in frames:
                    if len(f.columns) != len(columns) or (f.columns !=
                                                          columns).any():
                        f = f.reindex(columns=columns, fill_value=fill_value)

                    type_blocks.append(f._blocks)
                    # column size is all the same by this point
                    if previous_f is not None:  # after the first
                        if block_compatible:
                            block_compatible &= f._blocks.block_compatible(
                                previous_f._blocks,
                                axis=1)  # only compare columns
                        if reblock_compatible:
                            reblock_compatible &= f._blocks.reblock_compatible(
                                previous_f._blocks)
                    previous_f = f

                yield from TypeBlocks.vstack_blocks_to_blocks(
                    type_blocks=type_blocks,
                    block_compatible=block_compatible,
                    reblock_compatible=reblock_compatible,
                )
        else:
            raise AxisInvalid(f'no support for {axis}')

        self.from_arrays(
            blocks=blocks(),
            index=index,
            columns=columns,
            name=name,
            axis=1,  # blocks are normalized for horizontal concat
        )
示例#7
0
    def from_arrays(
        self,
        blocks: tp.Iterable[np.ndarray],
        *,
        index: tp.Optional[IndexInitializer] = None,
        columns: tp.Optional[IndexInitializer] = None,
        name: NameType = None,
        axis: int = 0,
    ) -> None:
        '''
        Given an iterable of arrays, write out an NPZ or NPY directly, without building up intermediary :obj:`Frame`. If axis 0, the arrays are vertically stacked; if axis 1, they are horizontally stacked. For both axis, if included, indices must be of appropriate length.

        Args:
            blocks:
            *,
            index: An array, :obj:`Index`, or :obj:`IndexHierarchy`.
            columns: An array, :obj:`Index`, or :obj:`IndexHierarchy`.
            name:
            axis:
        '''
        if not self._writeable:
            raise UnsupportedOperation('Open with mode "w" to write.')

        metadata: tp.Dict[str, tp.Any] = {}

        if isinstance(index, IndexBase):
            depth_index = index.depth
            name_index = index.name
            cls_index = index.__class__
            ArchiveIndexConverter.index_encode(
                metadata=metadata,
                archive=self._archive,
                index=index,
                key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX,
                key_types=Label.KEY_TYPES_INDEX,
                depth=depth_index,
                include=True,
            )
        elif index is not None:
            if index.__class__ is not np.ndarray:
                raise RuntimeError(
                    'index argument must be an Index, IndexHierarchy, or 1D np.ndarray'
                )

            depth_index = 1
            name_index = None
            cls_index = dtype_to_index_cls(True, index.dtype)  #type: ignore
            ArchiveIndexConverter.array_encode(
                metadata=metadata,
                archive=self._archive,
                array=index,
                key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX,
            )
        else:
            depth_index = 1
            name_index = None
            cls_index = Index

        if isinstance(columns, IndexBase):
            depth_columns = columns.depth
            name_columns = columns.name
            cls_columns = columns.__class__
            ArchiveIndexConverter.index_encode(
                metadata=metadata,
                archive=self._archive,
                index=columns,
                key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS,
                key_types=Label.KEY_TYPES_COLUMNS,
                depth=depth_columns,
                include=True,
            )
        elif columns is not None:
            if columns.__class__ is not np.ndarray:
                raise RuntimeError(
                    'index argument must be an Index, IndexHierarchy, or 1D np.ndarray'
                )

            depth_columns = 1  # only support 1D
            name_columns = None
            cls_columns = dtype_to_index_cls(True,
                                             columns.dtype)  #type: ignore
            ArchiveIndexConverter.array_encode(
                metadata=metadata,
                archive=self._archive,
                array=columns,
                key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS,
            )
        else:
            depth_columns = 1  # only support 1D
            name_columns = None
            cls_columns = Index

        metadata[Label.KEY_NAMES] = [
            name,
            name_index,
            name_columns,
        ]
        # do not store Frame class as caller will determine
        metadata[Label.KEY_TYPES] = [
            cls_index.__name__,
            cls_columns.__name__,
        ]

        if axis == 1:
            rows = 0
            for i, array in enumerate(blocks):
                if not rows:
                    rows = array.shape[0]
                else:
                    if array.shape[0] != rows:
                        raise RuntimeError('incompatible block shapes')
                self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(i),
                                          array)
        elif axis == 0:
            # for now, just vertically concat and write, though this has a 2X memory requirement
            resolved = concat_resolved(blocks, axis=0)
            # if this results in an obect array, an exception will be raised
            self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(0),
                                      resolved)
            i = 0
        else:
            raise AxisInvalid(f'invalid axis {axis}')

        metadata[Label.KEY_DEPTHS] = [
            i + 1,  # block count
            depth_index,
            depth_columns
        ]
        self._archive.write_metadata(metadata)