Пример #1
0
    def quantile(
        self,
        *,
        qs: Float64Index,
        axis: int = 0,
        transposed: bool = False,
        interpolation="linear",
    ) -> ArrayManager:

        # error: Value of type variable "ArrayLike" of "ensure_block_shape" cannot be
        # "Union[ndarray, ExtensionArray]"
        arrs = [ensure_block_shape(x, 2)
                for x in self.arrays]  # type: ignore[type-var]
        assert axis == 1
        # error: Value of type variable "ArrayLike" of "quantile_compat" cannot be
        # "object"
        new_arrs = [
            quantile_compat(x, qs, interpolation,
                            axis=axis)  # type: ignore[type-var]
            for x in arrs
        ]
        for i, arr in enumerate(new_arrs):
            if arr.ndim == 2:
                assert arr.shape[0] == 1, arr.shape
                new_arrs[i] = arr[0]

        axes = [qs, self._axes[1]]
        return type(self)(new_arrs, axes)
Пример #2
0
def _concatenate_join_units(
    join_units: list[JoinUnit], concat_axis: int, copy: bool
) -> ArrayLike:
    """
    Concatenate values from several join units along selected axis.
    """
    if concat_axis == 0 and len(join_units) > 1:
        # Concatenating join units along ax0 is handled in _merge_blocks.
        raise AssertionError("Concatenating join units along axis0")

    empty_dtype = _get_empty_dtype(join_units)

    has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
    upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)

    to_concat = [
        ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
        for ju in join_units
    ]

    if len(to_concat) == 1:
        # Only one block, nothing to concatenate.
        concat_values = to_concat[0]
        if copy:
            if isinstance(concat_values, np.ndarray):
                # non-reindexed (=not yet copied) arrays are made into a view
                # in JoinUnit.get_reindexed_values
                if concat_values.base is not None:
                    concat_values = concat_values.copy()
            else:
                concat_values = concat_values.copy()

    elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
        # TODO(EA2D): special case not needed if all EAs used HybridBlocks
        # NB: we are still assuming here that Hybrid blocks have shape (1, N)
        # concatting with at least one EA means we are concatting a single column
        # the non-EA values are 2D arrays with shape (1, n)

        # error: No overload variant of "__getitem__" of "ExtensionArray" matches
        # argument type "Tuple[int, slice]"
        to_concat = [
            t
            if is_1d_only_ea_dtype(t.dtype)
            else t[0, :]  # type: ignore[call-overload]
            for t in to_concat
        ]
        concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
        concat_values = ensure_block_shape(concat_values, 2)

    else:
        concat_values = concat_compat(to_concat, axis=concat_axis)

    return concat_values
Пример #3
0
    def grouped_reduce(self: T,
                       func: Callable,
                       ignore_failures: bool = False) -> T:
        """
        Apply grouped reduction function columnwise, returning a new ArrayManager.

        Parameters
        ----------
        func : grouped reduction function
        ignore_failures : bool, default False
            Whether to drop columns where func raises TypeError.

        Returns
        -------
        ArrayManager
        """
        result_arrays: list[np.ndarray] = []
        result_indices: list[int] = []

        for i, arr in enumerate(self.arrays):
            # grouped_reduce functions all expect 2D arrays
            arr = ensure_block_shape(arr, ndim=2)
            try:
                res = func(arr)
            except (TypeError, NotImplementedError):
                if not ignore_failures:
                    raise
                continue

            if res.ndim == 2:
                # reverse of ensure_block_shape
                assert res.shape[0] == 1
                res = res[0]

            result_arrays.append(res)
            result_indices.append(i)

        if len(result_arrays) == 0:
            index = Index([None])  # placeholder
        else:
            index = Index(range(result_arrays[0].shape[0]))

        if ignore_failures:
            columns = self.items[np.array(result_indices, dtype="int64")]
        else:
            columns = self.items

        # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
        # expected "List[Union[ndarray, ExtensionArray]]"
        return type(self)(result_arrays,
                          [index, columns])  # type: ignore[arg-type]
Пример #4
0
def _concatenate_join_units(
    join_units: List[JoinUnit], concat_axis: int, copy: bool
) -> ArrayLike:
    """
    Concatenate values from several join units along selected axis.
    """
    if concat_axis == 0 and len(join_units) > 1:
        # Concatenating join units along ax0 is handled in _merge_blocks.
        raise AssertionError("Concatenating join units along axis0")

    empty_dtype = _get_empty_dtype(join_units)

    has_none_blocks = any(unit.block is None for unit in join_units)
    upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)

    to_concat = [
        ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
        for ju in join_units
    ]

    if len(to_concat) == 1:
        # Only one block, nothing to concatenate.
        concat_values = to_concat[0]
        if copy:
            if isinstance(concat_values, np.ndarray):
                # non-reindexed (=not yet copied) arrays are made into a view
                # in JoinUnit.get_reindexed_values
                if concat_values.base is not None:
                    concat_values = concat_values.copy()
            else:
                concat_values = concat_values.copy()
    elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
        # concatting with at least one EA means we are concatting a single column
        # the non-EA values are 2D arrays with shape (1, n)
        # error: Invalid index type "Tuple[int, slice]" for
        # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
        to_concat = [
            t
            if (isinstance(t, ExtensionArray) and t.ndim == 1)
            else t[0, :]  # type: ignore[index]
            for t in to_concat
        ]
        concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
        concat_values = ensure_block_shape(concat_values, 2)

    else:
        concat_values = concat_compat(to_concat, axis=concat_axis)

    return concat_values
Пример #5
0
def make_block(values,
               placement,
               klass=None,
               ndim=None,
               dtype: Dtype | None = None) -> Block:
    """
    This is a pseudo-public analogue to blocks.new_block.

    We ask that downstream libraries use this rather than any fully-internal
    APIs, including but not limited to:

    - core.internals.blocks.make_block
    - Block.make_block
    - Block.make_block_same_class
    - Block.__init__
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    values, dtype = extract_pandas_array(values, dtype, ndim)

    if klass is ExtensionBlock and is_period_dtype(values.dtype):
        # GH-44681 changed PeriodArray to be stored in the 2D
        # NDArrayBackedExtensionBlock instead of ExtensionBlock
        # -> still allow ExtensionBlock to be passed in this case for back compat
        klass = None

    if klass is None:
        dtype = dtype or values.dtype
        klass = get_block_type(dtype)

    elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
        # pyarrow calls get here
        values = DatetimeArray._simple_new(values, dtype=dtype)

    if not isinstance(placement, BlockPlacement):
        placement = BlockPlacement(placement)

    ndim = maybe_infer_ndim(values, placement, ndim)
    if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
        # GH#41168 ensure we can pass 1D dt64tz values
        # More generally, any EA dtype that isn't is_1d_only_ea_dtype
        values = extract_array(values, extract_numpy=True)
        values = ensure_block_shape(values, ndim)

    check_ndim(values, placement, ndim)
    values = maybe_coerce_values(values)
    return klass(values, ndim=ndim, placement=placement)
Пример #6
0
    def quantile(
        self,
        *,
        qs: Float64Index,
        axis: int = 0,
        transposed: bool = False,
        interpolation="linear",
    ) -> ArrayManager:

        arrs = [ensure_block_shape(x, 2) for x in self.arrays]
        assert axis == 1
        new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs]
        for i, arr in enumerate(new_arrs):
            if arr.ndim == 2:
                assert arr.shape[0] == 1, arr.shape
                new_arrs[i] = arr[0]

        axes = [qs, self._axes[1]]
        return type(self)(new_arrs, axes)
Пример #7
0
def make_block(values,
               placement,
               klass=None,
               ndim=None,
               dtype: Dtype | None = None) -> Block:
    """
    This is a pseudo-public analogue to blocks.new_block.

    We ask that downstream libraries use this rather than any fully-internal
    APIs, including but not limited to:

    - core.internals.blocks.make_block
    - Block.make_block
    - Block.make_block_same_class
    - Block.__init__
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    values, dtype = extract_pandas_array(values, dtype, ndim)

    needs_reshape = False
    if klass is None:
        dtype = dtype or values.dtype
        klass = get_block_type(values, dtype)

    elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
        # pyarrow calls get here
        values = DatetimeArray._simple_new(values, dtype=dtype)
        needs_reshape = True

    if not isinstance(placement, BlockPlacement):
        placement = BlockPlacement(placement)

    ndim = maybe_infer_ndim(values, placement, ndim)
    if needs_reshape:
        values = ensure_block_shape(values, ndim)

    check_ndim(values, placement, ndim)
    values = maybe_coerce_values(values)
    return klass(values, ndim=ndim, placement=placement)
Пример #8
0
    def apply_with_block(self: T,
                         f,
                         align_keys=None,
                         swap_axis=True,
                         **kwargs) -> T:
        # switch axis to follow BlockManager logic
        if swap_axis and "axis" in kwargs and self.ndim == 2:
            kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0

        align_keys = align_keys or []
        aligned_args = {k: kwargs[k] for k in align_keys}

        result_arrays = []

        for i, arr in enumerate(self.arrays):

            if aligned_args:
                for k, obj in aligned_args.items():
                    if isinstance(obj, (ABCSeries, ABCDataFrame)):
                        # The caller is responsible for ensuring that
                        #  obj.axes[-1].equals(self.items)
                        if obj.ndim == 1:
                            if self.ndim == 2:
                                kwargs[k] = obj.iloc[slice(i, i + 1)]._values
                            else:
                                kwargs[k] = obj.iloc[:]._values
                        else:
                            kwargs[k] = obj.iloc[:, [i]]._values
                    else:
                        # otherwise we have an ndarray
                        if obj.ndim == 2:
                            kwargs[k] = obj[[i]]

            # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
            # attribute "tz"
            if hasattr(arr,
                       "tz") and arr.tz is None:  # type: ignore[union-attr]
                # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock

                # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
                # attribute "_data"
                arr = arr._data  # type: ignore[union-attr]
            elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray):
                # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock

                # error: "ExtensionArray" has no attribute "_data"
                arr = arr._data  # type: ignore[attr-defined]

            if self.ndim == 2:
                arr = ensure_block_shape(arr, 2)
                block = new_block(arr, placement=slice(0, 1, 1), ndim=2)
            else:
                block = new_block(arr,
                                  placement=slice(0, len(self), 1),
                                  ndim=1)

            applied = getattr(block, f)(**kwargs)
            if isinstance(applied, list):
                applied = applied[0]
            arr = applied.values
            if self.ndim == 2 and arr.ndim == 2:
                # 2D for np.ndarray or DatetimeArray/TimedeltaArray
                assert len(arr) == 1
                # error: Invalid index type "Tuple[int, slice]" for
                # "Union[ndarray, ExtensionArray]"; expected type
                # "Union[int, slice, ndarray]"
                arr = arr[0, :]  # type: ignore[index]
            result_arrays.append(arr)

        return type(self)(result_arrays, self._axes)
Пример #9
0
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool,
                   typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    # if the array preparation does a copy -> avoid this for ArrayManager,
    # since the copy is done on conversion to 1D arrays
    copy_on_sanitize = False if typ == "array" else copy

    vdtype = getattr(values, "dtype", None)
    if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype):
        # GH#19157

        if isinstance(values,
                      (np.ndarray, ExtensionArray)) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            # error: No overload variant of "__getitem__" of "ExtensionArray"
            # matches argument type "Tuple[slice, int]"
            values = [
                values[:, n]  # type: ignore[call-overload]
                for n in range(values.shape[1])
            ]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))
        else:
            columns = ensure_index(columns)

        return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)

    elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
        # i.e. Datetime64TZ, PeriodDtype
        values = extract_array(values, extract_numpy=True)
        if copy:
            values = values.copy()
        if values.ndim == 1:
            values = values.reshape(-1, 1)

    else:
        # by definition an array here
        # the dtypes will be coerced to a single dtype
        values = _prep_ndarray(values, copy=copy_on_sanitize)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        # GH#40110 see similar check inside sanitize_array
        rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f")

        values = sanitize_array(flat,
                                None,
                                dtype=dtype,
                                copy=copy_on_sanitize,
                                raise_cast_failure=rcf)

        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)

    _check_values_indices_shape_match(values, index, columns)

    if typ == "array":

        if issubclass(values.dtype.type, str):
            values = np.array(values, dtype=object)

        if dtype is None and is_object_dtype(values.dtype):
            arrays = [
                ensure_wrapped_if_datetimelike(
                    maybe_infer_to_datetimelike(values[:, i]))
                for i in range(values.shape[1])
            ]
        else:
            if is_datetime_or_timedelta_dtype(values.dtype):
                values = ensure_wrapped_if_datetimelike(values)
            arrays = [values[:, i] for i in range(values.shape[1])]

        if copy:
            arrays = [arr.copy() for arr in arrays]

        return ArrayManager(arrays, [index, columns], verify_integrity=False)

    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):
        obj_columns = list(values)
        maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
        # don't convert (and copy) the objects if no type inference occurs
        if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
            dvals_list = [
                ensure_block_shape(dval, 2) for dval in maybe_datetime
            ]
            block_values = [
                new_block_2d(dvals_list[n], placement=BlockPlacement(n))
                for n in range(len(dvals_list))
            ]
        else:
            bp = BlockPlacement(slice(len(columns)))
            nb = new_block_2d(values, placement=bp)
            block_values = [nb]
    else:
        bp = BlockPlacement(slice(len(columns)))
        nb = new_block_2d(values, placement=bp)
        block_values = [nb]

    if len(columns) == 0:
        block_values = []

    return create_block_manager_from_blocks(block_values, [columns, index],
                                            verify_integrity=False)
Пример #10
0
def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj],
                   copy: bool, typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    if is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))

        return arrays_to_mgr(values,
                             columns,
                             index,
                             columns,
                             dtype=dtype,
                             typ=typ)

    # by definition an array here
    # the dtypes will be coerced to a single dtype
    values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        if not is_integer_dtype(dtype):
            # TODO: skipping integer_dtype is needed to keep the tests passing,
            #  not clear it is correct
            # Note: we really only need _try_cast, but keeping to exposed funcs
            values = sanitize_array(flat,
                                    None,
                                    dtype=dtype,
                                    copy=copy,
                                    raise_cast_failure=True)
        else:
            try:
                values = construct_1d_ndarray_preserving_na(flat,
                                                            dtype=dtype,
                                                            copy=False)
            except Exception as err:
                # e.g. ValueError when trying to cast object dtype to float64
                msg = f"failed to cast to '{dtype}' (Exception was: {err})"
                raise ValueError(msg) from err
        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)
    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
            dvals_list = [ensure_block_shape(dval, 2) for dval in dvals_list]

            # TODO: What about re-joining object columns?
            dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list]
            block_values = [
                new_block(dvals_list[n], placement=n, ndim=2)
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            datelike_vals = maybe_squeeze_dt64tz(datelike_vals)
            block_values = [datelike_vals]
    else:
        block_values = [maybe_squeeze_dt64tz(values)]

    return create_block_manager_from_blocks(block_values, [columns, index])
Пример #11
0
def create_block(typestr,
                 placement,
                 item_shape=None,
                 num_offset=0,
                 maker=new_block):
    """
    Supported typestr:

        * float, f8, f4, f2
        * int, i8, i4, i2, i1
        * uint, u8, u4, u2, u1
        * complex, c16, c8
        * bool
        * object, string, O
        * datetime, dt, M8[ns], M8[ns, tz]
        * timedelta, td, m8[ns]
        * sparse (SparseArray with fill_value=0.0)
        * sparse_na (SparseArray with fill_value=np.nan)
        * category, category2

    """
    placement = BlockPlacement(placement)
    num_items = len(placement)

    if item_shape is None:
        item_shape = (N, )

    shape = (num_items, ) + item_shape

    mat = get_numeric_mat(shape)

    if typestr in (
            "float",
            "f8",
            "f4",
            "f2",
            "int",
            "i8",
            "i4",
            "i2",
            "i1",
            "uint",
            "u8",
            "u4",
            "u2",
            "u1",
    ):
        values = mat.astype(typestr) + num_offset
    elif typestr in ("complex", "c16", "c8"):
        values = 1.0j * (mat.astype(typestr) + num_offset)
    elif typestr in ("object", "string", "O"):
        values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset],
                            shape)
    elif typestr in ("b", "bool"):
        values = np.ones(shape, dtype=np.bool_)
    elif typestr in ("datetime", "dt", "M8[ns]"):
        values = (mat * 1e9).astype("M8[ns]")
    elif typestr.startswith("M8[ns"):
        # datetime with tz
        m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
        assert m is not None, f"incompatible typestr -> {typestr}"
        tz = m.groups()[0]
        assert num_items == 1, "must have only 1 num items for a tz-aware"
        values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data
        values = ensure_block_shape(values, ndim=len(shape))
    elif typestr in ("timedelta", "td", "m8[ns]"):
        values = (mat * 1).astype("m8[ns]")
    elif typestr in ("category", ):
        values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
    elif typestr in ("category2", ):
        values = Categorical(
            ["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
    elif typestr in ("sparse", "sparse_na"):
        # FIXME: doesn't support num_rows != 10
        assert shape[-1] == 10
        assert all(s == 1 for s in shape[:-1])
        if typestr.endswith("_na"):
            fill_value = np.nan
        else:
            fill_value = 0.0
        values = SparseArray(
            [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
            fill_value=fill_value,
        )
        arr = values.sp_values.view()
        arr += num_offset - 1
    else:
        raise ValueError(f'Unsupported typestr: "{typestr}"')

    return maker(values, placement=placement, ndim=len(shape))
Пример #12
0
def concatenate_managers(
    mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool
) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    # TODO(ArrayManager) this assumes that all managers are of the same type
    if isinstance(mgrs_indexers[0][0], ArrayManager):
        return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)

    concat_plans = [
        _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
    blocks = []

    for placement, join_units in concat_plan:
        unit = join_units[0]
        blk = unit.block

        if len(join_units) == 1 and not join_units[0].indexers:
            values = blk.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            fastpath = True
        elif _is_uniform_join_units(join_units):
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=blk.ndim - 1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals, axis=1)
                values = ensure_block_shape(values, blk.ndim)

            values = ensure_wrapped_if_datetimelike(values)

            fastpath = blk.values.dtype == values.dtype
        else:
            values = _concatenate_join_units(join_units, concat_axis, copy=copy)
            fastpath = False

        if fastpath:
            b = blk.make_block_same_class(values, placement=placement)
        else:
            b = new_block(values, placement=placement, ndim=len(axes))

        blocks.append(b)

    return BlockManager(tuple(blocks), axes)
Пример #13
0
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool,
                   typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    vdtype = getattr(values, "dtype", None)
    if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))

        return arrays_to_mgr(values,
                             columns,
                             index,
                             columns,
                             dtype=dtype,
                             typ=typ)

    if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
        # i.e. Datetime64TZ
        values = extract_array(values, extract_numpy=True)
        if copy:
            values = values.copy()
        if values.ndim == 1:
            values = values.reshape(-1, 1)

    else:
        # by definition an array here
        # the dtypes will be coerced to a single dtype
        values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        if not is_integer_dtype(dtype):
            # TODO: skipping integer_dtype is needed to keep the tests passing,
            #  not clear it is correct
            # Note: we really only need _try_cast, but keeping to exposed funcs
            values = sanitize_array(flat,
                                    None,
                                    dtype=dtype,
                                    copy=copy,
                                    raise_cast_failure=True)
        else:
            try:
                values = construct_1d_ndarray_preserving_na(flat,
                                                            dtype=dtype,
                                                            copy=False)
            except IntCastingNaNError:
                # following Series, we ignore the dtype and retain floating
                # values instead of casting nans to meaningless ints
                pass

        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)

    _check_values_indices_shape_match(values, index, columns)

    if typ == "array":

        if issubclass(values.dtype.type, str):
            values = np.array(values, dtype=object)

        if dtype is None and is_object_dtype(values.dtype):
            arrays = [
                ensure_wrapped_if_datetimelike(
                    maybe_infer_to_datetimelike(values[:, i].copy()))
                for i in range(values.shape[1])
            ]
        else:
            if is_datetime_or_timedelta_dtype(values.dtype):
                values = ensure_wrapped_if_datetimelike(values)
            arrays = [values[:, i].copy() for i in range(values.shape[1])]

        return ArrayManager(arrays, [index, columns], verify_integrity=False)

    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values]
            dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals]

            # TODO: What about re-joining object columns?
            block_values = [
                new_block(dvals_list[n], placement=n, ndim=2)
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            nb = new_block(datelike_vals,
                           placement=slice(len(columns)),
                           ndim=2)
            block_values = [nb]
    else:
        nb = new_block(values, placement=slice(len(columns)), ndim=2)
        block_values = [nb]

    if len(columns) == 0:
        block_values = []

    return create_block_manager_from_blocks(block_values, [columns, index])
Пример #14
0
def concatenate_managers(mgrs_indexers, axes: list[Index], concat_axis: int,
                         copy: bool) -> Manager:
    """
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    """
    # TODO(ArrayManager) this assumes that all managers are of the same type
    if isinstance(mgrs_indexers[0][0], ArrayManager):
        return _concatenate_array_managers(mgrs_indexers, axes, concat_axis,
                                           copy)

    # Assertions disabled for performance
    # for tup in mgrs_indexers:
    #    # caller is responsible for ensuring this
    #    indexers = tup[1]
    #    assert concat_axis not in indexers

    if concat_axis == 0:
        return _concat_managers_axis0(mgrs_indexers, axes, copy)

    mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)

    # Assertion disabled for performance
    # assert all(not x[1] for x in mgrs_indexers)

    concat_plans = [
        _get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers
    ]
    concat_plan = _combine_concat_plans(concat_plans)
    blocks = []

    for placement, join_units in concat_plan:
        unit = join_units[0]
        blk = unit.block
        # Assertion disabled for performance
        # assert len(join_units) == len(mgrs_indexers)

        if len(join_units) == 1:
            values = blk.values
            if copy:
                values = values.copy()
            else:
                values = values.view()
            fastpath = True
        elif _is_uniform_join_units(join_units):
            vals = [ju.block.values for ju in join_units]

            if not blk.is_extension:
                # _is_uniform_join_units ensures a single dtype, so
                #  we can use np.concatenate, which is more performant
                #  than concat_compat
                values = np.concatenate(vals, axis=1)
            else:
                # TODO(EA2D): special-casing not needed with 2D EAs
                values = concat_compat(vals, axis=1)
                values = ensure_block_shape(values, ndim=2)

            values = ensure_wrapped_if_datetimelike(values)

            fastpath = blk.values.dtype == values.dtype
        else:
            values = _concatenate_join_units(join_units, copy=copy)
            fastpath = False

        if fastpath:
            b = blk.make_block_same_class(values, placement=placement)
        else:
            b = new_block_2d(values, placement=placement)

        blocks.append(b)

    return BlockManager(tuple(blocks), axes)