def _concatenate_join_units( join_units: list[JoinUnit], concat_axis: int, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. """ if concat_axis == 0 and len(join_units) > 1: # Concatenating join units along ax0 is handled in _merge_blocks. raise AssertionError("Concatenating join units along axis0") empty_dtype = _get_empty_dtype(join_units) has_none_blocks = any(unit.block is None for unit in join_units) upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) to_concat = [ ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) for ju in join_units ] if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] if copy: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values if concat_values.base is not None: concat_values = concat_values.copy() else: concat_values = concat_values.copy() elif any(is_1d_only_ea_obj(t) for t in to_concat): # TODO(EA2D): special case not needed if all EAs used HybridBlocks # NB: we are still assuming here that Hybrid blocks have shape (1, N) # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) # error: Invalid index type "Tuple[int, slice]" for # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]" to_concat = [ t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[index] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) else: concat_values = concat_compat(to_concat, axis=concat_axis) return concat_values
def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike: """ Concatenate values from several join units along axis=1. """ empty_dtype = _get_empty_dtype(join_units) to_concat = [ ju.get_reindexed_values(empty_dtype=empty_dtype) for ju in join_units ] if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] if copy: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values if concat_values.base is not None: concat_values = concat_values.copy() else: concat_values = concat_values.copy() elif any(is_1d_only_ea_obj(t) for t in to_concat): # TODO(EA2D): special case not needed if all EAs used HybridBlocks # NB: we are still assuming here that Hybrid blocks have shape (1, N) # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) # error: No overload variant of "__getitem__" of "ExtensionArray" matches # argument type "Tuple[int, slice]" to_concat = [ t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[call-overload] for t in to_concat ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) else: concat_values = concat_compat(to_concat, axis=1) return concat_values
def cython_operation( self, *, values: ArrayLike, axis: int, min_count: int = -1, comp_ids: np.ndarray, ngroups: int, **kwargs, ) -> ArrayLike: """ Call our cython function, with appropriate pre- and post- processing. """ if values.ndim > 2: raise NotImplementedError("number of dimensions is currently limited to 2") elif values.ndim == 2: assert axis == 1, axis elif not is_1d_only_ea_obj(values): # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 0 dtype = values.dtype is_numeric = is_numeric_dtype(dtype) # can we do this operation with our cython functions # if not raise NotImplementedError self._disallow_invalid_ops(dtype, is_numeric) if not isinstance(values, np.ndarray): # i.e. ExtensionArray if isinstance(values, BaseMaskedArray) and self.uses_mask(): return self._masked_ea_wrap_cython_operation( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, **kwargs, ) else: return self._ea_wrap_cython_operation( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, **kwargs, ) return self._cython_op_ndim_compat( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, )
def take_nd( arr: ArrayLike, indexer, axis: int = 0, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Note: this function assumes that the indexer is a valid(ated) indexer with no out of bound indices. Parameters ---------- arr : np.ndarray or ExtensionArray Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : bool, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : np.ndarray or ExtensionArray May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray if not is_1d_only_ea_obj(arr): # i.e. DatetimeArray, TimedeltaArray arr = cast("NDArrayBackedExtensionArray", arr) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)