def test_cast_1d_array(self, datum1, datum2): data = [datum1, datum2] result = construct_1d_object_array_from_listlike(data) # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 assert result.dtype == 'object' assert list(result) == data
def na_op(x, y): try: result = op(x, y) except TypeError: if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = _ensure_object(x) y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: # let null fall thru if not isna(y): y = bool(y) try: result = lib.scalar_binop(x, y, op) except: msg = ("cannot compare a dtyped [{dtype}] array " "with a scalar of type [{type}]" ).format(dtype=x.dtype, type=type(y).__name__) raise TypeError(msg) return result
def test_from_product_datetimeindex(): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon)
def test_values_boxed(): tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), (3, pd.Timestamp('2000-01-03')), (1, pd.Timestamp('2000-01-04')), (2, pd.Timestamp('2000-01-02')), (3, pd.Timestamp('2000-01-03'))] result = pd.MultiIndex.from_tuples(tuples) expected = construct_1d_object_array_from_listlike(tuples) tm.assert_numpy_array_equal(result.values, expected) # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def asarray_tuplesafe(values, dtype=None): if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')): values = list(values) elif isinstance(values, ABCIndexClass): return values.values if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) result = np.asarray(values, dtype=dtype) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) if result.ndim == 2: # Avoid building an array of arrays: # TODO: verify whether any path hits this except #18819 (invalid) values = [tuple(x) for x in values] result = construct_1d_object_array_from_listlike(values) return result
def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) if isinstance(y, (ABCSeries, ABCIndex)): y = y.values result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) return result
def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): # perf shortcut as this is the most common case if take_fast_path: if maybe_castable(arr) and not copy and dtype is None: return arr try: # GH#15832: Check if we are requesting a numeric dype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): subarr = maybe_cast_to_integer_array(arr, dtype) subarr = maybe_cast_to_datetime(arr, dtype) # Take care in creating object arrays (but iterators are not # supported): if is_object_dtype(dtype) and (is_list_like(subarr) and not (is_iterator(subarr) or isinstance(subarr, np.ndarray))): subarr = construct_1d_object_array_from_listlike(subarr) elif not is_extension_type(subarr): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except (ValueError, TypeError): if is_categorical_dtype(dtype): # We *do* allow casting to categorical, since we know # that Categorical is the only array type for 'category'. subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) elif is_extension_array_dtype(dtype): # create an extension array from its dtype array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) elif dtype is not None and raise_cast_failure: raise else: subarr = np.array(arr, dtype=object, copy=copy) return subarr
def test_cast_1d_array_invalid_scalar(val): with pytest.raises(TypeError, match="has no len()"): construct_1d_object_array_from_listlike(val)
def _try_cast( arr: Union[list, np.ndarray], dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool, ) -> ArrayLike: """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool If False, don't copy the data if not needed. raise_cast_failure : bool If True, and if a dtype is specified, raise errors during casting. Otherwise an object array is returned. Returns ------- np.ndarray or ExtensionArray """ # perf shortcut as this is the most common case if ( isinstance(arr, np.ndarray) and maybe_castable(arr.dtype) and not copy and dtype is None ): return arr if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M" or is_sparse(dtype)): # create an extension array from its dtype # DatetimeTZ case needs to go through maybe_cast_to_datetime but # SparseDtype does not array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) return subarr if is_object_dtype(dtype) and not isinstance(arr, np.ndarray): subarr = construct_1d_object_array_from_listlike(arr) return subarr try: # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): # this will raise if we have e.g. floats maybe_cast_to_integer_array(arr, dtype) subarr = arr else: subarr = maybe_cast_to_datetime(arr, dtype) if not isinstance(subarr, (ABCExtensionArray, ABCIndex)): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise raise except (ValueError, TypeError) as err: if dtype is not None and raise_cast_failure: raise elif "Cannot cast" in str(err): # via _disallow_mismatched_datetimelike raise else: subarr = np.array(arr, dtype=object, copy=copy) return subarr
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a logical operation `|`, `&`, or `^`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.and_, operator.or_, operator.xor} Or one of the reversed variants from roperator. Returns ------- ndarray or ExtensionArray """ fill_int = lambda x: x def fill_bool(x, left=None): # if `left` is specifically not-boolean, we do not cast to bool if x.dtype.kind in ["c", "f", "O"]: # dtypes that can hold NA mask = isna(x) if mask.any(): x = x.astype(object) x[mask] = False if left is None or is_bool_dtype(left.dtype): x = x.astype(bool) return x is_self_int_dtype = is_integer_dtype(left.dtype) right = lib.item_from_zerodim(right) if is_list_like(right) and not hasattr(right, "dtype"): # e.g. list, tuple right = construct_1d_object_array_from_listlike(right) # NB: We assume extract_array has already been called on left and right lvalues = maybe_upcast_datetimelike_array(left) rvalues = right if should_extension_dispatch(lvalues, rvalues): # Call the method on lvalues res_values = op(lvalues, rvalues) else: if isinstance(rvalues, np.ndarray): is_other_int_dtype = is_integer_dtype(rvalues.dtype) rvalues = rvalues if is_other_int_dtype else fill_bool( rvalues, lvalues) else: # i.e. scalar is_other_int_dtype = lib.is_integer(rvalues) # For int vs int `^`, `|`, `&` are bitwise operators and return # integer dtypes. Otherwise these are boolean ops filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool res_values = na_logical_op(lvalues, rvalues, op) # error: Cannot call function of unknown type res_values = filler(res_values) # type: ignore[operator] return res_values
def _try_cast( arr, dtype: Optional[Union[np.dtype, "ExtensionDtype"]], copy: bool, raise_cast_failure: bool, ): """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- arr : ndarray, list, tuple, iterator (catchall) Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool If False, don't copy the data if not needed. raise_cast_failure : bool If True, and if a dtype is specified, raise errors during casting. Otherwise an object array is returned. """ # perf shortcut as this is the most common case if isinstance(arr, np.ndarray): if maybe_castable(arr) and not copy and dtype is None: return arr try: # GH#15832: Check if we are requesting a numeric dype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): subarr = maybe_cast_to_integer_array(arr, dtype) subarr = maybe_cast_to_datetime(arr, dtype) # Take care in creating object arrays (but iterators are not # supported): if is_object_dtype(dtype) and ( is_list_like(subarr) and not (is_iterator(subarr) or isinstance(subarr, np.ndarray))): subarr = construct_1d_object_array_from_listlike(subarr) elif not is_extension_array_dtype(subarr): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise raise except (ValueError, TypeError): if is_categorical_dtype(dtype): # We *do* allow casting to categorical, since we know # that Categorical is the only array type for 'category'. dtype = cast(CategoricalDtype, dtype) subarr = dtype.construct_array_type()(arr, dtype.categories, ordered=dtype.ordered) elif is_extension_array_dtype(dtype): # create an extension array from its dtype dtype = cast(ExtensionDtype, dtype) array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) elif dtype is not None and raise_cast_failure: raise else: subarr = np.array(arr, dtype=object, copy=copy) return subarr
def _try_cast( arr: list | np.ndarray, dtype: DtypeObj | None, copy: bool, raise_cast_failure: bool, ) -> ArrayLike: """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool If False, don't copy the data if not needed. raise_cast_failure : bool If True, and if a dtype is specified, raise errors during casting. Otherwise an object array is returned. Returns ------- np.ndarray or ExtensionArray """ is_ndarray = isinstance(arr, np.ndarray) if dtype is None: # perf shortcut as this is the most common case if is_ndarray: arr = cast(np.ndarray, arr) if arr.dtype != object: return sanitize_to_nanoseconds(arr, copy=copy) out = maybe_infer_to_datetimelike(arr) if out is arr and copy: out = out.copy() return out else: # i.e. list varr = np.array(arr, copy=False) # filter out cases that we _dont_ want to go through # maybe_infer_to_datetimelike if varr.dtype != object or varr.size == 0: return varr return maybe_infer_to_datetimelike(varr) elif isinstance(dtype, ExtensionDtype): # create an extension array from its dtype if isinstance(dtype, DatetimeTZDtype): # We can't go through _from_sequence because it handles dt64naive # data differently; _from_sequence treats naive as wall times, # while maybe_cast_to_datetime treats it as UTC # see test_maybe_promote_any_numpy_dtype_with_datetimetz return maybe_cast_to_datetime(arr, dtype) # TODO: copy? array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) return subarr elif is_object_dtype(dtype): if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) return subarr return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) elif dtype.kind == "U": # TODO: test cases with arr.dtype.kind in ["m", "M"] return lib.ensure_string_array(arr, convert_na_value=False, copy=copy) elif dtype.kind in ["m", "M"]: return maybe_cast_to_datetime(arr, dtype) try: # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): # this will raise if we have e.g. floats subarr = maybe_cast_to_integer_array(arr, dtype) else: # 4 tests fail if we move this to a try/except/else; see # test_constructor_compound_dtypes, test_constructor_cast_failure # test_constructor_dict_cast2, test_loc_setitem_dtype subarr = np.array(arr, dtype=dtype, copy=copy) except (ValueError, TypeError): if raise_cast_failure: raise else: # we only get here with raise_cast_failure False, which means # called via the DataFrame constructor # GH#24435 warnings.warn( f"Could not cast to {dtype}, falling back to object. This " "behavior is deprecated. In a future version, when a dtype is " "passed to 'DataFrame', either all columns will be cast to that " "dtype, or a TypeError will be raised", FutureWarning, stacklevel=7, ) subarr = np.array(arr, dtype=object, copy=copy) return subarr
def _values_for_argsort(self): # Bypass NumPy's shape inference to get a (N,) array of tuples. frozen = [tuple(x.items()) for x in self] return construct_1d_object_array_from_listlike(frozen)
def _get_join_target(self) -> np.ndarray: # constructing tuples is much faster than constructing Intervals tups = list(zip(self.left, self.right)) target = construct_1d_object_array_from_listlike(tups) return target