def maybe_convert_platform_interval(values): """ Try to do platform conversion, with special casing for IntervalArray. Wrapper around maybe_convert_platform that alters the default return dtype in certain cases to be compatible with IntervalArray. For example, empty lists return with integer dtype instead of object dtype, which is prohibited for IntervalArray. Parameters ---------- values : array-like Returns ------- array """ if isinstance(values, (list, tuple)) and len(values) == 0: # GH 19016 # empty lists/tuples get object dtype by default, but this is not # prohibited for IntervalArray, so coerce to integer instead return np.array([], dtype=np.int64) elif is_categorical_dtype(values): values = np.asarray(values) return maybe_convert_platform(values)
def convert(v): if not is_list_like(v) or isinstance(v, ABCDataFrame): return v v = extract_array(v, extract_numpy=True) res = maybe_convert_platform(v) return res
def _sanitize_values(arr): """ return an ndarray for our input, in a platform independent manner """ if hasattr(arr, 'values'): arr = arr.values else: # scalar if is_scalar(arr): arr = [arr] # ndarray if isinstance(arr, np.ndarray): pass elif is_list_like(arr) and len(arr) > 0: arr = maybe_convert_platform(arr) else: arr = np.asarray(arr) return arr
def maybe_convert_platform_interval(values): """ Try to do platform conversion, with special casing for IntervalArray. Wrapper around maybe_convert_platform that alters the default return dtype in certain cases to be compatible with IntervalArray. For example, empty lists return with integer dtype instead of object dtype, which is prohibited for IntervalArray. Parameters ---------- values : array-like Returns ------- array """ if isinstance(values, (list, tuple)) and len(values) == 0: # GH 19016 # empty lists/tuples get object dtype by default, but this is # prohibited for IntervalArray, so coerce to integer instead return np.array([], dtype=np.int64) elif is_categorical_dtype(values): values = np.asarray(values) return maybe_convert_platform(values)
def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, fastpath=False, verify_integrity=True): if fastpath: return cls._simple_new(data.left, data.right, closed, name, copy=copy, verify_integrity=False) if name is None and hasattr(data, 'name'): name = data.name if isinstance(data, IntervalIndex): left = data.left right = data.right closed = data.closed else: # don't allow scalars if is_scalar(data): cls._scalar_data_error(data) data = maybe_convert_platform(data) left, right, infer_closed = intervals_to_interval_bounds(data) if _all_not_none(closed, infer_closed) and closed != infer_closed: # GH 18421 msg = ("conflicting values for closed: constructor got " "'{closed}', inferred from data '{infer_closed}'" .format(closed=closed, infer_closed=infer_closed)) raise ValueError(msg) closed = closed or infer_closed return cls._simple_new(left, right, closed, name, copy=copy, verify_integrity=verify_integrity)
def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, fastpath=False, verify_integrity=True): if fastpath: return cls._simple_new(data.left, data.right, closed, name, copy=copy, verify_integrity=False) if name is None and hasattr(data, 'name'): name = data.name if isinstance(data, IntervalIndex): left = data.left right = data.right closed = data.closed else: # don't allow scalars if is_scalar(data): cls._scalar_data_error(data) data = maybe_convert_platform(data) left, right, infer_closed = intervals_to_interval_bounds(data) if _all_not_none(closed, infer_closed) and closed != infer_closed: # GH 18421 msg = ("conflicting values for closed: constructor got " "'{closed}', inferred from data '{infer_closed}'" .format(closed=closed, infer_closed=infer_closed)) raise ValueError(msg) closed = closed or infer_closed return cls._simple_new(left, right, closed, name, copy=copy, verify_integrity=verify_integrity)
def _sanitize_values(arr): """ return an ndarray for our input, in a platform independent manner """ if hasattr(arr, 'values'): arr = arr.values else: # scalar if is_scalar(arr): arr = [arr] # ndarray if isinstance(arr, np.ndarray): pass elif is_list_like(arr) and len(arr) > 0: arr = maybe_convert_platform(arr) else: arr = np.asarray(arr) return arr
def from_arrays(cls, left, right, closed='right', name=None, copy=False): """ Construct an IntervalIndex from a a left and right array Parameters ---------- left : array-like (1-dimensional) Left bounds for each interval. right : array-like (1-dimensional) Right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False copy the data Examples -------- >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ left = maybe_convert_platform(left) right = maybe_convert_platform(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True)
def convert(v): if not is_list_like(v) or isinstance(v, ABCDataFrame): return v elif not hasattr(v, "dtype") and not isinstance(v, (list, tuple, range)): # TODO: should we cast these to list? return v v = extract_array(v, extract_numpy=True) res = maybe_convert_platform(v) return res
def from_arrays(cls, left, right, closed='right', name=None, copy=False): """ Construct an IntervalIndex from a a left and right array Parameters ---------- left : array-like (1-dimensional) Left bounds for each interval. right : array-like (1-dimensional) Right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, optional Whether the intervals are closed on the left-side, right-side, both or neither. Defaults to 'right'. name : object, optional Name to be stored in the index. copy : boolean, default False copy the data Examples -------- >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ left = maybe_convert_platform(left) right = maybe_convert_platform(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True)
def from_intervals(cls, data, name=None, copy=False): """ Construct an IntervalIndex from a 1d array of Interval objects Parameters ---------- data : array-like (1-dimensional) Array of Interval objects. All intervals must be closed on the same sides. name : object, optional Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored Examples -------- >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1), ... pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') The generic Index constructor work identically when it infers an array of all intervals: >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ if isinstance(data, IntervalIndex): left, right, closed = data.left, data.right, data.closed name = name or data.name else: data = maybe_convert_platform(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False)
def from_intervals(cls, data, name=None, copy=False): """ Construct an IntervalIndex from a 1d array of Interval objects Parameters ---------- data : array-like (1-dimensional) Array of Interval objects. All intervals must be closed on the same sides. name : object, optional Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored Examples -------- >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1), ... pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') The generic Index constructor work identically when it infers an array of all intervals: >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)]) IntervalIndex([(0, 1], (1, 2]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ if isinstance(data, IntervalIndex): left, right, closed = data.left, data.right, data.closed name = name or data.name else: data = maybe_convert_platform(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False)
def from_breaks(cls, breaks, closed='right', name=None, copy=False): """ Construct an IntervalIndex from an array of splits Parameters ---------- breaks : array-like (1-dimensional) Left and right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. name : object, optional Name to be stored in the index. copy : boolean, default False copy the data Examples -------- >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]] closed='right', dtype='interval[int64]') See Also -------- interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array IntervalIndex.from_intervals : Construct an IntervalIndex from an array of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ breaks = maybe_convert_platform(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, name=name, copy=copy)
def sanitize_array( data, index: Optional["Index"], dtype: Optional[DtypeObj] = None, copy: bool = False, raise_cast_failure: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. """ if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = np.array(data, copy=False) else: # we will try to copy be-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif isinstance(data, abc.Set): raise TypeError("Set type is unordered") elif lib.is_scalar(data) and index is not None and dtype is not None: data = maybe_cast_to_datetime(data, dtype) if not lib.is_scalar(data): data = data[0] subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, "ndim", 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception("Data must be 1-dimensional") else: subarr = com.asarray_tuplesafe(data, dtype=dtype) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, str): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype): inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) return subarr
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): try: subarr = _try_cast(data, True, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path # them) # GH#6140 subarr = sanitize_index(data, index, copy=copy) else: # we will try to copy be-definition here subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): if isinstance(data, ABCPandasArray): # We don't want to let people put our PandasArray wrapper # (the output of Series/Index.array), into a Series. So # we explicitly unwrap it here. subarr = data.to_numpy() else: subarr = data # everything else in this block must also handle ndarray's, # becuase we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) if copy: subarr = data.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 start, stop, step = get_range_parameters(data) arr = np.arange(start, stop, step, dtype='int64') subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, compat.string_types): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr, skipna=False) if inferred == 'period': try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def convert(v): return maybe_convert_platform(v)
def sanitize_array( data, index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, *, allow_2d: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True allow_2d : bool, default False If False, raise if we have a 2D Arraylike. Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) elif isinstance(data, range): # GH#16804 data = range_to_ndarray(data) copy = False if not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: # i.e. called via DataFrame constructor warnings.warn( "In a future version, passing float-dtype values and an " "integer dtype to DataFrame will retain floating dtype " "if they cannot be cast losslessly (matching Series behavior). " "To retain the old behavior, use DataFrame(data).astype(dtype)", FutureWarning, stacklevel=4, ) # GH#40110 until the deprecation is enforced, we _dont_ # ignore the dtype for DataFrame, and _do_ cast even though # it is lossy. dtype = cast(np.dtype, dtype) return np.array(data, dtype=dtype, copy=copy) subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr else: if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") # materialize e.g. generators, convert e.g. tuples, abc.ValueView # TODO: non-standard array-likes we can convert to ndarray more efficiently? data = list(data) if dtype is not None or len(data) == 0: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: # TODO: copy? subarr = maybe_convert_platform(data) if subarr.dtype == object: subarr = cast(np.ndarray, subarr) subarr = maybe_infer_to_datetimelike(subarr) subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if not (isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype)): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) subarr = extract_array(subarr, extract_numpy=True) return subarr
def convert(v): return maybe_convert_platform(v)
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): try: subarr = _try_cast(data, True, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path # them) # GH#6140 subarr = sanitize_index(data, index, copy=copy) else: # we will try to copy be-definition here subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): if isinstance(data, ABCPandasArray): # We don't want to let people put our PandasArray wrapper # (the output of Series/Index.array), into a Series. So # we explicitly unwrap it here. subarr = data.to_numpy() else: subarr = data # everything else in this block must also handle ndarray's, # becuase we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) if copy: subarr = data.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 start, stop, step = get_range_parameters(data) arr = np.arange(start, stop, step, dtype='int64') subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, compat.string_types): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr, skipna=False) if inferred == 'period': try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def sanitize_array( data, index: Optional[Index], dtype: Optional[DtypeObj] = None, copy: bool = False, raise_cast_failure: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array if isinstance(data, set): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError("Set type is unordered") data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) subarr = _sanitize_ndim(subarr, data, dtype, index) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) return subarr
def sanitize_array( data, index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray, List[Any]]", variable has type # "ExtensionArray") subarr = maybe_cast_to_datetime(subarr, dtype) # type: ignore[assignment] elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: # realize e.g. generators # TODO: non-standard array-likes we can convert to ndarray more efficiently? data = list(data) subarr = _try_cast(data, dtype, copy, raise_cast_failure) subarr = _sanitize_ndim(subarr, data, dtype, index) if not (isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype)): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) subarr = extract_array(subarr, extract_numpy=True) return subarr
def sanitize_array( data, index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, *, allow_2d: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True allow_2d : bool, default False If False, raise if we have a 2D Arraylike. Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) if isinstance(dtype, PandasDtype): # Avoid ending up with a PandasArray dtype = dtype.numpy_dtype # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) elif isinstance(data, range): # GH#16804 data = range_to_ndarray(data) copy = False if not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data # GH#846 if isinstance(data, np.ndarray): if isinstance(data, np.matrix): data = data.A if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: warnings.warn( "In a future version, passing float-dtype values containing NaN " "and an integer dtype will raise IntCastingNaNError " "(subclass of ValueError) instead of silently ignoring the " "passed dtype. To retain the old behavior, call Series(arr) or " "DataFrame(arr) without passing a dtype.", FutureWarning, stacklevel=find_stack_level(), ) subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: # i.e. called via DataFrame constructor warnings.warn( "In a future version, passing float-dtype values and an " "integer dtype to DataFrame will retain floating dtype " "if they cannot be cast losslessly (matching Series behavior). " "To retain the old behavior, use DataFrame(data).astype(dtype)", FutureWarning, stacklevel=find_stack_level(), ) # GH#40110 until the deprecation is enforced, we _dont_ # ignore the dtype for DataFrame, and _do_ cast even though # it is lossy. dtype = cast(np.dtype, dtype) return np.array(data, dtype=dtype, copy=copy) # We ignore the dtype arg and return floating values, # e.g. test_constructor_floating_data_int_dtype # TODO: where is the discussion that documents the reason for this? subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() else: if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") # materialize e.g. generators, convert e.g. tuples, abc.ValueView if hasattr(data, "__array__"): # e.g. dask array GH#38645 data = np.asarray(data) else: data = list(data) if dtype is not None or len(data) == 0: try: subarr = _try_cast(data, dtype, copy, raise_cast_failure) except ValueError: if is_integer_dtype(dtype): casted = np.array(data, copy=False) if casted.dtype.kind == "f": # GH#40110 match the behavior we have if we passed # a ndarray[float] to begin with return sanitize_array( casted, index, dtype, copy=False, raise_cast_failure=raise_cast_failure, allow_2d=allow_2d, ) else: raise else: raise else: subarr = maybe_convert_platform(data) if subarr.dtype == object: subarr = cast(np.ndarray, subarr) subarr = maybe_infer_to_datetimelike(subarr) subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if isinstance(subarr, np.ndarray): # at this point we should have dtype be None or subarr.dtype == dtype dtype = cast(np.dtype, dtype) subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) return subarr