def test_map_dictlike(self, mapper, simple_index): idx = simple_index if isinstance(idx, CategoricalIndex): pytest.skip(f"skipping tests for {type(idx)}") identity = mapper(idx.values, idx) # we don't infer to UInt64 for a dict if is_unsigned_integer_dtype(idx.dtype) and isinstance(identity, dict): expected = idx.astype("int64") else: expected = idx result = idx.map(identity) # For RangeIndex we convert to Int64Index tm.assert_index_equal(result, expected, exact="equiv") # empty mappable if idx._is_backward_compat_public_numeric_index: new_index_cls = NumericIndex else: new_index_cls = Float64Index expected = new_index_cls([np.nan] * len(idx)) result = idx.map(mapper(expected, idx)) tm.assert_index_equal(result, expected)
def astype(self, dtype, copy=True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. # 3. DatetimeArray.astype handles datetime -> period from pandas import Categorical dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return self._box_values(self.asi8) elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. values = self.asi8 if is_unsigned_integer_dtype(dtype): # Again, we ignore int32 vs. int64 values = values.view("uint64") if copy: values = values.copy() return values elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) elif is_categorical_dtype(dtype): return Categorical(self, dtype=dtype) else: return np.asarray(self, dtype=dtype)
def test_is_unsigned_integer_dtype(): assert not com.is_unsigned_integer_dtype(str) assert not com.is_unsigned_integer_dtype(int) assert not com.is_unsigned_integer_dtype(float) assert not com.is_unsigned_integer_dtype(pd.Series([1, 2])) assert not com.is_unsigned_integer_dtype(pd.Index([1, 2.])) assert not com.is_unsigned_integer_dtype(np.array(['a', 'b'])) assert com.is_unsigned_integer_dtype(np.uint64) assert com.is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
def test_is_unsigned_integer_dtype(): assert not com.is_unsigned_integer_dtype(str) assert not com.is_unsigned_integer_dtype(int) assert not com.is_unsigned_integer_dtype(float) assert not com.is_unsigned_integer_dtype(pd.Series([1, 2])) assert not com.is_unsigned_integer_dtype(pd.Index([1, 2.])) assert not com.is_unsigned_integer_dtype(np.array(['a', 'b'])) assert com.is_unsigned_integer_dtype(np.uint64) assert com.is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
def test_constructor_int_dtype_float(self, dtype): # GH#18400 if is_unsigned_integer_dtype(dtype): index_type = UInt64Index else: index_type = Int64Index expected = index_type([0, 1, 2, 3]) result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) tm.assert_index_equal(result, expected)
def _convert_arr_indexer(self, keyarr) -> np.ndarray: if not is_unsigned_integer_dtype(self.dtype): return super()._convert_arr_indexer(keyarr) # Cast the indexer to uint64 if possible so that the values returned # from indexing are also uint64. dtype = None if is_integer_dtype(keyarr) or (lib.infer_dtype(keyarr, skipna=False) == "integer"): dtype = np.dtype(np.uint64) return com.asarray_tuplesafe(keyarr, dtype=dtype)
def makeNumericIndex(k=10, name=None, *, dtype): dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) if is_integer_dtype(dtype): values = np.arange(k, dtype=dtype) if is_unsigned_integer_dtype(dtype): values += 2**(dtype.itemsize * 8 - 1) elif is_float_dtype(dtype): values = np.random.random_sample(k) - np.random.random_sample(1) values.sort() values = values * (10**np.random.randint(0, 9)) else: raise NotImplementedError(f"wrong dtype {dtype}") return NumericIndex(values, dtype=dtype, name=name)
def test_is_not_unsigned_integer_dtype(dtype): assert not com.is_unsigned_integer_dtype(dtype)
def test_is_not_unsigned_integer_dtype(dtype): assert not com.is_unsigned_integer_dtype(dtype)
def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. .. versionadded:: 0.24.0 Parameters ---------- arr : array-like The array to cast. dtype : str, np.dtype The integer dtype to cast the array to. copy: bool, default False Whether to make a copy of the array before returning. Returns ------- int_arr : ndarray An array of integer or unsigned integer dtype Raises ------ OverflowError : the dtype is incompatible with the data ValueError : loss of precision has occurred during casting Examples -------- If you try to coerce negative values to unsigned integers, it raises: >>> Series([-1], dtype="uint64") Traceback (most recent call last): ... OverflowError: Trying to coerce negative values to unsigned integers Also, if you try to coerce float values to integers, it raises: >>> Series([1, 2, 3.5], dtype="int64") Traceback (most recent call last): ... ValueError: Trying to coerce float values to integers """ try: if not hasattr(arr, "astype"): casted = np.array(arr, dtype=dtype, copy=copy) else: casted = arr.astype(dtype, copy=copy) except OverflowError: raise OverflowError("The elements provided in the data cannot all be " f"casted to the dtype {dtype}") if np.array_equal(arr, casted): return casted # We do this casting to allow for proper # data and dtype checking. # # We didn't do this earlier because NumPy # doesn't handle `uint64` correctly. arr = np.asarray(arr) if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): raise OverflowError( "Trying to coerce negative values to unsigned integers") if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): raise ValueError("Trying to coerce float values to integers")
def to_orc( df: DataFrame, path: FilePath | WriteBuffer[bytes] | None = None, *, engine: Literal["pyarrow"] = "pyarrow", index: bool | None = None, engine_kwargs: dict[str, Any] | None = None, ) -> bytes | None: """ Write a DataFrame to the ORC format. .. versionadded:: 1.5.0 Parameters ---------- df : DataFrame The dataframe to be written to ORC. Raises NotImplementedError if dtype of one or more columns is category, unsigned integers, intervals, periods or sparse. path : str, file-like object or None, default None If a string, it will be used as Root Directory path when writing a partitioned dataset. By file-like object, we refer to objects with a write() method, such as a file handle (e.g. via builtin open function). If path is None, a bytes object is returned. engine : str, default 'pyarrow' ORC library to use. Pyarrow must be >= 7.0.0. index : bool, optional If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. If ``None``, similar to ``infer`` the dataframe's index(es) will be saved. However, instead of being saved as values, the RangeIndex will be stored as a range in the metadata so it doesn't require much space and is faster. Other indexes will be included as columns in the file output. engine_kwargs : dict[str, Any] or None, default None Additional keyword arguments passed to :func:`pyarrow.orc.write_table`. Returns ------- bytes if no path argument is provided else None Raises ------ NotImplementedError Dtype of one or more columns is category, unsigned integers, interval, period or sparse. ValueError engine is not pyarrow. Notes ----- * Before using this function you should read the :ref:`user guide about ORC <io.orc>` and :ref:`install optional dependencies <install.warn_orc>`. * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_ library. * For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__. * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files. """ if index is None: index = df.index.names[0] is not None if engine_kwargs is None: engine_kwargs = {} # If unsupported dtypes are found raise NotImplementedError # In Pyarrow 9.0.0 this check will no longer be needed for dtype in df.dtypes: if (is_categorical_dtype(dtype) or is_interval_dtype(dtype) or is_period_dtype(dtype) or is_unsigned_integer_dtype(dtype)): raise NotImplementedError( "The dtype of one or more columns is not supported yet.") if engine != "pyarrow": raise ValueError("engine must be 'pyarrow'") engine = import_optional_dependency(engine, min_version="7.0.0") orc = import_optional_dependency("pyarrow.orc") was_none = path is None if was_none: path = io.BytesIO() assert path is not None # For mypy with get_handle(path, "wb", is_text=False) as handles: assert isinstance(engine, ModuleType) # For mypy try: orc.write_table( engine.Table.from_pandas(df, preserve_index=index), handles.handle, **engine_kwargs, ) except TypeError as e: raise NotImplementedError( "The dtype of one or more columns is not supported yet." ) from e if was_none: assert isinstance(path, io.BytesIO) # For mypy return path.getvalue() return None