def _get_values(self): data = self._parent if is_datetime64_dtype(data.dtype): return DatetimeIndex(data, copy=False, name=self.name) elif is_datetime64tz_dtype(data.dtype): return DatetimeIndex(data, copy=False, name=self.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaIndex(data, copy=False, name=self.name) else: if is_period_arraylike(data): # TODO: use to_period_array return PeriodArray(data, copy=False) if is_datetime_arraylike(data): return DatetimeIndex(data, copy=False, name=self.name) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data)))
def test_array_interface(self, period_index): arr = PeriodArray(period_index) # default asarray gives objects result = np.asarray(arr) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) # to object dtype (same as default) result = np.asarray(arr, dtype=object) tm.assert_numpy_array_equal(result, expected) # to other dtypes with pytest.raises(TypeError): np.asarray(arr, dtype="int64") with pytest.raises(TypeError): np.asarray(arr, dtype="float64") result = np.asarray(arr, dtype="S20") expected = np.asarray(arr).astype("S20") tm.assert_numpy_array_equal(result, expected)
def __from_arrow__( self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]) -> "PeriodArray": """ Construct PeriodArray from pyarrow Array/ChunkedArray. """ import pyarrow # noqa: F811 from pandas.core.arrays import PeriodArray from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask if isinstance(array, pyarrow.Array): chunks = [array] else: chunks = array.chunks results = [] for arr in chunks: data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype="int64") parr = PeriodArray(data.copy(), freq=self.freq, copy=False) parr[~mask] = NaT results.append(parr) return PeriodArray._concat_same_type(results)
def test_array_interface(self, period_index): arr = PeriodArray(period_index) # default asarray gives objects result = np.asarray(arr) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) # to object dtype (same as default) result = np.asarray(arr, dtype=object) tm.assert_numpy_array_equal(result, expected) result = np.asarray(arr, dtype="int64") tm.assert_numpy_array_equal(result, arr.asi8) # to other dtypes msg = r"float\(\) argument must be a string or a number, not 'Period'" with pytest.raises(TypeError, match=msg): np.asarray(arr, dtype="float64") result = np.asarray(arr, dtype="S20") expected = np.asarray(arr).astype("S20") tm.assert_numpy_array_equal(result, expected)
def data_for_grouping(dtype): B = 2018 NA = iNaT A = 2017 C = 2019 return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq)
def data_missing_for_sorting(dtype): return PeriodArray([2018, iNaT, 2017], freq=dtype.freq)
def data_missing(dtype): return PeriodArray([iNaT, 2017], freq=dtype.freq)
def data_for_sorting(dtype): return PeriodArray([2018, 2019, 2017], freq=dtype.freq)
def data(dtype): return PeriodArray(np.arange(1970, 2070), freq=dtype.freq)
def test_setitem_raises_type(): arr = PeriodArray(np.arange(3), freq="D") with tm.assert_raises_regex(TypeError, "int"): arr[0] = 1
def test_setitem_raises_type(): arr = PeriodArray(np.arange(3), freq="D") with pytest.raises(TypeError, match="int"): arr[0] = 1
def test_setitem_raises_length(): arr = PeriodArray(np.arange(3), freq="D") with tm.assert_raises_regex(ValueError, "length"): arr[[0, 1]] = [pd.Period("2000", freq="D")]
def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ do_round = False if is_scalar(result): return result elif isinstance(result, ABCDataFrame): # occurs in pivot_table doctest return result if isinstance(dtype, str): if dtype == "infer": inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) if inferred_type == "boolean": dtype = "bool" elif inferred_type == "integer": dtype = "int64" elif inferred_type == "datetime64": dtype = "datetime64[ns]" elif inferred_type == "timedelta64": dtype = "timedelta64[ns]" # try to upcast here elif inferred_type == "floating": dtype = "int64" if issubclass(result.dtype.type, np.number): do_round = True else: dtype = "object" dtype = np.dtype(dtype) converted = maybe_downcast_numeric(result, dtype, do_round) if converted is not result: return converted # a datetimelike # GH12821, iNaT is casted to float if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: if hasattr(dtype, "tz"): # not a numpy dtype if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime result = to_datetime(result).tz_localize("utc") result = result.tz_convert(dtype.tz) else: result = result.astype(dtype) elif dtype.type is Period: # TODO(DatetimeArray): merge with previous elif from pandas.core.arrays import PeriodArray try: return PeriodArray(result, freq=dtype.freq) except TypeError: # e.g. TypeError: int() argument must be a string, a # bytes-like object or a number, not 'Period pass return result
def decode(obj): """ Decoder for deserializing numpy data types. """ typ = obj.get(u'typ') if typ is None: return obj elif typ == u'timestamp': freq = obj[u'freq'] if 'freq' in obj else obj[u'offset'] return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq) elif typ == u'nat': return NaT elif typ == u'period': return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq']) elif typ == u'index': dtype = dtype_for(obj[u'dtype']) data = unconvert(obj[u'data'], dtype, obj.get(u'compress')) return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name']) elif typ == u'range_index': return globals()[obj[u'klass']](obj[u'start'], obj[u'stop'], obj[u'step'], name=obj[u'name']) elif typ == u'multi_index': dtype = dtype_for(obj[u'dtype']) data = unconvert(obj[u'data'], dtype, obj.get(u'compress')) data = [tuple(x) for x in data] return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names']) elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) freq = d.pop('freq', None) return globals()[obj[u'klass']](PeriodArray(data, freq), **d) elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) result = globals()[obj[u'klass']](data, **d) tz = obj[u'tz'] # reverse tz conversion if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result elif typ in (u'interval_index', 'interval_array'): return globals()[obj[u'klass']].from_arrays(obj[u'left'], obj[u'right'], obj[u'closed'], name=obj[u'name']) elif typ == u'category': from_codes = globals()[obj[u'klass']].from_codes return from_codes(codes=obj[u'codes'], categories=obj[u'categories'], ordered=obj[u'ordered']) elif typ == u'interval': return Interval(obj[u'left'], obj[u'right'], obj[u'closed']) elif typ == u'series': dtype = dtype_for(obj[u'dtype']) pd_dtype = pandas_dtype(dtype) index = obj[u'index'] result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype, obj[u'compress']), index=index, dtype=pd_dtype, name=obj[u'name']) return result elif typ == u'block_manager': axes = obj[u'axes'] def create_block(b): values = _safe_reshape( unconvert(b[u'values'], dtype_for(b[u'dtype']), b[u'compress']), b[u'shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 if u'locs' in b: placement = b[u'locs'] else: placement = axes[0].get_indexer(b[u'items']) return make_block(values=values, klass=getattr(internals, b[u'klass']), placement=placement, dtype=b[u'dtype']) blocks = [create_block(b) for b in obj[u'blocks']] return globals()[obj[u'klass']](BlockManager(blocks, axes)) elif typ == u'datetime': return parse(obj[u'data']) elif typ == u'datetime64': return np.datetime64(parse(obj[u'data'])) elif typ == u'date': return parse(obj[u'data']).date() elif typ == u'timedelta': return timedelta(*obj[u'data']) elif typ == u'timedelta64': return np.timedelta64(int(obj[u'data'])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return globals()[obj['klass']]( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) # elif typ == 'sparse_dataframe': # return globals()[obj['klass']]( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) # elif typ == 'sparse_panel': # return globals()[obj['klass']]( # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) elif typ == u'block_index': return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'], obj[u'blengths']) elif typ == u'int_index': return globals()[obj[u'klass']](obj[u'length'], obj[u'indices']) elif typ == u'ndarray': return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']], obj.get(u'compress')).reshape(obj[u'shape']) elif typ == u'np_scalar': if obj.get(u'sub_typ') == u'np_complex': return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype']) else: dtype = dtype_for(obj[u'dtype']) try: return dtype(obj[u'data']) except (ValueError, TypeError): return dtype.type(obj[u'data']) elif typ == u'np_complex': return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j') elif isinstance(obj, (dict, list, set)): return obj else: return obj
def test_strftime(self, period_index): arr = PeriodArray(period_index) result = arr.strftime("%Y") expected = np.array([per.strftime("%Y") for per in arr], dtype=object) tm.assert_numpy_array_equal(result, expected)
def decode(obj): """ Decoder for deserializing numpy data types. """ typ = obj.get("typ") if typ is None: return obj elif typ == "timestamp": freq = obj["freq"] if "freq" in obj else obj["offset"] return Timestamp(obj["value"], tz=obj["tz"], freq=freq) elif typ == "nat": return NaT elif typ == "period": return Period(ordinal=obj["ordinal"], freq=obj["freq"]) elif typ == "index": dtype = dtype_for(obj["dtype"]) data = unconvert(obj["data"], dtype, obj.get("compress")) return Index(data, dtype=dtype, name=obj["name"]) elif typ == "range_index": return RangeIndex(obj["start"], obj["stop"], obj["step"], name=obj["name"]) elif typ == "multi_index": dtype = dtype_for(obj["dtype"]) data = unconvert(obj["data"], dtype, obj.get("compress")) data = [tuple(x) for x in data] return MultiIndex.from_tuples(data, names=obj["names"]) elif typ == "period_index": data = unconvert(obj["data"], np.int64, obj.get("compress")) d = dict(name=obj["name"], freq=obj["freq"]) freq = d.pop("freq", None) return PeriodIndex(PeriodArray(data, freq), **d) elif typ == "datetime_index": data = unconvert(obj["data"], np.int64, obj.get("compress")) d = dict(name=obj["name"], freq=obj["freq"]) result = DatetimeIndex(data, **d) tz = obj["tz"] # reverse tz conversion if tz is not None: result = result.tz_localize("UTC").tz_convert(tz) return result elif typ in ("interval_index", "interval_array"): return globals()[obj["klass"]].from_arrays( obj["left"], obj["right"], obj["closed"], name=obj["name"] ) elif typ == "category": from_codes = globals()[obj["klass"]].from_codes return from_codes( codes=obj["codes"], categories=obj["categories"], ordered=obj["ordered"] ) elif typ == "interval": return Interval(obj["left"], obj["right"], obj["closed"]) elif typ == "series": dtype = dtype_for(obj["dtype"]) index = obj["index"] data = unconvert(obj["data"], dtype, obj["compress"]) return Series(data, index=index, dtype=dtype, name=obj["name"]) elif typ == "block_manager": axes = obj["axes"] def create_block(b): values = _safe_reshape( unconvert(b["values"], dtype_for(b["dtype"]), b["compress"]), b["shape"] ) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 if "locs" in b: placement = b["locs"] else: placement = axes[0].get_indexer(b["items"]) if is_datetime64tz_dtype(b["dtype"]): assert isinstance(values, np.ndarray), type(values) assert values.dtype == "M8[ns]", values.dtype values = DatetimeArray(values, dtype=b["dtype"]) return make_block( values=values, klass=getattr(internals, b["klass"]), placement=placement, dtype=b["dtype"], ) blocks = [create_block(b) for b in obj["blocks"]] return globals()[obj["klass"]](BlockManager(blocks, axes)) elif typ == "datetime": return parse(obj["data"]) elif typ == "datetime64": return np.datetime64(parse(obj["data"])) elif typ == "date": return parse(obj["data"]).date() elif typ == "timedelta": return timedelta(*obj["data"]) elif typ == "timedelta64": return np.timedelta64(int(obj["data"])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return SparseSeries( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) # elif typ == 'sparse_dataframe': # return SparseDataFrame( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) elif typ == "block_index": return globals()[obj["klass"]](obj["length"], obj["blocs"], obj["blengths"]) elif typ == "int_index": return globals()[obj["klass"]](obj["length"], obj["indices"]) elif typ == "ndarray": return unconvert( obj["data"], np.typeDict[obj["dtype"]], obj.get("compress") ).reshape(obj["shape"]) elif typ == "np_scalar": if obj.get("sub_typ") == "np_complex": return c2f(obj["real"], obj["imag"], obj["dtype"]) else: dtype = dtype_for(obj["dtype"]) try: return dtype(obj["data"]) except (ValueError, TypeError): return dtype.type(obj["data"]) elif typ == "np_complex": return complex(obj["real"] + "+" + obj["imag"] + "j") elif isinstance(obj, (dict, list, set)): return obj else: return obj
def test_strftime(self, period_index): arr = PeriodArray(period_index) result = arr.strftime("%Y") expected = np.array(period_index.strftime("%Y")) tm.assert_numpy_array_equal(result, expected)
def test_setitem(key, value, expected): arr = PeriodArray(np.arange(3), freq="D") expected = PeriodArray(expected, freq="D") arr[key] = value tm.assert_period_array_equal(arr, expected)
def data_for_twos(dtype): return PeriodArray(np.ones(100) * 2, freq=dtype.freq)
def test_setitem_raises_length(): arr = PeriodArray(np.arange(3), freq="D") with pytest.raises(ValueError, match="length"): arr[[0, 1]] = [pd.Period("2000", freq="D")]
def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ if is_scalar(result): return result def trans(x): return x if isinstance(dtype, str): if dtype == "infer": inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) if inferred_type == "boolean": dtype = "bool" elif inferred_type == "integer": dtype = "int64" elif inferred_type == "datetime64": dtype = "datetime64[ns]" elif inferred_type == "timedelta64": dtype = "timedelta64[ns]" # try to upcast here elif inferred_type == "floating": dtype = "int64" if issubclass(result.dtype.type, np.number): def trans(x): # noqa return x.round() else: dtype = "object" if isinstance(dtype, str): dtype = np.dtype(dtype) try: # don't allow upcasts here (except if empty) if dtype.kind == result.dtype.kind: if result.dtype.itemsize <= dtype.itemsize and np.prod( result.shape): return result if is_bool_dtype(dtype) or is_integer_dtype(dtype): # if we don't have any elements, just astype it if not np.prod(result.shape): return trans(result).astype(dtype) # do a test on the first element, if it fails then we are done r = result.ravel() arr = np.array([r[0]]) # if we have any nulls, then we are done if isna(arr).any() or not np.allclose( arr, trans(arr).astype(dtype), rtol=0): return result # a comparable, e.g. a Decimal may slip in here elif not isinstance( r[0], (np.integer, np.floating, np.bool, int, float, bool)): return result if (issubclass(result.dtype.type, (np.object_, np.number)) and notna(result).all()): new_result = trans(result).astype(dtype) try: if np.allclose(new_result, result, rtol=0): return new_result except Exception: # comparison of an object dtype with a number type could # hit here if (new_result == result).all(): return new_result elif issubclass(dtype.type, np.floating) and not is_bool_dtype(result.dtype): return result.astype(dtype) # a datetimelike # GH12821, iNaT is casted to float elif dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: try: result = result.astype(dtype) except Exception: if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime result = to_datetime(result).tz_localize("utc") result = result.tz_convert(dtype.tz) elif dtype.type == Period: # TODO(DatetimeArray): merge with previous elif from pandas.core.arrays import PeriodArray return PeriodArray(result, freq=dtype.freq) except Exception: pass return result
def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) with pytest.raises(TypeError, match="dtype"): PeriodArray(ser, freq="D")
def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) with tm.assert_raises_regex(TypeError, 'dtype'): PeriodArray(ser, freq='D')