def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def _parsed_string_to_bounds(self, reso, parsed): if reso == 'year': t1 = Period(year=parsed.year, freq='A') elif reso == 'month': t1 = Period(year=parsed.year, month=parsed.month, freq='M') elif reso == 'quarter': q = (parsed.month - 1) // 3 + 1 t1 = Period(year=parsed.year, quarter=q, freq='Q-DEC') elif reso == 'day': t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, freq='D') elif reso == 'hour': t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, freq='H') elif reso == 'minute': t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, minute=parsed.minute, freq='T') elif reso == 'second': t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, minute=parsed.minute, second=parsed.second, freq='S') else: raise KeyError(reso) return (t1.asfreq(self.freq, how='start'), t1.asfreq(self.freq, how='end'))
def _parsed_string_to_bounds(self, reso, parsed): if reso == "year": t1 = Period(year=parsed.year, freq="A") elif reso == "month": t1 = Period(year=parsed.year, month=parsed.month, freq="M") elif reso == "quarter": q = (parsed.month - 1) // 3 + 1 t1 = Period(year=parsed.year, quarter=q, freq="Q-DEC") elif reso == "day": t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, freq="D") elif reso == "hour": t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, freq="H") elif reso == "minute": t1 = Period( year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, minute=parsed.minute, freq="T" ) elif reso == "second": t1 = Period( year=parsed.year, month=parsed.month, day=parsed.day, hour=parsed.hour, minute=parsed.minute, second=parsed.second, freq="S", ) else: raise KeyError(reso) return (t1.asfreq(self.freq, how="start"), t1.asfreq(self.freq, how="end"))
def get_loc(self, key, method=None): """ Get integer location for requested label Returns ------- loc : int """ try: return self._engine.get_loc(key) except KeyError: if is_integer(key): raise try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt except TypeError: pass key = Period(key, self.freq) try: return Index.get_loc(self, key.ordinal, method=method) except KeyError: raise KeyError(key)
def wrapper(self, other): if isinstance(other, Period): func = getattr(self.values, opname) if other.freq != self.freq: raise AssertionError("Frequencies must be equal") result = func(other.ordinal) elif isinstance(other, PeriodIndex): if other.freq != self.freq: raise AssertionError("Frequencies must be equal") result = getattr(self.values, opname)(other.values) mask = (com.mask_missing(self.values, tslib.iNaT) | com.mask_missing(other.values, tslib.iNaT)) if mask.any(): result[mask] = nat_result return result else: other = Period(other, freq=self.freq) func = getattr(self.values, opname) result = func(other.ordinal) if other.ordinal == tslib.iNaT: result.fill(nat_result) mask = self.values == tslib.iNaT if mask.any(): result[mask] = nat_result return result
def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) freq = Period._maybe_convert_freq(freq) base, mult = _gfc(freq) return period.dt64arr_to_periodarr(data.view('i8'), base, tz)
def wrapper(self, other): if isinstance(other, Period): func = getattr(self.values, opname) other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_ERROR.format(self.freqstr, other.freqstr) raise ValueError(msg) result = func(other.ordinal) elif isinstance(other, PeriodIndex): if other.freq != self.freq: msg = _DIFFERENT_FREQ_ERROR.format(self.freqstr, other.freqstr) raise ValueError(msg) result = getattr(self.values, opname)(other.values) mask = (com.mask_missing(self.values, tslib.iNaT) | com.mask_missing(other.values, tslib.iNaT)) if mask.any(): result[mask] = nat_result return result else: other = Period(other, freq=self.freq) func = getattr(self.values, opname) result = func(other.ordinal) if other.ordinal == tslib.iNaT: result.fill(nat_result) mask = self.values == tslib.iNaT if mask.any(): result[mask] = nat_result return result
def wrapper(self, other): if isinstance(other, Period): func = getattr(self._values, opname) other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) result = func(other.ordinal) elif isinstance(other, PeriodIndex): if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) result = getattr(self._values, opname)(other._values) mask = self._isnan | other._isnan if mask.any(): result[mask] = nat_result return result elif other is tslib.NaT: result = np.empty(len(self._values), dtype=bool) result.fill(nat_result) else: other = Period(other, freq=self.freq) func = getattr(self._values, opname) result = func(other.ordinal) if self.hasnans: result[self._isnan] = nat_result return result
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ try: return self._engine.get_loc(key) except KeyError: if is_integer(key): raise try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt except TypeError: pass try: key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period # as we have an invalid type raise KeyError(key) try: ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal if tolerance is not None: tolerance = self._convert_tolerance(tolerance) return self._int64index.get_loc(ordinal, method, tolerance) except KeyError: raise KeyError(key)
def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ try: return self._engine.get_loc(key) except KeyError: if is_integer(key): raise try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt except TypeError: pass try: key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period # as we have an invalid type return self._invalid_indexer('label', key) try: return Index.get_loc(self, key.ordinal, method, tolerance) except KeyError: raise KeyError(key)
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- bound : Period or object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ['ix', 'loc', 'getitem'] if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, compat.string_types): try: _, parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) return bounds[0 if side == 'left' else 1] except Exception: raise KeyError(label) elif is_integer(label) or is_float(label): self._invalid_indexer('slice', label) return label
def __setstate__(self, state): """Necessary for making this object picklable""" if isinstance(state, dict): super(PeriodIndex, self).__setstate__(state) elif isinstance(state, tuple): # < 0.15 compat if len(state) == 2: nd_state, own_state = state data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) # backcompat self.freq = Period._maybe_convert_freq(own_state[1]) else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(self, state) self._data = data else: raise Exception("invalid pickle state")
def to_timestamp(self, freq=None, how='start'): """ Cast to DatetimeIndex Parameters ---------- freq : string or DateOffset, default 'D' for week or longer, 'S' otherwise Target frequency how : {'s', 'e', 'start', 'end'} Returns ------- DatetimeIndex """ how = _validate_end_alias(how) if freq is None: base, mult = _gfc(self.freq) freq = frequencies.get_to_timestamp_base(base) else: freq = Period._maybe_convert_freq(freq) base, mult = _gfc(freq) new_data = self.asfreq(freq, how) new_data = period.periodarr_to_dt64arr(new_data._values, base) return DatetimeIndex(new_data, freq='infer', name=self.name)
def asfreq(self, freq=None, how='E'): """ Convert the PeriodIndex to the specified frequency `freq`. Parameters ---------- freq : str a frequency how : str {'E', 'S'} 'E', 'END', or 'FINISH' for end, 'S', 'START', or 'BEGIN' for start. Whether the elements should be aligned to the end or start within pa period. January 31st ('END') vs. Janury 1st ('START') for example. Returns ------- new : PeriodIndex with the new frequency Examples -------- >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') >>> pidx <class 'pandas.tseries.period.PeriodIndex'> [2010, ..., 2015] Length: 6, Freq: A-DEC >>> pidx.asfreq('M') <class 'pandas.tseries.period.PeriodIndex'> [2010-12, ..., 2015-12] Length: 6, Freq: M >>> pidx.asfreq('M', how='S') <class 'pandas.tseries.period.PeriodIndex'> [2010-01, ..., 2015-01] Length: 6, Freq: M """ how = _validate_end_alias(how) freq = Period._maybe_convert_freq(freq) base1, mult1 = _gfc(self.freq) base2, mult2 = _gfc(freq) asi8 = self.asi8 # mult1 can't be negative or 0 end = how == 'E' if end: ordinal = asi8 + mult1 - 1 else: ordinal = asi8 new_data = period.period_asfreq_arr(ordinal, base1, base2, end) if self.hasnans: new_data[self._isnan] = tslib.iNaT return self._simple_new(new_data, self.name, freq=freq)
def to_timestamp(self, freq=None, how='start'): """ Cast to DatetimeIndex Parameters ---------- freq : string or DateOffset, default 'D' for week or longer, 'S' otherwise Target frequency how : {'s', 'e', 'start', 'end'} Returns ------- DatetimeIndex """ how = _validate_end_alias(how) if freq is None: base, mult = _gfc(self.freq) freq = frequencies.get_to_timestamp_base(base) else: freq = Period._maybe_convert_freq(freq) base, mult = _gfc(freq) new_data = self.asfreq(freq, how) new_data = period.periodarr_to_dt64arr(new_data.values, base) return DatetimeIndex(new_data, freq='infer', name=self.name)
def _from_arraylike(cls, data, freq, tz): if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if np.isscalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = com._ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq).ordinal for x in data], dtype=np.int64) except (TypeError, ValueError): data = com._ensure_object(data) if freq is None and len(data) > 0: freq = getattr(data[0], 'freq', None) if freq is None: raise ValueError('freq not specified and cannot be ' 'inferred from first element') data = _get_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data.values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data.values, base1, base2, 1) else: if freq is None and len(data) > 0: freq = getattr(data[0], 'freq', None) if freq is None: raise ValueError('freq not specified and cannot be ' 'inferred from first element') if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: try: data = com._ensure_int64(data) except (TypeError, ValueError): data = com._ensure_object(data) data = _get_ordinals(data, freq) return data, freq
def pnow(freq=None): # deprecation, xref #13790 import warnings warnings.warn("pd.pnow() and pandas.tseries.period.pnow() " "are deprecated. Please use Period.now()", FutureWarning, stacklevel=2) return Period.now(freq=freq)
def _get_ordinal_range(start, end, periods, freq, mult=1): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify 2 of start, end, periods') if freq is not None: _, mult = _gfc(freq) if start is not None: start = Period(start, freq) if end is not None: end = Period(end, freq) is_start_per = isinstance(start, Period) is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('Start and end must have same freq') if ((is_start_per and start.ordinal == tslib.iNaT) or (is_end_per and end.ordinal == tslib.iNaT)): raise ValueError('Start and end must not be NaT') if freq is None: if is_start_per: freq = start.freq elif is_end_per: freq = end.freq else: # pragma: no cover raise ValueError('Could not infer freq from start/end') if periods is not None: periods = periods * mult if start is None: data = np.arange(end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64) else: data = np.arange(start.ordinal, start.ordinal + periods, mult, dtype=np.int64) else: data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) return data, freq
def searchsorted(self, key, side='left'): if isinstance(key, Period): if key.freq != self.freq: raise ValueError("Different period frequency: %s" % key.freq) key = key.ordinal elif isinstance(key, compat.string_types): key = Period(key, freq=self.freq).ordinal return self.values.searchsorted(key, side=side)
def searchsorted(self, key, side='left'): if isinstance(key, Period): if key.freq != self.freq: msg = _DIFFERENT_FREQ_ERROR.format(self.freqstr, key.freqstr) raise ValueError(msg) key = key.ordinal elif isinstance(key, compat.string_types): key = Period(key, freq=self.freq).ordinal return self.values.searchsorted(key, side=side)
def searchsorted(self, key, side='left', sorter=None): if isinstance(key, Period): if key.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, key.freqstr) raise IncompatibleFrequency(msg) key = key.ordinal elif isinstance(key, compat.string_types): key = Period(key, freq=self.freq).ordinal return self._values.searchsorted(key, side=side, sorter=sorter)
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ s = com._values_from_object(series) try: return com._maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) grp = frequencies.Resolution.get_freq_group(reso) freqn = frequencies.get_freq_group(self.freq) vals = self._values # if our data is higher resolution than requested key, slice if grp < freqn: iv = Period(asdt, freq=(grp, 1)) ord1 = iv.asfreq(self.freq, how='S').ordinal ord2 = iv.asfreq(self.freq, how='E').ordinal if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) pos = np.searchsorted(self._values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key) else: raise KeyError(key) except TypeError: pass key = Period(key, self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key)
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ s = com._values_from_object(series) try: return com._maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) grp = frequencies.Resolution.get_freq_group(reso) freqn = frequencies.get_freq_group(self.freq) vals = self.values # if our data is higher resolution than requested key, slice if grp < freqn: iv = Period(asdt, freq=(grp, 1)) ord1 = iv.asfreq(self.freq, how='S').ordinal ord2 = iv.asfreq(self.freq, how='E').ordinal if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) pos = np.searchsorted(self.values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key) else: raise KeyError(key) except TypeError: pass key = Period(key, self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key)
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not getattr(values, 'dtype', None): values = np.array(values, copy=False) if is_object_dtype(values): return PeriodIndex(values, name=name, freq=freq, **kwargs) result = object.__new__(cls) result._data = values result.name = name if freq is None: raise ValueError('freq is not specified') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result
def _from_ordinals(cls, values, name=None, freq=None, **kwargs): """ Values should be int ordinals `__new__` & `_simple_new` cooerce to ordinals and call this method """ values = np.array(values, dtype='int64', copy=False) result = object.__new__(cls) result._data = values result.name = name if freq is None: raise ValueError('freq is not specified and cannot be inferred') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result
def _generate_range(cls, start, end, periods, freq, fields): if freq is not None: freq = Period._maybe_convert_freq(freq) field_count = len(fields) if com._count_not_none(start, end) > 0: if field_count > 0: raise ValueError('Can either instantiate from fields ' 'or endpoints, but not both') subarr, freq = _get_ordinal_range(start, end, periods, freq) elif field_count > 0: subarr, freq = _range_from_fields(freq=freq, **fields) else: raise ValueError('Not enough parameters to construct ' 'Period range') return subarr, freq
def __getitem__(self, key): getitem = self._data.__getitem__ if np.isscalar(key): val = getitem(key) return Period(ordinal=val, freq=self.freq) else: if com.is_bool_indexer(key): key = np.asarray(key) result = getitem(key) if result.ndim > 1: # MPL kludge # values = np.asarray(list(values), dtype=object) # return values.reshape(result.shape) return PeriodIndex(result, name=self.name, freq=self.freq) return PeriodIndex(result, name=self.name, freq=self.freq)
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not is_integer_dtype(values): values = np.array(values, copy=False) if (len(values) > 0 and is_float_dtype(values)): raise TypeError("PeriodIndex can't take floats") else: return PeriodIndex(values, name=name, freq=freq, **kwargs) values = np.array(values, dtype='int64', copy=False) result = object.__new__(cls) result._data = values result.name = name if freq is None: raise ValueError('freq is not specified') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, dtype=None, **kwargs): if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if name is None and hasattr(data, 'name'): name = data.name if dtype is not None: dtype = pandas_dtype(dtype) if not is_period_dtype(dtype): raise ValueError('dtype must be PeriodDtype') if freq is None: freq = dtype.freq elif freq != dtype.freq: msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) # coerce freq to freq object, otherwise it can be coerced elementwise # which is slow if freq: freq = Period._maybe_convert_freq(freq) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, kwargs) return cls._from_ordinals(data, name=name, freq=freq) if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) # not array / index if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data) # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) return cls._from_ordinals(data, name=name, freq=freq) # check not floats if infer_dtype(data) == 'floating' and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") # anything else, likely an array of strings or periods data = _ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq)
def _get_object_array(self): freq = self.freq return np.array( [Period._from_ordinal(ordinal=x, freq=freq) for x in self.values], copy=False)
def _get_ordinals(data, freq): f = lambda x: Period(x, freq=freq).ordinal if isinstance(data[0], Period): return period.extract_ordinals(data, freq) else: return lib.map_infer(data, f)
def pnow(freq=None): return Period(datetime.now(), freq=freq)
def _get_object_array(self): freq = self.freq return np.array([ Period._from_ordinal(ordinal=x, freq=freq) for x in self.values], copy=False)
def _box_func(self): return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
def read_data_for_period(self, period: Period, varname_internal: str) -> DataArray: """ Read the data for period and varname into memory, and return it as xarray DataArray :param period: :param varname_internal: Note: this method will read everything into memory, please be easy on the period duration for large datasets """ assert isinstance(period, Period) level, level_kind = -1, -1 if varname_internal in self.level_mapping: lvl = self.level_mapping[varname_internal] assert isinstance(lvl, VerticalLevel) level, level_kind = lvl.get_value_and_kind() data = {} lons, lats = None, None data_list = None dates = None # for each datasource type the following arrays should be defined: # data(t, x, y), dates(t), lons(x, y), lats(x, y) if self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_OF_RPN_FILES: for month_start in period.range("months"): f = self.yearmonth_to_path[(month_start.year, month_start.month)] r = RPN(str(f)) # read the data into memory data1 = r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind) if lons is None: lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec() data.update(data1) r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT: filename_prefix = self.varname_to_file_prefix[varname_internal] for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print("Skipping {}-{}".format(year, m)) continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # Skip the file for time step 0 if f.name[-9:-1] == "0" * 8: continue # read only files with the specified prefix if not f.name.startswith(filename_prefix): continue r = RPN(str(f)) data.update(r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if lons is None: lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec() r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME: for month_start in period.range("months"): year, m = month_start.year, month_start.month print(year, m) # Skip years or months that are not available if (year, m) not in self.yearmonth_to_path: print("Skipping {}-{}".format(year, m)) continue month_dir = self.yearmonth_to_path[(year, m)] for f in month_dir.iterdir(): # read only files containing the variable name in the name, i.e. *TT*.rpn if not self.varname_mapping[varname_internal] in f.name: continue r = RPN(str(f)) data.update( r.get_all_time_records_for_name_and_level(varname=self.varname_mapping[varname_internal], level=level, level_kind=level_kind)) if lons is None: lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec() r.close() dates = list(sorted(data))[:-1] # Ignore the last date because it is from the next month data_list = [data[d] for d in dates] elif self.data_source_type == data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES: base_folder = Path(self.base_folder) ds = xarray.open_mfdataset(str(base_folder.joinpath("*"))) # select the variable by name and time var = ds[self.varname_mapping[varname_internal]].loc[period.start:period.end].squeeze() need_to_create_meshgrid = False for cname, cvals in var.coords.items(): if "time" in cname.lower(): dates = cvals if "lon" in cname.lower(): lons = cvals if lons.ndim == 1: need_to_create_meshgrid = True if "lat" in cname.lower(): lats = cvals if need_to_create_meshgrid: lats, lons = np.meshgrid(lats.values, lons.values) if var.ndim > 3: var = var[:, self.level_mapping[varname_internal], :, :] if var.shape[-2:] == lons.shape: data_list = var.values else: data_list = np.transpose(var.values, axes=(0, 2, 1)) else: raise NotImplementedError("reading of the layout type {} is not implemented yet.".format(self.data_source_type)) # print(dates[0], dates[1], "...", dates[-1], len(dates)) # Construct a dictionary for xarray.DataArray ... vardict = { "coords": { "t": {"dims": "t", "data": dates}, "lon": {"dims": ("x", "y"), "data": lons}, "lat": {"dims": ("x", "y"), "data": lats}, }, "dims": ("t", "x", "y"), "data": data_list, "name": varname_internal } if len(data_list) == 0: raise IOError("Could not find any data for the period {}..{}".format(period.start, period.end)) # Convert units based on supplied mappings return self.multipliers[varname_internal] * DataArray.from_dict(vardict) + self.offsets[varname_internal]