def _value_counts_arraylike(values, dropna=True): is_datetimetz_type = is_datetimetz(values) is_period_type = (is_period_dtype(values) or is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if needs_i8_conversion(dtype) or is_period_type: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period_type: # values may be an object values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_int64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz_type: keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) if is_period_type: keys = PeriodIndex._simple_new(keys, freq=freq) elif is_signed_integer_dtype(dtype): values = _ensure_int64(values) keys, counts = htable.value_count_int64(values, dropna) elif is_unsigned_integer_dtype(dtype): values = _ensure_uint64(values) keys, counts = htable.value_count_uint64(values, dropna) elif is_float_dtype(dtype): values = _ensure_float64(values) keys, counts = htable.value_count_float64(values, dropna) else: values = _ensure_object(values) keys, counts = htable.value_count_object(values, dropna) mask = isnull(values) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def _value_counts_arraylike(values, dropna=True): is_datetimetz_type = is_datetimetz(values) is_period = (isinstance(values, ABCPeriodIndex) or is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period: values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz_type: if isinstance(orig, ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz keys = DatetimeIndex._simple_new(keys, tz=tz) if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) elif is_integer_dtype(dtype): values = _ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif is_float_dtype(dtype): values = _ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = _ensure_object(values) mask = isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) index = data.index name = data.name orig = data if is_categorical_dtype(data) else None if orig is not None: data = orig.values.categories if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data)))
def infer_freq(index, warn=True): """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. Parameters ---------- index : DatetimeIndex or TimedeltaIndex if passed a Series will use the values of the series (NOT THE INDEX) warn : boolean, default True Returns ------- freq : string or None None if no discernible frequency TypeError if the index is not datetime-like ValueError if there are less than three values. """ import pandas as pd if isinstance(index, ABCSeries): values = index._values if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible " "dtype on a Series of {0}".format(index.dtype)) index = values if is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq() if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " "type {0}".format(type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): try: index = pd.DatetimeIndex(index) except AmbiguousTimeError: index = pd.DatetimeIndex(index.asi8) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq()
def duplicated(values, keep='first'): """ Return boolean ndarray denoting duplicate values. .. versionadded:: 0.19.0 Parameters ---------- values : ndarray-like Array over which to check for duplicate values. keep : {'first', 'last', False}, default 'first' - ``first`` : Mark duplicates as ``True`` except for the first occurrence. - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. Returns ------- duplicated : ndarray """ dtype = values.dtype # no need to revert to original type if needs_i8_conversion(dtype): values = values.view(np.int64) elif is_period_arraylike(values): from pandas.tseries.period import PeriodIndex values = PeriodIndex(values).asi8 elif is_categorical_dtype(dtype): values = values.values.codes elif isinstance(values, (ABCSeries, ABCIndex)): values = values.values if is_signed_integer_dtype(dtype): values = _ensure_int64(values) duplicated = htable.duplicated_int64(values, keep=keep) elif is_unsigned_integer_dtype(dtype): values = _ensure_uint64(values) duplicated = htable.duplicated_uint64(values, keep=keep) elif is_float_dtype(dtype): values = _ensure_float64(values) duplicated = htable.duplicated_float64(values, keep=keep) else: values = _ensure_object(values) duplicated = htable.duplicated_object(values, keep=keep) return duplicated
def convert(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) if (isinstance(values, valid_types) or is_integer(values) or is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq)._values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq)._values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values