示例#1
0
文件: common.py 项目: BrenBarn/pandas
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

    index = data.index
    if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)

    raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
示例#2
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

    index = data.index
    if issubclass(data.dtype.type, np.datetime64):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index)
    elif issubclass(data.dtype.type, np.timedelta64):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index)

    raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
示例#3
0
文件: common.py 项目: yaduart/pandas
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a "
                        "datetimelike index".format(type(data)))

    index = data.index
    name = data.name
    orig = data if is_categorical_dtype(data) else None
    if orig is not None:
        data = orig.values.categories

    if is_datetime64_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'),
                                  index, name=name, orig=orig)
    elif is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer',
                                                ambiguous='infer'),
                                  index, data.name, orig=orig)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy,
                                                  freq='infer'), index,
                                   name=name, orig=orig)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index,
                                    name=name, orig=orig)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy,
                                                    freq='infer'), index,
                                      name=name, orig=orig)

    raise TypeError("cannot convert an object of type {0} to a "
                    "datetimelike index".format(type(data)))
示例#4
0
文件: fmt.py 项目: georgebdavis/tia
def guess_formatter(values, precision=1, commas=True, parens=True, nan='nan', prefix=None, pcts=0,
                    trunc_dot_zeros=0):
    """Based on the values, return the most suitable formatter
    Parameters
    ----------
    values : Series, DataFrame, scalar, list, tuple, or ndarray
             Values used to determine which formatter is the best fit
    """
    formatter_args = dict(precision=precision, commas=commas, parens=parens, nan=nan, prefix=prefix,
                          trunc_dot_zeros=trunc_dot_zeros)

    try:
        if isinstance(values, pd.Series):
            # added a helper method for date time specific arrays as timestamps can be annoying when printed
            if is_datetime_arraylike(values):
                # basic date formatter if no hours or minutes
                if (values.dt.hour == 0).all() and (values.dt.minute == 0).all():
                    return new_datetime_formatter()

            aval = values.abs()
            vmax, vmin = aval.max(), aval.min()
        elif isinstance(values, np.ndarray):
            if values.ndim == 2:
                avalues = pd.DataFrame(values).abs()
                vmax = avalues.max().max()
                vmin = avalues.min().min()
            elif values.ndim == 1:
                aval = pd.Series(values).abs()
                vmax, vmin = aval.max(), aval.min()
            else:
                raise ValueError('cannot accept frame with more than 2-dimensions')
        elif isinstance(values, pd.DataFrame):
            avalues = values.abs()
            vmax = avalues.max().max()
            vmin = avalues.min().min()
        elif isinstance(values, (list, tuple)):
            vmax = max(values)
            vmin = min(values)
        else:
            vmax = vmin = abs(values)

        if np.isnan(vmin):
            return new_float_formatter(**formatter_args)
        else:
            min_digits = 0 if vmin == 0 else math.floor(math.log10(vmin))
            # max_digits = math.floor(math.log10(vmax))
            if min_digits >= 12:
                return new_trillions_formatter(**formatter_args)
            elif min_digits >= 9:
                return new_billions_formatter(**formatter_args)
            elif min_digits >= 6:
                return new_millions_formatter(**formatter_args)
            elif min_digits >= 3:
                return new_thousands_formatter(**formatter_args)
            elif pcts and min_digits < 0 and vmax < 1:
                return new_percent_formatter(**formatter_args)
            else:
                if isinstance(vmax, int):
                    formatter_args.pop('precision')
                    return new_int_formatter(**formatter_args)
                else:
                    return new_float_formatter(**formatter_args)
    except:
        #import sys
        #e = sys.exc_info()[0]
        return lambda x: x
def censor(x, range=(0, 1), only_finite=True):
    """
    Convert any values outside of range to a **NULL** type object.

    Parameters
    ----------
    x : array_like
        Values to manipulate
    range : tuple
        (min, max) giving desired output range
    only_finite : bool
        If True (the default), will only modify
        finite values.

    Returns
    -------
    x : array_like
        Censored array

    Examples
    --------
    >>> a = [1, 2, np.inf, 3, 4, -np.inf, 5]
    >>> censor(a, (0, 10))
    [1, 2, inf, 3, 4, -inf, 5]
    >>> censor(a, (0, 10), False)
    [1, 2, nan, 3, 4, nan, 5]
    >>> censor(a, (2, 4))
    [nan, 2, inf, 3, 4, -inf, nan]

    Notes
    -----
    All values in ``x`` should be of the same type. ``only_finite`` parameter
    is not considered for Datetime and Timedelta types.

    The **NULL** type object depends on the type of values in **x**.

    - :class:`float` - :py:`float('nan')`
    - :class:`int` - :py:`float('nan')`
    - :class:`datetime.datetime` : :py:`np.datetime64(NaT)`
    - :class:`datetime.timedelta` : :py:`np.timedelta64(NaT)`

    """
    if not len(x):
        return x

    py_time_types = (datetime.datetime, datetime.timedelta)
    np_pd_time_types = (pd.Timestamp, pd.Timedelta, np.datetime64,
                        np.timedelta64)
    x0 = first_element(x)

    # Yes, we want type not isinstance
    if type(x0) in py_time_types:
        return _censor_with(x, range, 'NaT')

    if not hasattr(x, 'dtype') and isinstance(x0, np_pd_time_types):
        return _censor_with(x, range, type(x0)('NaT'))

    x_array = np.asarray(x)
    if pdtypes.is_number(x0) and not isinstance(x0, np.timedelta64):
        null = float('nan')
    elif com.is_datetime_arraylike(x_array):
        null = pd.Timestamp('NaT')
    elif pdtypes.is_datetime64_dtype(x_array):
        null = np.datetime64('NaT')
    elif isinstance(x0, pd.Timedelta):
        null = pd.Timedelta('NaT')
    elif pdtypes.is_timedelta64_dtype(x_array):
        null = np.timedelta64('NaT')
    else:
        raise ValueError("Do not know how to censor values of type "
                         "{}".format(type(x0)))

    if only_finite:
        try:
            finite = np.isfinite(x)
        except TypeError:
            finite = np.repeat(True, len(x))
    else:
        finite = np.repeat(True, len(x))

    if hasattr(x, 'dtype'):
        outside = (x < range[0]) | (x > range[1])
        bool_idx = finite & outside
        x = x.copy()
        x[bool_idx] = null
    else:
        x = [
            null if not range[0] <= val <= range[1] and f else val
            for val, f in zip(x, finite)
        ]

    return x