示例#1
0
    def __new__(
        cls,
        data,
        inclusive=None,
        closed: None | lib.NoDefault = lib.no_default,
        dtype: Dtype | None = None,
        copy: bool = False,
        name: Hashable = None,
        verify_integrity: bool = True,
    ) -> IntervalIndex:

        inclusive, closed = _warning_interval(inclusive, closed)

        name = maybe_extract_name(name, data, cls)

        with rewrite_exception("IntervalArray", cls.__name__):
            array = IntervalArray(
                data,
                inclusive=inclusive,
                copy=copy,
                dtype=dtype,
                verify_integrity=verify_integrity,
            )

        return cls._simple_new(array, name)
示例#2
0
    def __init__(
        self,
        subtype,
        inclusive: str | None = None,
        closed: None | lib.NoDefault = lib.no_default,
    ) -> None:
        # attributes need to be set first before calling
        # super init (as that calls serialize)
        inclusive, closed = _warning_interval(inclusive, closed)
        assert inclusive in VALID_CLOSED
        self._closed = inclusive
        if not isinstance(subtype, pyarrow.DataType):
            subtype = pyarrow.type_for_alias(str(subtype))
        self._subtype = subtype

        storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
        pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
示例#3
0
    def from_tuples(
        cls,
        data,
        inclusive=None,
        closed: None | lib.NoDefault = lib.no_default,
        name: Hashable = None,
        copy: bool = False,
        dtype: Dtype | None = None,
    ) -> IntervalIndex:

        inclusive, closed = _warning_interval(inclusive, closed)
        if inclusive is None:
            inclusive = "both"

        with rewrite_exception("IntervalArray", cls.__name__):
            arr = IntervalArray.from_tuples(
                data, inclusive=inclusive, copy=copy, dtype=dtype
            )
        return cls._simple_new(arr, name=name)
示例#4
0
def interval_range(
    start=None,
    end=None,
    periods=None,
    freq=None,
    name: Hashable = None,
    closed: IntervalClosedType | lib.NoDefault = lib.no_default,
    inclusive: IntervalClosedType | None = None,
) -> IntervalIndex:
    """
    Return a fixed frequency IntervalIndex.

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals.
    end : numeric or datetime-like, default None
        Right bound for generating intervals.
    periods : int, default None
        Number of periods to generate.
    freq : numeric, str, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : str, default None
        Name of the resulting IntervalIndex.
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

        .. deprecated:: 1.5.0
           Argument `closed` has been deprecated to standardize boundary inputs.
           Use `inclusive` instead, to set each bound as closed or open.
    inclusive : {"both", "neither", "left", "right"}, default "both"
        Include boundaries; Whether to set each bound as closed or open.

        .. versionadded:: 1.5.0

    Returns
    -------
    IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5, inclusive="right")
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  dtype='interval[int64, right]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'), inclusive="right")
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  dtype='interval[datetime64[ns], right]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right")
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  dtype='interval[float64, right]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS', inclusive="right")
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  dtype='interval[datetime64[ns], right]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right")
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              dtype='interval[float64, right]')

    The ``inclusive`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, inclusive='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  dtype='interval[int64, both]')
    """
    inclusive, closed = _warning_interval(inclusive, closed)
    if inclusive is None:
        inclusive = "both"

    start = maybe_box_datetimelike(start)
    end = maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com.any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else "D"

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError(
            "Of the four parameters: start, end, periods, and "
            "freq, exactly three must be specified"
        )

    if not _is_valid_endpoint(start):
        raise ValueError(f"start must be numeric or datetime-like, got {start}")
    elif not _is_valid_endpoint(end):
        raise ValueError(f"end must be numeric or datetime-like, got {end}")

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        raise TypeError(f"periods must be a number, got {periods}")

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError as err:
            raise ValueError(
                f"freq must be numeric or convertible to DateOffset, got {freq}"
            ) from err

    # verify type compatibility
    if not all(
        [
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq),
        ]
    ):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    breaks: np.ndarray | TimedeltaIndex | DatetimeIndex

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com.all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com.not_none(start, end, freq)):
            # np.linspace always produces float output

            breaks = maybe_downcast_numeric(breaks, np.dtype("int64"))
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            breaks = date_range(start=start, end=end, periods=periods, freq=freq)
        else:
            breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, inclusive=inclusive)
示例#5
0
    def __new__(
        cls,
        subtype=None,
        inclusive: str_type | None = None,
        closed: None | lib.NoDefault = lib.no_default,
    ):
        from pandas.core.dtypes.common import (
            is_string_dtype,
            pandas_dtype,
        )

        inclusive, closed = _warning_interval(inclusive, closed)

        if inclusive is not None and inclusive not in {
                "right",
                "left",
                "both",
                "neither",
        }:
            raise ValueError(
                "inclusive must be one of 'right', 'left', 'both', 'neither'")

        if isinstance(subtype, IntervalDtype):
            if inclusive is not None and inclusive != subtype.inclusive:
                raise ValueError(
                    "dtype.inclusive and 'inclusive' do not match. "
                    "Try IntervalDtype(dtype.subtype, inclusive) instead.")
            return subtype
        elif subtype is None:
            # we are called as an empty constructor
            # generally for pickle compat
            u = object.__new__(cls)
            u._subtype = None
            u._closed = inclusive
            return u
        elif isinstance(subtype, str) and subtype.lower() == "interval":
            subtype = None
        else:
            if isinstance(subtype, str):
                m = cls._match.search(subtype)
                if m is not None:
                    gd = m.groupdict()
                    subtype = gd["subtype"]
                    if gd.get("inclusive", None) is not None:
                        if inclusive is not None:
                            if inclusive != gd["inclusive"]:
                                raise ValueError(
                                    "'inclusive' keyword does not match value "
                                    "specified in dtype string")
                        inclusive = gd["inclusive"]

            try:
                subtype = pandas_dtype(subtype)
            except TypeError as err:
                raise TypeError("could not construct IntervalDtype") from err

        if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
            # GH 19016
            msg = ("category, object, and string subtypes are not supported "
                   "for IntervalDtype")
            raise TypeError(msg)

        key = str(subtype) + str(inclusive)
        try:
            return cls._cache_dtypes[key]
        except KeyError:
            u = object.__new__(cls)
            u._subtype = subtype
            u._closed = inclusive
            cls._cache_dtypes[key] = u
            return u