def data(self): # work around for https://github.com/pandas-dev/pandas/issues/18372 return [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.__df.columns, np.atleast_1d(row))) for row in self.__df.values ]
def wind(in_testing, params): sets = params deployed = int(in_testing) pipeline = [ { "$match": { "id": 2001 } }, { "$group": {"_id": "$ts", "low": { "$last": "$t1" }, "med": { "$last": "$t2" }, "high": { "$last": "$h" } } }, { "$sort" : SON([("_id", -1)]) } ] data = list(sensors.aggregate(pipeline, allowDiskUse = True)) dicts = [] ### Set Sample size sample = sets["size"] tScore = sets["tScore"] ### d0 = pd.DataFrame(data) d1 = d0.rename(columns={'_id':'ts'}) # Offset by one, as the first is for subtracting the amount of time df = (d1.iloc[1:sample+1]) lowlist = list(df["low"]) medlist = list(df["med"]) highlist = list(df["high"]) dicts = { "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(df.columns, row) if v != None and v == v) for row in df.values], "low_avg" : round(np.mean(lowlist), 1), "low_MOE" : round(np.std(lowlist)/math.sqrt(sample)*tScore/np.mean(lowlist)*100,1), "med_avg" : round(np.mean(medlist), 1), "med_MOE" : round(np.std(medlist)/math.sqrt(sample)*tScore/np.mean(medlist)*100,1), "high_avg" : round(np.mean(lowlist), 1), "high_MOE" : round(np.std(highlist)/math.sqrt(sample)*tScore/np.mean(highlist)*100,1) } return dicts
def updateTS(_ID): step0 = time.time() pipeline = [ { "$match": { "id": _ID } }, { "$sort" : SON([("ts", 1)]) } ] try: _TEST = pd.DataFrame(list(sensors.aggregate(pipeline, allowDiskUse = True))) print "Total rows:", _TEST.__len__() print "Data loaded in %ss" % (round((time.time() - step0), 1)) _TEST.ts = pd.to_datetime(_TEST.ts) #https://stackoverflow.com/questions/20167194/insert-a-pandas-dataframe-into-mongodb-using-pymongo/49127811 my_list = [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(_TEST.columns, row) if v != None and v == v) for row in _TEST.values] bulk = sensors.initialize_unordered_bulk_op() for i in range (0, len(my_list)): bulk.find( { '_id': my_list[i]["_id"]}).update({ '$set': { "ts" : my_list[i]["ts"] }}) #https://stackoverflow.com/questions/46458618/how-can-i-update-a-whole-collection-in-mongodb-and-not-document-by-document print bulk.execute() # output = list(sensors.find({"id": _ID})) # print output[0] print "Completed ID %s in %ss" % (_ID, (round((time.time() - step0), 1))) # return output except: print "ID %s does not exist." % _ID
def rain(in_testing, params): sets = params deployed = map(int, in_testing.split(',')) pipeline = [ { "$match": { "id": 2000 } }, { "$group": {"_id": "$ts", "r1": { "$last": "$t1" }, "r2": { "$last": "$h" }, "r3": { "$last": "$r" } } }, { "$sort" : SON([("_id", -1)]) } ] data = list(sensors.aggregate(pipeline, allowDiskUse = True)) dicts = [] ### Set Sample size sample = sets["size"] tScore = sets["tScore"] ### d0 = pd.DataFrame(data) df = d0.rename(columns={'_id':'ts'}) # Offset by one, as the first is for subtracting the amount of time rain1 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r1']] rain2 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r2']] rain3 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r3']] r1list = list(rain1["r1"]) r2list = list(rain2["r2"]) r3list = list(rain3["r3"]) dicts = [{ "id" : deployed[0], "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain1.columns, row) if v != None and v == v) for row in rain1.values], "avg" : round(np.mean(r1list), 1), "MOE" : round(np.std(r1list)/math.sqrt(sample)*tScore/np.mean(r1list)*100,1) }, { "id" : deployed[1], "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain2.columns, row) if v != None and v == v) for row in rain2.values], "avg" : round(np.mean(r2list), 1), "MOE" : round(np.std(r2list)/math.sqrt(sample)*tScore/np.mean(r2list)*100,1) }, { "id" : deployed[2], "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain3.columns, row) if v != None and v == v) for row in rain3.values], "avg" : round(np.mean(r3list), 1), "MOE" : round(np.std(r3list)/math.sqrt(sample)*tScore/np.mean(r3list)*100,1) }] return dicts
def data(self): # work around for https://github.com/pandas-dev/pandas/issues/18372 data = [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.df.columns, np.atleast_1d(row))) for row in self.df.values] for d in data: for k, v in list(d.items()): # if an int is too big for Java Script to handle # convert it to a string if isinstance(v, int): if abs(v) > JS_MAX_INTEGER: d[k] = str(v) return data
def data(self): # work around for https://github.com/pandas-dev/pandas/issues/18372 data = [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.df.columns, np.atleast_1d(row))) for row in self.df.values] for d in data: for k, v in list(d.items()): # if an int is too big for Java Script to handle # convert it to a string if isinstance(v, int): if abs(v) > JS_MAX_INTEGER: d[k] = str(v) return data
def tolist(self): """ Return a list of the values. These are each a scalar type, which is a Python scalar (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) See Also -------- numpy.ndarray.tolist """ if is_datetimelike(self): return [com._maybe_box_datetimelike(x) for x in self._values] else: return self._values.tolist()
def tolist(self): """ Return a list of the values. These are each a scalar type, which is a Python scalar (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) See Also -------- numpy.ndarray.tolist """ if is_datetimelike(self): return [_maybe_box_datetimelike(x) for x in self._values] else: return self._values.tolist()
def df_to_mongo(df, collection): try: # Ensure columns are unique: df = df.loc[:, ~df.T.duplicated(keep='first')] # Omit null/ nan values and convert to rows: df_list = [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(df.columns, row) if v != None and v == v and '#' not in str(v)) for row in df.values ] # Import to collection collection.insert_many(df_list) except Exception as e: print(e)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if _count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = _maybe_box_datetimelike(start) end = _maybe_box_datetimelike(end) endpoint = next(_not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def data(self): # work around for https://github.com/pandas-dev/pandas/issues/18372 return [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.__df.columns, np.atleast_1d(row))) for row in self.__df.values]
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com._any_none(periods, start, end): freq = 1 if is_number(endpoint) else 'D' if com._count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com._all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com._not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, 'int64') else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def rain(in_testing, params): sets = params deployed = map(int, in_testing.split(',')) pipeline = [{ "$match": { "id": 2000 } }, { "$group": { "_id": "$ts", "r1": { "$last": "$t1" }, "r2": { "$last": "$h" }, "r3": { "$last": "$r" } } }, { "$sort": SON([("_id", -1)]) }] data = list(sensors.aggregate(pipeline, allowDiskUse=True)) dicts = [] ### Set Sample size sample = sets["size"] tScore = sets["tScore"] ### d0 = pd.DataFrame(data) df = d0.rename(columns={'_id': 'ts'}) # Offset by one, as the first is for subtracting the amount of time rain1 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r1']] rain2 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r2']] rain3 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r3']] r1list = list(rain1["r1"]) r2list = list(rain2["r2"]) r3list = list(rain3["r3"]) dicts = [{ "id": deployed[0], "uploads": [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain1.columns, row) if v != None and v == v) for row in rain1.values ], "avg": round(np.mean(r1list), 1), "MOE": round( np.std(r1list) / math.sqrt(sample) * tScore / np.mean(r1list) * 100, 1) }, { "id": deployed[1], "uploads": [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain2.columns, row) if v != None and v == v) for row in rain2.values ], "avg": round(np.mean(r2list), 1), "MOE": round( np.std(r2list) / math.sqrt(sample) * tScore / np.mean(r2list) * 100, 1) }, { "id": deployed[2], "uploads": [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain3.columns, row) if v != None and v == v) for row in rain3.values ], "avg": round(np.mean(r3list), 1), "MOE": round( np.std(r3list) / math.sqrt(sample) * tScore / np.mean(r3list) * 100, 1) }] return dicts
def wind(in_testing, params): sets = params deployed = int(in_testing) pipeline = [{ "$match": { "id": 2001 } }, { "$group": { "_id": "$ts", "low": { "$last": "$t1" }, "med": { "$last": "$t2" }, "high": { "$last": "$h" } } }, { "$sort": SON([("_id", -1)]) }] data = list(sensors.aggregate(pipeline, allowDiskUse=True)) dicts = [] ### Set Sample size sample = sets["size"] tScore = sets["tScore"] ### d0 = pd.DataFrame(data) d1 = d0.rename(columns={'_id': 'ts'}) # Offset by one, as the first is for subtracting the amount of time df = (d1.iloc[1:sample + 1]) lowlist = list(df["low"]) medlist = list(df["med"]) highlist = list(df["high"]) dicts = { "uploads": [ dict((k, _maybe_box_datetimelike(v)) for k, v in zip(df.columns, row) if v != None and v == v) for row in df.values ], "low_avg": round(np.mean(lowlist), 1), "low_MOE": round( np.std(lowlist) / math.sqrt(sample) * tScore / np.mean(lowlist) * 100, 1), "med_avg": round(np.mean(medlist), 1), "med_MOE": round( np.std(medlist) / math.sqrt(sample) * tScore / np.mean(medlist) * 100, 1), "high_avg": round(np.mean(lowlist), 1), "high_MOE": round( np.std(highlist) / math.sqrt(sample) * tScore / np.mean(highlist) * 100, 1) } return dicts
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com._any_none(periods, start, end): freq = 1 if is_number(endpoint) else 'D' if com._count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq) ]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com._all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com._not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, 'int64') else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)