Exemplo n.º 1
0
 def __init__(self, name, aggregates=None, granularity=None, start=None, end=None, limit=None):
     self.name = name
     self.aggregates = ",".join(aggregates) if aggregates is not None else None
     self.granularity = granularity
     self.start, self.end = _utils.interval_to_ms(start, end)
     if not start:
         self.start = None
     if not end:
         self.end = None
     self.limit = limit
Exemplo n.º 2
0
 def test_interval_to_ms(self):
     assert isinstance(utils.interval_to_ms(None, None)[0], int)
     assert isinstance(utils.interval_to_ms(None, None)[1], int)
     assert isinstance(utils.interval_to_ms("1w-ago", "1d-ago")[0], int)
     assert isinstance(utils.interval_to_ms("1w-ago", "1d-ago")[1], int)
     assert isinstance(
         utils.interval_to_ms(datetime(2018, 2, 1), datetime(2018, 3,
                                                             1))[0], int)
     assert isinstance(
         utils.interval_to_ms(datetime(2018, 2, 1), datetime(2018, 3,
                                                             1))[1], int)
Exemplo n.º 3
0
    def get_datapoints_frame(self, time_series, aggregates, granularity, start, end=None, **kwargs) -> pd.DataFrame:
        """Returns a pandas dataframe of datapoints for the given timeseries all on the same timestamps.

        This method will automate paging for the user and return all data for the given time period.

        Args:
            time_series (list):  The list of timeseries names to retrieve data for. Each timeseries can be either a string
                                containing the timeseries or a dictionary containing the names of thetimeseries and a
                                list of specific aggregate functions.

            aggregates (list):  The list of aggregate functions you wish to apply to the data for which you have not
                                specified an aggregate function. Valid aggregate functions are: 'average/avg, max, min,
                                count, sum, interpolation/int, stepinterpolation/step'.

            granularity (str):  The granularity of the aggregate values. Valid entries are : 'day/d, hour/h, minute/m,
                                second/s', or a multiple of these indicated by a number as a prefix e.g. '12hour'.

            start (Union[str, int, datetime]):    Get datapoints after this time. Format is N[timeunit]-ago where timeunit is w,d,h,m,s.
                                        E.g. '2d-ago' will get everything that is up to 2 days old. Can also send time in ms since
                                        epoch or a datetime object which will be converted to ms since epoch UTC.

            end (Union[str, int, datetime]):      Get datapoints up to this time. Same format as for start.

        Keyword Arguments:
            limit (str): Max number of rows to return. If limit is specified, this method will not automate
                            paging and will return a maximum of 100,000 rows.

            workers (int):    Number of download workers to run in parallell. Defaults to 10.

        Returns:
            pandas.DataFrame: A pandas dataframe containing the datapoints for the given timeseries. The datapoints for all the
            timeseries will all be on the same timestamps.

        Examples:
            Get a dataframe of aggregated time series data::

                client = CogniteClient()

                res = client.datapoints.get_datapoints_frame(time_series=["ts1", "ts2"],
                                aggregates=["avg"], granularity="30s", start="1w-ago")

                print(res)

            The ``timeseries`` parameter can take a list of strings and/or dicts on the following formats.
            This is useful for specifying aggregate functions on a per time series level::

                Using strings:
                    ['<timeseries1>', '<timeseries2>']

                Using dicts:
                    [{'name': '<timeseries1>', 'aggregates': ['<aggfunc1>', '<aggfunc2>']},
                    {'name': '<timeseries2>', 'aggregates': []}]

                Using both:
                    ['<timeseries1>', {'name': '<timeseries2>', 'aggregates': ['<aggfunc1>', '<aggfunc2>']}]
        """
        if not isinstance(time_series, list):
            raise ValueError("time_series should be a list")
        start, end = _utils.interval_to_ms(start, end)

        if kwargs.get("limit"):
            return self._get_datapoints_frame_user_defined_limit(
                time_series, aggregates, granularity, start, end, limit=kwargs.get("limit")
            )

        num_of_workers = kwargs.get("workers") or self._num_of_workers

        windows = _utils.get_datapoints_windows(start, end, granularity, num_of_workers)

        partial_get_dpsf = partial(
            self._get_datapoints_frame_helper_wrapper,
            time_series=time_series,
            aggregates=aggregates,
            granularity=granularity,
        )

        with Pool(len(windows)) as p:
            dataframes = p.map(partial_get_dpsf, windows)

        df = pd.concat(dataframes).drop_duplicates(subset="timestamp").reset_index(drop=True)

        return df
Exemplo n.º 4
0
    def get_multi_time_series_datapoints(
        self, datapoints_queries: List[DatapointsQuery], start, end=None, aggregates=None, granularity=None, **kwargs
    ) -> DatapointsResponseIterator:
        """Returns a list of DatapointsObjects each of which contains a list of datapoints for the given timeseries.

        This method will automate paging for the user and return all data for the given time period(s).

        Args:
            datapoints_queries (list[stable.datapoints.DatapointsQuery]): The list of DatapointsQuery objects specifying which
                                                                        timeseries to retrieve data for.

            start (Union[str, int, datetime]):    Get datapoints after this time. Format is N[timeunit]-ago where timeunit is w,d,h,m,s.
                                        E.g. '2d-ago' will get everything that is up to 2 days old. Can also send time in ms since
                                        epoch or a datetime object which will be converted to ms since epoch UTC.

            end (Union[str, int, datetime]):      Get datapoints up to this time. Same format as for start.

            aggregates (list, optional):    The list of aggregate functions you wish to apply to the data. Valid aggregate
                                            functions are: 'average/avg, max, min, count, sum, interpolation/int,
                                            stepinterpolation/step'.

            granularity (str):              The granularity of the aggregate values. Valid entries are : 'day/d, hour/h,
                                            minute/m, second/s', or a multiple of these indicated by a number as a prefix
                                            e.g. '12hour'.

        Keyword Arguments:
            include_outside_points (bool):  No description.

        Returns:
            stable.datapoints.DatapointsResponseIterator: An iterator which iterates over stable.datapoints.DatapointsResponse objects.
        """
        url = "/timeseries/dataquery"
        start, end = _utils.interval_to_ms(start, end)

        datapoints_queries = [copy(dpq) for dpq in datapoints_queries]
        num_of_dpqs_with_agg = 0
        num_of_dpqs_raw = 0
        for dpq in datapoints_queries:
            if (dpq.aggregates is None and aggregates is None) or dpq.aggregates == "":
                num_of_dpqs_raw += 1
            else:
                num_of_dpqs_with_agg += 1

        items = []
        for dpq in datapoints_queries:
            if dpq.aggregates is None and aggregates is None:
                dpq.limit = int(self._LIMIT / num_of_dpqs_raw)
            else:
                dpq.limit = int(self._LIMIT_AGG / num_of_dpqs_with_agg)
            items.append(dpq.__dict__)
        body = {
            "items": items,
            "aggregates": ",".join(aggregates) if aggregates is not None else None,
            "granularity": granularity,
            "start": start,
            "includeOutsidePoints": kwargs.get("include_outside_points", False),
            "end": end,
        }
        datapoints_responses = []
        has_incomplete_requests = True
        while has_incomplete_requests:
            res = self._post(url=url, body=body).json()["data"]["items"]
            datapoints_responses.append(res)
            has_incomplete_requests = False
            for i, dpr in enumerate(res):
                dpq = datapoints_queries[i]
                if len(dpr["datapoints"]) == dpq.limit:
                    has_incomplete_requests = True
                    latest_timestamp = dpr["datapoints"][-1]["timestamp"]
                    ts_granularity = granularity if dpq.granularity is None else dpq.granularity
                    next_start = latest_timestamp + (_utils.granularity_to_ms(ts_granularity) if ts_granularity else 1)
                else:
                    next_start = end - 1
                    if datapoints_queries[i].end:
                        next_start = datapoints_queries[i].end - 1
                datapoints_queries[i].start = next_start

        results = [{"data": {"items": [{"name": dpq.name, "datapoints": []}]}} for dpq in datapoints_queries]
        for res in datapoints_responses:
            for i, ts in enumerate(res):
                results[i]["data"]["items"][0]["datapoints"].extend(ts["datapoints"])
        return DatapointsResponseIterator([DatapointsResponse(result) for result in results])
Exemplo n.º 5
0
    def get_datapoints(self, name, start, end=None, aggregates=None, granularity=None, **kwargs) -> DatapointsResponse:
        """Returns a DatapointsObject containing a list of datapoints for the given query.

        This method will automate paging for the user and return all data for the given time period.

        Args:
            name (str):             The name of the timeseries to retrieve data for.

            start (Union[str, int, datetime]):    Get datapoints after this time. Format is N[timeunit]-ago where timeunit is w,d,h,m,s.
                                        E.g. '2d-ago' will get everything that is up to 2 days old. Can also send time in ms since
                                        epoch or a datetime object which will be converted to ms since epoch UTC.

            end (Union[str, int, datetime]):      Get datapoints up to this time. Same format as for start.

            aggregates (list):      The list of aggregate functions you wish to apply to the data. Valid aggregate functions
                                    are: 'average/avg, max, min, count, sum, interpolation/int, stepinterpolation/step'.

            granularity (str):      The granularity of the aggregate values. Valid entries are : 'day/d, hour/h, minute/m,
                                    second/s', or a multiple of these indicated by a number as a prefix e.g. '12hour'.

        Keyword Arguments:
            workers (int):    Number of download workers to run in parallell. Defaults to 10.

            include_outside_points (bool):      No description

            protobuf (bool):        Download the data using the binary protobuf format. Only applicable when getting raw data.
                                    Defaults to True.

            limit (str):            Max number of datapoints to return. If limit is specified, this method will not automate
                                    paging and will return a maximum of 100,000 dps.

        Returns:
            stable.datapoints.DatapointsResponse: A data object containing the requested data with several getter methods with different
            output formats.

        Examples:
            Getting the last 3 days of raw datapoints for a given time series::

                client = CogniteClient()
                res = client.datapoints.get_datapoints(name="my_ts", start="3d-ago")
                print(res.to_pandas())
        """
        start, end = _utils.interval_to_ms(start, end)

        if aggregates:
            aggregates = ",".join(aggregates)

        if kwargs.get("limit"):
            return self._get_datapoints_user_defined_limit(
                name,
                aggregates,
                granularity,
                start,
                end,
                limit=kwargs.get("limit"),
                protobuf=kwargs.get("protobuf"),
                include_outside_points=kwargs.get("include_outside_points", False),
            )

        num_of_workers = kwargs.get("workers", self._num_of_workers)
        if kwargs.get("include_outside_points") is True:
            num_of_workers = 1

        windows = _utils.get_datapoints_windows(start, end, granularity, num_of_workers)

        partial_get_dps = partial(
            self._get_datapoints_helper_wrapper,
            name=name,
            aggregates=aggregates,
            granularity=granularity,
            protobuf=kwargs.get("protobuf", True),
            include_outside_points=kwargs.get("include_outside_points", False),
        )

        with Pool(len(windows)) as p:
            datapoints = p.map(partial_get_dps, windows)

        concat_dps = []
        [concat_dps.extend(el) for el in datapoints]

        return DatapointsResponse({"data": {"items": [{"name": name, "datapoints": concat_dps}]}})
Exemplo n.º 6
0
    def get_datapoints(self,
                       id,
                       start,
                       end=None,
                       aggregates=None,
                       granularity=None,
                       **kwargs) -> DatapointsResponse:
        """Returns a DatapointsObject containing a list of datapoints for the given query.

        This method will automate paging for the user and return all data for the given time period.

        Args:
            id (int):             The unique id of the timeseries to retrieve data for.

            start (Union[str, int, datetime]):    Get datapoints after this time. Format is N[timeunit]-ago where timeunit is w,d,h,m,s.
                                        E.g. '2d-ago' will get everything that is up to 2 days old. Can also send time in ms since
                                        epoch or a datetime object which will be converted to ms since epoch UTC.

            end (Union[str, int, datetime]):      Get datapoints up to this time. Same format as for start.

            aggregates (list):      The list of aggregate functions you wish to apply to the data. Valid aggregate functions
                                    are: 'average/avg, max, min, count, sum, interpolation/int, stepinterpolation/step'.

            granularity (str):      The granularity of the aggregate values. Valid entries are : 'day/d, hour/h, minute/m,
                                    second/s', or a multiple of these indicated by a number as a prefix e.g. '12hour'.

        Keyword Arguments:
            include_outside_points (bool):      No description

            workers (int):        Number of download processes to run in parallell. Defaults to number returned by cpu_count().

            limit (str):            Max number of datapoints to return. If limit is specified, this method will not automate
                                    paging and will return a maximum of 100,000 dps.

        Returns:
            client.test_experimental.datapoints.DatapointsResponse: A data object containing the requested data with several getter methods with different
            output formats.
        """
        start, end = _utils.interval_to_ms(start, end)

        if kwargs.get("limit"):
            return self._get_datapoints_user_defined_limit(
                id,
                aggregates,
                granularity,
                start,
                end,
                limit=kwargs.get("limit"),
                include_outside_points=kwargs.get("include_outside_points",
                                                  False),
            )

        diff = end - start
        num_of_workers = kwargs.get("processes", self._num_of_workers)
        if kwargs.get("include_outside_points") is True:
            num_of_workers = 1

        granularity_ms = 1
        if granularity:
            granularity_ms = _utils.granularity_to_ms(granularity)

        # Ensure that number of steps is not greater than the number data points that will be returned
        steps = min(num_of_workers, max(1, int(diff / granularity_ms)))
        # Make step size a multiple of the granularity requested in order to ensure evenly spaced results
        step_size = _utils.round_to_nearest(int(diff / steps),
                                            base=granularity_ms)
        # Create list of where each of the parallelized intervals will begin
        step_starts = [start + (i * step_size) for i in range(steps)]
        args = [{
            "start": start,
            "end": start + step_size
        } for start in step_starts]

        partial_get_dps = partial(
            self._get_datapoints_helper_wrapper,
            id=id,
            aggregates=aggregates,
            granularity=granularity,
            include_outside_points=kwargs.get("include_outside_points", False),
        )

        with Pool(steps) as p:
            datapoints = p.map(partial_get_dps, args)

        concat_dps = []
        [concat_dps.extend(el) for el in datapoints]

        return DatapointsResponse(
            {"data": {
                "items": [{
                    "id": id,
                    "datapoints": concat_dps
                }]
            }})