Exemplo n.º 1
0
def test_get_range():
    src_latest = Datapoints(timestamp=[20000000])
    dst_latest = Datapoints(timestamp=[10000000])

    start, end = datapoints._get_time_range(src_latest, dst_latest)
    assert (start, end) == (dst_latest[0].timestamp + 1,
                            src_latest[0].timestamp + 1)

    start, end = datapoints._get_time_range(Datapoints(), Datapoints())
    assert (start, end) == (0, 0)
Exemplo n.º 2
0
    def __init__(self, client, start, end, ts_item, aggregates, granularity, include_outside_points, limit):
        self.start = cognite.client.utils._time.timestamp_to_ms(start)
        self.end = cognite.client.utils._time.timestamp_to_ms(end)
        self.aggregates = ts_item.get("aggregates") or aggregates
        self.ts_item = {k: v for k, v in ts_item.items() if k in ["id", "externalId"]}
        self.granularity = granularity
        self.include_outside_points = include_outside_points
        self.limit = limit or float("inf")

        self.client = client
        self.request_limit = client._DPS_LIMIT_AGG if self.aggregates else client._DPS_LIMIT

        self.results = []
        self.point_before = Datapoints()
        self.point_after = Datapoints()
Exemplo n.º 3
0
    def _get_dps_results(self) -> List[DatapointsList]:
        def custom_sort_key(x):
            if x.timestamp:
                return x.timestamp[0]
            return 0

        dps_lists = [DatapointsList([], cognite_client=self.client._cognite_client)] * len(self.query_ids)
        for q_id, dps_objects in self.query_id_to_datapoints_objects.items():
            ts_id_to_dps_objects = defaultdict(lambda: [])
            for dps_object in dps_objects:
                ts_id_to_dps_objects[dps_object.id].append(dps_object)

            dps_list = DatapointsList([], cognite_client=self.client._cognite_client)
            for ts_id, dps_objects in ts_id_to_dps_objects.items():
                dps = Datapoints()
                for dps_object in sorted(dps_objects, key=custom_sort_key):
                    dps._extend(dps_object)
                if self.query_id_to_include_outside_points[q_id]:
                    dps = self._remove_duplicates(dps)
                query_limit = self.query_id_to_limit[q_id]
                if query_limit and len(dps) > query_limit:
                    dps = dps[:query_limit]
                dps_list.append(dps)
            dps_list = self._sort_dps_list_by_task_order(dps_list, self.query_id_to_tasks[q_id])
            dps_lists[self.query_ids.index(q_id)] = dps_list
        return dps_lists
Exemplo n.º 4
0
    def _get_datapoints_with_paging(
        self,
        start: int,
        end: int,
        ts_item: Dict[str, Any],
        aggregates: List[str],
        granularity: str,
        include_outside_points: bool,
        limit: int,
    ) -> Datapoints:
        is_aggregated = aggregates or "aggregates" in ts_item
        per_request_limit = self.client._DPS_LIMIT_AGG if is_aggregated else self.client._DPS_LIMIT
        limit_next_request = per_request_limit
        next_start = start
        datapoints = Datapoints()
        all_datapoints = Datapoints()
        while (
            (len(all_datapoints) == 0 or len(datapoints) == per_request_limit)
            and end > next_start
            and len(all_datapoints) < (limit or float("inf"))
        ):
            datapoints = self._get_datapoints(
                next_start, end, ts_item, aggregates, granularity, include_outside_points, limit_next_request
            )
            if len(datapoints) == 0:
                break

            if limit:
                remaining_datapoints = limit - len(datapoints)
                if remaining_datapoints < per_request_limit:
                    limit_next_request = remaining_datapoints
            latest_timestamp = int(datapoints.timestamp[-1])
            next_start = latest_timestamp + (
                cognite.client.utils._time.granularity_to_ms(granularity) if granularity else 1
            )
            all_datapoints._extend(datapoints)
        return all_datapoints
Exemplo n.º 5
0
    def _remove_duplicates(dps_object: Datapoints) -> Datapoints:
        frequencies = defaultdict(lambda: [0, []])
        for i, timestamp in enumerate(dps_object.timestamp):
            frequencies[timestamp][0] += 1
            frequencies[timestamp][1].append(i)

        indices_to_remove = []
        for timestamp, freq in frequencies.items():
            if freq[0] > 1:
                indices_to_remove += freq[1][1:]

        dps_object_without_duplicates = Datapoints(id=dps_object.id, external_id=dps_object.external_id)
        for attr, values in dps_object._get_non_empty_data_fields():
            filtered_values = [elem for i, elem in enumerate(values) if i not in indices_to_remove]
            setattr(dps_object_without_duplicates, attr, filtered_values)

        return dps_object_without_duplicates
    def retrieve(
        self, expression: str, start: Union[int, str, datetime], end: Union[int, str, datetime], limit: int = None
    ) -> Datapoints:
        """Calculate the result of a function on time series.

        Args:
            expression (str): Function to be calculated.
            start (Union[int, str, datetime]): Inclusive start.
            end (Union[int, str, datetime]): Exclusive end.

        Returns:
            Datapoints: A Datapoints object containing the calculated data.

        Examples:

                >>> from cognite.client.experimental import CogniteClient
                >>> c = CogniteClient()
                >>> dps = c.datapoints.synthetic.retrieve(expression="TS{id:123} + TS{externalId:'abc'}", start="2w-ago", end="now")
            """
        if limit is None or limit == -1:
            limit = float("inf")
        query = {
            "expression": expression,
            "start": cognite.client.utils._time.timestamp_to_ms(start),
            "end": cognite.client.utils._time.timestamp_to_ms(end),
        }
        datapoints = Datapoints()
        while True:
            query["limit"] = min(limit, self._DPS_LIMIT)
            resp = self._post(url_path=self._SYNTHETIC_RESOURCE_PATH + "/query", json={"items": [query]})
            data = resp.json()["items"][0]
            datapoints._extend(Datapoints._load(data, expected_fields=["value"]))
            limit -= len(data["datapoints"])
            if len(data["datapoints"]) < self._DPS_LIMIT or limit <= 0:
                break
            query["start"] = data["datapoints"][-1]["timestamp"] + 1
        return datapoints
Exemplo n.º 7
0
def replicate_datapoints(
    client_src: CogniteClient,
    client_dst: CogniteClient,
    ts_external_id: str,
    limit: Optional[int] = None,
    partition_size: int = 100000,
    mock_run: bool = False,
    job_id: int = 1,
    src_datapoint_transform: Optional[Callable[[Datapoint], Datapoint]] = None,
    timerange_transform: Optional[Callable[[Tuple[int, int]],
                                           Tuple[int, int]]] = None,
    start: Union[int, str] = None,
    end: Union[int, str] = None,
    value_manipulation_lambda_fnc: str = None,
) -> Tuple[bool, int]:
    """
    Copies data points from the source tenant into the destination project, for the given time series.

    If data points already exist in the destination for the time series, only the newer data points in the source are
    copied over.

    Args:
        client_src: The client corresponding to the source project.
        client_dst: The client corresponding to the destination project.
        ts_external_id: The external id of the time series to replicate datapoints for
        limit: The maximum number of data points to copy
        partition_size: The maximum number of datapoints to retrieve per request
        mock_run: If true, only retrieves data points from source and does not insert into destination
        job_id: The batch number being processed
        src_datapoint_transform: Function to apply to all source datapoints before inserting into destination
        timerange_transform: Function to set the time range boundaries (start, end) arbitrarily.
        start: Timestamp to start replication onwards from; if not specified starts at most recent datapoint
        end: If specified, limits replication to datapoints earlier than the end time
        value_manipulation_lambda_fnc: A basic lambda function can be provided to manipulate datapoints as a string.

    Returns:
        A tuple of the success status (True if no failures) and the number of datapoints successfully replicated
    """
    try:
        latest_dst_dp = client_dst.datapoints.retrieve_latest(
            external_id=ts_external_id)
        latest_src_dp = client_src.datapoints.retrieve_latest(
            external_id=ts_external_id)
    except CogniteAPIError as exc:
        logging.error(
            f"Job {job_id}: Failed for external id {ts_external_id}. {exc}")
        return False, 0

    if not latest_src_dp:
        return True, 0

    if src_datapoint_transform:
        latest_src_dp = Datapoints(
            timestamp=[src_datapoint_transform(latest_src_dp[0]).timestamp])

    _start, _end = _get_time_range(latest_src_dp, latest_dst_dp)

    start = _start if start is None else timestamp_to_ms(start)
    end = _end if end is None else timestamp_to_ms(end)

    if timerange_transform:
        start, end = timerange_transform(start, end)

    # Api Restrictions
    start = max(start, 31536000000)  # 1971

    logging.debug(
        f"Job {job_id}: Ext_id: {ts_external_id} Retrieving datapoints between {start} and {end}"
    )
    datapoints_count = 0
    while start < end:
        num_to_fetch = partition_size if limit is None else min(
            partition_size, limit - datapoints_count)
        if num_to_fetch == 0:
            break

        try:
            datapoints = client_src.datapoints.retrieve(
                external_id=ts_external_id,
                start=start,
                end=end,
                limit=num_to_fetch)
            if not datapoints:
                break

            if src_datapoint_transform:
                transformed_values = []
                transformed_timestamps = []
                for src_datapoint in datapoints:
                    transformed_datapoint = src_datapoint_transform(
                        src_datapoint)
                    transformed_timestamps.append(
                        transformed_datapoint.timestamp)
                    transformed_values.append(transformed_datapoint.value)
                datapoints = Datapoints(timestamp=transformed_timestamps,
                                        value=transformed_values)

            if value_manipulation_lambda_fnc:
                transformed_values = []
                transformed_timestamps = []
                lambda_fnc = evaluate_lambda_function(
                    value_manipulation_lambda_fnc)
                if lambda_fnc:
                    for src_datapoint in datapoints:
                        try:
                            transformed_timestamps.append(
                                src_datapoint.timestamp)
                            transformed_values.append(
                                lambda_fnc(src_datapoint.value))
                        except Exception as e:
                            logging.error(
                                f"Could not manipulate the datapoint (value={src_datapoint.value},"
                                +
                                f" timestamp={src_datapoint.timestamp}). Error: {e}"
                            )
                    datapoints = Datapoints(timestamp=transformed_timestamps,
                                            value=transformed_values)

            if not mock_run:
                client_dst.datapoints.insert(datapoints,
                                             external_id=ts_external_id)
        except CogniteAPIError as exc:
            logging.error(
                f"Job {job_id}: Failed for external id {ts_external_id}. {exc}"
            )
            return False, datapoints_count
        else:
            datapoints_count += len(datapoints)
            start = datapoints[-1].timestamp + 1

    logging.debug(
        f"Job {job_id}: Ext_id: {ts_external_id} Number of datapoints: {datapoints_count}"
    )
    return True, datapoints_count
    def retrieve(
        self,
        expression: Union[str, "sympy.Expr"],
        start: Union[int, str, datetime],
        end: Union[int, str, datetime],
        limit: int = None,
        variables: Dict[str, Union[str, TimeSeries]] = None,
        aggregate: str = None,
        granularity: str = None,
    ) -> Datapoints:
        """Calculate the result of a function on time series.

        Args:
            expression (Union[str,sympy.Expr]): Function to be calculated. Supports both strings and sympy expressions. Strings can have either the API `ts{}` syntax, or contain variable names to be replaced using the `variables` parameter.
            start (Union[int, str, datetime]): Inclusive start.
            end (Union[int, str, datetime]): Exclusive end.
            limit (int): Number of datapoints to retrieve.
            variables (Dict[str,Union[str,TimeSeries]]): An optional map of symbol replacements.
            aggregate (str): use this aggregate when replacing entries from `variables`, does not affect time series given in the `ts{}` syntax.
            granularity (str): use this granularity with the aggregate.

        Returns:
            Datapoints: A Datapoints object containing the calculated data.

        Examples:

            Request a synthetic time series query with direct syntax

                >>> from cognite.client.experimental import CogniteClient
                >>> c = CogniteClient()
                >>> dps = c.datapoints.synthetic.retrieve(expression="TS{id:123} + TS{externalId:'abc'}", start="2w-ago", end="now")

            Use variables to re-use an expression:

                >>> from cognite.client.experimental import CogniteClient
                >>> c = CogniteClient()
                >>> vars = {"A": "my_ts_external_id", "B": client.time_series.retrieve(id=1)}
                >>> dps = c.datapoints.synthetic.retrieve(expression="A+B", start="2w-ago", end="now", variables=vars)

            Use sympy to build complex expressions:

                >>> from cognite.client.experimental import CogniteClient
                >>> c = CogniteClient()
                >>> from sympy import symbols, cos, pi
                >>> a = sympy.symbols('a')
                >>> dps = c.datapoints.synthetic.retrieve(pi * cos(a), start="2w-ago", end="now", variables={"a": "my_ts_external_id"},aggregate='interpolation',granularity='1m')
            """
        if limit is None or limit == -1:
            limit = float("inf")
        expression, short_expression = SyntheticDatapointsAPI._build_expression(
            expression, variables, aggregate, granularity)
        query = {
            "expression": expression,
            "start": cognite.client.utils._time.timestamp_to_ms(start),
            "end": cognite.client.utils._time.timestamp_to_ms(end),
        }
        datapoints = Datapoints(value=[], error=[])
        datapoints.external_id = short_expression  # for dataframe readability
        while True:
            query["limit"] = min(limit, self._DPS_LIMIT)
            resp = self._post(url_path=self._SYNTHETIC_RESOURCE_PATH +
                              "/query",
                              json={"items": [query]})
            data = resp.json()["items"][0]
            datapoints._extend(
                Datapoints._load(data, expected_fields=["value", "error"]))
            limit -= len(data["datapoints"])
            if len(data["datapoints"]) < self._DPS_LIMIT or limit <= 0:
                break
            query["start"] = data["datapoints"][-1]["timestamp"] + 1
        return datapoints
Exemplo n.º 9
0
def mock_cogcli_datapoints_retrieve_single():
    with monkeypatch_cognite_client() as cogmock:
        cogmock.datapoints.retrieve.return_value = Datapoints(
            id=1, external_id="1", value=[1, 2, 3], timestamp=[1000, 2000, 3000]
        )
        yield
Exemplo n.º 10
0
def mock_cogcli_datapoints_query():
    with monkeypatch_cognite_client() as cogmock:
        cogmock.datapoints.query.return_value = [
            DatapointsList([Datapoints(id=1, external_id="1", value=[1, 2, 3], timestamp=[1000, 2000, 3000])])
        ]
        yield