def validate_type_and_ref_id( wf_in_out_name_to_filtered_source_or_sink_mapping: Mapping[str, Union[ FilteredSource, FilteredSink]] ) -> Tuple[List[str], List[str], List[ExternalType]]: """Validate generic rest adapter specific requirements of wirings * ref_ids can't be None * types must be provided and be a known rest adapter type Raises AdapterClientWiringInvalidError if requirements are not fullfilled. Returns a triple of compatibly ordered lists of workflow input / output names ref ids types (ExternalType) """ wf_in_out_names = list( wf_in_out_name_to_filtered_source_or_sink_mapping.keys()) ref_ids: List[str] = [ filtered_source.ref_id # type: ignore for wf_input_name in wf_in_out_names if (filtered_source := wf_in_out_name_to_filtered_source_or_sink_mapping[wf_input_name] ).ref_id is not None ] if len(ref_ids) < len(wf_in_out_name_to_filtered_source_or_sink_mapping): raise AdapterClientWiringInvalidError( "Unset ref id in a wiring using generic rest adapter.") try: corresponding_types: List[ExternalType] = [ ExternalType(fs.type) for wf_input_name in wf_in_out_names if (fs := wf_in_out_name_to_filtered_source_or_sink_mapping[wf_input_name] ).type is not None ] except ValueError as e: raise AdapterClientWiringInvalidError( "Unknown type in a wiring using generic rest adapter.") from e if len(corresponding_types) < len( wf_in_out_name_to_filtered_source_or_sink_mapping): raise AdapterClientWiringInvalidError( "Unset type in a wiring using generic rest adapter.") if not all((isinstance(rest_adapter_data_type, ExternalType) for rest_adapter_data_type in corresponding_types)): raise AdapterClientWiringInvalidError( "Got unknown type in wiring for generic rest adapter") return wf_in_out_names, ref_ids, corresponding_types
async def send_multiple_timeseries_to_adapter( filtered_sinks: Dict[str, FilteredSink], data_to_send: Dict[str, pd.Series], adapter_key: str, ) -> None: keys = filtered_sinks.keys() ref_ids = [str(filtered_sinks[key].ref_id) for key in keys] sink_types = [ExternalType(filtered_sinks[key].type) for key in keys] series_list = [data_to_send[key] for key in keys] await post_multiple_timeseries(series_list, ref_ids, sink_types, adapter_key=adapter_key)
async def send_single_metadatum_to_adapter( filtered_sink: FilteredSink, metadatum_value: Any, adapter_key: str, client: httpx.AsyncClient, ) -> None: if filtered_sink.ref_id_type == RefIdType.SOURCE: endpoint = "sources" elif filtered_sink.ref_id_type == RefIdType.SINK: endpoint = "sinks" else: endpoint = "thingNodes" url = posix_urljoin( await get_generic_rest_adapter_base_url(adapter_key), endpoint, urllib.parse.quote(str(filtered_sink.ref_id)), "metadata", urllib.parse.quote(str(filtered_sink.ref_key)), ) value_datatype = ExternalType((filtered_sink.type)).value_datatype assert value_datatype is not None # for mypy try: resp = await client.post( url, json=({ "key": filtered_sink.ref_key, "value": metadatum_value, "dataType": value_datatype.value, }), ) except httpx.HTTPError as e: msg = (f"Posting metadata to generic rest adapter endpoint {url}" f" failed with Exception {str(e)}") logger.info(msg) raise AdapterConnectionError( f"Posting metadata from generic rest adapter endpoint {url} failed." ) from e if resp.status_code != 200 and resp.status_code != 201: msg = ( f"Posting metadata to generic rest adapter endpoint {url} failed." f" Status code: {resp.status_code}. Text: {resp.text}") logger.info(msg) raise AdapterConnectionError(msg) logger.debug("Received Response for metadata posting from url %s:\n%s", url, str(resp.text))
def validate_series_dtype(series: pd.Series, sink_type: ExternalType) -> None: """Raise appropriate exceptions if sink_type and series value dtype do not match""" if sink_type.endswith( "(float)") and not pd.api.types.is_float_dtype(series): raise AdapterOutputDataError( f"Expected float value dtype for series but got {str(series.dtype)}." ) if sink_type.endswith( "(int)") and not pd.api.types.is_integer_dtype(series): raise AdapterOutputDataError( f"Expected int value dtype for series but got {str(series.dtype)}." ) if (sink_type.endswith("(boolean)") or sink_type.endswith("(bool)") ) and not pd.api.types.is_bool_dtype(series): raise AdapterOutputDataError( f"Expected bool value dtype for series but got {str(series.dtype)}." ) if (sink_type.endswith("(str)") or sink_type.endswith("(string)") ) and not pd.api.types.is_string_dtype(series): raise AdapterOutputDataError( f"Expected string value dtype for series but got {str(series.dtype)}." )
async def load_grouped_timeseries_data_together( data_to_load: Dict[str, FilteredSource], adapter_key: str) -> Dict[str, pd.Series]: """Reorganize query information by timestamp pairs and load timeseries data Generic Rest Adapter allows to query for multiple timeseries in one request but then only with one timestamp filter pair and same (requested) value type for all those timeseries. This function expects data refs of the timeseries type, groups them together if they have same filter timestamp pairs and same value type, loads each such group in one request and returns all results gathered. """ loaded_data = {} # group by occuring timestamp pairs group_by_timestamp_pair: Dict[Tuple[str, str, ExternalType], Dict[str, FilteredSource], ] = defaultdict(dict) for filtered_source in data_to_load.values(): if (not isinstance(filtered_source.filters.get( "timestampFrom", None), str)) or (not isinstance( filtered_source.filters.get("timestampTo", None), str)): raise AdapterClientWiringInvalidError( "Timeseries data with no to/from filters.") for key, filtered_source in data_to_load.items(): group_by_timestamp_pair[( filtered_source.filters["timestampFrom"], filtered_source.filters["timestampTo"], ExternalType(filtered_source.type), )][key] = filtered_source # load each group together: for (group_tuple, grouped_source_dict) in group_by_timestamp_pair.items(): loaded_ts_data_from_adapter = await load_ts_data_from_adapter( list(grouped_source_dict.values()), group_tuple[0], group_tuple[1], adapter_key=adapter_key, ) loaded_data.update({ key: extract_one_channel_series_from_loaded_data( loaded_ts_data_from_adapter, filtered_source.ref_id, # type: ignore ) for key, filtered_source in grouped_source_dict.items() }) try: received_ids = loaded_ts_data_from_adapter["timeseriesId"].unique() except KeyError as e: msg = ( f"Missing keys in received timeseries records." f" Got columns {str(loaded_ts_data_from_adapter.columns)}" f" with dataframe of shape {str(loaded_ts_data_from_adapter.shape)}:\n" f"{str(loaded_ts_data_from_adapter)}") logger.info(msg) raise AdapterHandlingException(msg) from e queried_ids = [fs.ref_id for fs in grouped_source_dict.values()] if not np.isin(received_ids, np.array(queried_ids)).all(): msg = ( f"Found timeseries ids in received data that were not queried." f" Received timeseriesId unique values were:\n{str(received_ids.tolist())}" f" \nQueried ids were:\n{str(queried_ids)}." "\nThis unassignable data will be discarded. This indicates an error in the adapter" f" implementation of the adapter {str(adapter_key)}!") logger.warning(msg) return loaded_data
async def load_framelike_data( filtered_sources: List[FilteredSource], additional_params: List[Tuple[ str, str]], # for timeseries: [("from", from_timestamp), ("to", to_timestamp)] adapter_key: str, endpoint: Literal["timeseries", "dataframe"], # "timeseries" or "dataframe" ) -> pd.DataFrame: """Load framelike data from REST endpoint""" url = posix_urljoin(await get_generic_rest_adapter_base_url(adapter_key), endpoint) if len({fs.type for fs in filtered_sources}) > 1: raise AdapterHandlingException( "Got more than one datatype in same grouped data") if len(filtered_sources) == 0: raise AdapterHandlingException("Requested fetching 0 sources") common_data_type = filtered_sources[0].type if (common_data_type == ExternalType.DATAFRAME) and len(filtered_sources) > 1: raise AdapterHandlingException( "Cannot request more than one dataframe together") logger.info( ("Requesting framelike data from generic rest adapter %s from endpoint %s:" " ids %s with additional params %s with common datatype %s"), adapter_key, url, str([filtered_source.ref_id for filtered_source in filtered_sources]), str(additional_params), str(common_data_type), ) headers = get_generic_rest_adapter_auth_headers() with requests.Session() as session: try: start_time = datetime.datetime.now(datetime.timezone.utc) logger.info( "Start receiving generic rest adapter %s framelike data at %s", adapter_key, start_time.isoformat(), ) resp = session.get( url, params=[("id", quote(str(filtered_source.ref_id))) for filtered_source in filtered_sources] + additional_params, stream=True, headers=headers, verify=runtime_config.hd_adapters_verify_certs, ) if (resp.status_code == 404 and "errorCode" in resp.text and resp.json()["errorCode"] == "RESULT_EMPTY"): logger.info( ("Received RESULT_EMPTY error_code from generic rest adapter %s" " framelike endpoint %s, therefore returning empty DataFrame" ), adapter_key, url, ) if endpoint == "timeseries": return create_empty_ts_df(ExternalType(common_data_type)) # must be "dataframe": return df_empty({}) if resp.status_code != 200: msg = ( f"Requesting framelike data from generic rest adapter endpoint {url} failed." f" Status code: {resp.status_code}. Text: {resp.text}") logger.info(msg) raise AdapterConnectionError(msg) logger.info("Start reading in and parsing framelike data") df = pd.read_json(resp.raw, lines=True) end_time = datetime.datetime.now(datetime.timezone.utc) logger.info( ("Finished receiving generic rest framelike data (including dataframe parsing)" " at %s. DataFrame shape is %s with columns %s"), end_time.isoformat(), str(df.shape), str(df.columns), ) logger.info( ("Receiving generic rest adapter framelike data took" " (including dataframe parsing)" " %s"), str(end_time - start_time), ) logger.debug( "Received dataframe of form %s:\n%s", str(df.shape) if len(df) > 0 else "EMPTY RESULT", str(df) if len(df) > 0 else "EMPTY RESULT", ) except requests.HTTPError as e: msg = ( f"Requesting framelike data from generic rest adapter endpoint {url}" f" failed with Exception {str(e)}") logger.info(msg) raise AdapterConnectionError( f"Requesting framelike from generic rest adapter endpoint {url} failed." ) from e logger.info("Complete generic rest adapter %s framelike request", adapter_key) if len(df) == 0: if endpoint == "timeseries": return create_empty_ts_df(ExternalType(common_data_type)) # must be dataframe: return df_empty({}) if "timestamp" in df.columns and endpoint == "dataframe": try: parsed_timestamps = pd.to_datetime(df["timestamp"]) except ValueError as e: logger.info( "Column 'timestamp' of dataframe from %s could not be parsed and therefore" " not be set to index. Proceeding with default index. Error was: %s", url, str(e), ) else: df.index = parsed_timestamps df = df.sort_index() return df
async def load_single_metadatum_from_adapter(filtered_source: FilteredSource, adapter_key: str, client: httpx.AsyncClient) -> Any: if filtered_source.ref_id_type == RefIdType.SOURCE: endpoint = "sources" elif filtered_source.ref_id_type == RefIdType.SINK: endpoint = "sinks" else: endpoint = "thingNodes" url = posix_urljoin( await get_generic_rest_adapter_base_url(adapter_key), endpoint, urllib.parse.quote(str(filtered_source.ref_id)), "metadata", urllib.parse.quote(str(filtered_source.ref_key)), ) try: resp = await client.get(url) except httpx.HTTPError as e: msg = ( f"Requesting metadata data from generic rest adapter endpoint {url}" f" failed with Exception {str(e)}") logger.info(msg) raise AdapterConnectionError( f"Requesting metadata from generic rest adapter endpoint {url} failed." ) from e if resp.status_code != 200: msg = ( f"Requesting metadata data from generic rest adapter endpoint {url} failed." f" Status code: {resp.status_code}. Text: {resp.text}") logger.info(msg) raise AdapterConnectionError(msg) try: metadatum = Metadatum.parse_obj(resp.json()) except ValidationError as e: msg = ( f"Validation failure trying to parse received metadata from adapter" f"url {url}: {str(resp.json())}\nError is: " + str(e)) logger.info(msg) raise AdapterHandlingException(msg) from e logger.debug("Received metadata json from url %s:\n%s", url, str(resp.json())) if metadatum.key != str(filtered_source.ref_key): msg = (f"received metadata has wrong key " f"(not the requested one {str(filtered_source.ref_key)})" f". Received metdatum is {str(metadatum)}") logger.info(msg) raise AdapterConnectionError(msg) value_datatype = ExternalType(filtered_source.type).value_datatype assert value_datatype is not None # for mypy if metadatum.dataType is not None and metadatum.dataType != value_datatype: msg = (f"received metadata has wrong value dataType " f"(not the requested one inside {str(filtered_source.type)})" f". Received metdatum is {str(metadatum)}") logger.info(msg) raise AdapterConnectionError(msg) try: parsed_value = value_datatype.parse_object(metadatum.value) except ValidationError as e: msg = ( f"Validation failure trying to parse received metadata from adapter" f"url {url}: {str(metadatum)}\nError is: " + str(e)) logger.info(msg) raise AdapterHandlingException(msg) from e return parsed_value # type: ignore