Exemplo n.º 1
0
def load_file_from_id(source_id: str) -> pd.DataFrame:
    possible_local_file = get_local_file_by_id(source_id)

    if possible_local_file is None:
        raise AdapterHandlingException(
            f"Local file {from_url_representation(source_id)} could not be located or"
            "does not lie in a configured local dir or does not exist")

    read_kwargs = {}
    if possible_local_file.parsed_settings_file is not None:
        if possible_local_file.parsed_settings_file.loadable:
            if possible_local_file.parsed_settings_file.load_settings is not None:
                read_kwargs = possible_local_file.parsed_settings_file.load_settings
        else:
            raise AdapterHandlingException(
                f"Local file {possible_local_file.path} settings file does not allow loading!"
            )

    file_support_handler = possible_local_file.file_support_handler()

    if file_support_handler is None:
        raise AdapterHandlingException(
            f"Local file {possible_local_file.path} has unknown/unregistered file extension."
        )

    if file_support_handler.read_handler_func is None:
        raise AdapterHandlingException(
            f"Registered FileSupportHandler for file {possible_local_file.path} has no "
            "read_handler_func.")

    # Actual loading
    try:
        loaded_df = file_support_handler.read_handler_func(
            possible_local_file.path, **read_kwargs)
    except Exception as e:
        msg = (
            f"Failed to retrieve local file \n{str(possible_local_file)}\n with "
            f"file_support_handler \n{str(file_support_handler)}\nException was:\n{str(e)}."
        )
        logger.info(msg)
        raise AdapterHandlingException(msg) from e

    logger.info(
        "Finished retrieving local file \n%s\n with file_support_handler \n%s\n"
        " Resulting DataFrame of shape %s:\n%s",
        str(possible_local_file),
        str(file_support_handler),
        str(loaded_df.shape),
        str(loaded_df),
    )
    return loaded_df
Exemplo n.º 2
0
def write_to_file(df: pd.DataFrame, sink_id: str) -> None:
    possible_local_file = get_local_file_by_id(sink_id)

    if possible_local_file is None:
        raise AdapterHandlingException(
            f"Local file {from_url_representation(sink_id)} target could not be located or"
            "does not lie in a configured local dir or does not exist / is not writable"
        )

    write_kwargs = {}
    if possible_local_file.parsed_settings_file is not None:
        if possible_local_file.parsed_settings_file.writable:
            if possible_local_file.parsed_settings_file.write_settings is not None:
                write_kwargs = possible_local_file.parsed_settings_file.write_settings
        else:
            raise AdapterHandlingException(
                f"Local file {possible_local_file.path} settings file does not allow writing!"
            )

    file_support_handler = possible_local_file.file_support_handler()

    if file_support_handler is None:
        raise AdapterHandlingException(
            f"Local file {possible_local_file.path} has unknown/unregistered file extension."
        )
    if file_support_handler.write_handler_func is None:
        raise AdapterHandlingException(
            f"Registered FileSupportHandler for file {possible_local_file.path} has no "
            "write_handler_func.")

    try:
        file_support_handler.write_handler_func(df, possible_local_file.path,
                                                **write_kwargs)
    except Exception as e:
        msg = (
            f"Failed to write local file \n{str(possible_local_file)}\n with "
            f"file_support_handler \n{str(file_support_handler)}\nException was:\n{str(e)}."
        )
        logger.info(msg)
        raise AdapterHandlingException(msg) from e
    logger.info(
        "Finished writing local file \n%s\n with file_support_handler \n%s\n"
        " Written DataFrame of shape %s:\n%s",
        str(possible_local_file),
        str(file_support_handler),
        str(df.shape),
        str(df),
    )
Exemplo n.º 3
0
def extract_one_channel_series_from_loaded_data(df: pd.DataFrame,
                                                ts_id: str) -> pd.Series:
    try:
        extracted_df = df[df["timeseriesId"] == ts_id].copy()
        extracted_df.index = extracted_df["timestamp"]
        extracted_series = extracted_df["value"].sort_index()
    except KeyError as e:
        msg = (
            f"Missing keys in received timeseries records. Got columns {str(df.columns)}"
            f" with dataframe of shape {str(df.shape)}:\n{str(df)}")
        logger.info(msg)
        raise AdapterHandlingException(msg) from e

    extracted_series.name = ts_id

    return extracted_series
Exemplo n.º 4
0
async def load_generic_adapter_base_urls() -> List[BackendRegisteredGenericRestAdapter]:
    """Loads generic REST adapter infos from the corresponding designer backend endpoint"""

    headers = get_generic_rest_adapter_auth_headers()

    url = posix_urljoin(runtime_config.hd_backend_api_url, "adapters/")
    logger.info("Start getting Generic REST Adapter URLS from HD Backend url %s", url)

    async with httpx.AsyncClient(
        verify=runtime_config.hd_backend_verify_certs
    ) as client:
        try:
            resp = await client.get(url, headers=headers)
        except httpx.HTTPError as e:
            msg = f"Failure connecting to hd backend adapters endpoint ({url}): " + str(
                e
            )
            logger.info(msg)
            raise AdapterConnectionError(msg) from e

    if resp.status_code != 200:
        msg = (
            f"HTTP failure trying to receive generic adapter infos from hd backend ({url}):"
            f" Status code {str(resp.status_code)}. Response: {resp.text}"
        )

        logger.info(msg)
        raise AdapterConnectionError(msg)

    try:
        loaded_generic_rest_adapters: List[
            BackendRegisteredGenericRestAdapter
        ] = BackendRegisteredGenericRestAdapters.parse_obj(resp.json()).__root__
    except ValidationError as e:
        msg = "Failure trying to parse received generic adapter infos: " + str(e)

        logger.info(msg)
        raise AdapterHandlingException(msg) from e

    logger.info(
        "Finished getting Generic REST Adapter URLS from HD Backend url %s", url
    )

    return loaded_generic_rest_adapters
Exemplo n.º 5
0
async def get_generic_rest_adapter_base_url(
    adapter_key: str, retry: bool = True
) -> str:
    """Load url from cache and update url cache if necessary"""
    try:
        return generic_rest_adapter_urls[adapter_key]
    except KeyError as e:
        if retry:
            logger.info(
                "Try updating cached generic REST adapter urls since key %s was not found",
                adapter_key,
            )
            await update_generic_adapter_base_urls_cache()
            return await get_generic_rest_adapter_base_url(adapter_key, retry=False)
        # no retry:
        msg = (
            f"Could not find / load necessary generic REST adapter url "
            f"from designer backend for adapter key {adapter_key}. "
            "Make sure the adapter with that key is properly registered"
            " in the designer backend."
        )
        logger.info(msg)
        raise AdapterHandlingException(msg) from e
Exemplo n.º 6
0
async def load_grouped_timeseries_data_together(
        data_to_load: Dict[str, FilteredSource],
        adapter_key: str) -> Dict[str, pd.Series]:
    """Reorganize query information by timestamp pairs and load timeseries data

    Generic Rest Adapter allows to query for multiple timeseries in one request but then only with
    one timestamp filter pair and same (requested) value type for all those timeseries.

    This function expects data refs of the timeseries type,
    groups them together if they have same filter timestamp pairs and same value type,
    loads each such group in one request
    and returns all results gathered.
    """
    loaded_data = {}

    # group by occuring timestamp pairs
    group_by_timestamp_pair: Dict[Tuple[str, str, ExternalType],
                                  Dict[str,
                                       FilteredSource], ] = defaultdict(dict)

    for filtered_source in data_to_load.values():
        if (not isinstance(filtered_source.filters.get(
                "timestampFrom", None), str)) or (not isinstance(
                    filtered_source.filters.get("timestampTo", None), str)):
            raise AdapterClientWiringInvalidError(
                "Timeseries data with no to/from filters.")

    for key, filtered_source in data_to_load.items():
        group_by_timestamp_pair[(
            filtered_source.filters["timestampFrom"],
            filtered_source.filters["timestampTo"],
            ExternalType(filtered_source.type),
        )][key] = filtered_source

    # load each group together:
    for (group_tuple, grouped_source_dict) in group_by_timestamp_pair.items():
        loaded_ts_data_from_adapter = await load_ts_data_from_adapter(
            list(grouped_source_dict.values()),
            group_tuple[0],
            group_tuple[1],
            adapter_key=adapter_key,
        )

        loaded_data.update({
            key: extract_one_channel_series_from_loaded_data(
                loaded_ts_data_from_adapter,
                filtered_source.ref_id,  # type: ignore
            )
            for key, filtered_source in grouped_source_dict.items()
        })

        try:
            received_ids = loaded_ts_data_from_adapter["timeseriesId"].unique()
        except KeyError as e:
            msg = (
                f"Missing keys in received timeseries records."
                f" Got columns {str(loaded_ts_data_from_adapter.columns)}"
                f" with dataframe of shape {str(loaded_ts_data_from_adapter.shape)}:\n"
                f"{str(loaded_ts_data_from_adapter)}")
            logger.info(msg)
            raise AdapterHandlingException(msg) from e

        queried_ids = [fs.ref_id for fs in grouped_source_dict.values()]

        if not np.isin(received_ids, np.array(queried_ids)).all():
            msg = (
                f"Found timeseries ids in received data that were not queried."
                f" Received timeseriesId unique values were:\n{str(received_ids.tolist())}"
                f" \nQueried ids were:\n{str(queried_ids)}."
                "\nThis unassignable data will be discarded. This indicates an error in the adapter"
                f" implementation of the adapter {str(adapter_key)}!")
            logger.warning(msg)

    return loaded_data
Exemplo n.º 7
0
async def load_framelike_data(
        filtered_sources: List[FilteredSource],
        additional_params:
    List[Tuple[
        str,
        str]],  # for timeseries: [("from", from_timestamp), ("to", to_timestamp)]
        adapter_key: str,
        endpoint: Literal["timeseries",
                          "dataframe"],  # "timeseries" or "dataframe"
) -> pd.DataFrame:
    """Load framelike data from REST endpoint"""

    url = posix_urljoin(await get_generic_rest_adapter_base_url(adapter_key),
                        endpoint)

    if len({fs.type for fs in filtered_sources}) > 1:
        raise AdapterHandlingException(
            "Got more than one datatype in same grouped data")

    if len(filtered_sources) == 0:
        raise AdapterHandlingException("Requested fetching 0 sources")

    common_data_type = filtered_sources[0].type

    if (common_data_type
            == ExternalType.DATAFRAME) and len(filtered_sources) > 1:
        raise AdapterHandlingException(
            "Cannot request more than one dataframe together")

    logger.info(
        ("Requesting framelike data from generic rest adapter %s from endpoint %s:"
         " ids %s with additional params %s with common datatype %s"),
        adapter_key,
        url,
        str([filtered_source.ref_id for filtered_source in filtered_sources]),
        str(additional_params),
        str(common_data_type),
    )

    headers = get_generic_rest_adapter_auth_headers()

    with requests.Session() as session:
        try:
            start_time = datetime.datetime.now(datetime.timezone.utc)
            logger.info(
                "Start receiving generic rest adapter %s framelike data at %s",
                adapter_key,
                start_time.isoformat(),
            )
            resp = session.get(
                url,
                params=[("id", quote(str(filtered_source.ref_id)))
                        for filtered_source in filtered_sources] +
                additional_params,
                stream=True,
                headers=headers,
                verify=runtime_config.hd_adapters_verify_certs,
            )
            if (resp.status_code == 404 and "errorCode" in resp.text
                    and resp.json()["errorCode"] == "RESULT_EMPTY"):
                logger.info(
                    ("Received RESULT_EMPTY error_code from generic rest adapter %s"
                     " framelike endpoint %s, therefore returning empty DataFrame"
                     ),
                    adapter_key,
                    url,
                )
                if endpoint == "timeseries":
                    return create_empty_ts_df(ExternalType(common_data_type))
                # must be "dataframe":
                return df_empty({})

            if resp.status_code != 200:
                msg = (
                    f"Requesting framelike data from generic rest adapter endpoint {url} failed."
                    f" Status code: {resp.status_code}. Text: {resp.text}")
                logger.info(msg)
                raise AdapterConnectionError(msg)
            logger.info("Start reading in and parsing framelike data")

            df = pd.read_json(resp.raw, lines=True)
            end_time = datetime.datetime.now(datetime.timezone.utc)
            logger.info(
                ("Finished receiving generic rest framelike data (including dataframe parsing)"
                 " at %s. DataFrame shape is %s with columns %s"),
                end_time.isoformat(),
                str(df.shape),
                str(df.columns),
            )
            logger.info(
                ("Receiving generic rest adapter framelike data took"
                 " (including dataframe parsing)"
                 " %s"),
                str(end_time - start_time),
            )
            logger.debug(
                "Received dataframe of form %s:\n%s",
                str(df.shape) if len(df) > 0 else "EMPTY RESULT",
                str(df) if len(df) > 0 else "EMPTY RESULT",
            )
        except requests.HTTPError as e:
            msg = (
                f"Requesting framelike data from generic rest adapter endpoint {url}"
                f" failed with Exception {str(e)}")

            logger.info(msg)
            raise AdapterConnectionError(
                f"Requesting framelike from generic rest adapter endpoint {url} failed."
            ) from e
    logger.info("Complete generic rest adapter %s framelike request",
                adapter_key)
    if len(df) == 0:
        if endpoint == "timeseries":
            return create_empty_ts_df(ExternalType(common_data_type))
        # must be dataframe:
        return df_empty({})

    if "timestamp" in df.columns and endpoint == "dataframe":
        try:
            parsed_timestamps = pd.to_datetime(df["timestamp"])
        except ValueError as e:
            logger.info(
                "Column 'timestamp' of dataframe from %s could not be parsed and therefore"
                " not be set to index. Proceeding with default index. Error was: %s",
                url,
                str(e),
            )
        else:
            df.index = parsed_timestamps
            df = df.sort_index()

    return df
Exemplo n.º 8
0
async def load_single_metadatum_from_adapter(filtered_source: FilteredSource,
                                             adapter_key: str,
                                             client: httpx.AsyncClient) -> Any:

    if filtered_source.ref_id_type == RefIdType.SOURCE:
        endpoint = "sources"
    elif filtered_source.ref_id_type == RefIdType.SINK:
        endpoint = "sinks"
    else:
        endpoint = "thingNodes"

    url = posix_urljoin(
        await get_generic_rest_adapter_base_url(adapter_key),
        endpoint,
        urllib.parse.quote(str(filtered_source.ref_id)),
        "metadata",
        urllib.parse.quote(str(filtered_source.ref_key)),
    )
    try:
        resp = await client.get(url)
    except httpx.HTTPError as e:
        msg = (
            f"Requesting metadata data from generic rest adapter endpoint {url}"
            f" failed with Exception {str(e)}")

        logger.info(msg)
        raise AdapterConnectionError(
            f"Requesting metadata from generic rest adapter endpoint {url} failed."
        ) from e

    if resp.status_code != 200:
        msg = (
            f"Requesting metadata data from generic rest adapter endpoint {url} failed."
            f" Status code: {resp.status_code}. Text: {resp.text}")
        logger.info(msg)
        raise AdapterConnectionError(msg)

    try:
        metadatum = Metadatum.parse_obj(resp.json())
    except ValidationError as e:
        msg = (
            f"Validation failure trying to parse received metadata from adapter"
            f"url {url}: {str(resp.json())}\nError is: " + str(e))

        logger.info(msg)
        raise AdapterHandlingException(msg) from e

    logger.debug("Received metadata json from url %s:\n%s", url,
                 str(resp.json()))

    if metadatum.key != str(filtered_source.ref_key):
        msg = (f"received metadata has wrong key "
               f"(not the requested one {str(filtered_source.ref_key)})"
               f". Received metdatum is {str(metadatum)}")
        logger.info(msg)
        raise AdapterConnectionError(msg)

    value_datatype = ExternalType(filtered_source.type).value_datatype
    assert value_datatype is not None  # for mypy

    if metadatum.dataType is not None and metadatum.dataType != value_datatype:
        msg = (f"received metadata has wrong value dataType "
               f"(not the requested one inside {str(filtered_source.type)})"
               f". Received metdatum is {str(metadatum)}")
        logger.info(msg)
        raise AdapterConnectionError(msg)

    try:
        parsed_value = value_datatype.parse_object(metadatum.value)
    except ValidationError as e:
        msg = (
            f"Validation failure trying to parse received metadata from adapter"
            f"url {url}: {str(metadatum)}\nError is: " + str(e))

        logger.info(msg)
        raise AdapterHandlingException(msg) from e
    return parsed_value  # type: ignore
Exemplo n.º 9
0
def get_structure(parent_id: Optional[str] = None) -> StructureResponse:
    """Obtain structure for corresponding adapter web service endpoint

    parent_id is a local path encoded via to_url_representation from the utils module of this
    adapter, or None.
    """

    local_root_dirs = local_file_adapter_config.local_dirs

    if parent_id is None:  # get root Nodes

        return StructureResponse(
            id="local-file-adapter",
            name="Local File Adapter",
            thingNodes=[
                StructureThingNode(
                    id=to_url_representation(dir_path),
                    name=os.path.basename(dir_path),
                    parentId=None,
                    description="Root local file directory at " +
                    os.path.abspath(os.path.realpath(dir_path)),
                ) for dir_path in local_root_dirs
            ],
            sources=[],
            sinks=[],
        )

    # One level in fiel hierarchy
    current_dir = from_url_representation(parent_id)

    if not len(
        [current_dir.startswith(root_dir)
         for root_dir in local_root_dirs]) > 0:
        raise AdapterHandlingException((
            f"Requested local file dir {current_dir} not contained in configured "
            f"root directories {str(local_root_dirs)}"))

    local_files, dirs = get_local_files_and_dirs(current_dir,
                                                 walk_sub_dirs=False)

    return StructureResponse(
        id="local-file-adapter",
        name="Local File Adapter",
        thingNodes=[
            StructureThingNode(
                id=to_url_representation(dir_path),
                name=os.path.basename(dir_path),
                parentId=parent_id,
                description="Local file directory at " +
                os.path.abspath(os.path.realpath(dir_path)),
            ) for dir_path in dirs
        ],
        sources=[
            source_from_local_file(local_file) for local_file in local_files
            if local_file_loadable(local_file)
        ],
        sinks=[
            sink_from_local_file(local_file) for local_file in local_files
            if local_file_writable(local_file)
        ],
    )
Exemplo n.º 10
0
async def load_generic_adapter_base_urls() -> List[BackendRegisteredGenericRestAdapter]:
    """Loads generic REST adapter infos from the corresponding designer backend endpoint"""

    headers = get_generic_rest_adapter_auth_headers()

    url = posix_urljoin(get_config().hd_backend_api_url, "adapters/")
    logger.info("Start getting Generic REST Adapter URLS from HD Backend url %s", url)

    if get_config().is_backend_service:
        # call function directly
        adapter_list = await get_all_adapters()

        try:
            loaded_generic_rest_adapters: List[BackendRegisteredGenericRestAdapter] = [
                BackendRegisteredGenericRestAdapter(
                    id=adapter_dto.id,
                    name=adapter_dto.name,
                    url=adapter_dto.url,
                    internalUrl=adapter_dto.internal_url,
                )
                for adapter_dto in adapter_list
            ]
        except ValidationError as e:
            msg = "Failure trying to parse received generic adapter infos: " + str(e)

            logger.info(msg)
            raise AdapterHandlingException(msg) from e
    else:
        # call backend service "adapters" endpoint
        async with httpx.AsyncClient(
            verify=get_config().hd_backend_verify_certs
        ) as client:
            try:
                resp = await client.get(url, headers=headers)
            except httpx.HTTPError as e:
                msg = (
                    f"Failure connecting to hd backend adapters endpoint ({url}): "
                    + str(e)
                )
                logger.info(msg)
                raise AdapterConnectionError(msg) from e

        if resp.status_code != 200:
            msg = (
                f"HTTP failure trying to receive generic adapter infos from hd backend ({url}):"
                f" Status code {str(resp.status_code)}. Response: {resp.text}"
            )

            logger.info(msg)
            raise AdapterConnectionError(msg)

        try:
            loaded_generic_rest_adapters = [
                BackendRegisteredGenericRestAdapter(
                    id=adapter_dto.id,
                    name=adapter_dto.name,
                    url=adapter_dto.url,
                    internalUrl=adapter_dto.internal_url,
                )
                for adapter_dto in AdapterFrontendDtoRegisteredGenericRestAdapters.parse_obj(
                    resp.json()
                ).__root__
            ]
        except ValidationError as e:
            msg = "Failure trying to parse received generic adapter infos: " + str(e)

            logger.info(msg)
            raise AdapterHandlingException(msg) from e

        logger.info(
            "Finished getting Generic REST Adapter URLS from HD Backend url %s", url
        )

    return loaded_generic_rest_adapters