async def test_end_to_end_load_only_metadata(): with mock.patch( "hetdesrun.adapters.generic_rest.load_multiple_metadata", new=mock_load_multiple_metadata, ): loaded_data = await load_data( { "wf_inp_1": FilteredSource( ref_id="id_1", ref_id_type="SOURCE", type="metadata(int)", ref_key="number", ), "wf_inp_2": FilteredSource( ref_id="id_2", ref_id_type="THINGNODE", type="metadata(string)", ref_key="description", ), }, adapter_key="end_to_end_only_dataframe_data", ) assert loaded_data["wf_inp_1"] == 42 assert loaded_data["wf_inp_2"] == "some description"
async def test_end_to_end_load_only_dataframe_data(): with mock.patch( "hetdesrun.adapters.generic_rest.load_dataframe.load_single_dataframe_from_adapter", new=mock_load_generic_rest_dataframe_data, ): # one loaded_data = await load_data( {"inp_1": FilteredSource(ref_id="id_1", type="dataframe")}, adapter_key="end_to_end_only_dataframe_data", ) assert len(loaded_data) == 1 assert isinstance(loaded_data["inp_1"], pd.DataFrame) assert loaded_data["inp_1"].shape == (4, 2) # more than one loaded_data = await load_data( { "inp_1": FilteredSource(ref_id="id_1", type=ExternalType.DATAFRAME), "inp_2": FilteredSource(ref_id="id_1", type=ExternalType.DATAFRAME), }, adapter_key="end_to_end_only_dataframe_data", ) assert len(loaded_data) == 2 assert isinstance(loaded_data["inp_2"], pd.DataFrame) assert loaded_data["inp_2"].shape == (4, 2)
async def resolve_and_load_data_from_wiring( workflow_wiring: WorkflowWiring, ) -> Dict[str, Any]: """Loads data from sources and provides it as a dict with the workflow input names as keys Data is loaded in batches per adapter. """ wirings_by_adapter = defaultdict(list) for input_wiring in workflow_wiring.input_wirings: wirings_by_adapter[input_wiring.adapter_id].append(input_wiring) loaded_data = {} # data is loaded adapter-wise: for adapter_key, input_wirings_of_adapter in wirings_by_adapter.items(): # call adapter with these wirings / sources loaded_data_from_adapter: dict = await load_data_from_adapter( adapter_key, { input_wiring.workflow_input_name: FilteredSource( ref_id=input_wiring.ref_id, ref_id_type=input_wiring.ref_id_type, ref_key=input_wiring.ref_key, type=input_wiring.type, filters=input_wiring.filters, ) for input_wiring in input_wirings_of_adapter }, ) loaded_data.update(loaded_data_from_adapter) return loaded_data
async def test_end_to_end_load_empty_ts_data_with_attrs(): with mock.patch( "hetdesrun.adapters.generic_rest.load_ts_data.load_ts_data_from_adapter", new=mock_load_generic_rest_ts_data, ): loaded_data = await load_data( { "inp_1": FilteredSource( ref_id="id_3", type="timeseries(float)", filters={ "timestampFrom": "2018-09-01T00:00:00Z", "timestampTo": "2020-01-01T00:00:00Z", }, ) }, adapter_key="end_to_end_only_ts_data", ) assert len(loaded_data) == 1 assert len(loaded_data["inp_1"]) == 0 assert isinstance(loaded_data["inp_1"], pd.Series) assert len(loaded_data["inp_1"].attrs) == 1 assert loaded_data["inp_1"].attrs["c"] == 5
async def test_end_to_end_load_dataframe_data_with_timestamp_column(): resp_mock = mock.Mock() resp_mock.status_code = 200 resp_mock.raw = """\n {"timestamp": "2020-03-11T13:45:18.194000000Z", "a": 42.3} {"timestamp": "2020-03-11T14:45:18.194000000Z", "a": 41.7} {"timestamp": "2020-03-11T15:45:18.194000000Z", "a": 15.89922333} """ with mock.patch( "hetdesrun.adapters.generic_rest.load_framelike.get_generic_rest_adapter_base_url", return_value="https://hetida.de", ): with mock.patch( "hetdesrun.adapters.generic_rest.load_framelike.requests.Session.get", return_value=resp_mock, ): loaded_data = await load_data( { "inp_1": FilteredSource(ref_id="id_1", type=ExternalType.DATAFRAME), }, adapter_key="end_to_end_only_dataframe_data", ) assert len(loaded_data) == 1 assert isinstance(loaded_data["inp_1"], pd.DataFrame) assert loaded_data["inp_1"].shape == (3, 2) assert pd.api.types.is_datetime64tz_dtype( loaded_data["inp_1"].index)
async def test_load_ts_adapter_request(): with mock.patch( "hetdesrun.adapters.generic_rest.load_framelike.get_generic_rest_adapter_base_url", return_value="https://hetida.de", ): resp_mock = mock.Mock() resp_mock.status_code = 200 resp_mock.raw = """\n {"timeseriesId": "id_1", "timestamp": "2020-03-11T13:45:18.194000000Z", "value": 42.3} {"timeseriesId": "id_1", "timestamp": "2020-03-11T14:45:18.194000000Z", "value": 41.7} {"timeseriesId": "id_1", "timestamp": "2020-03-11T15:45:18.194000000Z", "value": 15.89922333} """ resp_mock.headers = {} filtered_sources = [ FilteredSource( ref_id="id_1", type="timeseries(float)", filters={ "timestampFrom": "2018-09-01T00:00:00Z", "timestampTo": "2020-01-01T00:00:00Z", }, ), ] with mock.patch( "hetdesrun.adapters.generic_rest.load_framelike.requests.Session.get", return_value=resp_mock, ): df = await load_ts_data_from_adapter( filtered_sources, from_timestamp="2018-09-01T00:00:00Z", to_timestamp="2020-01-01T00:00:00Z", adapter_key="test_load_ts_generic_adapter_key", ) assert df.shape == (3, 3) resp_mock.status_code = 400 with pytest.raises(AdapterConnectionError): await load_ts_data_from_adapter( filtered_sources, from_timestamp="2018-09-01T00:00:00Z", to_timestamp="2020-01-01T00:00:00Z", adapter_key="test_load_ts_generic_adapter_key", ) resp_mock.status_code = 404 resp_mock.text = "errorCode" resp_mock.json = mock.Mock( return_value={"errorCode": "RESULT_EMPTY"}) df = await load_ts_data_from_adapter( filtered_sources, from_timestamp="2018-09-01T00:00:00Z", to_timestamp="2020-01-01T00:00:00Z", adapter_key="test_load_ts_generic_adapter_key", ) assert df.shape == (0, 3) resp_mock.status_code = 200 resp_mock.raw = "" df = await load_ts_data_from_adapter( filtered_sources, from_timestamp="2018-09-01T00:00:00Z", to_timestamp="2020-01-01T00:00:00Z", adapter_key="test_load_ts_generic_adapter_key", ) assert df.shape == (0, 3)
async def test_load_ts_data(): ts_data_to_load = { "inp_1": FilteredSource( ref_id="id_1", type=ExternalType.TIMESERIES_FLOAT, filters={ "timestampFrom": "2018-09-01T00:00:00Z", "timestampTo": "2020-01-01T00:00:00Z", }, ), "inp_2": FilteredSource( # same filter timestamps as above ref_id="id_2", type=ExternalType.TIMESERIES_FLOAT, filters={ "timestampFrom": "2018-09-01T00:00:00Z", "timestampTo": "2020-01-01T00:00:00Z", }, ), "inp_3": FilteredSource( # same filter timestamps as above ref_id="id_3", type=ExternalType.TIMESERIES_FLOAT, filters={ "timestampFrom": "2018-09-01T00:00:00Z", "timestampTo": "2020-01-01T00:00:00Z", }, ), "inp_4": FilteredSource( ref_id="id_1", type=ExternalType.TIMESERIES_FLOAT, filters={ "timestampFrom": "2017-09-01T00:00:00Z", # different timestamp "timestampTo": "2020-01-01T00:00:00Z", }, ), } with mock.patch( "hetdesrun.adapters.generic_rest.load_ts_data.load_ts_data_from_adapter", new=mock_load_generic_rest_ts_data, ): loaded_data = await load_grouped_timeseries_data_together( ts_data_to_load, adapter_key="test", ) # returns Series for input: assert isinstance(loaded_data["inp_1"], pd.Series) assert isinstance(loaded_data["inp_3"], pd.Series) # even if no data available! # proper separation into series: assert len(loaded_data["inp_1"]) == 2 assert len(loaded_data["inp_2"]) == 2 assert len(loaded_data["inp_3"]) == 0 assert len(loaded_data["inp_4"]) == 2 ts_data_to_load["inp_1"].filters["timestampFrom"] = None with pytest.raises(AdapterClientWiringInvalidError): loaded_data = await load_grouped_timeseries_data_together( ts_data_to_load, adapter_key="test", )
async def test_resources_offered_from_structure_hierarchy(async_test_client): """Walks through the hierarchy provided by structure endpoint and gets/posts offered resources""" async with async_test_client as client: response_obj = (await client.get("/adapters/localfile/structure")).json() assert len(response_obj["sources"]) == 0 assert len(response_obj["sinks"]) == 0 roots = response_obj["thingNodes"] assert len(roots) == 1 root = roots[0] all_tns = [] all_srcs = [] all_snks = [] tn_attached_metadata_dict = {} src_attached_metadata_dict = {} snk_attached_metadata_dict = {} await walk_thing_nodes( root["id"], tn_append_list=all_tns, src_append_list=all_srcs, snk_append_list=all_snks, src_attached_metadata_dict=src_attached_metadata_dict, snk_attached_metadata_dict=snk_attached_metadata_dict, tn_attached_metadata_dict=tn_attached_metadata_dict, open_async_test_client=client, ) assert len(all_tns) == 4 assert len(all_srcs) == 7 assert len(all_snks) == 2 assert len(src_attached_metadata_dict) == 0 assert len(snk_attached_metadata_dict) == 0 assert len(tn_attached_metadata_dict) == 0 for src in all_srcs: response_obj = ( await client.get(f'/adapters/localfile/sources/{src["id"]}') ).json() for key in src.keys(): assert response_obj[key] == src[key] for snk in all_snks: response_obj = ( await client.get(f'/adapters/localfile/sinks/{snk["id"]}') ).json() for key in snk.keys(): print(response_obj) assert response_obj[key] == snk[key] for tn in all_tns: response_obj = ( await client.get(f'/adapters/localfile/thingNodes/{tn["id"]}') ).json() for key in tn.keys(): print(response_obj) assert response_obj[key] == tn[key] # we actually get all metadata that is available as attached to something: for ((src_id, key), md) in src_attached_metadata_dict.items(): response_obj = ( await client.get(f"/adapters/localfile/sources/{src_id}/metadata/{key}") ).json() print(response_obj, "versus", md) assert response_obj["key"] == key assert response_obj["value"] == md["value"] assert response_obj["dataType"] == md["dataType"] if md.get("isSink", False): assert response_obj["isSink"] resp = await client.post( f"/adapters/localfile/sources/{src_id}/metadata/{key}", json=md ) assert resp.status_code == 200 for ((snk_id, key), md) in snk_attached_metadata_dict.items(): response_obj = ( await client.get(f"/adapters/localfile/sinks/{snk_id}/metadata/{key}") ).json() print(response_obj, "versus", md) assert response_obj["key"] == key assert response_obj["value"] == md["value"] assert response_obj["dataType"] == md["dataType"] if md.get("isSink", False): assert response_obj["isSink"] resp = await client.post( f"/adapters/localfile/sinks/{snk_id}/metadata/{key}", json=md ) assert resp.status_code == 200 for ((tn_id, key), md) in tn_attached_metadata_dict.items(): response_obj = ( await client.get( f"/adapters/localfile/thingNodes/{tn_id}/metadata/{key}" ) ).json() print(response_obj, "versus", md) assert response_obj["key"] == key assert response_obj["value"] == md["value"] assert response_obj["dataType"] == md["dataType"] if md.get("isSink", False): assert response_obj["isSink"] resp = await client.post( f"/adapters/localfile/thingNodes/{snk_id}/metadata/{key}", json=md ) assert resp.status_code == 200 # all metadata that is a source in the tree is also found for src in all_srcs: if src["type"].startswith("metadata"): response_obj = ( await client.get( f'/adapters/localfile/thingNodes/{src["thingNodeId"]}/metadata/{src["metadataKey"]}' ) ).json() print(response_obj, "versus", src) assert response_obj["key"] == src["metadataKey"] assert response_obj["dataType"] == ( ExternalType(src["type"]).value_datatype.value ) if src["type"].startswith("dataframe"): loaded_df = ( await load_data( { "wf_input": FilteredSource( ref_id=src["id"], ref_id_type="SOURCE", ref_key=None, type="dataframe", ), }, adapter_key="local-file-adapter", ) )["wf_input"] assert isinstance(loaded_df, pd.DataFrame) if src["type"].startswith("timeseries"): raise AssertionError( "No timeseries type expected in local file adapter sources" ) # metadata that is a sink in the tree is also always obtainable for snk in all_snks: if snk["type"].startswith("metadata"): response_obj = ( await client.get( f'/adapters/localfile/thingNodes/{snk["thingNodeId"]}/metadata/{snk["metadataKey"]}' ) ).json() print(response_obj, "versus", snk) assert response_obj["key"] == snk["metadataKey"] assert response_obj["dataType"] == ( ExternalType(snk["type"]).value_datatype.value ) resp = await client.post( f'/adapters/localfile/thingNodes/{snk["thingNodeId"]}/metadata/{snk["metadataKey"]}', json=response_obj, ) assert resp.status_code == 200 if snk["type"].startswith("dataframe"): with mock.patch( "hetdesrun.adapters.local_file.write_file.pd.DataFrame.to_csv" ) as to_csv_mock: with mock.patch( "hetdesrun.adapters.local_file.write_file.pd.DataFrame.to_excel" ) as to_excel_mock: await send_data( { "wf_output": FilteredSink( ref_id=snk["id"], ref_id_type="SINK", ref_key=None, type="dataframe", ), }, { "wf_output": pd.DataFrame( {"a": [1, 2, 3], "b": [12.2, 13.3, 14.4]} ) }, adapter_key="local-file-adapter", ) if snk["type"].startswith("timeseries"): raise AssertionError( "No timeseries type expected in local file adapter sinks" ) assert to_csv_mock.called_once func_name, args, kwargs = to_csv_mock.mock_calls[0] if snk["id"].endswith(".csv"): assert ( kwargs["sep"] == ";" ) # option from the settings file of the only test sink assert to_excel_mock.called_once
async def test_load_metadata_request(): with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.get_generic_rest_adapter_base_url", return_value="https://hetida.de", ): resp_mock = mock.Mock() resp_mock.status_code = 200 resp_mock.json = mock.Mock(return_value={ "key": "serial", "value": 24567, "dataType": "int" }) data_to_load = { "wf_input_1": FilteredSource( ref_id="id_1", ref_id_type="SOURCE", ref_key="serial", type="metadata(int)", filters={}, ), } with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.httpx.AsyncClient.get", return_value=resp_mock, ) as mocked_async_client_get: loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) assert loaded_metadata["wf_input_1"] == 24567 func_name, args, kwargs = mocked_async_client_get.mock_calls[0] assert args[0] == "https://hetida.de/sources/id_1/metadata/serial" resp_mock.status_code = 400 with pytest.raises(AdapterConnectionError): loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) resp_mock.status_code = 200 resp_mock.json = mock.Mock(return_value={ "keyyyy": "serial", "value": 24567, "dataType": "int" }) with pytest.raises(AdapterHandlingException): loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) resp_mock.json = mock.Mock(return_value=[ { "key": "serial", "value": 24567, "dataType": "int" }, { "key": "desc", "value": "some description", "dataType": "string" }, ]) with pytest.raises(AdapterHandlingException): loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) resp_mock.json = mock.Mock(return_value=[]) with pytest.raises(AdapterHandlingException): loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) # multiple metadata values: with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.httpx.AsyncClient.get", new=detailed_mocked_async_client_get, ): loaded_metadata = await load_multiple_metadata( { "wf_input_1": FilteredSource( ref_id="id_1", ref_id_type="THINGNODE", ref_key="description", type="metadata(string)", filters={}, ), "wf_input_2": FilteredSource( ref_id="id_2", ref_id_type="SINK", ref_key="max_val", type="metadata(float)", filters={}, ), }, adapter_key="test_load_metadata_adapter_key_2", ) assert loaded_metadata["wf_input_1"] == "some description" assert loaded_metadata["wf_input_2"] == 25.9
async def test_load_metadata_any_from_string_response(): with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.get_generic_rest_adapter_base_url", return_value="https://hetida.de", ): resp_mock = mock.Mock() resp_mock.status_code = 200 resp_mock.json = mock.Mock(return_value={ "key": "serial", "value": '{"a": 42.3}', "dataType": "any" }) data_to_load = { "wf_input_1": FilteredSource( ref_id="id_1", ref_id_type="SOURCE", ref_key="serial", type="metadata(any)", filters={}, ), } # works if any object is provided as json string with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.httpx.AsyncClient.get", return_value=resp_mock, ) as mocked_async_client_get: loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) assert loaded_metadata["wf_input_1"] == {"a": 42.3} # works if any object is provided as json object directly resp_mock.json = mock.Mock(return_value={ "key": "serial", "value": { "a": 42.3 }, "dataType": "any" }) with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.httpx.AsyncClient.get", return_value=resp_mock, ) as mocked_async_client_get: loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) assert loaded_metadata["wf_input_1"] == {"a": 42.3} # works if any object is provided as json object directly resp_mock.json = mock.Mock( # string but not a valid json string! return_value={ "key": "serial", "value": '{"a" "b"}', "dataType": "any" }) with mock.patch( "hetdesrun.adapters.generic_rest.load_metadata.httpx.AsyncClient.get", return_value=resp_mock, ) as mocked_async_client_get: loaded_metadata = await load_multiple_metadata( data_to_load, adapter_key="test_load_metadata_adapter_key", ) assert isinstance(loaded_metadata["wf_input_1"], str) assert loaded_metadata["wf_input_1"] == '{"a" "b"}'