def test_json_dumps_after_used_by_dts(self, ts_data_spec_dtos, files_data_spec_dto): data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos, files_data_spec=files_data_spec_dto) json_repr = data_spec.to_JSON() dts = DataTransferService(data_spec) dts.get_dataframes() json_repr_after_dts = data_spec.to_JSON() assert json_repr == json_repr_after_dts
def test_dict_dto_equal(self, ts_data_spec_dicts, ts_data_spec_dtos): data_spec_dtos = DataSpec(time_series_data_specs=ts_data_spec_dtos) data_spec_dicts = DataSpec(time_series_data_specs=ts_data_spec_dicts) service = DataTransferService(data_spec_dicts) service2 = DataTransferService(data_spec_dtos) dataframes_by_dicts = service.get_dataframes() dataframes_by_dtos = service2.get_dataframes() for df1, df2 in zip(dataframes_by_dtos.values(), dataframes_by_dicts.values()): pd.testing.assert_frame_equal(df1, df2)
def test_get_dataframes(self, ts_data_spec_dtos): data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos) service = DataTransferService(data_spec) dataframes = service.get_dataframes() assert isinstance(dataframes.get("ds1"), pd.DataFrame) assert isinstance(dataframes.get("ds2"), pd.DataFrame)
def test_instantiate_ts_data_spec_time_series_not_list(self): with pytest.raises(DataSpecValidationError): DataSpec( time_series_data_specs=[ TimeSeriesDataSpec(time_series=TimeSeries(id=1234), aggregates=["avg"], granularity="1s") ] )
def test_instantiate_ts_data_spec_invalid_time_series_types(self): with pytest.raises(DataSpecValidationError): DataSpec( time_series_data_specs=[ TimeSeriesDataSpec(time_series=[{"id": 1234}], aggregates=["avg"], granularity="1s") ] )
def generate_data_spec(last_processed_timestamp, granularity="10s"): tags_d03 = [] tags_d02 = [] for root, subdirs, files in os.walk("../tags"): for file in files: if file in ("well_tags.csv", "routing.csv", "riser_tags.csv", "output.csv", "template_tags.csv"): with open(os.path.join(root, file)) as f: df = pd.read_csv(f) placements = ["T3 WGM", "Template", "Riser"] placements_d03 = ["WellD03"] + placements placements_d02 = ["WellD02"] + placements df = df[~df["tag"].isin(EXCLUDE_TAGS)] tags_d03.append(df[df["placement"].isin(placements_d03)]) tags_d02.append(df[df["placement"].isin(placements_d02)]) tags_d02_concat = pd.concat(tags_d02, ignore_index=True) tags_d03_concat = pd.concat(tags_d03, ignore_index=True) tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag") tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag") d02_input_time_series = [] d03_input_time_series = [] for tag in tags_d02_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d02_input_time_series.append(ts) for tag in tags_d03_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ("ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d03_input_time_series.append(ts) d02_tsds = TimeSeriesDataSpec( time_series=d02_input_time_series, aggregates=["avg"], granularity=granularity, start=last_processed_timestamp, end=int(datetime.now().timestamp() * 1e3), label="d2", missing_data_strategy="ffill", ) d03_tsds = TimeSeriesDataSpec( time_series=d03_input_time_series, aggregates=["avg"], granularity=granularity, start=last_processed_timestamp, end=int(datetime.now().timestamp() * 1e3), label="d3", missing_data_strategy="ffill", ) return DataSpec(time_series_data_specs=[d02_tsds, d03_tsds])
def test_instantiate_ts_data_spec_duplicate_labels(self): with pytest.raises(DataSpecValidationError): DataSpec( time_series_data_specs=[ TimeSeriesDataSpec(time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s"), TimeSeriesDataSpec( time_series=[TimeSeries("ts1")], aggregates=["avg"], granularity="1s", label="default" ), ] )
def test_get_files(self): data_spec = DataSpec(files_data_spec=FilesDataSpec(file_ids={"test": 7725800487412823})) dts = DataTransferService(data_spec) data = dts.get_file("test") assert isinstance(data, BytesIO) assert ( data.getvalue() == b'import os\n\nfrom cognite.config import configure_session\nfrom cognite.v05 import files\n\nconfigure_session(os.getenv("COGNITE_TEST_API_KEY"), "mltest")\n\n\nres = files.upload_file("test.py", "./test.py")\n\nprint(res)\n' )
def data_spec(self, time_series_in_cdp): ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg", "min"], label="ts1") ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ts2") ts3 = TimeSeries(id=time_series_in_cdp[0], aggregates=["max", "count"], label="ts3") ts4 = TimeSeries(id=time_series_in_cdp[0], aggregates=["step"], label="ts4") tsds = TimeSeriesDataSpec( time_series=[ts1, ts2, ts3, ts4], aggregates=["avg"], granularity="1h", start="300d-ago" ) ds = DataSpec(time_series_data_specs=[tsds]) yield ds
def test_get_dataframes_w_column_mapping(self, time_series_in_cdp): ts1 = TimeSeries(id=time_series_in_cdp[0], aggregates=["avg"], label="cavg") ts2 = TimeSeries(id=time_series_in_cdp[0], aggregates=["cv"], label="ccv") ts3 = TimeSeries(id=time_series_in_cdp[1], aggregates=["avg"], label="sinavg") tsds = TimeSeriesDataSpec(time_series=[ts1, ts2, ts3], aggregates=["avg"], granularity="1h", start="300d-ago") dts = DataTransferService(DataSpec([tsds])) dfs = dts.get_dataframes() expected = ["timestamp", "cavg", "ccv", "sinavg"] assert expected == list(dfs["default"].columns.values)
def main(): configure_session(api_key=os.getenv("COGNITE_API_KEY"), project="akerbp", debug=True) tags_d03 = [] tags_d02 = [] for root, subdirs, files in os.walk("../tags"): for file in files: if file in ("well_tags.csv", "routing.csv", "output.csv", "riser_tags.csv", "template_tags.csv"): with open(os.path.join(root, file)) as f: df = pd.read_csv(f) placements = ["T3 WGM", "Template", "Riser"] placements_d03 = ["WellD03"] + placements placements_d02 = ["WellD02"] + placements df = df[~df["tag"].isin(EXCLUDE_TAGS)] tags_d03.append(df[df["placement"].isin(placements_d03)]) tags_d02.append(df[df["placement"].isin(placements_d02)]) tags_d02_concat = pd.concat(tags_d02, ignore_index=True) tags_d03_concat = pd.concat(tags_d03, ignore_index=True) tags_d02_concat = tags_d02_concat.drop_duplicates(subset="tag") tags_d03_concat = tags_d03_concat.drop_duplicates(subset="tag") d02_input_time_series = [] d03_input_time_series = [] for tag in tags_d02_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ( "ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d02_input_time_series.append(ts) for tag in tags_d03_concat["tag"]: aggregate = "step" if ("ESV" in tag or "18HV" in tag) else "avg" missing_data_strategy = "ffill" if ( "ESV" in tag or "18HV" in tag) else "linearInterpolation" ts = TimeSeries(name=tag, missing_data_strategy=missing_data_strategy, aggregates=[aggregate]) d03_input_time_series.append(ts) d02_tsds = TimeSeriesDataSpec( time_series=d02_input_time_series, aggregates=["avg"], granularity="10s", start=int(datetime(2017, 3, 1).timestamp() * 1e3), label="d2", ) d03_tsds = TimeSeriesDataSpec( time_series=d03_input_time_series, aggregates=["avg"], granularity="10s", start=int(datetime(2017, 3, 1).timestamp() * 1e3), label="d3", ) data_spec = DataSpec(time_series_data_specs=[d02_tsds, d03_tsds]) dts = DataTransferService(data_spec, num_of_processes=10) print(data_spec.to_JSON()) df_dict = dts.get_dataframes() for label, df in df_dict.items(): df.to_csv(f"../data/{label}.csv") print(df.shape)
def test_instantiate_files_data_spec_file_id_invalid_type(self): with pytest.raises(DataSpecValidationError): DataSpec(files_data_spec=FilesDataSpec(file_ids={"f1": 123, "f2": "456"}))
def test_json_dumps_loads(self, ts_data_spec_dtos, files_data_spec_dto): data_spec = DataSpec(time_series_data_specs=ts_data_spec_dtos, files_data_spec=files_data_spec_dto) json_repr = data_spec.to_JSON() ds = DataSpec.from_JSON(json_repr) assert ds.__eq__(data_spec)
def test_from_JSON_str(self): with pytest.raises(DataSpecValidationError): DataSpec.from_JSON(json.dumps({"blabla": "dada"}))
def test_from_JSON_invalid(self): with pytest.raises(DataSpecValidationError): DataSpec.from_JSON({"blabla": "dada"})
def test_instantiate_ts_data_spec_invalid_type(self): with pytest.raises(DataSpecValidationError): DataSpec(time_series_data_specs=["str"])
def test_instantiate_data_spec(self, ts_data_spec_dtos): DataSpec(ts_data_spec_dtos, files_data_spec=FilesDataSpec(file_ids={"name": 123}))
def test_instantiate_files_data_spec_file_ids_invalid_type(self): with pytest.raises(DataSpecValidationError): DataSpec(files_data_spec=FilesDataSpec(file_ids=[1, 2, 3]))