def create_temperature_ts_plot(): """ Create plot for README sketch """ stations = DwdObservationRequest( DwdObservationParameterSet.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL) df = stations.all() station_id, _, _, height, lat, lon, name, state = df.sort_values( "FROM_DATE").iloc[0].values name = name.replace(u"ß", "ss") data = DwdObservationValues( [station_id], DwdObservationParameter.DAILY.TEMPERATURE_AIR_200, DwdObservationResolution.DAILY, period=[DwdObservationPeriod.HISTORICAL]) df = data.all() df_annual = df.groupby(df.DATE.dt.year)["VALUE"].mean().reset_index() df_annual["DATE"] = pd.to_datetime(df_annual["DATE"], format="%Y") temp_mean = df["VALUE"].mean() fig, ax = plt.subplots(tight_layout=True) df.plot("DATE", "VALUE", ax=ax, color="blue", label="Tmean,daily", legend=False) df_annual.plot("DATE", "VALUE", ax=ax, color="orange", label="Tmean,annual", legend=False) ax.axhline(y=temp_mean, color="red", label="mean(Tmean,daily)") ax.text(0.2, 0.05, "Source: Deutscher Wetterdienst", ha='center', va='center', transform=ax.transAxes) ax.set_xlabel("Date") title = f"temperature (°C) at {name} (GER)\n" \ f"ID {station_id}\n" \ f"{lat}N {lon}E {height}m" ax.set_title(title) ax.legend(facecolor="white") ax.margins(x=0) plt.savefig(f"temperature_ts.png")
def station_example(): """Retrieve stations_result of DWD that measure temperature.""" stations = DwdObservationRequest( parameter=DwdObservationDataset.TEMPERATURE_AIR, resolution=DwdObservationResolution.HOURLY, period=DwdObservationPeriod.RECENT, start_date=datetime(2020, 1, 1), end_date=datetime(2020, 1, 20), ) result = stations.filter_by_distance(latitude=50.0, longitude=8.9, distance=30) print(result.df)
def station_example(): stations = DwdObservationRequest( parameter=DwdObservationDataset.TEMPERATURE_AIR, resolution=DwdObservationResolution.HOURLY, period=DwdObservationPeriod.RECENT, start_date=datetime(2020, 1, 1), end_date=datetime(2020, 1, 20), tidy=True, humanize=True, ) df = stations.filter_by_distance(latitude=50.0, longitude=8.9, distance=30).df print(df)
def create_weather_stations_map(): """ Create map of DWD weather stations_result in Germany """ stations = DwdObservationRequest(DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL) stations_df = stations.all().df fig, ax = plt.subplots() # Rainbow colormap cmap = colors.LinearSegmentedColormap.from_list("", [ '#86007D', '#0000F9', '#008018', '#FFFF41', '#FFA52C', '#FF0018', ]) bounds = stations_df.height.quantile( [0, 0.16666667, 0.33333333, 0.5, 0.66666667, 0.83333333, 1]).values norm = colors.BoundaryNorm(bounds, cmap.N) stations_df = stations_df.rename(columns={"height": "Height [m]"}) stations_df.plot.scatter(x="longitude", y="latitude", c="Height [m]", cmap=cmap, norm=norm, ax=ax) ax.set_xlabel("Longitude [°]") ax.set_ylabel("Latitude [°]") ax.set_title("German weather stations_result") ax.text(0.3, 0.05, "Source: Deutscher Wetterdienst", ha='center', va='center', transform=ax.transAxes) plt.savefig("german_weather_stations.png") return
def test_dwd_observation_stations_bbox(): request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.HISTORICAL, datetime(2020, 1, 1), datetime(2020, 1, 20), ) nearby_station = request.filter_by_bbox(left=8.7862, bottom=49.9195, right=8.993, top=50.0900) nearby_station = nearby_station.df.drop("to_date", axis="columns") assert_frame_equal(nearby_station, EXPECTED_STATIONS_DF.drop(columns=["distance"]))
def test_coerce_field_types_with_nans(): """Test field coercion with NaNs""" Settings.tidy = False Settings.humanize = False Settings.si_units = True request = DwdObservationRequest( parameter=DwdObservationDataset.SOLAR, # RS_IND_01, resolution=DwdObservationResolution.HOURLY, period=DwdObservationPeriod.RECENT, ).all() df = pd.DataFrame( { "qn": [pd.NA, np.nan, "1"], "rs_ind_01": [pd.NA, np.nan, "1"], "v_vv_i": [pd.NA, np.nan, "p"], } ) expected_df = pd.DataFrame( { "qn": pd.to_numeric([pd.NA, np.nan, 1], errors="coerce"), "rs_ind_01": pd.to_numeric([pd.NA, np.nan, 1], errors="coerce"), "v_vv_i": pd.Series([pd.NA, np.nan, "p"], dtype=pd.StringDtype()), } ) df = request.values._coerce_parameter_types(df) assert_frame_equal(df, expected_df, check_categorical=False)
def test_dwd_observation_stations_empty(): request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.HISTORICAL, datetime(2020, 1, 1), datetime(2020, 1, 20), ) # Bbox assert request.filter_by_bbox( left=-100, bottom=-20, right=-90, top=-10, ).df.empty
def test_coerce_field_types_with_nans(): """ Test field coercion with NaNs """ request = DwdObservationRequest( parameter=DwdObservationDataset.SOLAR, # RS_IND_01, resolution=DwdObservationResolution.HOURLY, period=DwdObservationPeriod.RECENT, humanize=False, tidy=False, ).all() df = pd.DataFrame( { "qn": [pd.NA, np.nan, "1"], "rs_ind_01": [pd.NA, np.nan, "1"], "v_vv_i": [pd.NA, np.nan, "p"], } ) expected_df = pd.DataFrame( { "qn": pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()), "rs_ind_01": pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()), "v_vv_i": pd.Series([pd.NA, np.nan, "p"], dtype=pd.StringDtype()), } ) df = request.values._coerce_parameter_types(df) assert_frame_equal(df, expected_df)
def test_export_cratedb(): """Test export of DataFrame to cratedb""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id( station_id=[1048], ) with mock.patch( "pandas.DataFrame.to_sql", ) as mock_to_sql: df = request.values.all().df ExportMixin(df=df).to_target("crate://localhost/?database=test&table=testdrive") mock_to_sql.assert_called_once_with( name="testdrive", con="crate://localhost", schema="test", if_exists="replace", index=False, chunksize=5000, )
def fetch_stations(parameter: str, resolution: str, period: str): """ Fetch "stations" data. This will be used to populate the navigation chooser and to render the map. The data will be stored on a hidden within the browser DOM. """ log.info( f"Requesting stations for " f"parameter={parameter}, " f"resolution={resolution}, " f"period={period}" ) try: stations = DwdObservationRequest( parameter=DwdObservationDataset(parameter), resolution=DwdObservationResolution(resolution), period=DwdObservationPeriod(period), ).all() except (requests.exceptions.ConnectionError, InvalidParameterCombination) as ex: log.warning(ex) # raise PreventUpdate log.error("Unable to connect to data source") return empty_frame df = stations.df log.info(f"Propagating stations data frame with {frame_summary(df)}") return df.to_json(date_format="iso", orient="split")
def test_export_influxdb2_tidy(): """Test export of DataFrame to influxdb v2""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb_client.InfluxDBClient", side_effect=[mock_client], create=True, ) as mock_connect: with mock.patch( "influxdb_client.Point", create=True, ): df = request.values.all().df ExportMixin(df=df).to_target("influxdb2://orga:token@localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(url="http://localhost:8086", org="orga", token="token")
def test_dwd_observation_metadata_discover_parameters(): parameters = DwdObservationRequest.discover(filter_="minute_1", flatten=True) expected = { "minute_1": { "precipitation_height": { "origin": "mm", "si": "kg / m ** 2" }, "precipitation_height_droplet": { "origin": "mm", "si": "kg / m ** 2", }, "precipitation_height_rocker": { "origin": "mm", "si": "kg / m ** 2", }, "precipitation_form": { "origin": "-", "si": "-" }, } } assert json.dumps(expected) in json.dumps(parameters)
def test_export_influxdb_tidy(): request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, tidy=True, si_units=False, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb.dataframe_client.DataFrameClient", side_effect=[mock_client], create=True, ) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target( "influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(database="dwd") mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( dataframe=mock.ANY, measurement="weather", tag_columns=["station_id", "quality", "dataset", "parameter"], batch_size=50000, )
def test_dwd_observation_metadata_describe_fields_kl_daily_english(): metadata = DwdObservationRequest.describe_fields( dataset=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ) assert list(metadata.keys()) == [ "parameters", "quality_information", ] assert list(metadata["parameters"].keys()) == [ "stations_id", "mess_datum", "qn_3", "fx", "fm", "qn_4", "rsk", "rskf", "sdk", "shk_tag", "nm", "vpm", "pm", "tmk", "upm", "txk", "tnk", "tgk", ]
def test_dwd_observation_stations_nearby_number_multiple(): request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.HISTORICAL, datetime(2020, 1, 1), datetime(2020, 1, 20), ) nearby_station = request.filter_by_rank( 50.0, 8.9, 3, ) nearby_station = nearby_station.df.drop("to_date", axis="columns") assert_frame_equal(nearby_station, EXPECTED_STATIONS_DF)
def test_api_no_valid_parameters(): with pytest.raises(NoParametersFound): DwdObservationRequest( parameter=[ DwdObservationDataset.TEMPERATURE_AIR, ], resolution=DwdObservationResolution.DAILY, )
def test_dwd_observation_stations_bbox(): request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.RECENT, datetime(2020, 1, 1), datetime(2020, 1, 20), ) nearby_station = request.filter_by_bbox( left=8.7862, bottom=49.9195, right=8.993, top=50.0899 ) nearby_station = nearby_station.df.drop("to_date", axis="columns") pd.testing.assert_frame_equal( nearby_station, EXPECTED_DF_MULTIPLE.drop(columns=["distance"]) .sort_values(["station_id"], key=lambda x: x.astype(int)) .reset_index(drop=True), )
def test_dwd_observation_stations_nearby_distance(): request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.HISTORICAL, datetime(2020, 1, 1), datetime(2020, 1, 20), ) # Kilometers nearby_station = request.filter_by_distance(50.0, 8.9, 16.13, "km") nearby_station = nearby_station.df.drop("to_date", axis="columns") assert_frame_equal(nearby_station, EXPECTED_STATIONS_DF) # Miles nearby_station = request.filter_by_distance(50.0, 8.9, 10.03, "mi") nearby_station = nearby_station.df.drop(columns="to_date") assert_frame_equal(nearby_station, EXPECTED_STATIONS_DF)
def test_dwd_observation_stations_nearby_number_single(): # Test for one nearest station request = DwdObservationRequest( DwdObservationDataset.TEMPERATURE_AIR, DwdObservationResolution.HOURLY, DwdObservationPeriod.RECENT, datetime(2020, 1, 1), datetime(2020, 1, 20), ) nearby_station = request.filter_by_rank( 50.0, 8.9, 1, ) nearby_station = nearby_station.df.drop("to_date", axis="columns") assert_frame_equal(nearby_station, EXPECTED_DF_SINGLE)
def test_request(): request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) df = request.values.all().df assert not df.empty
def sql_example(): request = DwdObservationRequest( parameter=[DwdObservationDataset.TEMPERATURE_AIR], resolution=DwdObservationResolution.HOURLY, start_date="2019-01-01", end_date="2020-01-01", tidy=True, humanize=True, ) stations = request.filter_by_station_id(station_id=(1048, )) sql = "SELECT * FROM data WHERE parameter='temperature_air_200' AND value < -7.0;" log.info(f"Invoking SQL query '{sql}'") # Acquire observation values and filter with SQL. results = stations.values.all() results.filter_by_sql(sql) print(results.df)
def test_dwd_observation_parameters_strings_uppercase(): request = DwdObservationRequest( parameter=[ "TMK", "TXK", "TNK", "RSK", "RSKF", ], resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.HISTORICAL, ) assert request.parameter == parameters_reference
def test_export_unknown(): request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048], ) df = request.values.all().df with pytest.raises(KeyError) as ex: ExportMixin(df=df).to_target("file:///test.foobar") ex.match("Unknown export file type")
def sql_example(): """Retrieve temperature data by DWD and filter by sql statement.""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=[DwdObservationDataset.TEMPERATURE_AIR], resolution=DwdObservationResolution.HOURLY, start_date="2019-01-01", end_date="2020-01-01", ) stations = request.filter_by_station_id(station_id=(1048,)) sql = "SELECT * FROM data WHERE " "parameter='temperature_air_mean_200' AND value < -7.0;" log.info(f"Invoking SQL query '{sql}'") # Acquire observation values and filter with SQL. results = stations.values.all() results.filter_by_sql(sql) print(results.df)
def fields_example(): # Output in JSON format. # import json; print(json.dumps(metadata.describe_fields(), indent=4)) # Output in YAML format. # import yaml; print(yaml.dump(dict(metadata.describe_fields()), default_style="|")) # Output in pretty-print format. pprint( DwdObservationRequest.describe_fields( dataset=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, language="en", )) pprint( DwdObservationRequest.describe_fields( dataset=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, language="de", ))
def test_dwd_observation_parameters_strings_lowercase(): request = DwdObservationRequest( parameter=[ "tmk", "txk", "tnk", "rsk", "rskf", ], resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.HISTORICAL, ) assert request.parameter == parameters_reference
def test_dwd_observation_parameters_constants(): request = DwdObservationRequest( parameter=[ DwdObservationParameter.DAILY.TEMPERATURE_AIR_200, # tmk DwdObservationParameter.DAILY.TEMPERATURE_AIR_MAX_200, # txk DwdObservationParameter.DAILY.TEMPERATURE_AIR_MIN_200, # tnk DwdObservationParameter.DAILY.PRECIPITATION_HEIGHT, # rsk DwdObservationParameter.DAILY.PRECIPITATION_FORM, # rskf ], resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.HISTORICAL, ) assert request.parameter == parameters_reference
def test_export_influxdb1_tidy(): """Test export of DataFrame to influxdb v1""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb.InfluxDBClient", side_effect=[mock_client], create=True, ) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target("influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with( host="localhost", port=8086, username=None, password=None, database="dwd", ssl=False, ) mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( points=mock.ANY, batch_size=50000, ) points = mock_client.write_points.call_args.kwargs["points"] assert points[0]["measurement"] == "weather" assert list(points[0]["tags"].keys()) == [ "station_id", "quality", "dataset", "parameter", ] assert list(points[0]["fields"].keys()) == [ "value", ]
def test_request(): """Test general data request""" Settings.tidy = True Settings.humanize = True Settings.si_units = True request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) df = request.values.all().df assert not df.empty
def __init__(self, station_id) -> None: # create request self.request = DwdObservationRequest( parameter=[ DwdObservationParameter.MINUTE_10.TEMPERATURE_AIR_200, DwdObservationParameter.MINUTE_10.RADIATION_GLOBAL, DwdObservationParameter.MINUTE_10.RADIATION_SKY_DIFFUSE, DwdObservationParameter.MINUTE_10.TEMPERATURE_DEW_POINT_200, DwdObservationParameter.MINUTE_10.PRESSURE_AIR_STATION_HEIGHT, DwdObservationParameter.MINUTE_10.WIND_SPEED, ], resolution=DwdObservationResolution.MINUTE_10, period=DwdObservationPeriod.NOW).filter( station_id=(station_id, )) # 1078 = Duesseldorf Flughafen