def test_dwd_observations_stations_geojson(): # Existing combination of parameters request = DwdObservationRequest( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL, ) results = request.filter_by_station_id(station_id=("00001", )) assert not results.df.empty geojson = results.to_ogc_feature_collection() properties = geojson["features"][0]["properties"] geometry = geojson["features"][0]["geometry"] assert properties["name"] == "Aach" assert properties["state"] == "Baden-Württemberg" assert geometry == { "type": "Point", "coordinates": [8.8493, 47.8413, 478.0], }
def test_dwd_observations_stations_filter_name_empty(): # Existing combination of parameters request = DwdObservationRequest( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL, ) df = request.filter_by_name(name="FizzBuzz").df assert df.empty
def test_dwd_observations_stations_filter_name(): # Existing combination of parameters request = DwdObservationRequest( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL, ) df = request.filter_by_name(name="Aach").df assert not df.empty assert_frame_equal(df, EXPECTED_DF)
def about(options: Munch): """ Output possible arguments for command line options "--parameter", "--resolution" and "--period". :param options: Normalized docopt command line options. """ def output(thing): for item in thing: if item: if hasattr(item, "value"): value = item.value else: value = item print("-", value) if options.parameters: output(DwdObservationDataset) elif options.resolutions: output(DwdObservationResolution) elif options.periods: output(DwdObservationPeriod) elif options.coverage: metadata = DwdObservationRequest.discover( filter_=options.resolution, dataset=read_list(options.parameter), flatten=False, ) output = json.dumps(metadata, indent=4) print(output) elif options.fields: metadata = DwdObservationRequest.describe_fields( dataset=read_list(options.parameter), resolution=options.resolution, period=read_list(options.period), language=options.language, ) output = pformat(dict(metadata)) print(output) else: log.error( 'Please invoke "wetterdienst dwd about" with one of these subcommands:' ) output(["parameters", "resolutions", "periods", "coverage"]) sys.exit(1)
def test_dwd_observation_data_result_missing_data(): """Test for DataFrame having empty values for dates where the station should not have values""" Settings.tidy = True Settings.humanize = True Settings.si_units = True request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1933-12-27", # few days before official start end_date="1934-01-04", # few days after official start, ).filter_by_station_id(station_id=[1048], ) # Leave only one column to potentially contain NaN which is VALUE df = request.values.all().df.drop("quality", axis=1) df_1933 = df[df["date"].dt.year == 1933] df_1934 = df[df["date"].dt.year == 1934] assert not df_1933.empty and df_1933.dropna().empty assert not df_1934.empty and not df_1934.dropna().empty request = DwdObservationRequest( parameter=DwdObservationParameter.HOURLY.TEMPERATURE_AIR_MEAN_200, resolution=DwdObservationResolution.HOURLY, start_date= "2020-06-09 12:00:00", # no data at this time (reason unknown) end_date="2020-06-09 12:00:00", ).filter_by_station_id(station_id=["03348"], ) df = request.values.all().df assert_frame_equal( df, pd.DataFrame({ "station_id": pd.Categorical(["03348"]), "dataset": pd.Categorical(["temperature_air"]), "parameter": pd.Categorical(["temperature_air_mean_200"]), "date": [datetime(2020, 6, 9, 12, 0, 0, tzinfo=pytz.UTC)], "value": pd.Series([pd.NA], dtype=pd.Float64Dtype()).astype(float), "quality": pd.Series([pd.NA], dtype=pd.Float64Dtype()).astype(float), }), check_categorical=False, )
def test_dwd_observations_stations_fail(): with pytest.raises(TypeError): DwdObservationRequest( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL, ).filter_by_station_id(name="FizzBuzz") with pytest.raises(TypeError): DwdObservationRequest( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationResolution.DAILY, DwdObservationPeriod.HISTORICAL, ).filter_by_name(name=123)
def test_dwd_observation_data_fails(): # station id assert (DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], period=[DwdObservationPeriod.HISTORICAL], resolution=DwdObservationResolution.DAILY, ).filter_by_station_id(station_id=["test"], ).df.empty) with pytest.raises(StartDateEndDateError): DwdObservationRequest( parameter=["abc"], resolution=DwdObservationResolution.DAILY, start_date="1971-01-01", end_date="1951-01-01", )
def test_create_humanized_column_names_mapping(): """Test for function to create a mapping to humanized column names""" kl_daily_hcnm = { "fx": "wind_gust_max", "fm": "wind_speed", "rsk": "precipitation_height", "rskf": "precipitation_form", "sdk": "sunshine_duration", "shk_tag": "snow_depth", "nm": "cloud_cover_total", "vpm": "pressure_vapor", "pm": "pressure_air_site", "tmk": "temperature_air_mean_200", "upm": "humidity", "txk": "temperature_air_max_200", "tnk": "temperature_air_min_200", "tgk": "temperature_air_min_005", } hcnm = (DwdObservationRequest( [DwdObservationDataset.CLIMATE_SUMMARY], DwdObservationResolution.DAILY, [DwdObservationPeriod.RECENT], ).filter_by_station_id( (0, ), ).values._create_humanized_parameters_mapping()) assert set(kl_daily_hcnm.items()).issubset(set(hcnm.items()))
def test_dwd_observation_weather_phenomena(): """Test for DWD weather phenomena data, thanks saschnet (https://github.com/saschnet) for providing the sample, see also https://github.com/earthobservations/wetterdienst/issues/647 """ Settings.tidy = True Settings.humanize = False Settings.si_units = False request = DwdObservationRequest( resolution=DwdObservationResolution.HOURLY, parameter=[DwdObservationParameter.HOURLY.WEATHER_PHENOMENA.WEATHER], start_date=datetime(year=2022, month=3, day=1, tzinfo=timezone.utc), end_date=datetime(year=2022, month=3, day=31, tzinfo=timezone.utc), ) res = request.all().df.dropna() assert len(res) > 0
def test_request_period_recent_now(): request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(hours=2), ) assert request.period == [Period.RECENT, Period.NOW]
def test_request_period_now_fixeddate(): # Now period request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(hours=2), ) assert Period.NOW in request.period
def test_request_period_empty(): # No period (for example in future) request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date=pd.Timestamp(datetime.utcnow()) + pd.Timedelta(days=720), ) assert request.period == []
def test_dwd_observation_data_10_minutes_result_tidy(): """Test for actual values (tidy) in metric units""" Settings.tidy = True Settings.humanize = False Settings.si_units = False request = DwdObservationRequest( parameter=[ DwdObservationParameter.MINUTE_10.TEMPERATURE_AIR.PRESSURE_AIR_SITE ], resolution=DwdObservationResolution.MINUTE_10, start_date="1999-12-31 22:00", end_date="1999-12-31 23:00", ).filter_by_station_id(station_id=(1048, ), ) df = request.values.all().df assert_frame_equal( df, pd.DataFrame( { "station_id": pd.Categorical(["01048"] * 7), "dataset": pd.Categorical(["temperature_air"] * 7), "parameter": pd.Categorical(["pp_10"] * 7), "date": [ datetime(1999, 12, 31, 22, 00, tzinfo=pytz.UTC), datetime(1999, 12, 31, 22, 10, tzinfo=pytz.UTC), datetime(1999, 12, 31, 22, 20, tzinfo=pytz.UTC), datetime(1999, 12, 31, 22, 30, tzinfo=pytz.UTC), datetime(1999, 12, 31, 22, 40, tzinfo=pytz.UTC), datetime(1999, 12, 31, 22, 50, tzinfo=pytz.UTC), datetime(1999, 12, 31, 23, 00, tzinfo=pytz.UTC), ], "value": pd.to_numeric( [ 996.1, 996.2, 996.2, 996.2, 996.3, 996.4, pd.NA, ], errors="coerce", ).astype(float), "quality": pd.to_numeric([1, 1, 1, 1, 1, 1, pd.NA], errors="coerce").astype(float), }, ), # Needed since pandas 1.2? check_categorical=False, )
def test_dwd_observation_data_api(): request = DwdObservationRequest( parameter=["precipitation_height"], resolution="daily", period=["recent", "historical"], ) assert request == DwdObservationRequest( parameter=[DwdObservationParameter.DAILY.PRECIPITATION_HEIGHT], resolution=Resolution.DAILY, period=[Period.HISTORICAL, Period.RECENT], start_date=None, end_date=None, ) assert request.parameter == [( DwdObservationParameter.DAILY.CLIMATE_SUMMARY.PRECIPITATION_HEIGHT, DwdObservationDataset.CLIMATE_SUMMARY, )]
def test_dwd_observation_data_dates(): # time input request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1971-01-01", ).filter_by_station_id(station_id=[1], ) assert request == DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[ DwdObservationPeriod.HISTORICAL, ], start_date=datetime(1971, 1, 1), end_date=datetime(1971, 1, 1), ).filter_by_station_id(station_id=[1], ) request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[DwdObservationPeriod.HISTORICAL], end_date="1971-01-01", ).filter_by_station_id(station_id=[1], ) assert request == DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[ DwdObservationPeriod.HISTORICAL, ], start_date=datetime(1971, 1, 1), end_date=datetime(1971, 1, 1), ).filter_by_station_id(station_id=[1], ) with pytest.raises(StartDateEndDateError): DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1971-01-01", end_date="1951-01-01", )
def test_dwd_observation_data_parameter(): request = DwdObservationRequest( parameter=["precipitation_height"], resolution="daily", period=["recent", "historical"], ).filter_by_station_id(station_id=[1], ) assert request == DwdObservationRequest( parameter=[DwdObservationParameter.DAILY.PRECIPITATION_HEIGHT], resolution=Resolution.DAILY, period=[Period.HISTORICAL, Period.RECENT], start_date=None, end_date=None, ).filter_by_station_id(station_id=[1], ) assert request.parameter == [( DwdObservationDatasetTree.DAILY.PRECIPITATION_MORE. PRECIPITATION_HEIGHT, # Noqa: E501, B950 DwdObservationDataset.PRECIPITATION_MORE, )]
def test_request_period_historical(): # Historical period expected request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1971-01-01", ) assert request.period == [ Period.HISTORICAL, ]
def test_dwd_observation_data_parameter_dataset_pairs(): """Test parameters given as parameter - dataset pair""" request = DwdObservationRequest( parameter=[("climate_summary", "climate_summary")], resolution="daily", period=["recent", "historical"], ) assert request.parameter == [(DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationDataset.CLIMATE_SUMMARY)] request = DwdObservationRequest( parameter=[("precipitation_height", "precipitation_more")], resolution="daily", period=["recent", "historical"], ) assert request.parameter == [( DwdObservationParameter.DAILY.PRECIPITATION_MORE.PRECIPITATION_HEIGHT, DwdObservationDataset.PRECIPITATION_MORE, )]
def test_dwd_observation_data_parameter(): """Test parameter given as single value without dataset""" request = DwdObservationRequest( parameter=["precipitation_height"], resolution="daily", period=["recent", "historical"], ) assert request.parameter == [( DwdObservationParameter.DAILY.CLIMATE_SUMMARY.PRECIPITATION_HEIGHT, DwdObservationDataset.CLIMATE_SUMMARY, )] request = DwdObservationRequest( parameter=["climate_summary"], resolution="daily", period=["recent", "historical"], ) assert request.parameter == [(DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationDataset.CLIMATE_SUMMARY)]
def test_request_period_historical_recent(): # Historical and recent period expected request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1971-01-01", end_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(days=400), ) assert request.period == [ Period.HISTORICAL, Period.RECENT, ]
def test_dwd_observation_data_dataset(): """Request a parameter set""" expected = DwdObservationRequest( parameter=["kl"], resolution="daily", period=["recent", "historical"], ).filter_by_station_id(station_id=(1, )) given = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT], start_date=None, end_date=None, ).filter_by_station_id(station_id=(1, ), ) assert given == expected expected = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT], ).filter_by_station_id(station_id=(1, ), ) given = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT], start_date=None, end_date=None, ).filter_by_station_id(station_id=(1, ), ) assert expected == given assert expected.parameter == [( DwdObservationDataset.CLIMATE_SUMMARY, DwdObservationDataset.CLIMATE_SUMMARY, )]
def test_dwd_observation_data_result_all_missing_data(): Settings.tidy = True Settings.humanize = True Settings.si_units = True stations = DwdObservationRequest( parameter=Parameter.PRECIPITATION_HEIGHT.name, resolution=DwdObservationResolution.MINUTE_10, start_date=datetime(2021, 10, 1), end_date=datetime(2021, 10, 5), ).filter_by_station_id(["01851"]) values = stations.values.all().df assert all(values.value.isna())
def test_dwd_observation_data_monthly_tidy(): """Test for actual values (tidy) in metric units""" Settings.tidy = True Settings.humanize = True Settings.si_units = True request = DwdObservationRequest( parameter=[DwdObservationParameter.MONTHLY.PRECIPITATION_HEIGHT], resolution=DwdObservationResolution.MONTHLY, start_date="2020-01-01", end_date="2020-12-31", ).filter_by_station_id("00433") values = request.values.all().df expected_df = pd.DataFrame( { "station_id": pd.Categorical(["00433"] * 12), "dataset": pd.Categorical(["climate_summary"] * 12), "parameter": pd.Categorical(["precipitation_height"] * 12), "date": [ Timestamp("2020-01-01 00:00:00+0000", tz="UTC"), Timestamp("2020-02-01 00:00:00+0000", tz="UTC"), Timestamp("2020-03-01 00:00:00+0000", tz="UTC"), Timestamp("2020-04-01 00:00:00+0000", tz="UTC"), Timestamp("2020-05-01 00:00:00+0000", tz="UTC"), Timestamp("2020-06-01 00:00:00+0000", tz="UTC"), Timestamp("2020-07-01 00:00:00+0000", tz="UTC"), Timestamp("2020-08-01 00:00:00+0000", tz="UTC"), Timestamp("2020-09-01 00:00:00+0000", tz="UTC"), Timestamp("2020-10-01 00:00:00+0000", tz="UTC"), Timestamp("2020-11-01 00:00:00+0000", tz="UTC"), Timestamp("2020-12-01 00:00:00+0000", tz="UTC"), ], "value": pd.to_numeric([ 34.0, 83.2, 30.3, 22.7, 33.3, 35.8, 46.8, 43.2, 52.8, 58.2, 16.4, 22.1 ], errors="coerce"), "quality": pd.to_numeric([9.0] * 12, errors="coerce"), }, ) assert_frame_equal(values, expected_df, check_categorical=False)
def dwd_stations( kind: str, parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), mosmix_type: str = Query(default=None), longitude: float = Query(default=None), latitude: float = Query(default=None), rank: int = Query(default=None), distance: int = Query(default=None), sql: str = Query(default=None), ): if kind not in ["observation", "forecast"]: return HTTPException(status_code=404, detail=f"product {kind} not found") # Data acquisition. if kind == "observation": if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) stations = DwdObservationRequest( parameter=parameter, resolution=resolution, period=period, si_units=False ) else: stations = DwdMosmixRequest( parameter=parameter, mosmix_type=mosmix_type, si_units=False ) if longitude and latitude and (rank or distance): if rank: results = stations.filter_by_rank( latitude=latitude, longitude=longitude, rank=rank ) else: results = stations.filter_by_distance( latitude=latitude, longitude=longitude, distance=distance, unit="km" ) else: results = stations.all() # Postprocessing. if sql is not None: results.filter_by_sql(sql) results.fill_gaps() return make_json_response(results.to_dict())
def run(): """ Usage: wetterdienst dwd observation stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--rank=<rank>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd observation values --parameter=<parameter> --resolution=<resolution> [--station=<station>] [--period=<period>] [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd observation values --parameter=<parameter> --resolution=<resolution> --latitude=<latitude> --longitude=<longitude> [--period=<period>] [--rank=<rank>] [--distance=<distance>] [--tidy] [--date=<date>] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd forecast stations [--parameter=<parameter>] [--mosmix-type=<mosmix-type>] [--date=<date>] [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--rank=<rank>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd forecast values --parameter=<parameter> [--mosmix-type=<mosmix-type>] --station=<station> [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd about [parameters] [resolutions] [periods] wetterdienst dwd about coverage [--parameter=<parameter>] [--resolution=<resolution>] [--period=<period>] wetterdienst dwd about fields --parameter=<parameter> --resolution=<resolution> --period=<period> [--language=<language>] wetterdienst radar stations [--odim-code=<odim-code>] [--wmo-code=<wmo-code>] [--country-name=<country-name>] wetterdienst dwd radar stations wetterdienst restapi [--listen=<listen>] [--reload] wetterdienst explorer [--listen=<listen>] [--reload] wetterdienst --version wetterdienst (-h | --help) Options: --parameter=<parameter> Parameter Set/Parameter, e.g. "kl" or "precipitation_height", etc. --resolution=<resolution> Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1" --period=<period> Dataset period: "historical", "recent", "now" --station=<station> Comma-separated list of station identifiers --latitude=<latitude> Latitude for filtering by geoposition. --longitude=<longitude> Longitude for filtering by geoposition. --rank=<rank> Rank of nearby stations when filtering by geoposition. --distance=<distance> Maximum distance in km when filtering by geoposition. --date=<date> Date for filtering data. Can be either a single date(time) or an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals. --mosmix-type=<mosmix-type> type of mosmix, either 'small' or 'large' --sql=<sql> SQL query to apply to DataFrame. --format=<format> Output format. [Default: json] --target=<target> Output target for storing data into different data sinks. --language=<language> Output language. [Default: en] --version Show version information --debug Enable debug messages --listen=<listen> HTTP server listen address. --reload Run service and dynamically reload changed files -h --help Show this screen Examples requesting observation stations: # Get list of all stations for daily climate summary data in JSON format wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent # Get list of all stations in CSV format wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --format=csv # Get list of specific stations wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411 # Get list of specific stations in GeoJSON format wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411 --format=geojson Examples requesting observation values: # Get daily climate summary data for specific stations wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent # Get daily climate summary data for specific stations in CSV format wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent # Get daily climate summary data for specific stations in tidy format wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --tidy # Limit output to specific date wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01 # Limit output to specified date range in ISO-8601 time interval format wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05 # The real power horse: Acquire data across historical+recent data sets wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --date=1969-01-01/2020-06-11 # Acquire monthly data for 2020-05 wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=monthly --date=2020-05 # Acquire monthly data from 2017-01 to 2019-12 wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=monthly --date=2017-01/2019-12 # Acquire annual data for 2019 wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=annual --date=2019 # Acquire annual data from 2010 to 2020 wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=annual --date=2010/2020 # Acquire hourly data wetterdienst dwd observation values --station=1048,4411 --parameter=air_temperature --resolution=hourly --period=recent --date=2020-06-15T12 Examples requesting forecast stations: wetterdienst dwd forecast stations Examples requesting forecast values: wetterdienst dwd forecast values --parameter=ttt,ff --station=65510 Examples using geospatial features: # Acquire stations and readings by geoposition, request specific number of nearby stations. wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --rank=5 wetterdienst dwd observation values --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --rank=5 --date=2020-06-30 # Acquire stations and readings by geoposition, request stations within specific distance. wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25 wetterdienst dwd observation values --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25 --date=2020-06-30 Examples using SQL filtering: # Find stations by state. wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE state='Sachsen'" # Find stations by name (LIKE query). wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE lower(station_name) LIKE lower('%dresden%')" # Find stations by name (regexp query). wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE regexp_matches(lower(station_name), lower('.*dresden.*'))" # Filter measurements: Display daily climate observation readings where the maximum temperature is below two degrees celsius. wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE temperature_air_max_200 < 2.0;" # Filter measurements: Same as above, but use tidy format. # FIXME: Currently, this does not work, see https://github.com/earthobservations/wetterdienst/issues/377. wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 2.0;" --tidy Examples for inquiring metadata: # Display list of available parameters (air_temperature, precipitation, pressure, ...) wetterdienst dwd about parameters # Display list of available resolutions (10_minutes, hourly, daily, ...) wetterdienst dwd about resolutions # Display list of available periods (historical, recent, now) wetterdienst dwd about periods # Display coverage/correlation between parameters, resolutions and periods. # This can answer questions like ... wetterdienst dwd about coverage # Tell me all periods and resolutions available for 'air_temperature'. wetterdienst dwd about coverage --parameter=air_temperature # Tell me all parameters available for 'daily' resolution. wetterdienst dwd about coverage --resolution=daily Examples for exporting data to files: # Export list of stations into spreadsheet wetterdienst dwd observations stations --parameter=kl --resolution=daily --period=recent --target=file://stations.xlsx # Shortcut command for fetching readings alias fetch="wetterdienst dwd observations values --station=1048,4411 --parameter=kl --resolution=daily --period=recent" # Export readings into spreadsheet (Excel-compatible) fetch --target="file://observations.xlsx" # Export readings into Parquet format and display head of Parquet file fetch --target="file://observations.parquet" # Check Parquet file parquet-tools schema observations.parquet parquet-tools head observations.parquet # Export readings into Zarr format fetch --target="file://observations.zarr" Examples for exporting data to databases: # Shortcut command for fetching readings alias fetch="wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent" # Store readings to DuckDB fetch --target="duckdb:///dwd.duckdb?table=weather" # Store readings to InfluxDB fetch --target="influxdb://localhost/?database=dwd&table=weather" # Store readings to CrateDB fetch --target="crate://localhost/?database=dwd&table=weather" Invoke the HTTP REST API service: # Start service on standard port, listening on http://localhost:7890. wetterdienst restapi # Start service on standard port and watch filesystem changes. # This is suitable for development. wetterdienst restapi --reload # Start service on public interface and specific port. wetterdienst restapi --listen=0.0.0.0:8890 Invoke the Wetterdienst Explorer UI service: # Start service on standard port, listening on http://localhost:7891. wetterdienst explorer # Start service on standard port and watch filesystem changes. # This is suitable for development. wetterdienst explorer --reload # Start service on public interface and specific port. wetterdienst explorer --listen=0.0.0.0:8891 """ appname = f"{__appname__} {__version__}" # Read command line options. options = normalize_options(docopt(run.__doc__, version=appname)) # Setup logging. debug = options.get("debug") log_level = logging.INFO if debug: # pragma: no cover log_level = logging.DEBUG setup_logging(log_level) # Run HTTP service. if options.restapi: # pragma: no cover listen_address = options.listen log.info(f"Starting {appname}") log.info(f"Starting HTTP web service on http://{listen_address}") from wetterdienst.ui.restapi import start_service start_service(listen_address, reload=options.reload) return # Run UI service. if options.explorer: # pragma: no cover listen_address = options.listen log.info(f"Starting {appname}") log.info(f"Starting UI web service on http://{listen_address}") from wetterdienst.ui.explorer.app import start_service start_service(listen_address, reload=options.reload) return # Handle radar data inquiry. Currently, "stations only". if options.radar: if options.dwd: data = DwdRadarSites().all() else: if options.odim_code: data = OperaRadarSites().by_odimcode(options.odim_code) elif options.wmo_code: data = OperaRadarSites().by_wmocode(options.wmo_code) elif options.country_name: data = OperaRadarSites().by_countryname(options.country_name) else: data = OperaRadarSites().all() output = json.dumps(data, indent=4) print(output) return # Output domain information. if options.about: about(options) return # Sanity checks. if (options["values"] or options.forecast) and options.format == "geojson": raise KeyError("GeoJSON format only available for stations output") # Acquire station list, also used for readings if required. # Filtering applied for distance (a.k.a. nearby) and pre-selected stations stations = None if options.observation: stations = DwdObservationRequest( parameter=read_list(options.parameter), resolution=options.resolution, period=options.period, tidy=options.tidy, si_units=False, ) elif options.forecast: stations = DwdMosmixRequest( parameter=read_list(options.parameter), mosmix_type=DwdMosmixType.LARGE, tidy=options.tidy, si_units=False, ) if options.latitude and options.longitude: if options.rank: stations = stations.filter_by_rank( latitude=float(options.latitude), longitude=float(options.longitude), rank=int(options.rank), ) elif options.distance: stations = stations.filter_by_distance( latitude=float(options.latitude), longitude=float(options.longitude), distance=int(options.distance), ) else: raise DocoptExit( "Geospatial queries need either --rank or --distance") results = stations elif options.station: results = stations.filter_by_station_id(read_list(options.station)) else: results = stations.all() df = pd.DataFrame() if options.stations: pass elif options["values"]: try: # TODO: Add stream-based processing here. results = results.values.all() except ValueError as ex: log.exception(ex) sys.exit(1) df = results.df if df.empty: log.error("No data available for given constraints") sys.exit(1) # Filter readings by datetime expression. if options["values"] and options.date: results.filter_by_date(options.date) # Apply filtering by SQL. if options.sql: if options.tidy: log.error("Combining SQL filtering with tidy format not possible") sys.exit(1) log.info(f"Filtering with SQL: {options.sql}") results.filter_by_sql(options.sql) # Emit to data sink, e.g. write to database. if options.target: results.to_target(options.target) return # Render to output format. try: if options.format == "json": output = results.to_json() elif options.format == "csv": output = results.to_csv() elif options.format == "geojson": output = results.to_geojson() else: raise KeyError("Unknown output format") except KeyError as ex: log.error( f'{ex}. Output format must be one of "json", "geojson", "csv".') sys.exit(1) print(output)
def dwd_values( kind: str, stations: str = Query(default=None), parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), mosmix_type: str = Query(default=None), date: str = Query(default=None), sql: str = Query(default=None), tidy: bool = Query(default=True), ): """ Acquire data from DWD. # TODO: Obtain lat/lon distance/number information. :param provider: :param kind: string for product, either observation or forecast :param stations: Comma-separated list of station identifiers. :param parameter: Observation measure :param resolution: Frequency/granularity of measurement interval :param period: Recent or historical files :param mosmix_type: MOSMIX type. Either "small" or "large". :param date: Date or date range :param sql: SQL expression :param tidy: Whether to return data in tidy format. Default: True. :return: """ if kind not in ["observation", "mosmix"]: return HTTPException( status_code=404, detail=f"Unknown value for query argument 'kind={kind}' {kind}", ) if stations is None: raise HTTPException( status_code=400, detail="Query argument 'stations' is required" ) station_ids = map(str, read_list(stations)) if kind == "observation": if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) # Data acquisition. request = DwdObservationRequest( parameter=parameter, resolution=resolution, period=period, tidy=tidy, si_units=False, ) else: if parameter is None or mosmix_type is None: raise HTTPException( status_code=400, detail="Query argument 'mosmix_type' is required" ) request = DwdMosmixRequest( parameter=parameter, mosmix_type=mosmix_type, si_units=False ) # Postprocessing. results = request.filter_by_station_id(station_id=station_ids).values.all() if date is not None: results.filter_by_date(date) if sql is not None: results.filter_by_sql(sql) data = json.loads(results.to_json()) return make_json_response(data)
def test_dwd_observation_data_result_tidy_si(): """Test for actual values (tidy) in metric units""" Settings.tidy = True Settings.humanize = False Settings.si_units = True request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1933-12-31", # few days before official start end_date="1934-01-01", # few days after official start, ).filter_by_station_id(station_id=(1048, ), ) df = request.values.all().df assert list(df.columns.values) == [ "station_id", "dataset", "parameter", "date", "value", "quality", ] assert_frame_equal( df, pd.DataFrame( { "station_id": pd.Categorical(["01048"] * 28), "dataset": pd.Categorical(["climate_summary"] * 28), "parameter": pd.Categorical([ "fx", "fx", "fm", "fm", "rsk", "rsk", "rskf", "rskf", "sdk", "sdk", "shk_tag", "shk_tag", "nm", "nm", "vpm", "vpm", "pm", "pm", "tmk", "tmk", "upm", "upm", "txk", "txk", "tnk", "tnk", "tgk", "tgk", ]), "date": [ datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), ], "value": pd.to_numeric( [ # FX pd.NA, pd.NA, # FM pd.NA, pd.NA, # RSK pd.NA, 0.2, # RSKF pd.NA, 8, # SDK pd.NA, pd.NA, # SHK_TAG pd.NA, 0, # NM pd.NA, 100.0, # VPM pd.NA, 640.0, # PM pd.NA, 100860.0, # TMK pd.NA, 273.65, # UPM pd.NA, 97.00, # TXK pd.NA, 273.84999999999997, # TNK pd.NA, 273.34999999999997, # TGK pd.NA, pd.NA, ], errors="coerce", ).astype(float), "quality": pd.Series( [ # FX np.NaN, np.NaN, # FM np.NaN, np.NaN, # RSK np.NaN, 1, # RSKF np.NaN, 1, # SDK np.NaN, np.NaN, # SHK_TAG np.NaN, 1, # NM np.NaN, 1, # VPM np.NaN, 1, # PM np.NaN, 1, # TMK np.NaN, 1, # UPM np.NaN, 1, # TXK np.NaN, 1, # TNK np.NaN, 1, # TGK np.NaN, np.NaN, ], dtype=float, ), }, ), # Needed since pandas 1.2? check_categorical=False, )
def test_tidy_up_data(): """Test for function to tidy data""" Settings.tidy = True Settings.humanize = False Settings.si_units = True station_id = "01048" request = DwdObservationRequest( "kl", "daily", "historical", start_date="2019-01-23 00:00:00", ).filter_by_station_id((station_id, )) df = pd.DataFrame({ "station_id": ["01048"], "date": [pd.Timestamp("2019-01-23 00:00:00")], "qn_3": [10], "fx": [11.8], "fm": [5.8], "qn_4": [3], "rsk": [0.0], "rskf": [0.0], "sdk": [7.1], "shk_tag": [0.0], "nm": [2.3], "vpm": [3.2], "pm": [975.4], "tmk": [-5.5], "upm": [79.17], "txk": [-1.7], "tnk": [-7.9], "tgk": [-11.4], }) df_tidied = request.values.tidy_up_df(df, request.parameter[0][1]) df_tidied_organized = request.values._organize_df_columns( df_tidied, station_id, DwdObservationDataset.CLIMATE_SUMMARY) df_tidy = pd.DataFrame({ "station_id": ["01048"] * 14, "dataset": ["climate_summary"] * 14, "parameter": [ "fx", "fm", "rsk", "rskf", "sdk", "shk_tag", "nm", "vpm", "pm", "tmk", "upm", "txk", "tnk", "tgk", ], "date": [pd.Timestamp("2019-01-23 00:00:00")] * 14, "value": [ 11.8, 5.8, 0.0, 0.0, 7.1, 0.0, 2.3, 3.2, 975.4, -5.5, 79.17, -1.7, -7.9, -11.4, ], "quality": pd.Series([10, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=float), }) assert_frame_equal(df_tidied_organized, df_tidy)
def test_dwd_observation_data_result_tabular_metric(): """Test for actual values (tabular) in metric units""" Settings.tidy = False Settings.humanize = False Settings.si_units = True request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1933-12-31", # few days before official start end_date="1934-01-01", # few days after official start, ).filter_by_station_id(station_id=[1048], ) df = request.values.all().df assert list(df.columns.values) == [ "station_id", "dataset", "date", "qn_3", "fx", "fm", "qn_4", "rsk", "rskf", "sdk", "shk_tag", "nm", "vpm", "pm", "tmk", "upm", "txk", "tnk", "tgk", ] assert_frame_equal( df, pd.DataFrame({ "station_id": pd.Categorical(["01048"] * 2), "dataset": pd.Categorical(["climate_summary"] * 2), "date": [ datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), ], "qn_3": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "fx": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "fm": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "qn_4": pd.to_numeric([pd.NA, 1], errors="coerce"), "rsk": pd.to_numeric([pd.NA, 0.2], errors="coerce"), "rskf": pd.to_numeric([pd.NA, 8], errors="coerce"), "sdk": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "shk_tag": pd.to_numeric([pd.NA, 0], errors="coerce"), "nm": pd.to_numeric([pd.NA, 100.0], errors="coerce"), "vpm": pd.to_numeric([pd.NA, 640.0], errors="coerce"), "pm": pd.to_numeric([pd.NA, 100860.0], errors="coerce"), "tmk": pd.to_numeric([pd.NA, 273.65], errors="coerce"), "upm": pd.to_numeric([pd.NA, 97.00], errors="coerce"), "txk": pd.to_numeric([pd.NA, 273.84999999999997], errors="coerce"), "tnk": pd.to_numeric([pd.NA, 273.34999999999997], errors="coerce"), "tgk": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), }), check_categorical=False, )
def test_dwd_observation_data_result_tabular(): """ Test for actual values (tabular) """ request = DwdObservationRequest( parameter=[DwdObservationDataset.CLIMATE_SUMMARY], resolution=DwdObservationResolution.DAILY, start_date="1933-12-31", # few days before official start end_date="1934-01-01", # few days after official start, tidy=False, humanize=False, si_units=False, ).filter_by_station_id(station_id=[1048], ) df = request.values.all().df assert list(df.columns.values) == [ "date", "station_id", "qn_3", "fx", "fm", "qn_4", "rsk", "rskf", "sdk", "shk_tag", "nm", "vpm", "pm", "tmk", "upm", "txk", "tnk", "tgk", ] assert_frame_equal( df, pd.DataFrame({ "date": [ datetime(1933, 12, 31, tzinfo=pytz.UTC), datetime(1934, 1, 1, tzinfo=pytz.UTC), ], "station_id": pd.Categorical(["01048", "01048"]), "qn_3": pd.Series([pd.NA, pd.NA], dtype=pd.Int64Dtype()), "fx": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "fm": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "qn_4": pd.Series([pd.NA, 1], dtype=pd.Int64Dtype()), "rsk": pd.to_numeric([pd.NA, 0.2], errors="coerce"), "rskf": pd.to_numeric([pd.NA, 8], errors="coerce"), "sdk": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), "shk_tag": pd.Series([pd.NA, 0], dtype=pd.Int64Dtype()), "nm": pd.to_numeric([pd.NA, 8.0], errors="coerce"), "vpm": pd.to_numeric([pd.NA, 6.4], errors="coerce"), "pm": pd.to_numeric([pd.NA, 1008.60], errors="coerce"), "tmk": pd.to_numeric([pd.NA, 0.5], errors="coerce"), "upm": pd.to_numeric([pd.NA, 97.00], errors="coerce"), "txk": pd.to_numeric([pd.NA, 0.7], errors="coerce"), "tnk": pd.to_numeric([pd.NA, 0.2], errors="coerce"), "tgk": pd.to_numeric([pd.NA, pd.NA], errors="coerce"), }), )