def test_create_humanized_column_names_mapping(): """ Test for function to create a mapping to humanized column names """ kl_daily_hcnm = { # "QN_3": "QUALITY_WIND", "FX": "WIND_GUST_MAX", "FM": "WIND_SPEED", # "QN_4": "QUALITY_GENERAL", "RSK": "PRECIPITATION_HEIGHT", "RSKF": "PRECIPITATION_FORM", "SDK": "SUNSHINE_DURATION", "SHK_TAG": "SNOW_DEPTH", "NM": "CLOUD_COVER_TOTAL", "VPM": "PRESSURE_VAPOR", "PM": "PRESSURE_AIR", "TMK": "TEMPERATURE_AIR_200", "UPM": "HUMIDITY", "TXK": "TEMPERATURE_AIR_MAX_200", "TNK": "TEMPERATURE_AIR_MIN_200", "TGK": "TEMPERATURE_AIR_MIN_005", } hcnm = DWDObservationData( [0], [DWDObservationParameterSet.CLIMATE_SUMMARY], DWDObservationResolution.DAILY, [DWDObservationPeriod.RECENT], )._create_humanized_parameters_mapping() assert set(kl_daily_hcnm.items()).issubset(set(hcnm.items()))
def test_dwd_observation_data_parameter(): request = DWDObservationData( station_ids=[1], parameters=["precipitation_height"], resolution="daily", periods=["recent", "historical"], ) assert request == DWDObservationData( station_ids=[1], parameters=[DWDObservationParameter.DAILY.PRECIPITATION_HEIGHT], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT], start_date=None, end_date=None, ) assert request.parameters == [( DWDObservationParameterSetStructure.DAILY.CLIMATE_SUMMARY. PRECIPITATION_HEIGHT, # Noqa: E501, B950 DWDObservationParameterSet.CLIMATE_SUMMARY, )] with pytest.raises(NoParametersFound): DWDObservationData( station_ids=[1], parameters=["abc"], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", end_date="1951-01-01", )
def test_observation_data_storing(): """ 1. Scenario This scenario makes sure we take fresh data and write it to the given folder, thus we can run just another test afterwards as no old data is used """ storage = StorageAdapter(persist=True).hdf5( DWDObservationParameterSet.CLIMATE_SUMMARY, DWDObservationResolution.DAILY, DWDObservationPeriod.HISTORICAL, ) storage.invalidate() dwd_obs_data = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL], storage=StorageAdapter(persist=True), ) df = dwd_obs_data.collect_safe() df_stored = dwd_obs_data.collect_safe() assert_frame_equal(df, df_stored, check_column_type=False) storage.invalidate() assert True
def test_dwd_observation_data_time_input(): # time input request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", ) assert request == DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[ DWDObservationPeriod.HISTORICAL, ], start_date=pd.Timestamp("1971-01-01"), end_date=pd.Timestamp("1971-01-01"), ) request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL], end_date="1971-01-01", ) assert request == DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[ DWDObservationPeriod.HISTORICAL, ], start_date=pd.Timestamp("1971-01-01"), end_date=pd.Timestamp("1971-01-01"), ) with pytest.raises(StartDateEndDateError): DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", end_date="1951-01-01", )
def test_dwd_observation_data_parameter_set(): request = DWDObservationData( station_ids=[1], parameters=["kl"], resolution="daily", periods=["recent", "historical"], ) assert request == DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT], start_date=None, end_date=None, ) assert request.parameters == [( DWDObservationParameterSet.CLIMATE_SUMMARY, DWDObservationParameterSet.CLIMATE_SUMMARY, )] request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT], ) assert request == DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT], start_date=None, end_date=None, ) # station id with pytest.raises(ValueError): DWDObservationData( station_ids=["test"], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], periods=[DWDObservationPeriod.HISTORICAL], resolution=DWDObservationResolution.DAILY, )
def test_dwd_observation_data_dynamic_period(): # Historical period expected request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", ) assert request.periods == [ DWDObservationPeriod.HISTORICAL, ] # Historical and recent period expected request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", end_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(days=400), ) assert request.periods == [ DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT, ] # Historical, recent and now period expected request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date="1971-01-01", end_date=pd.Timestamp(datetime.utcnow()), ) assert request.periods == [ DWDObservationPeriod.HISTORICAL, DWDObservationPeriod.RECENT, DWDObservationPeriod.NOW, ] # !!!Recent and now period cant be tested dynamically # TODO: add test with mocked datetime here # Now period request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(hours=2), ) assert DWDObservationPeriod.NOW in request.periods # No period (for example in future) request = DWDObservationData( station_ids=[1], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, start_date=pd.Timestamp(datetime.utcnow()) + pd.Timedelta(days=720), ) assert request.periods == []
def run(): """ Usage: wetterdienst dwd observations stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--number=<number>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] wetterdienst dwd observations readings --parameter=<parameter> --resolution=<resolution> --station=<station> [--period=<period>] [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd observations readings --parameter=<parameter> --resolution=<resolution> --latitude=<latitude> --longitude=<longitude> [--period=<period>] [--number=<number>] [--distance=<distance>] [--tidy] [--date=<date>] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd forecasts stations [--date=<date>] [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--number=<number>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] wetterdienst dwd forecasts readings --mosmix-type=<mosmix-type> --station=<station> [--parameter=<parameter>] [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>] wetterdienst dwd about [parameters] [resolutions] [periods] wetterdienst dwd about coverage [--parameter=<parameter>] [--resolution=<resolution>] [--period=<period>] wetterdienst dwd about fields --parameter=<parameter> --resolution=<resolution> --period=<period> [--language=<language>] wetterdienst service [--listen=<listen>] wetterdienst --version wetterdienst (-h | --help) Options: --parameter=<parameter> Parameter Set/Parameter, e.g. "kl" or "precipitation_height", etc. --resolution=<resolution> Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1" --period=<period> Dataset period: "historical", "recent", "now" --station=<station> Comma-separated list of station identifiers --latitude=<latitude> Latitude for filtering by geoposition. --longitude=<longitude> Longitude for filtering by geoposition. --number=<number> Number of nearby stations when filtering by geoposition. --distance=<distance> Maximum distance in km when filtering by geoposition. --date=<date> Date for filtering data. Can be either a single date(time) or an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals. --mosmix-type=<mosmix-type> type of mosmix, either 'small' or 'large' --sql=<sql> SQL query to apply to DataFrame. --format=<format> Output format. [Default: json] --target=<target> Output target for storing data into different data sinks. --language=<language> Output language. [Default: en] --version Show version information --debug Enable debug messages --listen=<listen> HTTP server listen address. [Default: localhost:7890] -h --help Show this screen Examples requesting stations: # Get list of all stations for daily climate summary data in JSON format wetterdienst dwd stations --parameter=kl --resolution=daily --period=recent # Get list of all stations in CSV format wetterdienst dwd stations --parameter=kl --resolution=daily --period=recent --format=csv # Get list of specific stations wetterdienst dwd stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411 # Get list of specific stations in GeoJSON format wetterdienst dwd stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411 --format=geojson Examples requesting readings: # Get daily climate summary data for specific stations wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent # Optionally save/restore to/from disk in order to avoid asking upstream servers each time wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent # Limit output to specific date wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01 # Limit output to specified date range in ISO-8601 time interval format wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05 # The real power horse: Acquire data across historical+recent data sets wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11 # Acquire monthly data for 2020-05 wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=monthly --period=recent,historical --date=2020-05 # Acquire monthly data from 2017-01 to 2019-12 wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=monthly --period=recent,historical --date=2017-01/2019-12 # Acquire annual data for 2019 wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=annual --period=recent,historical --date=2019 # Acquire annual data from 2010 to 2020 wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=annual --period=recent,historical --date=2010/2020 # Acquire hourly data wetterdienst dwd readings --station=1048,4411 --parameter=air_temperature --resolution=hourly --period=recent --date=2020-06-15T12 Examples using geospatial features: # Acquire stations and readings by geoposition, request specific number of nearby stations. wetterdienst dwd stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --num=5 wetterdienst dwd readings --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --num=5 --date=2020-06-30 # Acquire stations and readings by geoposition, request stations within specific radius. wetterdienst dwd stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25 wetterdienst dwd readings --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25 --date=2020-06-30 Examples using SQL filtering: # Find stations by state. wetterdienst dwd stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE state='Sachsen'" # Find stations by name (LIKE query). wetterdienst dwd stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE lower(station_name) LIKE lower('%dresden%')" # Find stations by name (regexp query). wetterdienst dwd stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE regexp_matches(lower(station_name), lower('.*dresden.*'))" # Filter measurements: Display daily climate observation readings where the maximum temperature is below two degrees. wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE element='temperature_air_max_200' AND value < 2.0;" Examples for inquiring metadata: # Display list of available parameters (air_temperature, precipitation, pressure, ...) wetterdienst dwd about parameters # Display list of available resolutions (10_minutes, hourly, daily, ...) wetterdienst dwd about resolutions # Display list of available periods (historical, recent, now) wetterdienst dwd about periods # Display coverage/correlation between parameters, resolutions and periods. # This can answer questions like ... wetterdienst dwd about coverage # Tell me all periods and resolutions available for 'air_temperature'. wetterdienst dwd about coverage --parameter=air_temperature # Tell me all parameters available for 'daily' resolution. wetterdienst dwd about coverage --resolution=daily Examples for exporting data to databases: # Shortcut command for fetching readings from DWD alias fetch="wetterdienst dwd readings --station=1048,4411 --parameter=kl --resolution=daily --period=recent" # Store readings to DuckDB fetch --target="duckdb://database=dwd.duckdb&table=weather" # Store readings to InfluxDB fetch --target="influxdb://localhost/?database=dwd&table=weather" # Store readings to CrateDB fetch --target="crate://localhost/?database=dwd&table=weather" Run as HTTP service: wetterdienst dwd service wetterdienst dwd service --listen=0.0.0.0:9999 """ appname = f"{__appname__} {__version__}" # Read command line options. options = normalize_options(docopt(run.__doc__, version=appname)) # Setup logging. debug = options.get("debug") log_level = logging.INFO if debug: # pragma: no cover log_level = logging.DEBUG setup_logging(log_level) # Run service. if options.service: # pragma: no cover listen_address = options.listen log.info(f"Starting {appname}") log.info(f"Starting web service on {listen_address}") from wetterdienst.service import start_service start_service(listen_address) return # Output domain information. if options.about: about(options) return # Sanity checks. if (options.readings or options.forecasts) and options.format == "geojson": raise KeyError("GeoJSON format only available for stations output") # Acquire station list, also used for readings if required. # Filtering applied for distance (a.k.a. nearby) and pre-selected stations df = get_stations(options) if options.stations and df.empty: log.error("No data available for given constraints") sys.exit(1) # Acquire observations. if options.readings: # Use list of station identifiers. if options.station: station_ids = read_list(options.station) elif options.latitude and options.longitude: try: station_ids = df.STATION_ID.unique() except AttributeError: station_ids = df.WMO_ID.unique() else: raise KeyError( "Either --station or --latitude, --longitude required") # Funnel all parameters to the workhorse. if options.observations: readings = DWDObservationData( station_ids=station_ids, parameters=read_list(options.parameter), resolution=options.resolution, periods=read_list(options.period), humanize_parameters=True, tidy_data=options.tidy, ) elif options.forecasts: readings = DWDMosmixData( station_ids=station_ids, parameters=read_list(options.parameter), mosmix_type=options.mosmix_type, humanize_parameters=True, tidy_data=options.tidy, ) # Collect data and merge together. try: df = readings.all() except ValueError as ex: log.exception(ex) sys.exit(1) # Sanity checks. if df.empty: log.error("No data available") sys.exit(1) # Filter readings by datetime expression. if options.readings and options.date: resolution = None if options.observations: resolution = readings.resolution df = df.dwd.filter_by_date(options.date, resolution) # Make column names lowercase. df = df.dwd.lower() # Apply filtering by SQL. if options.sql: log.info(f"Filtering with SQL: {options.sql}") df = df.io.sql(options.sql) # Emit to data sink, e.g. write to database. if options.target: log.info(f"Writing data to target {options.target}") df.io.export(options.target) return # Render to output format. try: output = df.dwd.format(options.format) except KeyError as ex: log.error( f'{ex}. Output format must be one of "json", "geojson", "csv", "excel".' ) sys.exit(1) print(output)