result = jsp.query( """ SET STATEMENT max_statement_time=60 FOR SELECT `date`, SUBSTRING_INDEX(GROUP_CONCAT(`tavg` ORDER BY `priority` ASC), ",", 1) AS `tavg`, SUBSTRING_INDEX(GROUP_CONCAT(`tmin` ORDER BY `priority` ASC), ",", 1) AS `tmin`, SUBSTRING_INDEX(GROUP_CONCAT(`tmax` ORDER BY `priority` ASC), ",", 1) AS `tmax`, SUBSTRING_INDEX(GROUP_CONCAT(`prcp` ORDER BY `priority` ASC), ",", 1) AS `prcp`, SUBSTRING_INDEX(GROUP_CONCAT(`snow` ORDER BY `priority` ASC), ",", 1) AS `snow`, SUBSTRING_INDEX(GROUP_CONCAT(`wdir` ORDER BY `priority` ASC), ",", 1) AS `wdir`, SUBSTRING_INDEX(GROUP_CONCAT(`wspd` ORDER BY `priority` ASC), ",", 1) AS `wspd`, SUBSTRING_INDEX(GROUP_CONCAT(`wpgt` ORDER BY `priority` ASC), ",", 1) AS `wpgt`, SUBSTRING_INDEX(GROUP_CONCAT(`pres` ORDER BY `priority` ASC), ",", 1) AS `pres`, SUBSTRING_INDEX(GROUP_CONCAT(`tsun` ORDER BY `priority` ASC), ",", 1) AS `tsun` FROM ( (SELECT `date`, `tavg`, `tmin`, `tmax`, `prcp`, `snow`, NULL AS `wdir`, `wspd`, `wpgt`, `pres`, `tsun`, "A" AS `priority` FROM `daily_national` WHERE `station` = :station ) UNION ALL (SELECT `date`, `tavg`, `tmin`, `tmax`, `prcp`, `snow`, `wdir`, `wspd`, `wpgt`, NULL AS `pres`, `tsun`, "B" AS `priority` FROM `daily_ghcn` WHERE `station` = :station ) UNION ALL (SELECT DATE(CONVERT_TZ(`hourly_national`.`time`, "UTC", :timezone)) AS `date`, IF(count(`hourly_national`.`temp`)<24, NULL, ROUND(AVG(`hourly_national`.`temp`), 1)) AS `tavg`, IF(count(`hourly_national`.`temp`)<24, NULL, MIN(`hourly_national`.`temp`)) AS `tmin`, IF(count(`hourly_national`.`temp`)<24, NULL, MAX(`hourly_national`.`temp`)) AS `tmax`, IF(count(`hourly_national`.`prcp`)<24, NULL, SUM(`hourly_national`.`prcp`)) AS `prcp`, NULL AS `snow`, IF(count(`hourly_national`.`wdir`)<24, NULL, ROUND(DEGAVG(SUM(SIN(RADIANS(`hourly_national`.`wdir`))), SUM(COS(RADIANS(`hourly_national`.`wdir`)))), 1)) AS `wdir`, IF(count(`hourly_national`.`wspd`)<24, NULL, ROUND(AVG(`hourly_national`.`wspd`), 1)) AS `wspd`, NULL AS `wpgt`, IF(count(`hourly_national`.`pres`)<24, NULL, ROUND(AVG(`hourly_national`.`pres`), 1)) AS `pres`, NULL AS `tsun`, "C" AS `priority` FROM `hourly_national` WHERE `hourly_national`.`station` = :station GROUP BY `station`, `date` ) UNION ALL (SELECT DATE(CONVERT_TZ(`hourly_isd`.`time`, "UTC", :timezone)) AS `date`, IF(count(`hourly_isd`.`temp`)<24, NULL, ROUND(AVG(`hourly_isd`.`temp`),1)) AS `tavg`, IF(count(`hourly_isd`.`temp`)<24, NULL, MIN(`hourly_isd`.`temp`)) AS `tmin`, IF(count(`hourly_isd`.`temp`)<24, NULL, MAX(`hourly_isd`.`temp`)) AS `tmax`, IF(count(`hourly_isd`.`prcp`)<24, NULL, SUM(`hourly_isd`.`prcp`)) AS `prcp`, NULL AS `snow`, IF(count(`hourly_isd`.`wdir`)<24, NULL, ROUND(DEGAVG(SUM(SIN(RADIANS(`hourly_isd`.`wdir`))), SUM(COS(RADIANS(`hourly_isd`.`wdir`)))), 1)) AS `wdir`, IF(count(`hourly_isd`.`wspd`)<24, NULL, ROUND(AVG(`hourly_isd`.`wspd`),1)) AS `wspd`, NULL AS `wpgt`, IF(count(`hourly_isd`.`pres`)<24, NULL, ROUND(AVG(`hourly_isd`.`pres`),1)) AS `pres`, NULL AS `tsun`, "D" AS `priority` FROM `hourly_isd` WHERE `hourly_isd`.`station` = :station GROUP BY `station`, `date` ) UNION ALL (SELECT DATE(CONVERT_TZ(`hourly_synop`.`time`, "UTC", :timezone)) AS `date`, IF(count(`hourly_synop`.`temp`)<24, NULL, ROUND(AVG(`hourly_synop`.`temp`),1)) AS `tavg`, IF(count(`hourly_synop`.`temp`)<24, NULL, MIN(`hourly_synop`.`temp`)) AS `tmin`, IF(count(`hourly_synop`.`temp`)<24, NULL, MAX(`hourly_synop`.`temp`)) AS `tmax`, IF(count(`hourly_synop`.`prcp`)<24, NULL, SUM(`hourly_synop`.`prcp`)) AS `prcp`, IF(count(`hourly_synop`.`snow`)<24, NULL, MAX(`hourly_synop`.`snow`)) AS `snow`, IF(count(`hourly_synop`.`wdir`)<24, NULL, ROUND(DEGAVG(SUM(SIN(RADIANS(`hourly_synop`.`wdir`))), SUM(COS(RADIANS(`hourly_synop`.`wdir`)))), 1)) AS `wdir`, IF(count(`hourly_synop`.`wspd`)<24, NULL, ROUND(AVG(`hourly_synop`.`wspd`),1)) AS `wspd`, IF(count(`hourly_synop`.`wpgt`)<24, NULL, MAX(`wpgt`)) AS `wpgt`, IF(count(`hourly_synop`.`pres`)<24, NULL, ROUND(AVG(`hourly_synop`.`pres`),1)) AS `pres`, NULL AS `tsun`, "E" AS `priority` FROM `hourly_synop` WHERE `hourly_synop`.`station` = :station GROUP BY `station`, `date` ) UNION ALL (SELECT DATE(CONVERT_TZ(`hourly_metar`.`time`, "UTC", :timezone)) AS `date`, IF(count(`hourly_metar`.`temp`)<24, NULL, ROUND(AVG(`hourly_metar`.`temp`),1)) AS `tavg`, IF(count(`hourly_metar`.`temp`)<24, NULL, MIN(`hourly_metar`.`temp`)) AS `tmin`, IF(count(`hourly_metar`.`temp`)<24, NULL, MAX(`hourly_metar`.`temp`)) AS `tmax`, NULL AS `prcp`, NULL AS `snow`, IF(count(`hourly_metar`.`wdir`)<24, NULL, ROUND(DEGAVG(SUM(SIN(RADIANS(`hourly_metar`.`wdir`))), SUM(COS(RADIANS(`hourly_metar`.`wdir`)))), 1)) AS `wdir`, IF(count(`hourly_metar`.`wspd`)<24, NULL, ROUND(AVG(`hourly_metar`.`wspd`),1)) AS `wspd`, NULL AS `wpgt`, IF(count(`hourly_metar`.`pres`)<24, NULL, ROUND(AVG(`hourly_metar`.`pres`),1)) AS `pres`, NULL AS `tsun`, "F" AS `priority` FROM `hourly_metar` WHERE `hourly_metar`.`station` = :station GROUP BY `station`, `date` ) ) AS `daily_derived` WHERE ( `tavg` IS NOT NULL OR `tmin` IS NOT NULL OR `tmax` IS NOT NULL OR `prcp` IS NOT NULL ) AND `date` <= DATE_ADD(CURRENT_DATE(), INTERVAL 10 DAY) GROUP BY `date` ORDER BY `date` """, { "station": station[0], "timezone": station[1] }, )
# Get start & end dates of time series start = data.index.get_level_values("start").min() end = data.index.get_level_values("end").max() start = f"{start}-01-01" end = f"{end}-12-31" if len(start) == 10 and len(end) == 10: jsp.query(f""" INSERT INTO `inventory`( `station`, `mode`, `start`, `end` ) VALUES ( "{station[0]}", "N", "{start}", "{end}" ) ON DUPLICATE KEY UPDATE `start` = VALUES(`start`), `end` = VALUES(`end`) """) except BaseException: pass # Close Jasper instance jsp.close()
jsp.query(""" INSERT INTO `inventory`(`station`, `mode`, `start`) SELECT `station`, 'H' AS `mode`, MIN(`mindate`) AS `start` FROM ( (SELECT `station`, DATE(MIN(`time`)) as `mindate` FROM `hourly_synop` GROUP BY `station`) UNION ALL (SELECT `station`, DATE(MIN(`time`)) as `mindate` FROM `hourly_metar` GROUP BY `station`) UNION ALL (SELECT `station`, DATE(MIN(`time`)) as `mindate` FROM `hourly_national` GROUP BY `station`) UNION ALL (SELECT `station`, DATE(MIN(`time`)) as `mindate` FROM `hourly_isd` GROUP BY `station`) ) AS `hourly_inventory` GROUP BY `station` ON DUPLICATE KEY UPDATE `start` = VALUES(`start`) """)
STATIONS_PER_CYCLE = 11 # Create Jasper instance jsp = Jasper("export.bulk.monthly") # Get weather station(s) stations = get_stations( jsp, read_file("monthly_stations.sql"), STATIONS_PER_CYCLE, ) # Export data for each weather station for station in stations: result = jsp.query( read_file("monthly.sql"), {"station": station[0], "timezone": station[1]} ) if result.rowcount > 0: # Fetch data data = result.fetchall() # Export data dump export_csv( jsp, list(map(lambda d: d[:9], data)), f"/monthly/{station[0]}.csv.gz" ) # Export source map # pylint: disable=consider-using-generator export_csv( jsp,
Export meta data for weather stations The code is licensed under the MIT license. """ import json from jasper import Jasper from jasper.helpers import read_file from jasper.actions import export_csv, export_json # Create Jasper instance jsp = Jasper("export.bulk.stations.meta") # Export data for all weather stations result = jsp.query(read_file("meta.sql")) if result.rowcount > 0: # Fetch data data = result.fetchall() # Data lists full = [] lite = [] slim = [] for record in data: # Create dict of names try: names = json.loads(record[2]) except BaseException:
data = Daily(station[0], model=False).fetch() # Get start & end dates of time series start = data.index.min().strftime("%Y-%m-%d") end = data.index.max().strftime("%Y-%m-%d") if len(start) == 10 and len(end) == 10: jsp.query(f""" INSERT INTO `inventory`( `station`, `mode`, `start`, `end` ) VALUES ( "{station[0]}", "D", "{start}", "{end}" ) ON DUPLICATE KEY UPDATE `start` = VALUES(`start`), `end` = VALUES(`end`) """) except BaseException: pass # Get current time now = datetime.now() # Run daily
result = jsp.query( f""" SET STATEMENT max_statement_time=90 FOR SELECT DATE(MIN(`time`)) AS `date`, DATE_FORMAT(MIN(`time`), '%H') AS `hour`, SUBSTRING_INDEX(GROUP_CONCAT(`temp` ORDER BY `priority` ASC), ',', 1) AS `temp`, SUBSTRING_INDEX(GROUP_CONCAT(`dwpt` ORDER BY `priority` ASC), ',', 1) AS `dwpt`, SUBSTRING_INDEX(GROUP_CONCAT(`rhum` ORDER BY `priority` ASC), ',', 1) AS `rhum`, SUBSTRING_INDEX(GROUP_CONCAT(`prcp` ORDER BY `priority` ASC), ',', 1) AS `prcp`, SUBSTRING_INDEX(GROUP_CONCAT(`snow` ORDER BY `priority` ASC), ',', 1) AS `snow`, SUBSTRING_INDEX(GROUP_CONCAT(`wdir` ORDER BY `priority` ASC), ',', 1) AS `wdir`, SUBSTRING_INDEX(GROUP_CONCAT(`wspd` ORDER BY `priority` ASC), ',', 1) AS `wspd`, SUBSTRING_INDEX(GROUP_CONCAT(`wpgt` ORDER BY `priority` ASC), ',', 1) AS `wpgt`, SUBSTRING_INDEX(GROUP_CONCAT(`pres` ORDER BY `priority` ASC), ',', 1) AS `pres`, SUBSTRING_INDEX(GROUP_CONCAT(`tsun` ORDER BY `priority` ASC), ',', 1) AS `tsun`, SUBSTRING_INDEX(GROUP_CONCAT(`coco` ORDER BY `priority` ASC), ',', 1) AS `coco` FROM ( (SELECT `time`, `temp`, ROUND((243.04*(LN(`rhum`/100)+((17.625*`temp`)/(243.04+`temp`)))/(17.625-LN(`rhum`/100)-((17.625*`temp`)/(243.04+`temp`)))),1) AS `dwpt`, `rhum`, `prcp`, NULL AS `snow`, `wdir`, `wspd`, NULL AS `wpgt`, `pres`, `tsun`, NULL AS `coco`, 'A' AS `priority` FROM `hourly_national` WHERE `station` = :station {f'AND `time` BETWEEN "{start_year}-01-01 00:00:00" AND "{end_year}-12-31 23:59:59"' if MODE == 'recent' else ''} ) UNION ALL (SELECT `time`, `temp`, ROUND((243.04*(LN(`rhum`/100)+((17.625*`temp`)/(243.04+`temp`)))/(17.625-LN(`rhum`/100)-((17.625*`temp`)/(243.04+`temp`)))),1) AS `dwpt`, `rhum`, `prcp`, NULL AS `snow`, `wdir`, `wspd`, NULL AS `wpgt`, `pres`, NULL AS `tsun`, NULL AS `coco`, 'B' AS `priority` FROM `hourly_isd` WHERE `station` = :station {f'AND `time` BETWEEN "{start_year}-01-01 00:00:00" AND "{end_year}-12-31 23:59:59"' if MODE == 'recent' else ''} ) UNION ALL (SELECT `time`, `temp`, ROUND((243.04*(LN(`rhum`/100)+((17.625*`temp`)/(243.04+`temp`)))/(17.625-LN(`rhum`/100)-((17.625*`temp`)/(243.04+`temp`)))),1) AS `dwpt`, `rhum`, `prcp`, `snow`, `wdir`, `wspd`, `wpgt`, `pres`, `tsun`, `coco`, 'C' AS `priority` FROM `hourly_synop` WHERE `station` = :station {f'AND `time` BETWEEN "{start_year}-01-01 00:00:00" AND "{end_year}-12-31 23:59:59"' if MODE == 'recent' else ''} ) UNION ALL (SELECT `time`, `temp`, ROUND((243.04*(LN(`rhum`/100)+((17.625*`temp`)/(243.04+`temp`)))/(17.625-LN(`rhum`/100)-((17.625*`temp`)/(243.04+`temp`)))),1) AS `dwpt`, `rhum`, NULL AS `prcp`, NULL AS `snow`, `wdir`, `wspd`, NULL AS `wpgt`, `pres`, NULL AS `tsun`, `coco`, 'D' AS `priority` FROM `hourly_metar` WHERE `station` = :station {f'AND `time` BETWEEN "{start_year}-01-01 00:00:00" AND "{end_year}-12-31 23:59:59"' if MODE == 'recent' else ''} ) ) AS `hourly_derived` WHERE `time` <= DATE_ADD(NOW(), INTERVAL 10 DAY) GROUP BY DATE_FORMAT(`time`, '%Y %m %d %H') ORDER BY `time` """, {"station": station[0]}, )
data["location"]["latitude"], "lon": data["location"]["longitude"], "elevation": data["location"]["elevation"], "tz": data["timezone"], }, ) except BaseException: pass # Create copy of stations table jsp.query("CREATE TABLE `stations_temp` LIKE `stations`") try: # Load station repository handle, _ = request.urlretrieve( "https://github.com/meteostat/weather-stations/archive/refs/heads/master.zip" ) zip_obj = zipfile.ZipFile(handle, "r") # Write all stations for index, name in enumerate(zip_obj.namelist()): if re.search("/stations/([A-Z0-9]{5}).json$", name): file = zip_obj.namelist()[index] raw = zip_obj.open(file) data = json.loads(raw.read().decode("UTF-8")) write_station(data)
jsp, read_file(f"hourly_stations_{MODE}.sql"), STATIONS_PER_CYCLE, ) # Start & end year now = datetime.now() start_year = now.year - 1 if MODE in ("recent", "live") else 1890 end_year = now.year + 1 # Export data for each weather station for station in stations: result = jsp.query( read_file("hourly.sql"), { "station": station[0], "start_datetime": f"{start_year}-01-01 00:00:00", "end_datetime": f"{end_year}-12-31 23:59:59", }, ) if result.rowcount > 0: # Fetch data data = result.fetchall() # Write all data if MODE == "all": write_dump(data, station[0]) # Write annually first_year = int(data[0][0].year) last_year = int(data[-1][0].year)