def get_property_id(self, cursor, key): cursor.execute( 'SELECT tblstationpropertylistno FROM tblstationpropertylist WHERE uniquename=%s', (key, )) try: return cursor.fetchone()[0] except TypeError: raise ProviderException(f"No property '{key}'")
def get_property_value(self, cursor, station_no, property_id): cursor.execute( 'SELECT value FROM tblstationproperty WHERE tblstationno=%s AND tblstationpropertylistno=%s', (station_no, property_id)) try: return cursor.fetchone()[0] except TypeError: raise ProviderException( f"No property value for property '{property_id}'")
def process_data(self): try: self.log.info('Processing Metar data...') with open(os.path.join(os.path.dirname(__file__), 'metar/stations.json')) as in_file: icao = json.load(in_file) now = arrow.utcnow() hour = now.hour minute = now.minute if minute < 45: current_cycle = hour else: current_cycle = hour + 1 % 24 stations = {} for cycle in (current_cycle-1, current_cycle): file = f'http://tgftp.nws.noaa.gov/data/observations/metar/cycles/{cycle:02d}Z.TXT' self.log.info(f"Processing '{file}' ...") request = requests.get(file, stream=True, timeout=(self.connect_timeout, self.read_timeout)) for line in request.iter_lines(): if line: data = line.decode('iso-8859-1') try: # Is this line a date with format "2017/05/12 23:55" ? arrow.get(data, 'YYYY/MM/DD HH:mm') continue # Catch also ValueError because https://github.com/crsmithdev/arrow/issues/535 except (arrow.parser.ParserError, ValueError): try: metar = Metar(data, strict=False) # wind_dir could be NONE if 'dir' is 'VRB' if metar.wind_speed: if metar.station_id not in stations: stations[metar.station_id] = {} key = arrow.get(metar.time).timestamp stations[metar.station_id][key] = metar except Exception as e: self.log.warn(f'Error while parsing METAR data: {e}') continue for metar_id in stations: metar = next(iter(stations[metar_id].values())) try: name, short_name, default_name, lat, lon, altitude, tz = None, None, None, None, None, None, None checkwx_key = f'metar/checkwx/{metar.station_id}' if not self.redis.exists(checkwx_key): try: self.log.info('Calling api.checkwx.com...') request = requests.get( f'https://api.checkwx.com/station/{metar.station_id}', headers={'Accept': 'application/json', 'X-API-Key': self.checkwx_api_key}, timeout=(self.connect_timeout, self.read_timeout) ) if request.status_code == 401: raise UsageLimitException(request.json()['errors'][0]['message']) elif request.status_code == 429: raise UsageLimitException('api.checkwx.com rate limit exceeded') try: checkwx_data = request.json()['data'][0] if 'icao' not in checkwx_data: raise ProviderException('Invalid CheckWX data') except (ValueError, KeyError): checkwx_json = request.json() messages = [] if type(checkwx_json['data']) is list: messages.extend(checkwx_json['data']) else: messages.append(checkwx_json['data']) raise ProviderException(f'CheckWX API error: {",".join(messages)}') self.add_redis_key(checkwx_key, { 'data': json.dumps(checkwx_data), 'date': arrow.now().format('YYYY-MM-DD HH:mm:ssZZ'), }, self.checkwx_cache_duration) except TimeoutError as e: raise e except UsageLimitException as e: self.add_redis_key(checkwx_key, { 'error': repr(e), 'date': arrow.now().format('YYYY-MM-DD HH:mm:ssZZ'), }, self.usage_limit_cache_duration) except Exception as e: if not isinstance(e, ProviderException): self.log.exception('Error while getting CheckWX data') else: self.log.warn(f'Error while getting CheckWX data: {e}') self.add_redis_key(checkwx_key, { 'error': repr(e), 'date': arrow.now().format('YYYY-MM-DD HH:mm:ssZZ'), }, self.checkwx_cache_duration) if not self.redis.hexists(checkwx_key, 'error'): checkwx_data = json.loads(self.redis.hget(checkwx_key, 'data')) station_type = checkwx_data.get('type', None) if station_type: name = f'{checkwx_data["name"]} {station_type}' else: name = checkwx_data['name'] city = checkwx_data.get('city', None) if city: if station_type: short_name = f'{city} {station_type}' else: short_name = city else: default_name = checkwx_data['name'] lat = checkwx_data['latitude']['decimal'] lon = checkwx_data['longitude']['decimal'] tz = checkwx_data['timezone']['tzid'] elevation = checkwx_data.get('elevation', None) altitude = None if elevation: if 'meters' in elevation: altitude = Q_(elevation['meters'], ureg.meters) elif 'feet' in elevation: altitude = Q_(elevation['feet'], ureg.feet) if metar.station_id in icao: lat = lat or icao[metar.station_id]['lat'] lon = lon or icao[metar.station_id]['lon'] default_name = default_name or icao[metar.station_id]['name'] station = self.save_station( metar.station_id, short_name, name, lat, lon, Status.GREEN, altitude=altitude, tz=tz, url=os.path.join(self.provider_url, f'site?id={metar.station_id}&db=metar'), default_name=default_name or f'{metar.station_id} Airport', lookup_name=f'{metar.station_id} Airport ICAO') station_id = station['_id'] if metar.station_id not in icao: self.log.warn(f"Missing '{metar.station_id}' ICAO in database. Is it '{station['name']}'?") measures_collection = self.measures_collection(station_id) new_measures = [] for key in stations[metar_id]: metar = stations[metar_id][key] if not self.has_measure(measures_collection, key): temp = self.get_quantity(metar.temp, self.temperature_units) dew_point = self.get_quantity(metar.dewpt, self.temperature_units) humidity = self.compute_humidity(dew_point, temp) measure = self.create_measure( station, key, self.get_direction(metar.wind_dir), self.get_quantity(metar.wind_speed, self.speed_units), self.get_quantity(metar.wind_gust or metar.wind_speed, self.speed_units), temperature=temp, humidity=humidity, pressure=Pressure(qfe=None, qnh=self.get_quantity(metar.press, self.pressure_units), qff=self.get_quantity(metar.press_sea_level, self.pressure_units)) ) new_measures.append(measure) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warn(f"Error while processing station '{metar_id}': {e}") except Exception as e: self.log.exception(f"Error while processing station '{metar_id}': {e}") except Exception as e: self.log.exception(f'Error while processing Metar: {e}') self.log.info('Done !')
def process_data(self): try: self.log.info('Processing iWeathar data...') list_pattern = re.compile( r'marker = new PdMarker\(new GLatLng\((?P<lat>[+-]?([0-9]*[.])?[0-9]+), ' r'(?P<lon>[+-]?([0-9]*[.])?[0-9]+)\), icon(.*?)' r"var html = \"(?P<name>.*?)<br><a href=\\'display\.php\?s_id=(?P<id>[0-9]+)\\'", flags=re.DOTALL) speed_pattern = re.compile( r'In the last few minutes the wind was (.*?) at an ' r'average speed of (?P<avg>[0-9]+) kmh, reaching up to (?P<max>[0-9]+) kmh' ) dir_pattern = re.compile(r'.{1,3} (?P<dir>[0-9]+)') temp_pattern = re.compile(r'(?P<temp>-?([0-9]+[.])?[0-9]?)') hum_pattern = re.compile(r'(?P<hum>[0-9]{1,2})') session = requests.Session() session.headers.update(user_agents.chrome) content = session.get(self.provider_url + '/google_maps.php', timeout=(self.connect_timeout, self.read_timeout), verify=False).text stations_jsons = [] for match in list_pattern.finditer(content): stations_jsons.append(match.groupdict()) iweathar_tz = tz.gettz('Africa/Johannesburg') for stations_json in stations_jsons: station_id = None try: url = self.provider_url + '/display?s_id=' + stations_json[ 'id'] station = self.save_station(stations_json['id'], stations_json['name'], stations_json['name'], float(stations_json['lat']), float(stations_json['lon']), Status.GREEN, url=url) station_id = station['_id'] html_tree = html.fromstring( session.get(url, timeout=(self.connect_timeout, self.read_timeout), verify=False).text) try: date = html_tree.xpath( '//*[text()="Last Update:"]/../following::td' )[0].text.strip() except Exception: raise ProviderException( 'Unable to parse the date: is the html page well rendered?' ) key = arrow.get(date, 'YYYY-MM-DD HH:mm:ss').replace( tzinfo=iweathar_tz).timestamp try: dir_text = html_tree.xpath( '//*[text()="Wind Direction:"]/../following::td/a' )[0].text.strip() dir_match = dir_pattern.match(dir_text).groupdict() wind_dir = dir_match['dir'] speed_text = html_tree.xpath( '//*[text()="Weather Summary:"]/../following::td' )[0].text.strip() speed_match = speed_pattern.match( speed_text).groupdict() wind_avg = speed_match['avg'] wind_max = speed_match['max'] except Exception: raise ProviderException('Unable to get wind measures') try: temp_text = html_tree.xpath( '//*[text()="Temperature:"]/../following::td/a' )[0].text.strip() temp_match = temp_pattern.match(temp_text).groupdict() temp = temp_match['temp'] except Exception: temp = None try: hum_text = html_tree.xpath( '//*[text()="Humidity:"]/../following::td/a' )[0].text.strip() hum_match = hum_pattern.match(hum_text).groupdict() hum = hum_match['hum'] except Exception: hum = None measures_collection = self.measures_collection(station_id) new_measures = [] if not self.has_measure(measures_collection, key): measure = self.create_measure( station, key, wind_dir, wind_avg, wind_max, temperature=temp, humidity=hum, ) new_measures.append(measure) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warn( f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception( f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception(f'Error while processing iWeathar: {e}') self.log.info('...Done!')
def process_data(self): try: self.log.info('Processing JDC data...') @retry(wait=wait_random_exponential(multiplier=2, min=2), stop=stop_after_delay(60), after=after_log(self.log, logging.WARNING)) def request_data(): # jdc.ch randomly timeout early in the morning... backup tasks? return requests.get('http://meteo.jdc.ch/API/?Action=StationView&flags=all', timeout=(self.connect_timeout, self.read_timeout)) result = request_data() try: jdc_stations = result.json()['Stations'] except Exception: raise Exception('Action=StationView returns invalid json response') for jdc_station in jdc_stations: station_id = None try: jdc_id = jdc_station['serial'] station = self.save_station( jdc_id, jdc_station['short-name'], jdc_station['name'], jdc_station['latitude'], jdc_station['longitude'], self.get_status(jdc_station['status']), altitude=jdc_station['altitude'], url=urllib.parse.urljoin(self.provider_url, '/station/' + str(jdc_station['serial']))) station_id = station['_id'] try: # Asking 2 days of data result = requests.get( f'http://meteo.jdc.ch/API/?Action=DataView&serial={jdc_id}&duration=172800', timeout=(self.connect_timeout, self.read_timeout)) try: json = result.json() except ValueError: raise Exception('Action=DataView returns invalid json response') if json['ERROR'] == 'OK': measures_collection = self.measures_collection(station_id) measures = json['data']['measurements'] new_measures = [] for jdc_measure in measures: key = jdc_measure['unix-time'] if not self.has_measure(measures_collection, key): try: measure = self.create_measure( station, key, jdc_measure.get('wind-direction'), jdc_measure.get('wind-average'), jdc_measure.get('wind-maximum'), temperature=jdc_measure.get('temperature'), humidity=jdc_measure.get('humidity'), pressure=Pressure( qfe=jdc_measure.get('pressure', None), qnh=None, qff=None), rain=jdc_measure.get('rain', None), ) new_measures.append(measure) except ProviderException as e: self.log.warning( f"Error while processing measure '{key}' for station '{station_id}': {e}") except Exception as e: self.log.exception( f"Error while processing measure '{key}' for station '{station_id}': {e}") self.insert_new_measures(measures_collection, station, new_measures) else: raise ProviderException(f"Action=Data returns an error: '{json['ERROR']}'") except ProviderException as e: self.log.warning(f"Error while processing measures for station '{station_id}': {e}") except Exception as e: self.log.exception(f"Error while processing measures for station '{station_id}': {e}") except ProviderException as e: self.log.warning(f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception(f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception(f'Error while processing JDC: {e}') self.log.info('Done !')
def process_data(self): try: self.log.info('Processing WINDLINE data...') connection_info = urlparse(self.windline_sql_url) mysql_connection = MySQLdb.connect(connection_info.hostname, connection_info.username, connection_info.password, connection_info.path[1:], charset='utf8') # mysql_connection is buffered by default so we can use the same cursor with fetchall mysql_cursor = mysql_connection.cursor() start_date = datetime.utcnow() - timedelta(days=2) # Fetch only stations that have a status (property_id=13) mysql_cursor.execute( 'SELECT tblstation.tblstationno, stationid, stationname, shortdescription, value ' 'FROM tblstation ' 'INNER JOIN tblstationproperty ' 'ON tblstation.tblstationno = tblstationproperty.tblstationno ' 'WHERE tblstationpropertylistno=%s', (self.status_property_id, )) for row in mysql_cursor.fetchall(): station_no = row[0] windline_id = row[1] short_name = row[2] name = row[3] status = row[4] station_id = None try: try: if 6000 <= int(windline_id) < 7000: # Windline integrate holfuy stations with 6xxx ids raise ProviderException( f'{windline_id} is an holfuy station, discarding' ) except ValueError: pass station = self.save_station( windline_id, short_name, name, wgs84.parse_dms( self.get_property_value( mysql_cursor, station_no, self.latitude_property_id)), wgs84.parse_dms( self.get_property_value( mysql_cursor, station_no, self.longitude_property_id)), self.get_status(status), altitude=self.get_property_value( mysql_cursor, station_no, self.altitude_property_id)) station_id = station['_id'] try: measures_collection = self.measures_collection( station_id) new_measures = [] wind_average_rows = self.get_measures( mysql_cursor, windline_id, self.wind_average_type, start_date) wind_maximum_rows = self.get_measures( mysql_cursor, windline_id, self.wind_maximum_type, start_date) wind_direction_rows = self.get_measures( mysql_cursor, windline_id, self.wind_direction_type, start_date) temperature_rows = self.get_measures( mysql_cursor, windline_id, self.temperature_type, start_date) humidity_rows = self.get_measures( mysql_cursor, windline_id, self.humidity_type, start_date) # The wind average measure is the time reference for a measure for wind_average_row in wind_average_rows: try: key = arrow.get(wind_average_row[0]).timestamp if key not in [measure['_id'] for measure in new_measures] and \ not self.has_measure(measures_collection, key): wind_average = Q_( float(wind_average_row[1]), ureg.meter / ureg.second) measure_date = wind_average_row[0] wind_maximum = Q_( float( self.get_measure_value( wind_maximum_rows, measure_date - timedelta(seconds=10), measure_date + timedelta(seconds=10))), ureg.meter / ureg.second) wind_direction = self.get_measure_value( wind_direction_rows, measure_date - timedelta(seconds=10), measure_date + timedelta(seconds=10)) wind_direction = self.get_corrected_value( mysql_cursor, wind_direction, station_no, self.wind_direction_type) temperature = self.get_last_measure_value( temperature_rows, measure_date + timedelta(seconds=10)) humidity = self.get_last_measure_value( humidity_rows, measure_date + timedelta(seconds=10)) measure = self.create_measure( station, key, wind_direction, wind_average, wind_maximum, temperature=temperature, humidity=humidity, ) new_measures.append(measure) except NoMeasure: pass self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warning( f"Error while processing measures for station '{station_id}': {e}" ) except Exception as e: self.log.exception( f"Error while processing measures for station '{station_id}': {e}" ) except ProviderException as e: self.log.warning( f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception( f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception(f'Error while processing Windline: {e}') finally: try: mysql_cursor.close() mysql_connection.close() except Exception: pass self.log.info('Done !')
def process_data(self): stations = {} try: self.log.info('Processing FFVL data...') result = requests.get( 'http://data.ffvl.fr/json/balises.json', timeout=(self.connect_timeout, self.read_timeout)) ffvl_stations = result.json() for ffvl_station in ffvl_stations: ffvl_id = None try: ffvl_id = ffvl_station['idBalise'] station = self.save_station( ffvl_id, ffvl_station['nom'], ffvl_station['nom'], ffvl_station['latitude'], ffvl_station['longitude'], Status.GREEN, altitude=ffvl_station['altitude'], url=ffvl_station['url']) stations[station['_id']] = station except ProviderException as e: self.log.warning(f"Error while processing station '{ffvl_id}': {e}") except Exception as e: self.log.exception(f"Error while processing station '{ffvl_id}': {e}") except ProviderException as e: self.log.warning(f'Error while processing stations: {e}') except Exception as e: self.log.exception(f'Error while processing stations: {e}') try: @retry(wait=wait_random_exponential(multiplier=2, min=2), stop=stop_after_delay(60), after=after_log(self.log, logging.WARNING)) def request_data(): # data.ffvl.fr randomly returns an empty file instead the json doc result = requests.get( 'http://data.ffvl.fr/json/relevesmeteo.json', timeout=(self.connect_timeout, self.read_timeout)) return result.json() ffvl_measures = request_data() ffvl_tz = tz.gettz('Europe/Paris') for ffvl_measure in ffvl_measures: station_id = None try: ffvl_id = ffvl_measure['idbalise'] station_id = self.get_station_id(ffvl_id) if station_id not in stations: raise ProviderException(f"Unknown station '{station_id}'") station = stations[station_id] measures_collection = self.measures_collection(station_id) new_measures = [] key = arrow.get(ffvl_measure['date'], 'YYYY-MM-DD HH:mm:ss').replace(tzinfo=ffvl_tz).timestamp if not self.has_measure(measures_collection, key): measure = self.create_measure( station, key, ffvl_measure['directVentMoy'], ffvl_measure['vitesseVentMoy'], ffvl_measure['vitesseVentMax'], temperature=ffvl_measure['temperature'], humidity=ffvl_measure['hydrometrie'], pressure=Pressure(qfe=ffvl_measure['pression'], qnh=None, qff=None) ) new_measures.append(measure) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warning(f"Error while processing measures for station '{station_id}': {e}") except Exception as e: self.log.exception(f"Error while processing measures for station '{station_id}': {e}") except ProviderException as e: self.log.warning(f'Error while processing FFVL: {e}') except Exception as e: self.log.exception(f'Error while processing FFVL: {e}') self.log.info('...Done!')
def process_data(self): try: self.log.info('Processing Holfuy data...') holfuy_stations = requests.get( 'https://api.holfuy.com/stations/stations.json', timeout=(self.connect_timeout, self.read_timeout)).json() holfuy_data = requests.get( 'https://api.holfuy.com/live/?s=all&m=JSON&tu=C&su=km/h&utc', timeout=(self.connect_timeout, self.read_timeout)).json() holfuy_measures = {} for holfuy_measure in holfuy_data['measurements']: holfuy_measures[holfuy_measure['stationId']] = holfuy_measure for holfuy_station in holfuy_stations['holfuyStationsList']: holfuy_id = None station_id = None try: holfuy_id = holfuy_station['id'] name = holfuy_station['name'] location = holfuy_station['location'] latitude = location.get('latitude') longitude = location.get('longitude') if (latitude is None or longitude is None) or (latitude == 0 and longitude == 0): raise ProviderException('No geolocation found') altitude = location.get('altitude') urls = { lang: url.format(id=holfuy_id) for lang, url in self.provider_urls.items() } station = self.save_station(holfuy_id, name, name, latitude, longitude, Status.GREEN, altitude=altitude, url=urls) station_id = station['_id'] measures_collection = self.measures_collection(station_id) new_measures = [] if holfuy_id not in holfuy_measures: raise ProviderException( f"Station '{name}' not found in 'api.holfuy.com/live/'" ) holfuy_measure = holfuy_measures[holfuy_id] last_measure_date = arrow.get(holfuy_measure['dateTime']) key = last_measure_date.timestamp if not self.has_measure(measures_collection, key): measure = self.create_measure( station, key, holfuy_measure['wind']['direction'], Q_(holfuy_measure['wind']['speed'], ureg.kilometer / ureg.hour), Q_(holfuy_measure['wind']['gust'], ureg.kilometer / ureg.hour), temperature=Q_(holfuy_measure['temperature'], ureg.degC) if 'temperature' in holfuy_measure else None, pressure=Pressure( qfe=None, qnh=Q_(holfuy_measure['pressure'], ureg.hPa) if 'pressure' in holfuy_measure else None, qff=None)) new_measures.append(measure) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warning( f"Error while processing station '{station_id or holfuy_id}': {e}" ) except Exception as e: self.log.exception( f"Error while processing station '{station_id or holfuy_id}': {e}" ) except Exception as e: self.log.exception(f'Error while processing Holfuy: {e}') self.log.info('Done !')
def process_data(self): station_id = None try: self.log.info('Processing Zermatt data...') stations_metadata = self.get_stations_metadata() session = requests.Session() session.headers.update(user_agents.chrome) wind_tree = html.fromstring( session.get(self.provider_url, timeout=(self.connect_timeout, self.read_timeout)).text) # Groups groups = wind_tree.xpath("//table[@class='w-all']") for group in groups: stations = group.xpath('tbody/tr') i = 0 while i < len(stations): is_station = stations[i].xpath("td[@class='station']") if len(is_station) == 1: has_data = 'kein' not in stations[i + 1].xpath( 'td')[0].text.lower() try: id_main = self.cleanup_id( stations[i].xpath('td')[0].text) id_subs = [] sub_path = stations[i].xpath('td/span') if len(sub_path) > 0: sub_texts = sub_path[0].text.replace( '(', '').replace(')', '').split(',') for sub_text in sub_texts: sub_text = sub_text.strip() match = self.pylon_pattern.search(sub_text) if match: id_subs.append(match['pylon']) else: id_subs.append(sub_text) id_sub = '-'.join(id_subs) zermatt_id = self.cleanup_id( f'{id_main}-{id_sub}' if id_sub else id_main) try: # Fetch metadata from admin zermatt_station = list( filter( lambda d: str(d['id']) == zermatt_id, stations_metadata))[0] except Exception: continue station = self.save_station( zermatt_id, zermatt_station['name'], zermatt_station['short_name'], zermatt_station['latitude'], zermatt_station['longitude'], Status.GREEN if has_data else Status.RED, altitude=zermatt_station['altitude'] if 'altitude' in zermatt_station else None, url=self.provider_url) station_id = station['_id'] if has_data: key_text = stations[i + 1].xpath( "td[@class='c5']")[0].text key = arrow.get( key_text.strip(), 'DD.MM.YYYY H:mm').replace( tzinfo=self.default_tz).timestamp measures_collection = self.measures_collection( station_id) if not self.has_measure( measures_collection, key): wind_dir_text = stations[i + 1].xpath( "td[@class='c4']")[0].text wind_dir = self.wind_directions[ wind_dir_text.strip()] wind_avg_text = stations[i + 1].xpath( "td[@class='c3']")[0].text wind_avg = self.wind_pattern.match( wind_avg_text.strip())['wind'] # zermatt.net is providing wind_max for the last 3 hours. Using wind_avg instead. # wind_max_text = stations[i+2].xpath("td[@class='c3']")[0].text # wind_max = self.wind_pattern.match(wind_max_text.strip())['wind'] temp_text = stations[i + 1].xpath( "td[@class='c2']")[0].text temp = self.temp_pattern.match( temp_text.strip( ))['temp'] if temp_text else None measure = self.create_measure( station, key, wind_dir, wind_avg, wind_avg, temperature=temp) self.insert_new_measures( measures_collection, station, [measure]) else: self.log.warning( f"No data for station '{station_id}'") except ProviderException as e: self.log.warning( f"Error while processing station '{station_id}': {e}" ) except Exception as e: self.log.exception( f"Error while processing station '{station_id}': {e}" ) finally: if has_data: i += 4 else: i += 2 else: raise ProviderException('Invalid html table order') except Exception as e: self.log.exception(f'Error while processing Zermatt: {e}')
def process_data(self): try: self.log.info('Processing WindsSpots data...') result = requests.get( 'https://api.windspots.com/windmobile/stationinfos?allStation=true', timeout=(self.connect_timeout, self.read_timeout), verify=False) for windspots_station in result.json()['stationInfo']: station_id = None try: windspots_id = windspots_station['@id'][10:] station = self.save_station( windspots_id, windspots_station['@shortName'], windspots_station['@name'], windspots_station['@wgs84Latitude'], windspots_station['@wgs84Longitude'], windspots_station['@maintenanceStatus'], altitude=windspots_station['@altitude']) station_id = station['_id'] try: # Asking 2 days of data result = requests.get( f'https://api.windspots.com/windmobile/stationdatas/windspots:{windspots_id}', timeout=(self.connect_timeout, self.read_timeout), verify=False) try: windspots_measure = result.json() except ValueError: raise ProviderException( 'Action=Data return invalid json response') measures_collection = self.measures_collection( station_id) new_measures = [] try: key = arrow.get(windspots_measure['@lastUpdate'], 'YYYY-M-DTHH:mm:ssZZ').timestamp except arrow.parser.ParserError: raise ProviderException( f"Unable to parse measure date: '{windspots_measure['@lastUpdate']}" ) wind_direction_last = windspots_measure[ 'windDirectionChart']['serie']['points'][0] wind_direction_key = int( wind_direction_last['date']) // 1000 if arrow.get(key).minute != arrow.get( wind_direction_key).minute: key_time = arrow.get(key).to('local').format( 'YY-MM-DD HH:mm:ssZZ') direction_time = arrow.get(wind_direction_key).to( 'local').format('YY-MM-DD HH:mm:ssZZ') self.log.warning( f"{station['short']} ({station_id}): wind direction time '{direction_time}' is " f"inconsistent with measure time '{key_time}'") if not self.has_measure(measures_collection, key): try: measure = self.create_measure( station, key, wind_direction_last['value'], windspots_measure.get('windAverage'), windspots_measure.get('windMax'), temperature=windspots_measure.get( 'airTemperature'), humidity=windspots_measure.get( 'airHumidity'), ) new_measures.append(measure) except ProviderException as e: self.log.warning( f"Error while processing measure '{key}' for station '{station_id}': {e}" ) except Exception as e: self.log.exception( f"Error while processing measure '{key}' for station '{station_id}': {e}" ) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warning( f"Error while processing measure for station '{station_id}': {e}" ) except Exception as e: self.log.exception( f"Error while processing measure for station '{station_id}': {e}" ) except ProviderException as e: self.log.warning( f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception( f"Error while processing station '{station_id}': {e}") except ProviderException as e: self.log.warning(f'Error while processing Windspots: {e}') except Exception as e: self.log.exception(f'Error while processing Windspots: {e}') self.log.info('Done !')
def process_data(self): station_id = None try: self.log.info('Processing Thunerwetter data...') date_pattern = re.compile(r'am (?P<date>.*?) um (?P<time>.*?) Uhr') wind_pattern = re.compile(r'(?P<wind_speed>[0-9]{1,3}\.[0-9]) km/h / ' r'(?P<wind_dir>[A-Z]{1,2}(-[A-Z]{1,2})?)') wind_directions = { 'N': 0, 'N-NO': 1 * (360 / 16), 'NO': 2 * (360 / 16), 'O-NO': 3 * (360 / 16), 'O': 4 * (360 / 16), 'O-SO': 5 * (360 / 16), 'SO': 6 * (360 / 16), 'S-SO': 7 * (360 / 16), 'S': 8 * (360 / 16), 'S-SW': 9 * (360 / 16), 'SW': 10 * (360 / 16), 'W-SW': 11 * (360 / 16), 'W': 12 * (360 / 16), 'W-NW': 13 * (360 / 16), 'NW': 14 * (360 / 16), 'N-NW': 15 * (360 / 16), } temp_pattern = re.compile(r'(?P<temp>[-+]?[0-9]{1,3}\.[0-9]) °C') humidity_pattern = re.compile(r'(?P<humidity>[0-9]{1,3}) %') thun_tz = tz.gettz('Europe/Zurich') session = requests.Session() session.headers.update(user_agents.chrome) wind_tree = html.fromstring( session.get(self.provider_url, timeout=(self.connect_timeout, self.read_timeout)).text) # Date date_element = wind_tree.xpath('//td[text()[contains(.,"Messwerte von Thun")]]')[0] date_text = date_element.text.strip() date = date_pattern.search(date_text).groupdict() station = self.save_station( 'westquartier', 'Thun Westquartier', 'Thun Westquartier', float(46.7536663), float(7.6211841), Status.GREEN, url=self.provider_url ) station_id = station['_id'] key = arrow.get(f'{date["date"]} {date["time"]}', 'DD.MM.YYYY HH:mm').replace( tzinfo=thun_tz).timestamp measures_collection = self.measures_collection(station_id) new_measures = [] if not self.has_measure(measures_collection, key): wind_elements = wind_tree.xpath('//td[text()="Ø 10 Minuten"]') # Wind average wind_avg_text = wind_elements[0].xpath('following-sibling::td')[0].text.strip() wind_avg = wind_pattern.search(wind_avg_text).groupdict() # Wind max wind_max_text = wind_elements[1].xpath('following-sibling::td')[0].text.strip() wind_max = wind_pattern.search(wind_max_text).groupdict() air_tree = html.fromstring( session.get(self.provider_url_temp, timeout=(self.connect_timeout, self.read_timeout)).text) # Date date_element = air_tree.xpath('//td[text()[contains(.,"Messwerte von Thun")]]')[0] date_text = date_element.text.strip() date = date_pattern.search(date_text).groupdict() air_date = arrow.get(f'{date["date"]} {date["time"]}', 'DD.MM.YYYY HH:mm').replace( tzinfo=thun_tz).timestamp if air_date != key: raise ProviderException('Wind and air dates are not matching') air_elements = air_tree.xpath('//td[text()="aktuell"]') # Temperature temp_text = air_elements[0].xpath('following-sibling::td')[0].text.strip() temp = temp_pattern.search(temp_text).groupdict() # Humidity humidity_text = air_elements[1].xpath('following-sibling::td')[0].text.strip() humidity = humidity_pattern.search(humidity_text).groupdict() measure = self.create_measure( station, key, wind_directions[wind_avg['wind_dir']], wind_avg['wind_speed'], wind_max['wind_speed'], temperature=temp['temp'], humidity=humidity['humidity'] ) new_measures.append(measure) self.insert_new_measures(measures_collection, station, new_measures) except ProviderException as e: self.log.warning(f"Error while processing station '{station_id}': {e}") except Exception as e: self.log.exception(f"Error while processing station '{station_id}': {e}") self.log.info('...Done!')