def get_darksky_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'https://api.darksky.net/forecast/%s/%s' point = '%0.3f,%0.3f' % (lat, lon) api_options = {'exclude': 'currently,minutely,daily,alerts,flags'} json_url = api_url % (api_key, point) response = requests.get(json_url, params=api_options) darksky_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('darksky: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time, units, and columns darksky_df = pd.DataFrame(darksky_data['hourly']['data']) darksky_df['DateTime'] = np.nan for idx in darksky_df.index: darksky_df.loc[idx, 'DateTime'] = epoch_time_to_datetime( darksky_df.loc[idx, 'time']) # already UTC darksky_df.set_index('DateTime', inplace=True) column_names_dict = { 'cloudCover': 'cloud', 'dewPoint': 'dewpoint', 'precipIntensity': 'rain', 'windBearing': 'windDirection', 'summary': 'condition' } darksky_df = darksky_df.rename(columns=column_names_dict) darksky_df.loc[:, 'cloud'] = 100. * darksky_df.loc[:, 'cloud'] darksky_df.loc[:, 'windSpeed'] = mph_to_kt(darksky_df.loc[:, 'windSpeed']) darksky_df.loc[:, 'windGust'] = mph_to_kt(darksky_df.loc[:, 'windGust']) # Calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = darksky_df.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = darksky_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = darksky_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = darksky_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = darksky_df.reset_index() return forecast
def get_nws_forecast(config, stid, lat, lon, forecast_date): """ Retrieve current NWS forecast for a point location. :param config: :param stid: str: station ID :param lat: float: latitude :param lon: float: longitude :param forecast_date: datetime: :return: """ hourly_url = 'http://forecast.weather.gov/MapClick.php?lat=%f&lon=%f&FcstType=digitalDWML' response = requests.get(hourly_url % (lat, lon)) # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('nws: got HTTP error when querying for XML file from %s' % (hourly_url % (lat, lon))) raise hourly_xml = eTree.fromstring(response.text) hourly_dict = etree_to_dict(hourly_xml) # Create a DataFrame for hourly data hourly = pd.DataFrame() hourly['DateTime'] = hourly_dict['dwml']['data']['time-layout'][ 'start-valid-time'] # De-localize the starting time so we can do an explicit datetime comparison hourly['DateTime'] = [ localized_date_to_utc(parse_iso(hourly['DateTime'].iloc[j])) for j in range(len(hourly['DateTime'])) ] hourly['DateTime'] = [ hourly['DateTime'].iloc[j].to_pydatetime().replace(tzinfo=None) for j in range(len(hourly['DateTime'])) ] hourly['datetime_index'] = hourly['DateTime'] hourly.set_index('datetime_index', inplace=True) parameters = hourly_dict['dwml']['data']['parameters'] # Get the temperatures for element in parameters['temperature']: if element['@type'] == 'hourly': hourly['temperature'] = xml_to_values(element['value']) elif element['@type'] == 'dew point': hourly['dewPoint'] = xml_to_values(element['value']) # Get the winds for element in parameters['wind-speed']: if element['@type'] == 'sustained': hourly['windSpeed'] = xml_to_values(element['value']) hourly['windSpeed'] = mph_to_kt(hourly['windSpeed']) elif element['@type'] == 'gust': hourly['windGust'] = xml_to_values(element['value']) hourly['windGust'] = mph_to_kt(hourly['windGust']) # Get other parameters hourly['cloud'] = xml_to_values(parameters['cloud-amount']['value']) hourly['windDirection'] = xml_to_values(parameters['direction']['value']) hourly['rain'] = xml_to_values(parameters['hourly-qpf']['value']) try: hourly['condition'] = xml_to_condition( parameters['weather']['weather-conditions']) except: pass # Aggregate daily values from hourly series forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) hourly_high = hourly.loc[forecast_start:forecast_end, 'temperature'].max() hourly_low = hourly.loc[forecast_start:forecast_end, 'temperature'].min() hourly_wind = hourly.loc[forecast_start:forecast_end, 'windSpeed'].max() hourly_rain = hourly.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create the Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(hourly_high, hourly_low, hourly_wind, hourly_rain) forecast.timeseries.data = hourly # Now do the daily data from the Forecast API api_url = 'https://api.weather.gov/points' point = '%0.3f,%0.3f' % (lat, lon) # Retrieve daily forecast daily_url = '%s/%s/forecast' % (api_url, point) response = requests.get(daily_url) # Test for an error HTTP response. If there is an error response, omit the daily part. try: response.raise_for_status() daily_data = response.json() except BaseException as e: if config['debug'] > 0: print("nws: warning: no daily values used for %s ('%s')" % (stid, str(e))) return forecast # Daily values: convert to DataFrame daily = pd.DataFrame.from_dict(daily_data['properties']['periods']) # Change the wind to its max value daily['windSpeed'] = daily['windSpeed'].apply(wind_speed_interpreter) # De-localize the starting time so we can do an explicit datetime comparison daily['startTime'] = [ parse_iso(daily['startTime'].iloc[j]) for j in range(len(daily['startTime'])) ] daily['startTime'] = [ daily['startTime'].iloc[j].replace(tzinfo=None) for j in range(len(daily['startTime'])) ] daily.set_index('startTime', inplace=True) try: daily_high = daily.loc[forecast_date + timedelta(hours=6), 'temperature'] except KeyError: daily_high = np.nan try: daily_low = daily.loc[forecast_date - timedelta(hours=6), 'temperature'] except KeyError: daily_low = np.nan daily_wind = mph_to_kt( np.max(daily.loc[forecast_start:forecast_end]['windSpeed'])) # Update the Forecast object forecast.daily.set_values(np.nanmax([hourly_high, daily_high]), np.nanmin([hourly_low, daily_low]), np.nanmax([hourly_wind, daily_wind]), hourly_rain) return forecast
def get_ukmet_forecast(stid, ukmet_code, forecast_date): """ Retrieve UKMET data. :param stid: station ID :param ukmet_code: site-specific URL code from ukmet.codes :param forecast_date: datetime of day to forecast :return: Forecast object for high, low, max wind for next 6Z--6Z. No precip. """ # Retrieve the model data url = 'https://www.metoffice.gov.uk/public/weather/forecast/%s' % ukmet_code req = Request(url, headers=hdr) response = urlopen(req) page = response.read().decode('utf-8', 'ignore') soup = BeautifulSoup(page, 'lxml') # Find UTC offset and current time in HTML utcoffset = int(soup.find(id='country').text.split('-')[1][0:2]) epoch = float(soup.find("td", {"id": "firstTimeStep"})['data-epoch']) utcnow = datetime.utcfromtimestamp(epoch) # Store daily variables days = [] highs = [] # this can be overwritten by hourly lows = [] # this can be overwritten by hourly winds = [] # this comes from hourly # Pull in daily data using li tabs tabids = ['tabDay1', 'tabDay2', 'tabDay3'] for ids in tabids: pars = soup.find(id=ids) days.append(datetime.strptime(pars['data-date'], '%Y-%m-%d')) highs.append( c_to_f( pars.findAll("span", {"title": "Maximum daytime temperature" })[0]['data-value-raw'])) lows.append( c_to_f( pars.findAll("span", {"title": "Minimum nighttime temperature" })[0]['data-value-raw'])) # Pull in hourly data # This requires PhantomJS to pull out additional HTML code driver = webdriver.PhantomJS( executable_path='/home/disk/p/wxchallenge/bin/phantomjs') driver.get(url + '#?date=2017-09-21') source = driver.page_source soup = BeautifulSoup(source, 'html.parser') dateTime = [] temperature = [] temperature_c = [] dewpoint = [] windSpeed = [] windGust = [] windDirection = [] humidity = [] # this is temporary--converted to dew point below divids = [ 'divDayModule0', 'divDayModule1', 'divDayModule2', 'divDayModule3' ] for i, divs in enumerate(divids): day0 = datetime.strptime( soup.find("div", {"id": "divDayModule0"})['data-content-id'], '%Y-%m-%d') day1 = (day0 + timedelta(days=1)).strftime('%Y-%m-%d') pars = soup.find(id=divs) divdate = datetime.strptime(pars['data-content-id'], '%Y-%m-%d').date() hourels = pars.findAll("tr", {"class": "weatherTime"})[0].find_all('td') for ii, ele in enumerate(hourels): if ele.text == 'Now': dateTime.append(utcnow) else: dtmp = datetime(divdate.year, divdate.month, divdate.day, int(ele.text.split(':')[0]), int(ele.text.split(':')[1])) dateTime.append(dtmp + timedelta(hours=utcoffset)) tempels = pars.findAll("tr", {"class": "weatherTemp"})[0].findAll( "i", {"class": "icon icon-animated"}) for ele in tempels: temperature_c.append(float(ele['data-value-raw'])) temperature.append(c_to_f(ele['data-value-raw'])) # relative humidity for conversion to dew point humels = pars.findAll("tr", {"class": "weatherHumidity"})[0].text.split() for ele in humels: humidity.append(float(ele.split('%')[0])) # add wind speedels = pars.findAll("i", {"data-type": "windSpeed"}) for ele in speedels: windSpeed.append(np.round(mph_to_kt(ele['data-value-raw']), 2)) gustels = pars.findAll("span", {"class": "gust"}) for ele in gustels: windGust.append(mph_to_kt(ele['data-value-raw'])) direls = pars.findAll("span", {"class": "direction"}) for ele in direls: windDirection.append(wind_dir_to_deg(ele.text)) # Convert T and humidity to dewpt for ii, rh in enumerate(humidity): td_tmp = dewpoint_from_t_rh(temperature_c[ii], rh) dewpoint.append(c_to_f(td_tmp)) # Make into dataframe df = pd.DataFrame( { 'temperature': temperature, 'dewpoint': dewpoint, 'windSpeed': windSpeed, 'windGust': windGust, 'windDirection': windDirection, 'dateTime': dateTime }, index=dateTime) # Correct the highs and lows with the hourly data, find max wind speed forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) for d in range(0, len(days)): try: # unlike the mos code, we always use the 'include' iloc_start_include = df.index.get_loc(forecast_start) except BaseException: print('ukmet: error getting start time index in db; check data.') break try: iloc_end = df.index.get_loc(forecast_end) except BaseException: print('ukmet: error getting end time index in db; check data.') break raw_high = df.iloc[iloc_start_include:iloc_end]['temperature'].max() raw_low = df.iloc[iloc_start_include:iloc_end]['temperature'].min() winds.append( int( np.round( df.iloc[iloc_start_include:iloc_end]['windSpeed'].max()))) if raw_high > highs[d]: highs[d] = raw_high if raw_low < lows[d]: lows[d] = raw_low forecast_start = forecast_start + timedelta(days=1) forecast_end = forecast_end + timedelta(days=1) forecast = Forecast(stid, default_model_name, days[0]) forecast.timeseries.data = df forecast.daily.set_values(highs[0], lows[0], winds[0], None) # # Make list of forecast objects for future days--currently not implemented # # forecast = [] # # for i in range(0,len(days)): # forecast_tmp = Forecast(stid, default_model_name, days[i]) # forecast_tmp.daily.date = days[i] # forecast_tmp.daily.high = highs[i] # forecast_tmp.daily.low = lows[i] # forecast.append(forecast_tmp) return forecast
def get_wunderground_forecast(stid, api_key, forecast_date): # retrieve api json data api_url = 'https://api.wunderground.com/api/%s/hourly/forecast/q/%s.json' api_options = {'features': 'hourly,forecast'} json_url = api_url % (api_key, stid) response = requests.get(json_url, params=api_options) wunderground_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('wunderground: got HTTP error when querying API') raise # Convert to DataFrame, fix time wunderground_df = pd.DataFrame(wunderground_data['hourly_forecast']) timezone_df = pd.DataFrame(wunderground_data['forecast']['simpleforecast']) timezone = get_timezone(timezone_df['forecastday']) time_series = convert_fcttime(wunderground_df['FCTTIME']) # already UTC for column in wunderground_df.columns.values: wunderground_df[column] = wunderground_df[column].apply( get_english_units) wunderground_df['mslp'] = inhg_to_mb(wunderground_df['mslp']) wunderground_df['wspd'] = mph_to_kt(wunderground_df['wspd']) wunderground_df['wdir'] = wunderground_df['wdir'].apply(get_wind_degrees) column_names_dict = { 'FCTTIME': 'DateTime', 'temp': 'temperature', 'wspd': 'windSpeed', 'mslp': 'pressure', 'sky': 'cloud', 'dewpoint': 'dewpoint', 'qpf': 'rain', 'wdir': 'windDirection', 'wx': 'condition' } wunderground_df.drop('condition', axis=1, inplace=True) wunderground_df = wunderground_df.rename(columns=column_names_dict) wunderground_df['DateTime'] = time_series wunderground_df.set_index('DateTime', inplace=True) # calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = wunderground_df.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = wunderground_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = wunderground_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = wunderground_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = wunderground_df.reset_index() return forecast
def get_owm_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'http://api.openweathermap.org/data/2.5/forecast' api_options = { 'APPID': api_key, 'lat': lat, 'lon': lon, 'units': 'imperial', } response = requests.get(api_url, params=api_options) owm_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('openweathermap: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time owm_df = pd.DataFrame(owm_data['list']) owm_df['DateTime'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'DateTime'] = date_to_datetime(owm_df.loc[idx, 'dt_txt']) owm_df.set_index('DateTime', inplace=True) # OWM has a column 'main' which contains some parameters at all times. Get all of those. for parameter in owm_df.loc[owm_df.index[0], 'main'].keys(): owm_df[parameter] = owm_df['main'].apply(get_parameter, args=(parameter, )) # Get some other special parameters # Make sure the 'rain' parameter exists (if no rain in forecast, the column is missing) if 'rain' not in owm_df: owm_df = owm_df.assign(**{'rain': 0.0}) else: owm_df.loc[:, 'rain'] = mm_to_in(owm_df['rain'].apply(get_parameter, args=('3h', ))) owm_df['condition'] = owm_df['weather'].apply(get_parameter, args=('description', ), is_list=True) owm_df['windSpeed'] = mph_to_kt(owm_df['wind'].apply(get_parameter, args=('speed', ))) owm_df['windDirection'] = owm_df['wind'].apply(get_parameter, args=('deg', )) owm_df['cloud'] = owm_df['clouds'].apply(get_parameter, args=('all', )) owm_df['dewpoint'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'dewpoint'] = dewpoint_from_t_rh( owm_df.loc[idx, 'temp'], owm_df.loc[idx, 'humidity']) # Rename remaining columns for default schema column_names_dict = { 'temp': 'temperature', } owm_df = owm_df.rename(columns=column_names_dict) # Calculate daily values. OWM includes period maxima and minima. Note that rain in OWM is cumulative for the LAST # 3 hours. forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) try: daily_high = owm_df.loc[forecast_start:forecast_end, 'temp_max'].max() except KeyError: daily_high = owm_df.loc[forecast_start:forecast_end, 'temperature'].max() try: daily_low = owm_df.loc[forecast_start:forecast_end, 'temp_min'].min() except KeyError: daily_low = owm_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = owm_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = np.nanmax([ owm_df.loc[forecast_start + timedelta(hours=3):forecast_end, 'rain'].sum(), 0.0 ]) # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = owm_df.reset_index() return forecast