Пример #1
0
def get_darksky_forecast(stid, lat, lon, api_key, forecast_date):

    # Retrieve data
    api_url = 'https://api.darksky.net/forecast/%s/%s'
    point = '%0.3f,%0.3f' % (lat, lon)
    api_options = {'exclude': 'currently,minutely,daily,alerts,flags'}
    json_url = api_url % (api_key, point)
    response = requests.get(json_url, params=api_options)
    darksky_data = response.json()
    # Raise error for invalid HTTP response
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print('darksky: got HTTP error when querying API')
        raise

    # Convert to pandas DataFrame and fix time, units, and columns
    darksky_df = pd.DataFrame(darksky_data['hourly']['data'])
    darksky_df['DateTime'] = np.nan
    for idx in darksky_df.index:
        darksky_df.loc[idx, 'DateTime'] = epoch_time_to_datetime(
            darksky_df.loc[idx, 'time'])  # already UTC
    darksky_df.set_index('DateTime', inplace=True)
    column_names_dict = {
        'cloudCover': 'cloud',
        'dewPoint': 'dewpoint',
        'precipIntensity': 'rain',
        'windBearing': 'windDirection',
        'summary': 'condition'
    }
    darksky_df = darksky_df.rename(columns=column_names_dict)
    darksky_df.loc[:, 'cloud'] = 100. * darksky_df.loc[:, 'cloud']
    darksky_df.loc[:, 'windSpeed'] = mph_to_kt(darksky_df.loc[:, 'windSpeed'])
    darksky_df.loc[:, 'windGust'] = mph_to_kt(darksky_df.loc[:, 'windGust'])

    # Calculate daily values
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    daily_high = darksky_df.loc[forecast_start:forecast_end,
                                'temperature'].max()
    daily_low = darksky_df.loc[forecast_start:forecast_end,
                               'temperature'].min()
    daily_wind = darksky_df.loc[forecast_start:forecast_end, 'windSpeed'].max()
    daily_rain = darksky_df.loc[forecast_start:forecast_end -
                                timedelta(hours=1), 'rain'].sum()

    # Create Forecast object
    forecast = Forecast(stid, default_model_name, forecast_date)
    forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain)
    forecast.timeseries.data = darksky_df.reset_index()

    return forecast
Пример #2
0
def get_nws_forecast(config, stid, lat, lon, forecast_date):
    """
    Retrieve current NWS forecast for a point location.

    :param config:
    :param stid: str: station ID
    :param lat: float: latitude
    :param lon: float: longitude
    :param forecast_date: datetime:
    :return:
    """
    hourly_url = 'http://forecast.weather.gov/MapClick.php?lat=%f&lon=%f&FcstType=digitalDWML'
    response = requests.get(hourly_url % (lat, lon))
    # Raise error for invalid HTTP response
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print('nws: got HTTP error when querying for XML file from %s' %
              (hourly_url % (lat, lon)))
        raise

    hourly_xml = eTree.fromstring(response.text)
    hourly_dict = etree_to_dict(hourly_xml)

    # Create a DataFrame for hourly data
    hourly = pd.DataFrame()
    hourly['DateTime'] = hourly_dict['dwml']['data']['time-layout'][
        'start-valid-time']
    # De-localize the starting time so we can do an explicit datetime comparison
    hourly['DateTime'] = [
        localized_date_to_utc(parse_iso(hourly['DateTime'].iloc[j]))
        for j in range(len(hourly['DateTime']))
    ]
    hourly['DateTime'] = [
        hourly['DateTime'].iloc[j].to_pydatetime().replace(tzinfo=None)
        for j in range(len(hourly['DateTime']))
    ]
    hourly['datetime_index'] = hourly['DateTime']
    hourly.set_index('datetime_index', inplace=True)
    parameters = hourly_dict['dwml']['data']['parameters']

    # Get the temperatures
    for element in parameters['temperature']:
        if element['@type'] == 'hourly':
            hourly['temperature'] = xml_to_values(element['value'])
        elif element['@type'] == 'dew point':
            hourly['dewPoint'] = xml_to_values(element['value'])
    # Get the winds
    for element in parameters['wind-speed']:
        if element['@type'] == 'sustained':
            hourly['windSpeed'] = xml_to_values(element['value'])
            hourly['windSpeed'] = mph_to_kt(hourly['windSpeed'])
        elif element['@type'] == 'gust':
            hourly['windGust'] = xml_to_values(element['value'])
            hourly['windGust'] = mph_to_kt(hourly['windGust'])
    # Get other parameters
    hourly['cloud'] = xml_to_values(parameters['cloud-amount']['value'])
    hourly['windDirection'] = xml_to_values(parameters['direction']['value'])
    hourly['rain'] = xml_to_values(parameters['hourly-qpf']['value'])
    try:
        hourly['condition'] = xml_to_condition(
            parameters['weather']['weather-conditions'])
    except:
        pass

    # Aggregate daily values from hourly series
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    hourly_high = hourly.loc[forecast_start:forecast_end, 'temperature'].max()
    hourly_low = hourly.loc[forecast_start:forecast_end, 'temperature'].min()
    hourly_wind = hourly.loc[forecast_start:forecast_end, 'windSpeed'].max()
    hourly_rain = hourly.loc[forecast_start:forecast_end - timedelta(hours=1),
                             'rain'].sum()

    # Create the Forecast object
    forecast = Forecast(stid, default_model_name, forecast_date)
    forecast.daily.set_values(hourly_high, hourly_low, hourly_wind,
                              hourly_rain)
    forecast.timeseries.data = hourly

    # Now do the daily data from the Forecast API
    api_url = 'https://api.weather.gov/points'
    point = '%0.3f,%0.3f' % (lat, lon)
    # Retrieve daily forecast
    daily_url = '%s/%s/forecast' % (api_url, point)
    response = requests.get(daily_url)
    # Test for an error HTTP response. If there is an error response, omit the daily part.
    try:
        response.raise_for_status()
        daily_data = response.json()
    except BaseException as e:
        if config['debug'] > 0:
            print("nws: warning: no daily values used for %s ('%s')" %
                  (stid, str(e)))
        return forecast

    # Daily values: convert to DataFrame
    daily = pd.DataFrame.from_dict(daily_data['properties']['periods'])
    # Change the wind to its max value
    daily['windSpeed'] = daily['windSpeed'].apply(wind_speed_interpreter)
    # De-localize the starting time so we can do an explicit datetime comparison
    daily['startTime'] = [
        parse_iso(daily['startTime'].iloc[j])
        for j in range(len(daily['startTime']))
    ]
    daily['startTime'] = [
        daily['startTime'].iloc[j].replace(tzinfo=None)
        for j in range(len(daily['startTime']))
    ]
    daily.set_index('startTime', inplace=True)
    try:
        daily_high = daily.loc[forecast_date + timedelta(hours=6),
                               'temperature']
    except KeyError:
        daily_high = np.nan
    try:
        daily_low = daily.loc[forecast_date - timedelta(hours=6),
                              'temperature']
    except KeyError:
        daily_low = np.nan
    daily_wind = mph_to_kt(
        np.max(daily.loc[forecast_start:forecast_end]['windSpeed']))

    # Update the Forecast object
    forecast.daily.set_values(np.nanmax([hourly_high, daily_high]),
                              np.nanmin([hourly_low, daily_low]),
                              np.nanmax([hourly_wind, daily_wind]),
                              hourly_rain)

    return forecast
Пример #3
0
def get_ukmet_forecast(stid, ukmet_code, forecast_date):
    """
    Retrieve UKMET data. 

    :param stid: station ID
    :param ukmet_code: site-specific URL code from ukmet.codes
    :param forecast_date: datetime of day to forecast
    :return: Forecast object for high, low, max wind for next 6Z--6Z. No precip.
    """
    # Retrieve the model data
    url = 'https://www.metoffice.gov.uk/public/weather/forecast/%s' % ukmet_code
    req = Request(url, headers=hdr)
    response = urlopen(req)
    page = response.read().decode('utf-8', 'ignore')
    soup = BeautifulSoup(page, 'lxml')

    # Find UTC offset and current time in HTML
    utcoffset = int(soup.find(id='country').text.split('-')[1][0:2])
    epoch = float(soup.find("td", {"id": "firstTimeStep"})['data-epoch'])
    utcnow = datetime.utcfromtimestamp(epoch)

    # Store daily variables
    days = []
    highs = []  # this can be overwritten by hourly
    lows = []  # this can be overwritten by hourly
    winds = []  # this comes from hourly

    # Pull in daily data using li tabs
    tabids = ['tabDay1', 'tabDay2', 'tabDay3']
    for ids in tabids:
        pars = soup.find(id=ids)
        days.append(datetime.strptime(pars['data-date'], '%Y-%m-%d'))
        highs.append(
            c_to_f(
                pars.findAll("span", {"title": "Maximum daytime temperature"
                                      })[0]['data-value-raw']))
        lows.append(
            c_to_f(
                pars.findAll("span",
                             {"title": "Minimum nighttime temperature"
                              })[0]['data-value-raw']))

    # Pull in hourly data
    # This requires PhantomJS to pull out additional HTML code
    driver = webdriver.PhantomJS(
        executable_path='/home/disk/p/wxchallenge/bin/phantomjs')
    driver.get(url + '#?date=2017-09-21')
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    dateTime = []
    temperature = []
    temperature_c = []
    dewpoint = []
    windSpeed = []
    windGust = []
    windDirection = []
    humidity = []  # this is temporary--converted to dew point below

    divids = [
        'divDayModule0', 'divDayModule1', 'divDayModule2', 'divDayModule3'
    ]
    for i, divs in enumerate(divids):
        day0 = datetime.strptime(
            soup.find("div", {"id": "divDayModule0"})['data-content-id'],
            '%Y-%m-%d')
        day1 = (day0 + timedelta(days=1)).strftime('%Y-%m-%d')
        pars = soup.find(id=divs)
        divdate = datetime.strptime(pars['data-content-id'], '%Y-%m-%d').date()
        hourels = pars.findAll("tr",
                               {"class": "weatherTime"})[0].find_all('td')
        for ii, ele in enumerate(hourels):
            if ele.text == 'Now':
                dateTime.append(utcnow)
            else:
                dtmp = datetime(divdate.year, divdate.month, divdate.day,
                                int(ele.text.split(':')[0]),
                                int(ele.text.split(':')[1]))
                dateTime.append(dtmp + timedelta(hours=utcoffset))
        tempels = pars.findAll("tr", {"class": "weatherTemp"})[0].findAll(
            "i", {"class": "icon icon-animated"})
        for ele in tempels:
            temperature_c.append(float(ele['data-value-raw']))
            temperature.append(c_to_f(ele['data-value-raw']))
        # relative humidity for conversion to dew point
        humels = pars.findAll("tr",
                              {"class": "weatherHumidity"})[0].text.split()
        for ele in humels:
            humidity.append(float(ele.split('%')[0]))
        # add wind
        speedels = pars.findAll("i", {"data-type": "windSpeed"})
        for ele in speedels:
            windSpeed.append(np.round(mph_to_kt(ele['data-value-raw']), 2))
        gustels = pars.findAll("span", {"class": "gust"})
        for ele in gustels:
            windGust.append(mph_to_kt(ele['data-value-raw']))
        direls = pars.findAll("span", {"class": "direction"})
        for ele in direls:
            windDirection.append(wind_dir_to_deg(ele.text))

    # Convert T and humidity to dewpt
    for ii, rh in enumerate(humidity):
        td_tmp = dewpoint_from_t_rh(temperature_c[ii], rh)
        dewpoint.append(c_to_f(td_tmp))

    # Make into dataframe
    df = pd.DataFrame(
        {
            'temperature': temperature,
            'dewpoint': dewpoint,
            'windSpeed': windSpeed,
            'windGust': windGust,
            'windDirection': windDirection,
            'dateTime': dateTime
        },
        index=dateTime)

    # Correct the highs and lows with the hourly data, find max wind speed
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    for d in range(0, len(days)):
        try:
            # unlike the mos code, we always use the 'include'
            iloc_start_include = df.index.get_loc(forecast_start)
        except BaseException:
            print('ukmet: error getting start time index in db; check data.')
            break
        try:
            iloc_end = df.index.get_loc(forecast_end)
        except BaseException:
            print('ukmet: error getting end time index in db; check data.')
            break
        raw_high = df.iloc[iloc_start_include:iloc_end]['temperature'].max()
        raw_low = df.iloc[iloc_start_include:iloc_end]['temperature'].min()
        winds.append(
            int(
                np.round(
                    df.iloc[iloc_start_include:iloc_end]['windSpeed'].max())))
        if raw_high > highs[d]:
            highs[d] = raw_high
        if raw_low < lows[d]:
            lows[d] = raw_low
        forecast_start = forecast_start + timedelta(days=1)
        forecast_end = forecast_end + timedelta(days=1)

    forecast = Forecast(stid, default_model_name, days[0])
    forecast.timeseries.data = df
    forecast.daily.set_values(highs[0], lows[0], winds[0], None)

    # # Make list of forecast objects for future days--currently not implemented
    #
    # forecast = []
    #
    # for i in range(0,len(days)):
    #     forecast_tmp = Forecast(stid, default_model_name, days[i])
    #     forecast_tmp.daily.date = days[i]
    #     forecast_tmp.daily.high = highs[i]
    #     forecast_tmp.daily.low = lows[i]
    #     forecast.append(forecast_tmp)

    return forecast
Пример #4
0
def get_wunderground_forecast(stid, api_key, forecast_date):

    # retrieve api json data
    api_url = 'https://api.wunderground.com/api/%s/hourly/forecast/q/%s.json'
    api_options = {'features': 'hourly,forecast'}
    json_url = api_url % (api_key, stid)
    response = requests.get(json_url, params=api_options)
    wunderground_data = response.json()
    # Raise error for invalid HTTP response
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print('wunderground: got HTTP error when querying API')
        raise

    # Convert to DataFrame, fix time
    wunderground_df = pd.DataFrame(wunderground_data['hourly_forecast'])
    timezone_df = pd.DataFrame(wunderground_data['forecast']['simpleforecast'])
    timezone = get_timezone(timezone_df['forecastday'])
    time_series = convert_fcttime(wunderground_df['FCTTIME'])  # already UTC

    for column in wunderground_df.columns.values:
        wunderground_df[column] = wunderground_df[column].apply(
            get_english_units)
    wunderground_df['mslp'] = inhg_to_mb(wunderground_df['mslp'])
    wunderground_df['wspd'] = mph_to_kt(wunderground_df['wspd'])
    wunderground_df['wdir'] = wunderground_df['wdir'].apply(get_wind_degrees)

    column_names_dict = {
        'FCTTIME': 'DateTime',
        'temp': 'temperature',
        'wspd': 'windSpeed',
        'mslp': 'pressure',
        'sky': 'cloud',
        'dewpoint': 'dewpoint',
        'qpf': 'rain',
        'wdir': 'windDirection',
        'wx': 'condition'
    }
    wunderground_df.drop('condition', axis=1, inplace=True)
    wunderground_df = wunderground_df.rename(columns=column_names_dict)
    wunderground_df['DateTime'] = time_series
    wunderground_df.set_index('DateTime', inplace=True)

    # calculate daily values
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    daily_high = wunderground_df.loc[forecast_start:forecast_end,
                                     'temperature'].max()
    daily_low = wunderground_df.loc[forecast_start:forecast_end,
                                    'temperature'].min()
    daily_wind = wunderground_df.loc[forecast_start:forecast_end,
                                     'windSpeed'].max()
    daily_rain = wunderground_df.loc[forecast_start:forecast_end -
                                     timedelta(hours=1), 'rain'].sum()

    # create Forecast object
    forecast = Forecast(stid, default_model_name, forecast_date)
    forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain)
    forecast.timeseries.data = wunderground_df.reset_index()

    return forecast
Пример #5
0
def get_owm_forecast(stid, lat, lon, api_key, forecast_date):

    # Retrieve data
    api_url = 'http://api.openweathermap.org/data/2.5/forecast'
    api_options = {
        'APPID': api_key,
        'lat': lat,
        'lon': lon,
        'units': 'imperial',
    }
    response = requests.get(api_url, params=api_options)
    owm_data = response.json()
    # Raise error for invalid HTTP response
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print('openweathermap: got HTTP error when querying API')
        raise

    # Convert to pandas DataFrame and fix time
    owm_df = pd.DataFrame(owm_data['list'])
    owm_df['DateTime'] = np.nan
    for idx in owm_df.index:
        owm_df.loc[idx, 'DateTime'] = date_to_datetime(owm_df.loc[idx,
                                                                  'dt_txt'])
    owm_df.set_index('DateTime', inplace=True)

    # OWM has a column 'main' which contains some parameters at all times. Get all of those.
    for parameter in owm_df.loc[owm_df.index[0], 'main'].keys():
        owm_df[parameter] = owm_df['main'].apply(get_parameter,
                                                 args=(parameter, ))

    # Get some other special parameters
    # Make sure the 'rain' parameter exists (if no rain in forecast, the column is missing)
    if 'rain' not in owm_df:
        owm_df = owm_df.assign(**{'rain': 0.0})
    else:
        owm_df.loc[:, 'rain'] = mm_to_in(owm_df['rain'].apply(get_parameter,
                                                              args=('3h', )))
    owm_df['condition'] = owm_df['weather'].apply(get_parameter,
                                                  args=('description', ),
                                                  is_list=True)
    owm_df['windSpeed'] = mph_to_kt(owm_df['wind'].apply(get_parameter,
                                                         args=('speed', )))
    owm_df['windDirection'] = owm_df['wind'].apply(get_parameter,
                                                   args=('deg', ))
    owm_df['cloud'] = owm_df['clouds'].apply(get_parameter, args=('all', ))
    owm_df['dewpoint'] = np.nan
    for idx in owm_df.index:
        owm_df.loc[idx, 'dewpoint'] = dewpoint_from_t_rh(
            owm_df.loc[idx, 'temp'], owm_df.loc[idx, 'humidity'])

    # Rename remaining columns for default schema
    column_names_dict = {
        'temp': 'temperature',
    }
    owm_df = owm_df.rename(columns=column_names_dict)

    # Calculate daily values. OWM includes period maxima and minima. Note that rain in OWM is cumulative for the LAST
    # 3 hours.
    forecast_start = forecast_date.replace(hour=6)
    forecast_end = forecast_start + timedelta(days=1)
    try:
        daily_high = owm_df.loc[forecast_start:forecast_end, 'temp_max'].max()
    except KeyError:
        daily_high = owm_df.loc[forecast_start:forecast_end,
                                'temperature'].max()
    try:
        daily_low = owm_df.loc[forecast_start:forecast_end, 'temp_min'].min()
    except KeyError:
        daily_low = owm_df.loc[forecast_start:forecast_end,
                               'temperature'].min()
    daily_wind = owm_df.loc[forecast_start:forecast_end, 'windSpeed'].max()
    daily_rain = np.nanmax([
        owm_df.loc[forecast_start + timedelta(hours=3):forecast_end,
                   'rain'].sum(), 0.0
    ])

    # Create Forecast object
    forecast = Forecast(stid, default_model_name, forecast_date)
    forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain)
    forecast.timeseries.data = owm_df.reset_index()

    return forecast