def get_latest_date(config, data_binding, stid, table_type='OBS'): """ Retrieve the latest datetime in a table for a station. :param config: :param data_binding: str: name of the data binding to use :param stid: str: station ID :param table_type: str: type of table :return: datetime: last available observation date """ # Get the database and the names of columns in the schema database = config['DataBinding'][data_binding]['database'] schema_name = config['DataBinding'][data_binding]['schema'] schema = get_object(schema_name).schema date_key = schema[table_type][0][0] table = '%s_%s' % (stid.upper(), table_type.upper()) conn = connection(config, database) cursor = conn.cursor() try: cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (date_key, table, date_key)) last_dt = date_to_datetime(cursor.fetchone()[0]) except: last_dt = None return last_dt
def get_forecast_stats(forecasts, verifs, day_list=None): """ Returns the statistics of a forecast relative to a verification. """ if day_list is not None: days = day_list else: days = list(forecasts.keys() & verifs.keys()) num_days = len(days) stats_dict = OrderedDict() stats_dict['attrs'] = OrderedDict() stats_dict['attrs']['numDays'] = num_days stats_dict['attrs']['verifyingDays'] = [date_to_datetime(d).isoformat() + 'Z' for d in days] stats_dict['stats'] = OrderedDict() for var in ['high', 'low', 'wind', 'rain']: stats_dict['stats'][var] = OrderedDict() if num_days < 1: return stats_dict for var in ['high', 'low', 'wind', 'rain']: forecast_values = np.array([getattr(forecasts[day], var) for day in days], dtype=np.float) verif_values = np.array([getattr(verifs[day], var) for day in days], dtype=np.float) bias = np.nanmean(forecast_values - verif_values) rmse = np.nanmean(np.sqrt((forecast_values - verif_values) ** 2.)) rmse_no_bias = np.nanmean(np.sqrt((forecast_values - bias - verif_values) ** 2.)) stats_dict['stats'][var]['bias'] = bias stats_dict['stats'][var]['rmse'] = rmse stats_dict['stats'][var]['rmseNoBias'] = rmse_no_bias return stats_dict
def readDaily(config, stid, data_binding, table_type, model=None, start_date=None, end_date=None, force_list=False): """ Read a Daily or list of Dailys from a specified data_binding at a certain station id and of a given table type. table_type must be 'verif', 'climo', 'daily_forecast', or something defined in the schema of data_binding as %(stid)_%(table_type).upper(). Model should be provided unless retrieving from verif or climo. If start_date and end_date are None, then then the start is set to now and the end to 24 hours in the future. If start_date only is None, then it is set to 24 hours before end_date. If end_date only is None, then it is set to 24 hours after start_date. :param config: :param stid: str: station ID :param data_binding: str: name of database binding to write to :param table_type: str: type of table :param model: str: model name :param start_date: datetime or str: starting date :param end_date: datetime or str: ending date :param force_list: bool: if True, returns a list even if there is only one Daily object :return: Daily or list of Dailys of requested data """ # Get the database and table names database = config['DataBinding'][data_binding]['database'] table = '%s_%s' % (stid.upper(), table_type.upper()) # Get data from _read data = _read(config, database, table, start_date=start_date, end_date=end_date, model=model) # Check that we have data if data is None: raise ValueError('db.readDaily error: no data retrieved.') # Generate Daily object(s) daily_list = [] for index in range(len(data.index)): row = data.iloc[index] daily = Daily(stid, date_to_datetime(row['DATETIME'])) daily.set_values(row['HIGH'], row['LOW'], row['WIND'], row['RAIN']) daily.model = model daily_list.append(daily) if len(data.index) == 0: raise ValueError('db.readDaily error: no data found.') elif len(data.index) > 1 or force_list: if config['debug'] > 9: print('db.readDaily: returning list of daily objects') return daily_list elif len(data.index) == 1: return daily_list[0]
def readForecast(config, stid, model, date, hour_start=6, hour_padding=6, no_hourly_ok=False): """ Return a Forecast object from the main theta-e database for a given model and date. This is specifically designed to return a Forecast for a single model and a single day. hour_start is the starting hour for the 24-hour forecast period. hour_padding is the number of hours on either side of the forecast period to include in the timeseries. :param config: :param stid: str: station ID :param model: str: model name :param date: datetime or str: date to retrieve :param hour_start: int: starting hour of the day in UTC :param hour_padding: int: added hours around the 24-hour TimeSeries :param no_hourly_ok: bool: if True, does not raise an error if the hourly timeseries is empty :return: Forecast """ # Basic sanity check for hour parameters if hour_start < 0 or hour_start > 23: raise ValueError('db.readForecast error: hour_start must be between 0 and 23.') if hour_padding < 0 or hour_padding > 24: raise ValueError('db.readForecast error: hour_padding must be between 0 and 24.') # Set the default database configuration; create Forecast data_binding = 'forecast' if config['debug'] > 9: print("db.readForecast: reading forecast from '%s' data binding" % data_binding) forecast = Forecast(stid, model, date) # The daily forecast part table_type = 'DAILY_FORECAST' daily = readDaily(config, stid, data_binding, table_type, model, start_date=date, end_date=date) # The hourly forecast part table_type = 'HOURLY_FORECAST' date = date_to_datetime(date) start_date = date + timedelta(hours=hour_start - hour_padding) end_date = date + timedelta(hours=hour_start + 24 + hour_padding) try: timeseries = readTimeSeries(config, stid, data_binding, table_type, model, start_date, end_date) except MissingDataError: if no_hourly_ok: timeseries = TimeSeries(stid) else: raise # Assign and return forecast.timeseries = timeseries forecast.daily = daily return forecast
def init(config, reset_old=False, no_climo=False): """ Initializes new station IDs in the databases. Returns a list of all sites included in config that require historical data to be retrieved. Also creates a database if it does not exist. :param config: :param reset_old: if True, erases tables if they are too old :param no_climo: if True, does not check "CLIMO" tables """ add_sites = [] for data_binding in config['DataBinding'].keys(): # Open the database and schema schema_name = config['DataBinding'][data_binding]['schema'] database = config['DataBinding'][data_binding]['database'] schema = get_object(schema_name).schema conn = connection(config, database) if conn is None: raise IOError('Error: db.init cannot connect to database %s' % database) cursor = conn.cursor() # Iterate through stations in the config for stid in config['Stations'].keys(): add_site = False # Find the tables in the db and requested by the schema schema_table_names = ['%s_%s' % (stid.upper(), key) for key in schema.keys()] schema_table_structures = list(schema.values()) # Schema must have primary (datetime) key listed first date_keys = [schema[key][0][0] for key in schema.keys()] if config['debug'] > 50: print('db.init: found the following tables in schema:') print(schema_table_names) cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") sql_table_names = [table[0] for table in cursor.fetchall()] if config['debug'] > 50: print('db.init: found the following tables in sql db:') print(sql_table_names) # For each requested table, create it if it doesn't exist for t, table in enumerate(schema_table_names): if no_climo and 'CLIMO' in table.upper(): if config['debug'] > 9: print('db.init: ignoring table %s' % table) continue if not (table in sql_table_names): # Something was missing, so we need to add the site to the output list add_site = True # A string of all table columns and types if config['debug'] > 0: print('db.init: need to create table %s' % table) sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]]) cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,)) else: # Check if data in table are recent time_now = datetime.utcnow() if table != stid.upper() + '_CLIMO': recent = timedelta(days=30) else: recent = time_now - datetime(last_leap_year(time_now), 12, 31) key = date_keys[t] try: cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (key, table, key)) last_dt = date_to_datetime(cursor.fetchone()[0]) except: last_dt = None if last_dt is None or (time_now - last_dt > recent): # Old or missing data, drop table and recreate it add_site = True if reset_old: if config['debug'] > 0: print('db.init: %s table too old, resetting it' % table) cursor.execute("DROP TABLE %s;" % table) sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]]) cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,)) else: if config['debug'] > 0: print('db.init: %s table is old, adding to historical' % table) # Lastly, add the site if we need to rerun historical data if add_site and stid not in add_sites: add_sites.append(stid) elif config['debug'] > 0: print('db.init: nothing to do for station %s' % stid) conn.close() return add_sites
def _read(config, database, table, model=None, start_date=None, end_date=None): """ Return a pandas DataFrame from table in database. If start_date and end_date are None, then then the start is set to now and the end to 24 hours in the future. If start_date only is None, then it is set to 24 hours before end_date. If end_date only is None, then it is set to 24 hours after start_date. :param config: :param database: str: name of database :param table: str: name of table to read from :param model: str: specific model to read data from :param start_date: datetime or str: starting date :param end_date: datetime or str: ending date :return: pandas DataFrame of requested data """ # Find the dates and make strings start_date = date_to_datetime(start_date) end_date = date_to_datetime(end_date) if start_date is None and end_date is not None: start_date = end_date - timedelta(hours=24) elif start_date is not None and end_date is None: end_date = start_date + timedelta(hours=24) elif start_date is None and end_date is None: start_date = datetime.utcnow() end_date = start_date + timedelta(hours=24) start = date_to_string(start_date) end = date_to_string(end_date) if config['debug'] > 9: print('db._read: getting data from %s for %s to %s' % (table, start, end)) # Open a database connection conn = connection(config, database) cursor = conn.cursor() # Fetch the data if model is None: sql_line = """SELECT * FROM %s WHERE DATETIME>=? AND DATETIME<=? ORDER BY DATETIME ASC;""" % table cursor.execute(sql_line, (start, end)) else: sql_line = """SELECT * FROM %s WHERE DATETIME>=? AND DATETIME<=? AND MODEL=? ORDER BY DATETIME ASC""" % table cursor.execute(sql_line, (start, end, model.upper())) values = cursor.fetchall() if config['debug'] > 50: print('db._read: fetched the following values') print(values) # Check that we have data if len(values) == 0: if config['debug'] > 9: print('db._read: warning: no valid data found!') return # Get column names cursor.execute("PRAGMA table_info(%s);" % table) columns = [c[1].upper() for c in cursor.fetchall()] if config['debug'] > 50: print('db._read: fetched the following column names') print(columns) conn.close() # Done with db # Convert to DataFrame and create TimeSeries data = pd.DataFrame(values) data.columns = columns # If model was given, then take it out if model is not None: data = data.drop('MODEL', axis=1) return data
def get_owm_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'http://api.openweathermap.org/data/2.5/forecast' api_options = { 'APPID': api_key, 'lat': lat, 'lon': lon, 'units': 'imperial', } response = requests.get(api_url, params=api_options) owm_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('openweathermap: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time owm_df = pd.DataFrame(owm_data['list']) owm_df['DateTime'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'DateTime'] = date_to_datetime(owm_df.loc[idx, 'dt_txt']) owm_df.set_index('DateTime', inplace=True) # OWM has a column 'main' which contains some parameters at all times. Get all of those. for parameter in owm_df.loc[owm_df.index[0], 'main'].keys(): owm_df[parameter] = owm_df['main'].apply(get_parameter, args=(parameter, )) # Get some other special parameters # Make sure the 'rain' parameter exists (if no rain in forecast, the column is missing) if 'rain' not in owm_df: owm_df = owm_df.assign(**{'rain': 0.0}) else: owm_df.loc[:, 'rain'] = mm_to_in(owm_df['rain'].apply(get_parameter, args=('3h', ))) owm_df['condition'] = owm_df['weather'].apply(get_parameter, args=('description', ), is_list=True) owm_df['windSpeed'] = mph_to_kt(owm_df['wind'].apply(get_parameter, args=('speed', ))) owm_df['windDirection'] = owm_df['wind'].apply(get_parameter, args=('deg', )) owm_df['cloud'] = owm_df['clouds'].apply(get_parameter, args=('all', )) owm_df['dewpoint'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'dewpoint'] = dewpoint_from_t_rh( owm_df.loc[idx, 'temp'], owm_df.loc[idx, 'humidity']) # Rename remaining columns for default schema column_names_dict = { 'temp': 'temperature', } owm_df = owm_df.rename(columns=column_names_dict) # Calculate daily values. OWM includes period maxima and minima. Note that rain in OWM is cumulative for the LAST # 3 hours. forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) try: daily_high = owm_df.loc[forecast_start:forecast_end, 'temp_max'].max() except KeyError: daily_high = owm_df.loc[forecast_start:forecast_end, 'temperature'].max() try: daily_low = owm_df.loc[forecast_start:forecast_end, 'temp_min'].min() except KeyError: daily_low = owm_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = owm_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = np.nanmax([ owm_df.loc[forecast_start + timedelta(hours=3):forecast_end, 'rain'].sum(), 0.0 ]) # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = owm_df.reset_index() return forecast