#pts0.index.name = 'index' #pts1 = vector.xy_to_gpd(pts0.index, 'x', 'y', pts0) catch_del = gpd.read_file(catch_del_shp_path) catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True) pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site') catch_agg1 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean() ## Adjust the vcsn according to the precip gauge 404810 ts1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', where_in={ 'ExtSiteID': ['404810'], 'DatasetTypeID': [15] }, from_date='2005-07-01', to_date=to_date) ts1 = ts1.droplevel(0) rain3 = catch_agg1.loc[70103, 'rain'] gauge1 = ts1.resample('A-JUN').sum().Value * 1.07 gauge1.name = '404810' ols1 = LM(rain3.to_frame(), gauge1.to_frame()).predict() ols_summ = ols1.summary_df.copy()
from_date = '2010-07-01' to_date = '2018-06-30' py_path = os.path.realpath(os.path.dirname(__file__)) data_dir = 'data' export_flow = 'flow.csv' ############################################ ### Get data ## Pull out recorder data tsdata = mssql.rd_sql_ts(server, database, ts_daily_table, 'ExtSiteID', 'DateTime', 'Value', where_in={ 'ExtSiteID': sites, 'DatasetTypeID': dataset }, from_date=from_date, to_date=to_date) ## Reformat tsdata1 = tsdata.unstack(0) tsdata1.columns = tsdata1.columns.droplevel(0) ## Save data tsdata.to_csv(os.path.join(py_path, data_dir, export_flow))
### Combine #zones = concat([sw_zones, precip_zones, gw_zones]).reset_index(drop=True) zones = concat([sw_zones, precip_zones]).reset_index(drop=True) ################################################# #### Select sites ### SW sites1 = sw_list[sw_list.Notes.isnull()].drop('Notes', axis=1) flow1 = mssql.rd_sql_ts(param.hydro_server, param.hydro_database, param.ts_table, 'ExtSiteID', 'DateTime', 'Value', where_col={ 'ExtSiteID': sites1.site.tolist(), 'DatasetTypeID': [5] }).reset_index() flow1.rename(columns={ 'ExtSiteID': 'site', 'DateTime': 'time', 'Value': 'data' }, inplace=True) ### precip precip1 = mssql.rd_sql_ts(param.hydro_server, param.hydro_database, param.ts_table,
def flow_est(self, buffer_dis=50000): """ Function to query and/or estimate flow at the input_sites. Parameters ---------- buffer_dis : int The search radius for the regressions in meters. Returns ------- DataFrame of Flow """ if self.input_summ.CollectionType.isin(['Recorder']).any(): rec_summ1 = self.input_summ[self.input_summ.CollectionType.isin(['Recorder'])].copy() rec_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': rec_summ1.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()}).reset_index() rec_ts_data1 = pd.merge(rec_summ1[['ExtSiteID', 'DatasetTypeID']], rec_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime']) rec_ts_data2 = rec_ts_data1.Value.unstack(0) else: rec_ts_data2 = pd.DataFrame() if self.input_summ.CollectionType.isin(['Manual Field']).any(): man_summ1 = self.input_summ[self.input_summ.CollectionType.isin(['Manual Field'])].copy() man_sites1 = self.sites_gdf[self.sites_gdf.ExtSiteID.isin(man_summ1.ExtSiteID)].copy() ## Determine which sites are within the buffer of the manual sites buff_sites_dict = {} man_buff1 = man_sites1.set_index(['ExtSiteID']).copy() man_buff1['geometry'] = man_buff1.buffer(buffer_dis) rec_sites_gdf = self.sites_gdf[self.sites_gdf.CollectionType == 'Recorder'].copy() for index in man_buff1.index: buff_sites1 = vector.sel_sites_poly(rec_sites_gdf, man_buff1.loc[[index]]) buff_sites_dict[index] = buff_sites1.ExtSiteID.tolist() buff_sites_list = [item for sublist in buff_sites_dict.values() for item in sublist] buff_sites = set(buff_sites_list) ## Pull out recorder data needed for all manual sites man_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': man_summ1.ExtSiteID.tolist(), 'DatasetTypeID': man_summ1.DatasetTypeID.unique().tolist()}).reset_index() man_ts_data1 = pd.merge(man_summ1[['ExtSiteID', 'DatasetTypeID']], man_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime']) man_ts_data2 = man_ts_data1.Value.unstack(0) man_rec_summ1 = self.summ[self.summ.ExtSiteID.isin(buff_sites)].copy() man_rec_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': man_rec_summ1.ExtSiteID.tolist(), 'DatasetTypeID': man_rec_summ1.DatasetTypeID.unique().tolist()}).reset_index() man_rec_ts_data1 = pd.merge(man_rec_summ1[['ExtSiteID', 'DatasetTypeID']], man_rec_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime']) man_rec_ts_data2 = man_rec_ts_data1.Value.unstack(0).interpolate('time', limit=10) ## Run through regressions reg_lst = [] new_lst = [] for key, lst in buff_sites_dict.items(): man_rec_ts_data3 = man_rec_ts_data2.loc[:, lst].copy() man_rec_ts_data3[man_rec_ts_data3 <= 0] = np.nan man_ts_data3 = man_ts_data2.loc[:, [key]].copy() man_ts_data3[man_ts_data3 <= 0] = np.nan lm1 = LM(man_rec_ts_data3, man_ts_data3) res1 = lm1.predict(n_ind=1, x_transform='log', y_transform='log', min_obs=self.min_gaugings) res2 = lm1.predict(n_ind=2, x_transform='log', y_transform='log', min_obs=self.min_gaugings) f = [res1.summary_df['f value'].iloc[0], res2.summary_df['f value'].iloc[0]] val = f.index(max(f)) if val == 0: reg_lst.append(res1.summary_df) s1 = res1.summary_df.iloc[0] d1 = man_rec_ts_data3[s1['x sites']].copy() d1[d1 <= 0] = 0.001 new_data1 = np.exp(np.log(d1) * float(s1['x slopes']) + float(s1['y intercept'])) new_data1.name = key new_data1[new_data1 <= 0] = 0 else: reg_lst.append(res2.summary_df) s1 = res2.summary_df.iloc[0] x_sites = s1['x sites'].split(', ') x_slopes = [float(s) for s in s1['x slopes'].split(', ')] intercept = float(s1['y intercept']) d1 = man_rec_ts_data3[x_sites[0]].copy() d1[d1 <= 0] = 0.001 d2 = man_rec_ts_data3[x_sites[1]].copy() d2[d2 <= 0] = 0.001 new_data1 = np.exp((np.log(d1) * float(x_slopes[0])) + (np.log(d2) * float(x_slopes[1])) + intercept) new_data1.name = key new_data1[new_data1 <= 0] = 0 new_lst.append(new_data1) new_data2 = pd.concat(new_lst, axis=1) reg_df = pd.concat(reg_lst).reset_index() else: new_data2 = pd.DataFrame() reg_df = pd.DataFrame() flow = pd.concat([rec_ts_data2, new_data2], axis=1).round(3) ## Save if required if hasattr(self, 'output_path'): run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M') if not reg_df.empty: reg_flow_csv = param['output']['reg_flow_csv'].format(run_date=run_time) reg_df.to_csv(os.path.join(self.output_path, reg_flow_csv), index=False) flow_csv = param['output']['flow_csv'].format(run_date=run_time) flow.to_csv(os.path.join(self.output_path, flow_csv)) setattr(self, 'flow', flow) setattr(self, 'reg_flow', reg_df) return flow
def get_ts_data(server, database, mtypes, sites, from_date=None, to_date=None, resample_code='D', period=1, val_round=3, min_count=None, pivot=False): """ Function to extract time series data from the hydrotel database. Parameters ---------- server : str The server where the Hydrotel database lays. database : str The name of the Hydrotel database. mtypes : str or list of str The measurement type(s) of the sites that should be returned. Possible options include swl, flow, gwl, and precip. sites : list of str The list of sites that should be returned. from_date : str or None The start date in the format '2000-01-01'. to_date : str or None The end date in the format '2000-01-01'. resample_code : str The Pandas time series resampling code. e.g. 'D' for day, 'W' for week, 'M' for month, etc. period : int The number of resampling periods. e.g. period = 2 and resample = 'D' would be to resample the values over a 2 day period. val_round : int The number of decimals to round the values. pivot : bool Should the output be pivotted into wide format? Returns ------- Series or DataFrame A MultiIndex Pandas Series if pivot is False and a DataFrame if True """ ### Import data and select the correct sites site_point = get_sites_mtypes(server, database, mtypes, sites).reset_index() ### Select rows within time period if isinstance(from_date, str): site_point = site_point[site_point.ToDate > from_date] if isinstance(to_date, str): site_point = site_point[site_point.FromDate < to_date] if site_point.empty: return pd.DataFrame() ### Pull out the ts data site_point1 = site_point[['ExtSiteID', 'MType', 'Point']].copy() tsdata_list = [] mtypes1 = site_point.MType.unique() for m in mtypes1: if m in resample_dict: res_val = resample_dict[m] else: res_val = 'mean' sel = site_point1[site_point1.MType == m] points = sel.Point.astype(int).tolist() data1 = rd_sql_ts(server, database, data_tab, 'Point', 'DT', 'SampleValue', resample_code, period, res_val, val_round, {'Point': points}, from_date=from_date, to_date=to_date, min_count=min_count).reset_index() data1.rename(columns={'DT': 'DateTime', 'SampleValue': 'Value'}, inplace=True) data2 = pd.merge(sel, data1, on='Point').drop('Point', axis=1).set_index(['ExtSiteID', 'MType', 'DateTime']).Value tsdata_list.append(data2) tsdata = pd.concat(tsdata_list) if pivot: tsdata = tsdata.unstack([0, 1]) return tsdata
sites_rec1 = flow_sites_gdf[sites_rec_bool].copy() sites_man1 = flow_sites_gdf[~sites_rec_bool].copy() flow_rec_sites1 = mssql.rd_sql(server, database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': rec_summ1.ExtSiteID.unique().tolist()}) flow_rec_sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', flow_rec_sites1) ## Estimate flow where recorder doesn't exist sites_man2 = sites_man1.copy() sites_man2['geometry'] = sites_man1.buffer(buffer_dis) rec_sites2 = vector.sel_sites_poly(flow_rec_sites2, sites_man2) rec_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': rec_sites2.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()}) rec_ts_data2 = rec_ts_data1.Value.unstack(0).interpolate('time', limit=10).dropna(axis=1) rec_flow1 = rec_ts_data2.loc[:, rec_ts_data2.columns.isin(sites_rec1.FlowSite)].copy() man_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': sites_man1.FlowSite.tolist(), 'DatasetTypeID': man_datasets}) man_ts_data2 = man_ts_data1.Value.unstack(0) reg_lst = [] new_lst = [] for col in man_ts_data2: site0 = sites_man1[sites_man1.FlowSite == col]