#pts0.index.name = 'index'
#pts1 = vector.xy_to_gpd(pts0.index, 'x', 'y', pts0)
catch_del = gpd.read_file(catch_del_shp_path)
catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True)

pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site')

catch_agg1 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean()

## Adjust the vcsn according to the precip gauge 404810
ts1 = mssql.rd_sql_ts(server,
                      database,
                      ts_table,
                      'ExtSiteID',
                      'DateTime',
                      'Value',
                      where_in={
                          'ExtSiteID': ['404810'],
                          'DatasetTypeID': [15]
                      },
                      from_date='2005-07-01',
                      to_date=to_date)
ts1 = ts1.droplevel(0)

rain3 = catch_agg1.loc[70103, 'rain']

gauge1 = ts1.resample('A-JUN').sum().Value * 1.07
gauge1.name = '404810'

ols1 = LM(rain3.to_frame(), gauge1.to_frame()).predict()

ols_summ = ols1.summary_df.copy()
Пример #2
0
from_date = '2010-07-01'
to_date = '2018-06-30'

py_path = os.path.realpath(os.path.dirname(__file__))
data_dir = 'data'
export_flow = 'flow.csv'

############################################
### Get data

## Pull out recorder data
tsdata = mssql.rd_sql_ts(server,
                         database,
                         ts_daily_table,
                         'ExtSiteID',
                         'DateTime',
                         'Value',
                         where_in={
                             'ExtSiteID': sites,
                             'DatasetTypeID': dataset
                         },
                         from_date=from_date,
                         to_date=to_date)

## Reformat
tsdata1 = tsdata.unstack(0)
tsdata1.columns = tsdata1.columns.droplevel(0)

## Save data
tsdata.to_csv(os.path.join(py_path, data_dir, export_flow))
### Combine

#zones = concat([sw_zones, precip_zones, gw_zones]).reset_index(drop=True)
zones = concat([sw_zones, precip_zones]).reset_index(drop=True)

#################################################
#### Select sites

### SW
sites1 = sw_list[sw_list.Notes.isnull()].drop('Notes', axis=1)

flow1 = mssql.rd_sql_ts(param.hydro_server,
                        param.hydro_database,
                        param.ts_table,
                        'ExtSiteID',
                        'DateTime',
                        'Value',
                        where_col={
                            'ExtSiteID': sites1.site.tolist(),
                            'DatasetTypeID': [5]
                        }).reset_index()
flow1.rename(columns={
    'ExtSiteID': 'site',
    'DateTime': 'time',
    'Value': 'data'
},
             inplace=True)

### precip
precip1 = mssql.rd_sql_ts(param.hydro_server,
                          param.hydro_database,
                          param.ts_table,
Пример #4
0
    def flow_est(self, buffer_dis=50000):
        """
        Function to query and/or estimate flow at the input_sites.

        Parameters
        ----------
        buffer_dis : int
            The search radius for the regressions in meters.

        Returns
        -------
        DataFrame of Flow
        """

        if self.input_summ.CollectionType.isin(['Recorder']).any():
            rec_summ1 = self.input_summ[self.input_summ.CollectionType.isin(['Recorder'])].copy()
            rec_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': rec_summ1.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()}).reset_index()
            rec_ts_data1 = pd.merge(rec_summ1[['ExtSiteID', 'DatasetTypeID']], rec_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime'])
            rec_ts_data2 = rec_ts_data1.Value.unstack(0)

        else:
            rec_ts_data2 = pd.DataFrame()

        if self.input_summ.CollectionType.isin(['Manual Field']).any():
            man_summ1 = self.input_summ[self.input_summ.CollectionType.isin(['Manual Field'])].copy()
            man_sites1 = self.sites_gdf[self.sites_gdf.ExtSiteID.isin(man_summ1.ExtSiteID)].copy()

            ## Determine which sites are within the buffer of the manual sites

            buff_sites_dict = {}
            man_buff1 = man_sites1.set_index(['ExtSiteID']).copy()
            man_buff1['geometry'] = man_buff1.buffer(buffer_dis)

            rec_sites_gdf = self.sites_gdf[self.sites_gdf.CollectionType == 'Recorder'].copy()

            for index in man_buff1.index:
                buff_sites1 = vector.sel_sites_poly(rec_sites_gdf, man_buff1.loc[[index]])
                buff_sites_dict[index] = buff_sites1.ExtSiteID.tolist()

            buff_sites_list = [item for sublist in buff_sites_dict.values() for item in sublist]
            buff_sites = set(buff_sites_list)

            ## Pull out recorder data needed for all manual sites
            man_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': man_summ1.ExtSiteID.tolist(), 'DatasetTypeID': man_summ1.DatasetTypeID.unique().tolist()}).reset_index()
            man_ts_data1 = pd.merge(man_summ1[['ExtSiteID', 'DatasetTypeID']], man_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime'])
            man_ts_data2 = man_ts_data1.Value.unstack(0)

            man_rec_summ1 = self.summ[self.summ.ExtSiteID.isin(buff_sites)].copy()
            man_rec_ts_data1 = mssql.rd_sql_ts(param['input']['ts_server'], param['input']['ts_database'], param['input']['ts_table'], ['ExtSiteID', 'DatasetTypeID'], 'DateTime', 'Value', from_date=self.from_date, to_date=self.to_date, where_in={'ExtSiteID': man_rec_summ1.ExtSiteID.tolist(), 'DatasetTypeID': man_rec_summ1.DatasetTypeID.unique().tolist()}).reset_index()
            man_rec_ts_data1 = pd.merge(man_rec_summ1[['ExtSiteID', 'DatasetTypeID']], man_rec_ts_data1, on=['ExtSiteID', 'DatasetTypeID']).drop('DatasetTypeID', axis=1).set_index(['ExtSiteID', 'DateTime'])
            man_rec_ts_data2 = man_rec_ts_data1.Value.unstack(0).interpolate('time', limit=10)

            ## Run through regressions
            reg_lst = []
            new_lst = []

            for key, lst in buff_sites_dict.items():
                man_rec_ts_data3 = man_rec_ts_data2.loc[:, lst].copy()
                man_rec_ts_data3[man_rec_ts_data3 <= 0] = np.nan

                man_ts_data3 = man_ts_data2.loc[:, [key]].copy()
                man_ts_data3[man_ts_data3 <= 0] = np.nan

                lm1 = LM(man_rec_ts_data3, man_ts_data3)
                res1 = lm1.predict(n_ind=1, x_transform='log', y_transform='log', min_obs=self.min_gaugings)
                res2 = lm1.predict(n_ind=2, x_transform='log', y_transform='log', min_obs=self.min_gaugings)

                f = [res1.summary_df['f value'].iloc[0], res2.summary_df['f value'].iloc[0]]

                val = f.index(max(f))

                if val == 0:
                    reg_lst.append(res1.summary_df)

                    s1 = res1.summary_df.iloc[0]

                    d1 = man_rec_ts_data3[s1['x sites']].copy()
                    d1[d1 <= 0] = 0.001

                    new_data1 = np.exp(np.log(d1) * float(s1['x slopes']) + float(s1['y intercept']))
                    new_data1.name = key
                    new_data1[new_data1 <= 0] = 0
                else:
                    reg_lst.append(res2.summary_df)

                    s1 = res2.summary_df.iloc[0]
                    x_sites = s1['x sites'].split(', ')
                    x_slopes = [float(s) for s in s1['x slopes'].split(', ')]
                    intercept = float(s1['y intercept'])

                    d1 = man_rec_ts_data3[x_sites[0]].copy()
                    d1[d1 <= 0] = 0.001
                    d2 = man_rec_ts_data3[x_sites[1]].copy()
                    d2[d2 <= 0] = 0.001

                    new_data1 = np.exp((np.log(d1) * float(x_slopes[0])) + (np.log(d2) * float(x_slopes[1])) + intercept)
                    new_data1.name = key
                    new_data1[new_data1 <= 0] = 0

                new_lst.append(new_data1)

            new_data2 = pd.concat(new_lst, axis=1)
            reg_df = pd.concat(reg_lst).reset_index()
        else:
            new_data2 = pd.DataFrame()
            reg_df = pd.DataFrame()

        flow = pd.concat([rec_ts_data2, new_data2], axis=1).round(3)

        ## Save if required
        if hasattr(self, 'output_path'):
            run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M')

            if not reg_df.empty:
                reg_flow_csv = param['output']['reg_flow_csv'].format(run_date=run_time)
                reg_df.to_csv(os.path.join(self.output_path, reg_flow_csv), index=False)

            flow_csv = param['output']['flow_csv'].format(run_date=run_time)
            flow.to_csv(os.path.join(self.output_path, flow_csv))

        setattr(self, 'flow', flow)
        setattr(self, 'reg_flow', reg_df)
        return flow
Пример #5
0
def get_ts_data(server, database, mtypes, sites, from_date=None, to_date=None, resample_code='D', period=1, val_round=3, min_count=None, pivot=False):
    """
    Function to extract time series data from the hydrotel database.

    Parameters
    ----------
    server : str
        The server where the Hydrotel database lays.
    database : str
        The name of the Hydrotel database.
    mtypes : str or list of str
        The measurement type(s) of the sites that should be returned. Possible options include swl, flow, gwl, and precip.
    sites : list of str
        The list of sites that should be returned.
    from_date : str or None
        The start date in the format '2000-01-01'.
    to_date : str or None
        The end date in the format '2000-01-01'.
    resample_code : str
        The Pandas time series resampling code. e.g. 'D' for day, 'W' for week, 'M' for month, etc.
    period : int
        The number of resampling periods. e.g. period = 2 and resample = 'D' would be to resample the values over a 2 day period.
    val_round : int
        The number of decimals to round the values.
    pivot : bool
        Should the output be pivotted into wide format?

    Returns
    -------
    Series or DataFrame
        A MultiIndex Pandas Series if pivot is False and a DataFrame if True
    """
    ### Import data and select the correct sites
    site_point = get_sites_mtypes(server, database, mtypes, sites).reset_index()

    ### Select rows within time period
    if isinstance(from_date, str):
        site_point = site_point[site_point.ToDate > from_date]
    if isinstance(to_date, str):
        site_point = site_point[site_point.FromDate < to_date]

    if site_point.empty:
        return pd.DataFrame()

    ### Pull out the ts data
    site_point1 = site_point[['ExtSiteID', 'MType', 'Point']].copy()

    tsdata_list = []

    mtypes1 = site_point.MType.unique()

    for m in mtypes1:
        if m in resample_dict:
            res_val = resample_dict[m]
        else:
            res_val = 'mean'
        sel = site_point1[site_point1.MType == m]
        points = sel.Point.astype(int).tolist()

        data1 = rd_sql_ts(server, database, data_tab, 'Point', 'DT', 'SampleValue', resample_code, period, res_val, val_round, {'Point': points}, from_date=from_date, to_date=to_date, min_count=min_count).reset_index()

        data1.rename(columns={'DT': 'DateTime', 'SampleValue': 'Value'}, inplace=True)
        data2 = pd.merge(sel, data1, on='Point').drop('Point', axis=1).set_index(['ExtSiteID', 'MType', 'DateTime']).Value
        tsdata_list.append(data2)

    tsdata = pd.concat(tsdata_list)

    if pivot:
        tsdata = tsdata.unstack([0, 1])

    return tsdata
    sites_rec1 = flow_sites_gdf[sites_rec_bool].copy()
    sites_man1 = flow_sites_gdf[~sites_rec_bool].copy()

    flow_rec_sites1 = mssql.rd_sql(server, database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': rec_summ1.ExtSiteID.unique().tolist()})

    flow_rec_sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', flow_rec_sites1)

    ## Estimate flow where recorder doesn't exist

    sites_man2 = sites_man1.copy()
    sites_man2['geometry'] = sites_man1.buffer(buffer_dis)

    rec_sites2 = vector.sel_sites_poly(flow_rec_sites2, sites_man2)

    rec_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': rec_sites2.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()})

    rec_ts_data2 = rec_ts_data1.Value.unstack(0).interpolate('time', limit=10).dropna(axis=1)

    rec_flow1 = rec_ts_data2.loc[:, rec_ts_data2.columns.isin(sites_rec1.FlowSite)].copy()

    man_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': sites_man1.FlowSite.tolist(), 'DatasetTypeID': man_datasets})

    man_ts_data2 = man_ts_data1.Value.unstack(0)

    reg_lst = []
    new_lst = []

    for col in man_ts_data2:
        site0 = sites_man1[sites_man1.FlowSite == col]