Exemplo n.º 1
0
def _proc_hydro_sql(self, sites_sql_fun, mtype_dict, mtype, sites=None, from_date=None, to_date=None, qual_codes=None, min_count=None, buffer_dis=0, resample_code=None, period=1, fun='mean'):
    """
    Convenience function for reading in mssql data from standardized hydro tables.
    """

    if isinstance(sites, GeoDataFrame):
        loc1 = sites_sql_fun()
        sites1 = sel_sites_poly(loc1, sites, buffer_dis).index.astype(str)
    else:
        sites1 = Series(sites).astype(str)

    h1 = self.copy()
    if isinstance(mtype_dict, (list, tuple)):
        for i in range(len(mtype_dict)):
            site1 = mtype_dict[i]['site_col']

            sites_stmt = 'select distinct ' + site1 + ' from ' + mtype_dict[i]['table']
            sites2 = rd_sql(mtype_dict[i]['server'], mtype_dict[i]['database'], stmt=sites_stmt).astype(str)[site1]
            sites3 = sites2[sites2.isin(sites1)].astype(str).tolist()
            if not sites3:
                raise ValueError('No sites in database')
            if mtype_dict[i]['qual_col'] is None:
                qual_codes = None
            h1 = h1._rd_hydro_mssql(sites=sites3, mtype=mtype, from_date=from_date, to_date=to_date, qual_codes=qual_codes, min_count=min_count, resample_code=resample_code, period=period, fun=fun, **mtype_dict[i])
    elif isinstance(mtype_dict, dict):
        site1 = mtype_dict['site_col']

        sites_stmt = 'select distinct ' + site1 + ' from ' + mtype_dict['table']
        sites2 = rd_sql(mtype_dict['server'], mtype_dict['database'], stmt=sites_stmt).astype(str)[site1]
        sites3 = sites2[sites2.isin(sites1)].astype(str).tolist()
        if not sites3:
                raise ValueError('No sites in database')
        if mtype_dict['qual_col'] is None:
            qual_codes = None
        h1 = h1._rd_hydro_mssql(sites=sites3, mtype=mtype, from_date=from_date, to_date=to_date, qual_codes=qual_codes, min_count=min_count, resample_code=resample_code, period=period, fun=fun, **mtype_dict)
    elif callable(mtype_dict):
        h1 = mtype_dict(h1, sites=sites1, mtype=mtype, from_date=from_date, to_date=to_date, min_count=min_count)

    return(h1)
Exemplo n.º 2
0
def rd_squalarc(sites,
                mtypes=None,
                from_date=None,
                to_date=None,
                convert_dtl=False,
                dtl_method=None,
                export=None):
    """
    Function to read in "squalarc" data. Which is atually stored in the mssql db.

    Parameters
    ----------
    sites: ndarry, list, or str
        The site names as a list, array, csv with the first column as the site names, or a polygon shapefile of the area of interest.
    mtypes: list or None
        A list of measurement type names to be in the output. Leaving it empty returns all mtypes.
    from_date: str
        A start date string in of '2010-01-01'.
    to_date: str
        A end date string in of '2011-01-01'.
    convert_dtl: bool
        Should values under the detection limit be converted to numeric?
    dtl_method: str
        The method to use to convert values under a detection limit to numeric. None or 'standard' takes half of the detection limit. 'trend' is meant as an output for trend analysis with includes an additional column dtl_ratio referring to the ratio of values under the detection limit.
    export: str or None
        Either None or a string path to a csv file.
    """

    #### Read in sites
    sites1 = select_sites(sites)

    #### Extract by polygon
    if isinstance(sites1, gpd.GeoDataFrame):
        ## Surface water sites
        sw_sites_tab = rd_sql('SQL2012PROD05',
                              'Squalarc',
                              'SITES',
                              col_names=['SITE_ID', 'NZTMX', 'NZTMY'])
        sw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_sw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', sw_sites_tab)
        sites1a = sites1.to_crs(gdf_sw_sites.crs)
        sw_sites2 = sel_sites_poly(gdf_sw_sites, sites1a).drop('geometry',
                                                               axis=1)

        ## Groundwater sites
        gw_sites_tab = rd_sql('SQL2012PROD05',
                              'Wells',
                              'WELL_DETAILS',
                              col_names=['WELL_NO', 'NZTMX', 'NZTMY'])
        gw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_gw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', gw_sites_tab)
        gw_sites2 = sel_sites_poly(gdf_gw_sites, sites1a).drop('geometry',
                                                               axis=1)

        sites2 = sw_sites2.site.append(gw_sites2.site).astype(str).tolist()
    else:
        sites2 = pd.Series(sites1, name='site').astype(str).tolist()

    #### Extract the rest of the data
    if len(sites2) > 10000:
        n_chunks = int(np.ceil(len(sites2) * 0.0001))
        sites3 = [sites2[i::n_chunks] for i in xrange(n_chunks)]
        samples_tab = pd.DataFrame()
        for i in sites3:
            samples_tab1 = rd_sql('SQL2012PROD05',
                                  'Squalarc',
                                  '"SQL_SAMPLE_METHODS+"',
                                  col_names=[
                                      'Site_ID', 'SAMPLE_NO', 'ME_TYP',
                                      'Collect_Date', 'Collect_Time',
                                      'PA_NAME', 'PARAM_UNITS', 'SRESULT'
                                  ],
                                  where_col='Site_ID',
                                  where_val=i)
            samples_tab1.columns = [
                'site', 'sample_id', 'source', 'date', 'time', 'parameter',
                'units', 'val'
            ]
            samples_tab1.loc[:,
                             'source'] = samples_tab1.loc[:,
                                                          'source'].str.lower(
                                                          )
            samples_tab = pd.concat([samples_tab, samples_tab1])
    else:
        samples_tab = rd_sql('SQL2012PROD05',
                             'Squalarc',
                             '"SQL_SAMPLE_METHODS+"',
                             col_names=[
                                 'Site_ID', 'SAMPLE_NO', 'ME_TYP',
                                 'Collect_Date', 'Collect_Time', 'PA_NAME',
                                 'PARAM_UNITS', 'SRESULT'
                             ],
                             where_col='Site_ID',
                             where_val=sites2)
        samples_tab.columns = [
            'site', 'sample_id', 'source', 'date', 'time', 'parameter',
            'units', 'val'
        ]
        samples_tab.loc[:, 'source'] = samples_tab.loc[:, 'source'].str.lower()

    samples_tab2 = samples_tab.copy()
    num_test = pd.to_numeric(samples_tab2.loc[:, 'time'], 'coerce')
    samples_tab2.loc[num_test.isnull(), 'time'] = '0000'
    samples_tab2.loc[:,
                     'time'] = samples_tab2.loc[:,
                                                'time'].str.replace('.', '')
    samples_tab2 = samples_tab2[samples_tab2.date.notnull()]
    #    samples_tab2.loc[:, 'time'] = samples_tab2.loc[:, 'time'].str.replace('9999', '0000')
    time1 = pd.to_datetime(samples_tab2.time, format='%H%M', errors='coerce')
    time1[time1.isnull()] = pd.Timestamp('2000-01-01 00:00:00')
    datetime1 = pd.to_datetime(
        samples_tab2.date.dt.date.astype(str) + ' ' +
        time1.dt.time.astype(str))
    samples_tab2.loc[:, 'date'] = datetime1
    samples_tab2 = samples_tab2.drop('time', axis=1)
    samples_tab2.loc[samples_tab2.val.isnull(), 'val'] = np.nan
    samples_tab2.loc[samples_tab2.val == 'N/A', 'val'] = np.nan

    #### Select within time range
    if isinstance(from_date, str):
        samples_tab2 = samples_tab2[samples_tab2['date'] >= from_date]
    if isinstance(to_date, str):
        samples_tab2 = samples_tab2[samples_tab2['date'] <= to_date]

    if mtypes is not None:
        mtypes1 = select_sites(mtypes)
        data = samples_tab2[samples_tab2.parameter.isin(mtypes1)].reset_index(
            drop=True)
    else:
        data = samples_tab2.reset_index(drop=True)

    #### Correct poorly typed in site names
    data.loc[:, 'site'] = data.loc[:, 'site'].str.upper().str.replace(' ', '')

    #### Convert detection limit values
    if convert_dtl:
        less1 = data['val'].str.match('<')
        if less1.sum() > 0:
            less1.loc[less1.isnull()] = False
            data2 = data.copy()
            data2.loc[less1,
                      'val'] = pd.to_numeric(
                          data.loc[less1, 'val'].str.replace('<', ''),
                          errors='coerce') * 0.5
            if dtl_method in (None, 'standard'):
                data3 = data2
            if dtl_method == 'trend':
                df1 = data2.loc[less1]
                count1 = data.groupby('parameter')['val'].count()
                count1.name = 'tot_count'
                count_dtl = df1.groupby('parameter')['val'].count()
                count_dtl.name = 'dtl_count'
                count_dtl_val = df1.groupby('parameter')['val'].nunique()
                count_dtl_val.name = 'dtl_val_count'
                combo1 = pd.concat([count1, count_dtl, count_dtl_val],
                                   axis=1,
                                   join='inner')
                combo1['dtl_ratio'] = (combo1['dtl_count'] /
                                       combo1['tot_count']).round(2)

                ## conditionals
                #            param1 = combo1[(combo1['dtl_ratio'] <= 0.4) | (combo1['dtl_ratio'] == 1)]
                #            under_40 = data['parameter'].isin(param1.index)
                param2 = combo1[(combo1['dtl_ratio'] > 0.4)
                                & (combo1['dtl_val_count'] != 1)]
                over_40 = data['parameter'].isin(param2.index)

                ## Calc detection limit values
                data3 = pd.merge(data,
                                 combo1['dtl_ratio'].reset_index(),
                                 on='parameter',
                                 how='left')
                data3.loc[:, 'val_dtl'] = data2['val']

                max_dtl_val = data2[over_40 & less1].groupby(
                    'parameter')['val'].transform('max')
                max_dtl_val.name = 'dtl_val_max'
                data3.loc[over_40 & less1, 'val_dtl'] = max_dtl_val
        else:
            data3 = data
    else:
        data3 = data

    #### Return and export
    if isinstance(export, str):
        data3.to_csv(export, encoding='utf-8', index=False)
    return data3
Exemplo n.º 3
0
def sel_sites_by_poly(self, poly, buffer_dis=0):
    from core.spatial.vector import sel_sites_poly

    pts = self.geo_point
    sites_sel = sel_sites_poly(pts, poly, buffer_dis).index.tolist()
    return sites_sel
Exemplo n.º 4
0
def rd_ht_wq_data(hts,
                  sites=None,
                  mtypes=None,
                  start=None,
                  end=None,
                  dtl_method=None,
                  output_site_data=False,
                  mtype_params=None,
                  sample_params=None):
    """
    Function to read data from an hts file and optionally select specific sites and aggregate the data.

    Parameters
    ----------
    hts : str
        Path to the hts file.
    sites : list
        A list of site names within the hts file.
    mtypes : list
        A list of measurement types that should be returned.
    start : str
        The start date to retreive from the data in ISO format (e.g. '2011-11-30 00:00').
    end : str
        The end date to retreive from the data in ISO format (e.g. '2011-11-30 00:00').
    dtl_method : None, 'standard', 'trend'
        The method to use to convert values under a detection limit to numeric. None does no conversion. 'standard' takes half of the detection limit. 'trend' is meant as an output for trend analysis with includes an additional column dtl_ratio referring to the ratio of values under the detection limit.
    output_site_data : bool
        Should the site data be output?

    Returns
    -------
    DataFrame
    """

    #    agg_unit_dict = {'l/s': 1, 'm3/s': 1, 'm3/hour': 1, 'mm': 1, 'm3': 4}
    #    unit_convert = {'l/s': 0.001, 'm3/s': 1, 'm3/hour': 1, 'mm': 1, 'm3': 4}

    sites1 = select_sites(sites)

    #### Extract by polygon
    if isinstance(sites1, GeoDataFrame):
        ## Surface water sites
        sw_sites_tab = rd_sql('SQL2012PROD05',
                              'Squalarc',
                              'SITES',
                              col_names=['SITE_ID', 'NZTMX', 'NZTMY'])
        sw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_sw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', sw_sites_tab)
        sites1a = sites1.to_crs(gdf_sw_sites.crs)
        sw_sites2 = sel_sites_poly(gdf_sw_sites, sites1a).drop('geometry',
                                                               axis=1)

        ## Groundwater sites
        gw_sites_tab = rd_sql('SQL2012PROD05',
                              'Wells',
                              'WELL_DETAILS',
                              col_names=['WELL_NO', 'NZTMX', 'NZTMY'])
        gw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_gw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', gw_sites_tab)
        gw_sites2 = sel_sites_poly(gdf_gw_sites, sites1a).drop('geometry',
                                                               axis=1)

        sites2 = sw_sites2.site.append(gw_sites2.site).astype(str).tolist()
    else:
        sites2 = sites1

    ### First read all of the sites in the hts file and select the ones to be read
    sites_df = rd_hilltop_sites(hts,
                                sites=sites2,
                                mtypes=mtypes,
                                rem_wq_sample=False)

    ### Open the hts file
    wqr = Dispatch("Hilltop.WQRetrieval")
    dfile = Dispatch("Hilltop.DataFile")
    try:
        dfile.Open(hts)
    except ValueError:
        print(dfile.errmsg)

    ### Iterate through he hts file
    df_lst = []
    for i in sites_df.index:
        site = sites_df.loc[i, 'site']
        mtype = sites_df.loc[i, 'mtype']
        if mtype == 'WQ Sample':
            continue
        wqr = dfile.FromWQSite(site, mtype)

        ## Set up start and end times and aggregation initiation
        if (start is None):
            start1 = wqr.DataStartTime
        else:
            start1 = start
        if end is None:
            end1 = wqr.DataEndTime
        else:
            end1 = end

        wqr.FromTimeRange(start1, end1)

        ## Extract data
        data = []
        time = []

        test_params = sites_df[sites_df.site == site].mtype.unique()
        if ('WQ Sample' in test_params) & (isinstance(mtype_params, list)
                                           | isinstance(sample_params, list)):
            sample_p = []
            mtype_p = []
            while wqr.GetNext:
                data.append(wqr.value)
                time.append(str(pytime_to_datetime(wqr.time)))
                sample_p.append({
                    sp: wqr.params(sp).encode('ascii', 'ignore')
                    for sp in sample_params
                })
                mtype_p.append({
                    mp: wqr.params(mp).encode('ascii', 'ignore')
                    for mp in mtype_params
                })
        else:
            while wqr.GetNext:
                data.append(wqr.value)
                time.append(str(pytime_to_datetime(wqr.time)))

        if data:
            df_temp = DataFrame({
                'time': time,
                'data': data,
                'site': site,
                'mtype': mtype
            })
            if sample_p:
                df_temp = concat(
                    [df_temp, DataFrame(sample_p),
                     DataFrame(mtype_p)], axis=1)
            df_lst.append(df_temp)

    dfile.Close()
    wqr.close()
    if df_lst:
        data = concat(df_lst)
        data.loc[:, 'time'] = to_datetime(data.loc[:, 'time'])
        data1 = to_numeric(data.loc[:, 'data'], errors='coerce')
        data.loc[data1.notnull(), 'data'] = data1[data1.notnull()]
        #        data.loc[:, 'data'].str.replace('*', '')
        data = data.reset_index(drop=True)

        #### Convert detection limit values
        if dtl_method is not None:
            less1 = data['data'].str.match('<')
            if less1.sum() > 0:
                less1.loc[less1.isnull()] = False
                data2 = data.copy()
                data2.loc[less1, 'data'] = to_numeric(
                    data.loc[less1, 'data'].str.replace('<', ''),
                    errors='coerce') * 0.5
                if dtl_method == 'standard':
                    data3 = data2
                if dtl_method == 'trend':
                    df1 = data2.loc[less1]
                    count1 = data.groupby('mtype')['data'].count()
                    count1.name = 'tot_count'
                    count_dtl = df1.groupby('mtype')['data'].count()
                    count_dtl.name = 'dtl_count'
                    count_dtl_val = df1.groupby('mtype')['data'].nunique()
                    count_dtl_val.name = 'dtl_val_count'
                    combo1 = concat([count1, count_dtl, count_dtl_val],
                                    axis=1,
                                    join='inner')
                    combo1['dtl_ratio'] = (combo1['dtl_count'] /
                                           combo1['tot_count']).round(2)

                    ## conditionals
                    param2 = combo1[(combo1['dtl_ratio'] > 0.4)
                                    & (combo1['dtl_val_count'] != 1)]
                    over_40 = data['mtype'].isin(param2.index)

                    ## Calc detection limit values
                    data3 = merge(data,
                                  combo1['dtl_ratio'].reset_index(),
                                  on='mtype',
                                  how='left')
                    data3.loc[:, 'data_dtl'] = data2['data']

                    max_dtl_val = data2[over_40 & less1].groupby(
                        'mtype')['data'].transform('max')
                    max_dtl_val.name = 'dtl_data_max'
                    data3.loc[over_40 & less1, 'data_dtl'] = max_dtl_val
            else:
                data3 = data
        else:
            data3 = data

        if output_site_data:
            sites_df = sites_df[~(sites_df.mtype == 'WQ Sample')]
            return (data3, sites_df)
        else:
            return (data3)
Exemplo n.º 5
0
def allo_query(shp=None, grp_by=['date', 'take_type', 'use_type'], allo_col=['allo'], use_col=['usage'], agg_yr=True, crc='all', crc_rem='none', wap_rem='none', wap='all', take_type='all', use_type='all', catch_num='all', swaz='all', cwms_zone='all', swaz_grp='all', years='all', gr_than='all', gwaz='all', catch_name='all', catch_grp_name='all', from_date=None, to_date=None, sd_only=False, allo_use_file='S:/Surface Water/shared/base_data/usage/allo_est_use_mon.h5', allo_gis_file=r'S:\Surface Water\shared\GIS_base\vector\allocations\allo_gis.shp', allo_restr_file='S:/Surface Water/shared/base_data/usage/allo_use_ros_mon.csv', export=True, export_path='summary1.csv', debug=False):
    """
    Function to query the water use and allocation results data. Allows for the selection/filtering and aggregation of many imbedded fields. Create a list only when you want to filter the data. Otherwise, leave the srguments default.
    """
    from pandas import concat, read_csv, read_hdf, merge, to_numeric, to_datetime
    from numpy import floor
    from core.ts.ts import grp_ts_agg
    from geopandas import read_file
    from core.spatial.vector import sel_sites_poly

    ### Read in the data
#    data = read_hdf(allo_use_file)[['crc', 'dates', 'take_type', 'use_type', 'mon_vol', 'up_allo_m3', 'usage', 'usage_est']]
    data = read_hdf(allo_use_file).drop(['ann_allo_m3', 'ann_usage_m3', 'band', 'band_restr', 'gauge_num', 'ann_restr_allo_m3', 'usage_ratio_est'], axis=1)
    data.rename(columns={'mon_restr_allo_m3': 'allo_restr', 'mon_usage_m3': 'usage', 'mon_allo_m3': 'allo'}, inplace=True)
    allo_gis = read_file(allo_gis_file)

    if shp is not None:
        poly = read_file(shp)
        allo_gis = sel_sites_poly(poly, allo_gis)

    ### Aggregate if needed
    if agg_yr:
        data = grp_ts_agg(data, ['crc', 'take_type', 'allo_block', 'wap'], 'date', 'A-JUN').sum().reset_index()
    df = merge(data, allo_gis.drop(['geometry'] , axis=1), on=['crc', 'take_type', 'allo_block', 'wap'])
    df.loc[:, 'catch_grp'] = to_numeric(df.loc[:, 'catch_grp'], errors='coerse')

    ### Run through filters
    if type(crc) == list:
        df = df[df.crc.isin(crc)]

    if type(crc_rem) == list:
        df = df[~df.crc.isin(crc_rem)]

    if type(wap) == list:
        df = df[df.wap.isin(wap)]

    if type(wap_rem) == list:
        df = df[~df.wap.isin(wap_rem)]

    if type(take_type) == list:
        df = df[df.take_type.isin(take_type)]

    if type(use_type) == list:
        df = df[df.use_type.isin(use_type)]

    if type(catch_num) == list:
        df = df[df.catch_grp.isin(catch_num)]

    if type(catch_name) == list:
        df = df[df.catch_name.isin(catch_name)]

    if type(catch_grp_name) == list:
        df = df[df.catch_grp_.isin(catch_grp_name)]

    if type(swaz) == list:
        df = df[df.swaz.isin(swaz)]

    if type(gwaz) == list:
        df = df[df.gwaz.isin(gwaz)]

    if type(swaz_grp) == list:
        df = df[df.swaz_grp.isin(swaz_grp)]

    if type(cwms_zone) == list:
        df = df[df.cwms.isin(cwms_zone)]

    if type(gr_than) == list:
        df = df[df[allo_col[0]] > (gr_than[0] * 60*60*24*212/1000)]

    if type(years) == list:
        yrs_index = df.date.astype('str').str[0:4].astype('int')
        df = df[yrs_index.isin(years)]

    if isinstance(from_date, str):
        df = df[df.date >= from_date]

    if isinstance(to_date, str):
        df = df[df.date <= to_date]

    if sd_only:
        df = df[((df.sd1_150 > 0) & (df.take_type == 'Take Groundwater')) | (df.take_type == 'Take Surface Water')]

    df_grp = df.groupby(grp_by)
    df1 = df[df[use_col].notnull().values]
    df1_grp = df1.groupby(grp_by)

    df_sum = df_grp[allo_col].sum()
    df_count = df_grp.crc.nunique()
    df1_count = df1_grp.crc.nunique()

    df_col = ['tot_' + x for x in allo_col]
    df_sum.columns = df_col
    df1_sum = df1_grp[use_col + allo_col].sum()

    for i in range(len(allo_col)):
        df1_sum['usage/' + allo_col[i]] = df1_sum[use_col].div(df1_sum[allo_col[i]], axis=0).round(2)

    df2 = concat([df1_sum, df_sum], axis=1)
#    df12.columns = ['usage_m3', 'usage_allo_m3', 'usage_ratio', 'tot_allo_m3']
    for i in range(len(allo_col)):
        df2['allo/' + df_col[0]] = df2[allo_col[i]].div(df2[df_col[i]], axis=0).round(2)

    df2['crc_with_use_count'] = df1_count
    df2['tot_crc_count'] = df_count

    if debug:
        if export:
            df.to_csv(export_path, index=False)
        return(df)
    else:
        if export:
            df2.to_csv(export_path)
        return(df2)
Exemplo n.º 6
0
def poly_interp_agg(precip,
                    precip_crs,
                    poly,
                    data_col,
                    time_col,
                    x_col,
                    y_col,
                    interp_buffer_dis=10000,
                    poly_buffer_dis=0,
                    grid_res=None,
                    interp_fun='cubic',
                    agg_ts_fun=None,
                    period=None,
                    digits=2,
                    agg_xy=False,
                    nfiles='many',
                    output_path=None):
    """
    Function to select the precip sites within a polygon with a certain buffer distance, then interpolate/resample the data at a specific resolution, then output the results.
    precip -- dataframe of time, x, y, and precip.\n
    precip_crs -- The crs of the x and y coordinates of the precip dataframe.\n
    poly -- str path of a shapefile polygon or a polygon GeoDataFrame.\n
    interp_buffer_dis -- Buffer distance of the polygon selection when performing the interpolation.\n
    poly_buffer_dis -- Buffer distance of the polygon selection when outputting the results.\n
    grid_res -- The resulting grid resolution in meters (or the unit of the final projection).\n
    interp_fun -- The scipy griddata interpolation function to be applied (see https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.interpolate.griddata.html).\n
    agg_ts_fun -- The pandas time series resampling function to resample the data in time (either 'mean' or 'sum'). If None, then no time resampling.\n
    period -- The pandas time series code to resample the data in time (i.e. '2H' for two hours).\n
    digits -- the number of digits to round to (int).\n
    agg_xy -- Should all of the interpolated points within the polygon area be aggregated (mean) to a single time series?\n
    nfiles -- If output_path is a geotiff, then 'one' or 'many' geotiffs to be created.\n
    output_path -- Full path string where the output should be stored. The file extension should be one of '.tif' for geotiff, '.nc' for netcdf, or '.csv' for csv.
    """

    ### Convert x and y of precip to geodataframe
    sites0 = precip[[x_col, y_col]].drop_duplicates().reset_index(drop=True)
    sites = xy_to_gpd(sites0.index,
                      sites0[x_col],
                      sites0[y_col],
                      crs=precip_crs)
    sites.columns = ['site', 'geometry']

    ### Select the locations within the polygon
    if isinstance(poly, (GeoDataFrame, GeoSeries)):
        poly1 = poly.copy()
    elif isinstance(poly, str):
        poly1 = read_file(poly)
    sites1 = sites.to_crs(poly1.crs)
    sites_sel = sel_sites_poly(sites1, poly, interp_buffer_dis)
    sites2 = sites0.loc[sites_sel['site']]

    ### Determine the grid resolution if not set
    if not isinstance(grid_res, (int, float)):
        bounds = poly1.unary_union.bounds
        x_range = bounds[2] - bounds[0]
        y_range = bounds[3] - bounds[1]
        min1 = min([x_range, y_range])
        grid_res = int(ceil(min1 / 20))

    ### Select the precip data from the sites
    precip2 = merge(precip, sites2, on=['x', 'y']).dropna()

    ### Interpolate grid
    poly_crs = ['+' + str(i) + '=' + str(poly1.crs[i]) for i in poly1.crs]
    poly_crs1 = ' '.join(poly_crs)
    new_precip = grid_interp_ts(precip2,
                                time_col,
                                x_col,
                                y_col,
                                data_col,
                                grid_res,
                                sites.crs,
                                poly_crs1,
                                interp_fun=interp_fun,
                                agg_ts_fun=agg_ts_fun,
                                period=period,
                                digits=digits)

    ### Create new sites list
    time = new_precip[time_col].sort_values().unique()
    sites_new_df = new_precip.loc[new_precip[time_col] == time[0],
                                  [x_col, y_col, data_col]]
    sites_new = xy_to_gpd(sites_new_df.index.values, x_col, y_col,
                          sites_new_df, poly_crs1)
    sites_new.columns = ['site', 'geometry']
    new_precip['site'] = tile(sites_new_df.index.values, len(time))

    ### Select sites from polygon
    sites_sel2 = sel_sites_poly(sites_new, poly, poly_buffer_dis)
    new_precip2 = new_precip.loc[new_precip.site.isin(sites_sel2.site),
                                 [time_col, x_col, y_col, data_col]]

    ### Agg to polygon if required
    if agg_xy:
        new_precip3 = new_precip2.groupby(time_col)[data_col].mean().round(
            digits)
        time_col = None
    else:
        new_precip3 = new_precip2.set_index([time_col, x_col, y_col])[data_col]

    ### Save results
    if isinstance(output_path, str):
        path1 = path.splitext(output_path)[0]
        if '.csv' in output_path:
            new_precip3.to_csv(path1 + '.csv', header=True)

        if '.tif' in output_path:
            df = new_precip3.reset_index()
            save_geotiff(df=df,
                         data_col=data_col,
                         crs=poly_crs1,
                         x_col=x_col,
                         y_col=y_col,
                         time_col=time_col,
                         nfiles=nfiles,
                         export_path=path1 + '.tif')

        if '.nc' in output_path:
            ds1 = new_precip3.to_xarray().to_dataset()
            ds1.attrs['spatial_ref'] = poly_crs1
            ds1.to_netcdf(path1 + '.nc')

    return (new_precip3)