def metconnect_id_loc(sites=None, mc_server='SQL2012PROD03', mc_db='MetConnect', mc_site_table='RainFallPredictionSites', mc_cols=['MetConnectID', 'SiteString', 'TidedaID'], gis_server='SQL2012PROD05'): """ Function to extract the metconnect id table with geometry location. Parameters ---------- sites : list of int or None The site numbers to extract from the table, or None for all. Returns ------- GeoDataFrame """ ### Input parameters # hy_server = 'SQL2012PROD05' # hy_db = 'Hydrotel' # pts_table = 'Points' # objs_table = 'Objects' # sites_table = 'Sites' # # pts_cols = ['Point', 'Object'] # objs_cols = ['Object', 'Site'] # sites_cols = ['Site', 'ExtSysId'] loc_db = 'Bgauging' loc_table = 'RSITES' loc_cols = ['SiteNumber', 'NZTMX', 'NZTMY'] ## Import tables mc1 = rd_sql(mc_server, mc_db, mc_site_table, mc_cols) mc2 = mc1[~mc1.SiteString.str.startswith('M')] mc2.columns = ['MetConnectID', 'site_name', 'ExtSysId'] mc2 = mc2[(mc2.MetConnectID != 7) & mc2.ExtSysId.notnull()] mc2.loc[:, 'ExtSysId'] = mc2.loc[:, 'ExtSysId'].astype(int) # hy_pts = rd_sql(hy_server, hy_db, pts_table, pts_cols, 'Point', mc2.Point.tolist()) # hy_objs = rd_sql(hy_server, hy_db, objs_table, objs_cols, 'Object', hy_pts.Object.tolist()) # hy_sites = rd_sql(hy_server, hy_db, sites_table, sites_cols, 'Site', hy_objs.Site.tolist()) # hy_sites['ExtSysId'] = to_numeric(hy_sites['ExtSysId']) hy_loc = rd_sql(gis_server, loc_db, loc_table, loc_cols, 'SiteNumber', mc2.ExtSysId.tolist()) hy_loc.columns = ['ExtSysId', 'x', 'y'] # t1 = merge(mc2, hy_pts, on='Point') # t2 = merge(t1, hy_objs, on='Object') # t3 = merge(t2, hy_sites, on='Site') t4 = pd.merge(mc2, hy_loc, on='ExtSysId') hy_xy = xy_to_gpd('MetConnectID', 'x', 'y', t4) return hy_xy
def sel_xy_nc(bound_shp, nc_path, x_col='longitude', y_col='latitude', time_col='time', nc_vars=None, buffer_dis=0, from_date=None, to_date=None, nc_crs=4326, out_crs=None, out_type='pandas'): """ Function to select space and time data from a netcdf file using a polygon shapefile. """ ### Process the boundary layer bound = gpd.read_file(bound_shp).buffer(buffer_dis).to_crs(convert_crs(nc_crs)) x_min, y_min, x_max, y_max = bound.unary_union.bounds ### Read and extract data from netcdf files ds1 = xr.open_dataset(nc_path) time1 = pd.to_datetime(ds1[time_col].values) if isinstance(from_date, str): time1 = time1[time1 >= from_date] if isinstance(to_date, str): time1 = time1[time1 <= to_date] lat1 = ds1[y_col].values lon1 = ds1[x_col].values lat2 = lat1[(lat1 >= y_min) & (lat1 <= y_max)] lon2 = lon1[(lon1 >= x_min) & (lon1 <= x_max)] ds2 = ds1.loc[{x_col: lon2, time_col: time1.values, y_col: lat2}] # coords1 = ds2.coords.keys() # dims1 = ds2.dims.keys() ## Select mtypes if isinstance(nc_vars, str): ds3 = ds2[[nc_vars]] elif isinstance(nc_vars, (list, np.ndarray, pd.Series)): ds3 = ds2[nc_vars] elif nc_vars is None: ds3 = ds2 ### Convert to different crs if needed if out_crs is not None: df1 = ds3.to_dataframe().reset_index() xy1 = ds3[[x_col, y_col]].copy() xy2 = xy1.to_dataframe().reset_index() crs1 = convert_crs(out_crs) new_gpd1 = xy_to_gpd(xy2.index, x_col, y_col, xy2, nc_crs) new_gpd2 = new_gpd1.to_crs(crs1) site_loc2 = xy2.copy() site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x) site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y) df2 = pd.merge(df1, site_loc2[[x_col, y_col, 'x_new', 'y_new']], on=[x_col, y_col], how='left') df3 = df2.drop([x_col, y_col], axis=1).rename(columns={'x_new': x_col, 'y_new': y_col}) ds1.close() return (df3) elif out_type == 'pandas': df1 = ds3.to_dataframe().reset_index() ds1.close() return (df1) elif out_type == 'xarray': return ds3
def rd_nc(poly_shp, nc_path, poly_epsg=4326, poly_id='Station_ID', x_col='longitude', y_col='latitude', data_col='rain', as_ts=True, export=True, export_path='nc_data.csv'): """ Function to read in netCDF files, select locations based on a polygon, and export the results. """ ### Read in all data poly = gpd.read_file(poly_shp)[[poly_id, 'geometry']].to_crs(epsg=poly_epsg) nc = xr.open_dataset(nc_path) ### Filter nc data df1 = nc.to_dataframe().drop('time_bnds', axis=1).reset_index() df1 = df1[df1.nb2 == 0].drop('nb2', axis=1) ### convert x and y to geopandas df1_xy = df1[[y_col, x_col]].drop_duplicates() df1_xy['id'] = range(len(df1_xy)) pts = xy_to_gpd('id', x_col, y_col, df1_xy, poly_epsg) ### Mask the points from the polygon join1, poly2 = pts_poly_join(pts, poly, poly_id) join2 = join1[['id', poly_id]] ### Select the associated data sel_xy = pd.merge(df1_xy, join2, on='id').drop('id', axis=1) df2 = pd.merge(df1, sel_xy, on=[y_col, x_col]) ### Convert to time series if as_ts: df3 = df2[[poly_id, 'time', data_col]].groupby([poly_id, 'time']).first().reset_index() df4 = df3.pivot(index='time', columns=poly_id, values=data_col).round(2) if export: df4.to_csv(export_path) else: df4 = df2 if export: df4.to_csv(export_path) return df4
def rd_niwa_rcp(base_path, mtypes, poly, vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv', id_col='Network', x_col='deg_x', y_col='deg_y', output_fun=None, export_path='output'): """ Function to read in the NIWA RCP netcdf files and output the data in a specified format. """ mtype_name = {'precip': 'TotalPrecipCorr', 'T_max': 'MaxTempCorr', 'T_min': 'MinTempCorr', 'P_atmos': 'MSLP', 'PET': 'PE', 'RH_mean': 'RelHum', 'R_s': 'SurfRad', 'U_z': 'WindSpeed'} ### Import and reorganize data vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]] sites_gpd = xy_to_gpd(id_col, x_col, y_col, vcsn_sites, 4326) poly1 = gpd.read_file(poly) sites_gpd2 = sites_gpd.to_crs(poly1.crs) mtypes1 = [mtype_name[i] for i in mtypes] ### Select sites sites_gpd3 = sel_sites_poly(sites_gpd2, poly1)[id_col] site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites_gpd3)] site_loc1.columns = ['id', 'x', 'y'] ### Read and extract data from netcdf files for root, dirs, files in os.walk(base_path): files2 = [i for i in files if i.endswith('.nc')] files3 = [j for j in files2 if any(j.startswith(i) for i in mtypes1)] file_paths1 = [os.path.join(root, i) for i in files3] if len(file_paths1) > 0: ds = rd_niwa_rcp_dir(file_paths1, site_loc1, mtypes) if callable(output_fun): new_base_path = root.replace(base_path, export_path) base_file_name = file_paths1[0].split('VCSN_')[1] if not os.path.exists(new_base_path): os.makedirs(new_base_path) output_fun(ds, new_base_path, base_file_name) print(base_file_name) else: raise ValueError('Must have a output function.')
def poly_interp_agg(precip, precip_crs, poly, data_col, time_col, x_col, y_col, interp_buffer_dis=10000, poly_buffer_dis=0, grid_res=None, interp_fun='cubic', agg_ts_fun=None, period=None, digits=2, agg_xy=False, nfiles='many', output_path=None): """ Function to select the precip sites within a polygon with a certain buffer distance, then interpolate/resample the data at a specific resolution, then output the results. Parameters ---------- precip: DataFrame Dataframe of time, x, y, and precip. precip_crs: int The crs of the x and y coordinates of the precip dataframe. poly: GeoDataFrame or str str path of a shapefile polygon or a polygon GeoDataFrame. interp_buffer_dis: int Buffer distance of the polygon selection when performing the interpolation. poly_buffer_dis: int Buffer distance of the polygon selection when outputting the results. grid_res: int The resulting grid resolution in meters (or the unit of the final projection). interp_fun: str The scipy griddata interpolation function to be applied (see https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.interpolate.griddata.html). agg_ts_fun: str or None The pandas time series resampling function to resample the data in time (either 'mean' or 'sum'). If None, then no time resampling. period: str or None The pandas time series code to resample the data in time (i.e. '2H' for two hours). digits: int the number of digits to round to. agg_xy: bool Should all of the interpolated points within the polygon area be aggregated (mean) to a single time series? nfiles: str If output_path is a geotiff, then 'one' or 'many' geotiffs to be created. output_path: str or None Full path string where the output should be stored. The file extension should be one of '.tif' for geotiff, '.nc' for netcdf, or '.csv' for csv. Returns ------- DataFrame """ ### Convert x and y of precip to geodataframe sites0 = precip[[x_col, y_col]].drop_duplicates().reset_index(drop=True) sites = xy_to_gpd(sites0.index, sites0[x_col], sites0[y_col], crs=precip_crs) sites.columns = ['site', 'geometry'] ### Select the locations within the polygon if isinstance(poly, (gpd.GeoDataFrame, gpd.GeoSeries)): poly1 = poly.copy() elif isinstance(poly, str): poly1 = gpd.read_file(poly) sites1 = sites.to_crs(poly1.crs) sites_sel = sel_sites_poly(sites1, poly, interp_buffer_dis) sites2 = sites0.loc[sites_sel['site']] ### Determine the grid resolution if not set if not isinstance(grid_res, (int, float)): bounds = poly1.unary_union.bounds x_range = bounds[2] - bounds[0] y_range = bounds[3] - bounds[1] min1 = min([x_range, y_range]) grid_res = int(np.ceil(min1 / 20)) ### Select the precip data from the sites precip2 = pd.merge(precip, sites2, on=['x', 'y']).dropna() ### Interpolate grid poly_crs = ['+' + str(i) + '=' + str(poly1.crs[i]) for i in poly1.crs] poly_crs1 = ' '.join(poly_crs) new_precip = grid_interp_ts(precip2, time_col, x_col, y_col, data_col, grid_res, sites.crs, poly_crs1, interp_fun=interp_fun, agg_ts_fun=agg_ts_fun, period=period, digits=digits) ### Create new sites list time = new_precip[time_col].sort_values().unique() sites_new_df = new_precip.loc[new_precip[time_col] == time[0], [x_col, y_col, data_col]] sites_new = xy_to_gpd(sites_new_df.index.values, x_col, y_col, sites_new_df, poly_crs1) sites_new.columns = ['site', 'geometry'] new_precip['site'] = np.tile(sites_new_df.index.values, len(time)) ### Select sites from polygon sites_sel2 = sel_sites_poly(sites_new, poly, poly_buffer_dis) new_precip2 = new_precip.loc[new_precip.site.isin(sites_sel2.site), [time_col, x_col, y_col, data_col]] ### Agg to polygon if required if agg_xy: new_precip3 = new_precip2.groupby(time_col)[data_col].mean().round( digits) time_col = None else: new_precip3 = new_precip2.set_index([time_col, x_col, y_col])[data_col] ### Save results if isinstance(output_path, str): path1 = os.path.splitext(output_path)[0] if '.csv' in output_path: new_precip3.to_csv(path1 + '.csv', header=True) if '.tif' in output_path: df = new_precip3.reset_index() save_geotiff(df=df, data_col=data_col, crs=poly_crs1, x_col=x_col, y_col=y_col, time_col=time_col, nfiles=nfiles, export_path=path1 + '.tif') if '.nc' in output_path: ds1 = new_precip3.to_xarray().to_dataset() ds1.attrs['spatial_ref'] = poly_crs1 ds1.to_netcdf(path1 + '.nc') return new_precip3
def input_processing(precip_et, crs, irr1, paw1, bound_shp, rain_name, pet_name, grid_res, buffer_dis, interp_fun, agg_ts_fun, time_agg, irr_eff_dict, irr_trig_dict, min_irr_area_ratio=0.01, irr_mons=[10, 11, 12, 1, 2, 3, 4], precip_correction=1.1): """ Function to process the input data for the lsrm. Outputs a DataFrame of the variables for the lsrm. """ np.seterr(invalid='ignore') ## Load and resample precip and et bound = gpd.read_file(bound_shp) new_rain = poly_interp_agg(precip_et, crs, bound_shp, rain_name, 'time', 'x', 'y', buffer_dis, grid_res, grid_res, interp_fun=interp_fun, agg_ts_fun=agg_ts_fun, period=time_agg) * precip_correction new_rain.name = 'precip' new_et = poly_interp_agg(precip_et, crs, bound_shp, pet_name, 'time', 'x', 'y', buffer_dis, grid_res, grid_res, interp_fun=interp_fun, agg_ts_fun=agg_ts_fun, period=time_agg) new_et.name = 'pet' new_rain_et = pd.concat([new_rain, new_et], axis=1) ## convert new point locations to geopandas time1 = new_rain_et.index.levels[0][0] grid1 = new_rain_et.loc[time1].reset_index()[['x', 'y']] grid2 = xy_to_gpd(grid1.index, 'x', 'y', grid1, bound.crs) grid2.columns = ['site', 'geometry'] all_times = new_rain_et.index.levels[0] new_rain_et.loc[:, 'site'] = np.tile(grid1.index, len(all_times)) ## Convert points to polygons sites_poly = points_grid_to_poly(grid2, 'site') ## process polygon data # Select polgons within boundary sites_poly_union = sites_poly.unary_union irr2 = irr1[irr1.intersects(sites_poly_union)] irr3 = irr2[irr2.irr_type.notnull()] paw2 = paw1[paw1.intersects(sites_poly_union)] paw3 = paw2[paw2.paw.notnull()] # Overlay intersection sites_poly1 = spatial_overlays(sites_poly, bound, how='intersection')[['site', 'geometry']] sites_poly2 = sites_poly1.dissolve('site') sites_poly2.crs = sites_poly.crs sites_poly_area = sites_poly2.area.round(2) sites_poly3 = sites_poly2.reset_index() irr4 = spatial_overlays(irr3, sites_poly3, how='intersection') paw4 = spatial_overlays(paw3, sites_poly3, how='intersection') irr4['area'] = irr4.geometry.area.round() irr5 = irr4[irr4.area >= 1].drop(['idx1', 'idx2'], axis=1).copy() paw4['area'] = paw4.geometry.area.round() paw5 = paw4.loc[(paw4.area >= 1)].drop(['idx1', 'idx2'], axis=1).copy() paw5.loc[paw5.paw <= 0, 'paw'] = 1 # Add in missing PAW values - Change later to something more useful if needed mis_sites_index = ~sites_poly3.site.isin(paw5.site) sites_poly3['area'] = sites_poly3.area.round() paw6 = pd.concat([paw5, sites_poly3[mis_sites_index]]) paw6.loc[paw6.paw.isnull(), 'paw'] = 1 # Aggregate by site weighted by area to estimate a volume paw_area1 = paw6[['paw', 'site', 'area']].copy() paw_area1.loc[:, 'paw_vol'] = paw_area1['paw'] * paw_area1['area'] paw7 = ((paw_area1.groupby('site')['paw_vol'].sum() / paw_area1.groupby('site')['area'].sum()) * sites_poly_area * 0.001).round(2) site_irr_area = irr5.groupby('site')['area'].sum() irr_eff1 = irr5.replace({'irr_type': irr_eff_dict}) irr_eff1.loc[:, 'irr_eff'] = irr_eff1['irr_type'] * irr_eff1['area'] irr_eff2 = (irr_eff1.groupby('site')['irr_eff'].sum() / site_irr_area).round(3) irr_trig1 = irr5.replace({'irr_type': irr_trig_dict}) irr_trig1.loc[:, 'irr_trig'] = irr_trig1['irr_type'] * irr_trig1['area'] irr_trig2 = (irr_trig1.groupby('site')['irr_trig'].sum() / site_irr_area).round(3) irr_area_ratio1 = (site_irr_area/sites_poly_area).round(3) poly_data1 = pd.concat([paw7, sites_poly_area, irr_eff2, irr_trig2, irr_area_ratio1], axis=1) poly_data1.columns = ['paw', 'site_area', 'irr_eff', 'irr_trig', 'irr_area_ratio'] poly_data1.loc[poly_data1['irr_area_ratio'] < min_irr_area_ratio, ['irr_eff', 'irr_trig', 'irr_area_ratio']] = np.nan ## Combine time series with polygon data new_rain_et1 = new_rain_et[new_rain_et['site'].isin(sites_poly2.index)] input1 = pd.merge(new_rain_et1.reset_index(), poly_data1.reset_index(), on='site', how='left') ## Convert precip and et to volumes input1.loc[:, ['precip', 'pet']] = (input1.loc[:, ['precip', 'pet']].mul(input1.loc[:, 'site_area'], axis=0) * 0.001).round(2) ## Remove irrigation parameters during non-irrigation times input1.loc[~input1.time.dt.month.isin(irr_mons), ['irr_eff', 'irr_trig']] = np.nan ## Run checks on the input data # print('Running checks on the prepared input data') null_time = input1.loc[input1.time.isnull(), 'time'] null_x = input1.loc[input1.x.isnull(), 'x'] null_y = input1.loc[input1.y.isnull(), 'y'] null_pet = input1.loc[input1['pet'].isnull(), 'pet'] null_rain = input1.loc[input1['precip'].isnull(), 'precip'] null_paw = input1.loc[input1.paw.isnull(), 'paw'] not_null_irr_eff = input1.loc[input1.irr_eff.notnull(), 'irr_eff'] if not null_time.empty: raise ValueError('Null values in the time variable') if not null_x.empty: raise ValueError('Null values in the x variable') if not null_y.empty: raise ValueError('Null values in the y variable') if not null_pet.empty: raise ValueError('Null values in the pet variable') if not null_rain.empty: raise ValueError('Null values in the rain variable') if not null_paw.empty: raise ValueError('Null values in the paw variable') if not_null_irr_eff.empty: raise ValueError('No values for irrigation variables') if input1['time'].dtype.name != 'datetime64[ns]': raise ValueError('time variable must be a datetime64[ns] dtype') if input1['x'].dtype != float: raise ValueError('x variable must be a float dtype') if input1['y'].dtype != float: raise ValueError('y variable must be a float dtype') if input1['pet'].dtype != float: raise ValueError('pet variable must be a float dtype') if input1['precip'].dtype != float: raise ValueError('precip variable must be a float dtype') if input1['paw'].dtype != float: raise ValueError('paw variable must be a float dtype') if input1['irr_eff'].dtype != float: raise ValueError('irr_eff variable must be a float dtype') if input1['irr_trig'].dtype != float: raise ValueError('irr_trig variable must be a float dtype') if input1['irr_area_ratio'].dtype != float: raise ValueError('irr_area_ratio variable must be a float dtype') ## Return dict return input1, sites_poly2
def rd_hdf(self, h5_path): """ Function to read a netcdf file (.nc) that was an export from a hydro class. """ ### Read in base tsdata and attributes ## Read in tsdata tsdata = pd.read_hdf(h5_path, 'tsdata') if 'qual_codes' in tsdata.columns: qual_codes = 'qual_codes' else: qual_codes = None ## Read in mfreq mfreq = pd.read_hdf(h5_path, 'mfreq').to_dict() ## Read in units units = pd.read_hdf(h5_path, 'units').to_dict() ### Make new Hydro class new1 = self.add_tsdata(tsdata.reset_index(), dformat='long', hydro_id='hydro_id', freq_type=mfreq, times='time', sites='site', values='value', units=units, qual_codes=qual_codes) ### Read in site attributes try: site_attr = pd.read_hdf(h5_path, 'site_attr') setattr(new1, 'site_attr', site_attr) except: print('No site attributes.') ### Read in geo points try: geo_point1 = pd.read_hdf(h5_path, 'geo_point') geo_point_crs = pd.to_numeric(pd.read_hdf(h5_path, 'geo_point_crs'), 'ignore').to_dict() geo_point = xy_to_gpd('site', 'x', 'y', geo_point1, geo_point_crs).set_index('site') new1.add_geo_point(geo_point, check=False) except: print('No geo points.') ### Read in geo catch try: geo_catch1 = pd.read_hdf(h5_path, 'geo_point') geo1 = [loads(x) for x in geo_catch1.wkt.values] geo_catch_crs = pd.to_numeric(pd.read_hdf(h5_path, 'geo_catch_crs'), 'ignore').to_dict() gdf_catch = gpd.GeoDataFrame(geo_catch1.drop('wkt', axis=1), geometry=geo1, crs=geo_catch_crs).set_index('site') new1.add_geo_point(gdf_catch, check=False) except: print('No geo catch.') return new1
def rd_niwa_vcsn(mtypes, sites, nc_path=r'\\fileservices02\ManagedShares\Data\VirtualClimate\vcsn_precip_et_2016-06-06.nc', vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv', id_col='Network', x_col='deg_x', y_col='deg_y', buffer_dis=0, include_sites=False, from_date=None, to_date=None, out_crs=None, netcdf_out=None): """ Function to read in the NIWA vcsn netcdf file and output the data as a dataframe. mtypes -- A string or list of the measurement types (either 'precip', or 'PET').\n sites -- Either a list of vcsn site names or a polygon of the area of interest.\n nc_path -- The path to the vcsn nc file.\n vcsn_sites_csv -- The csv file that relates the site name to coordinates.\n id_col -- The site name column in vcsn_sites_csv.\n x_col - The x column name in vcsn_sites_csv.\n y_col -- The y column name in vcsn_sites_csv.\n include_sites -- Should the site names be added to the output?\n out_crs -- The crs epsg number for the output coordinates if different than the default WGS85 (e.g. 2193 for NZTM). """ mtype_name = {'precip': 'rain', 'PET': 'pe'} ### Import and reorganize data vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]] if isinstance(sites, str): if sites.endswith('.shp'): sites_gpd = xy_to_gpd(id_col, x_col, y_col, vcsn_sites, 4326) poly1 = gpd.read_file(sites) sites_gpd2 = sites_gpd.to_crs(poly1.crs) ### Select sites sites2 = sel_sites_poly(sites_gpd2, poly1, buffer_dis)[id_col] elif isinstance(sites, (list, pd.Series, np.ndarray)): sites2 = sites ### Select locations site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites2)] site_loc1.columns = ['id', 'x', 'y'] ### Select mtypes if isinstance(mtypes, str): mtypes1 = [mtype_name[mtypes]] else: mtypes1 = [mtype_name[i] for i in mtypes] if include_sites: mtypes1.extend(['site']) ### Read and extract data from netcdf files ds1 = xr.open_dataset(nc_path) time1 = pd.to_datetime(ds1.time.values) if isinstance(from_date, str): time1 = time1[time1 >= from_date] if isinstance(to_date, str): time1 = time1[time1 <= to_date] lat1 = ds1.latitude.values lon1 = ds1.longitude.values lat2 = lat1[np.in1d(lat1, site_loc1.y.unique())] lon2 = lon1[np.in1d(lon1, site_loc1.x.unique())] ds2 = ds1.loc[{'longitude': lon2, 'time': time1.values, 'latitude': lat2}] ds3 = ds2[mtypes1] ### Convert to DataFrame df1 = ds3.to_dataframe().reset_index() df1.rename(columns={'latitude': 'y', 'longitude': 'x'}, inplace=True) df1 = df1.dropna() ### Convert to different crs if needed if out_crs is not None: crs1 = convert_crs(out_crs) new_gpd1 = xy_to_gpd('id', 'x', 'y', site_loc1, 4326) new_gpd2 = new_gpd1.to_crs(crs1) site_loc2 = site_loc1.copy() site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x) site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y) df2 = pd.merge(df1, site_loc2[['x', 'y', 'x_new', 'y_new']], on=['x', 'y']) df3 = df2.drop(['x', 'y'], axis=1).rename(columns={'x_new': 'x', 'y_new': 'y'}) col_order = ['y', 'x', 'time'] col_order.extend(mtypes1) df4 = df3[col_order] else: df4 = df1 ds1.close() ds3.close() ### Return if isinstance(netcdf_out, str): ds3.to_netcdf(netcdf_out) return df4