def read_json(filename, date_format="%Y-%m-%dT%H:%M:%SZ"): """Read Spectra from json. The wavespectra json format is produced from `SpecDataset.to_json` by running `Dataset.to_dict` and converting times into iso8601 strings. Args: - filename (str): filename of json to read. - date_format(str): strftime format for de-serializing datetimes. Returns: - dset (SpecDataset): spectra dataset object read from json file. """ with open(filename) as fp: dset_dict = json.load(fp) for item in ["coords", "data_vars"]: if "time" in dset_dict[item]: time_strings = dset_dict[item]["time"]["data"] times = [datetime.datetime.strptime(t, date_format) for t in time_strings] dset_dict[item]["time"]["data"] = times dset = xr.Dataset.from_dict(dset_dict) set_spec_attributes(dset) return dset
def from_ww3(dset): """Format WW3 dataset to receive wavespectra accessor. Args: dset (xr.Dataset): Dataset created from a WW3 file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ _units = dset.efth.attrs.get("units", "") dset = dset.rename(MAPPING) # Ensuring lon,lat are not function of time if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) # Only selected variables to be returned to_drop = list(set(dset.data_vars.keys()) - to_keep) # Converting from radians dset[attrs.SPECNAME] *= D2R # Setting standard names and storing original file attributes set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ "_units": _units, "_variable_name": attrs.SPECNAME }) # Adjustting attributes if 1D if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) dset[attrs.DIRNAME] = (dset[attrs.DIRNAME] + 180) % 360 return dset.drop_vars(to_drop).drop_dims("string16", errors="ignore")
def read_dictionary(spcdict): """Read spectra from generic dictionary. Args: - spcdict (dict): information for defining SpecDataset. Keys define spectral coordinates and variables, and should be named using the attributes from :py:mod:`wavespectra.core.attributes.attrs`. Example: .. code:: python from wavespectra.core.attributes import attrs spcdict = { attrs.TIMENAME: {'dims': (attrs.TIMENAME), 'data': time}, attrs.FREQNAME: {'dims': (attrs.FREQNAME), 'data': freq}, attrs.DIRNAME: {'dims': (attrs.DIRNAME), 'data': dirs}, attrs.SITENAME: {'dims': (attrs.SITENAME), 'data': site}, attrs.SPECNAME: { 'dims': (attrs.TIMENAME, attrs.DIRNAME, attrs.FREQNAME), 'data': efth }, attrs.LONNAME: {'dims': (attrs.SITENAME), 'data': lon}, attrs.LATNAME: {'dims': (attrs.SITENAME), 'data': lat}, attrs.DEPNAME: {'dims': (attrs.SITENAME, attrs.TIMENAME), 'data': dpt}, attrs.WDIRNAME: {'dims': (attrs.TIMENAME), 'data': wdir}, attrs.WSPDNAME: {'dims': (attrs.TIMENAME), 'data': wnd}, } """ spcdict = {k: v for k, v in spcdict.items() if len(v["data"])} dset = xr.Dataset.from_dict(spcdict) set_spec_attributes(dset) return dset
def read_ww3_msl(filename_or_fileglob, chunks={}): """Read Spectra from WAVEWATCHIII MetOcean Solutions netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). Returns: - dset (SpecDataset): spectra dataset object read from ww3 file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'site' dims """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset.specden.attrs.get("units", "") dset = dset.rename( {"freq": attrs.FREQNAME, "dir": attrs.DIRNAME, "wsp": attrs.WSPDNAME} ) dset[attrs.SPECNAME] = (dset["specden"].astype("float32") + 127.0) * dset["factor"] dset = dset.drop(["specden", "factor", "df"]) # Assign site coordinate so they will look like those read from native ww3 files dset[attrs.SITENAME] = np.arange(1.0, dset.site.size + 1) set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({"_units": _units, "_variable_name": "specden"}) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) return dset
def from_ww3_msl(dset): """Format WW3-MSL netcdf dataset to receive wavespectra accessor. Args: dset (xr.Dataset): Dataset created from a SWAN netcdf file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ _units = dset.specden.attrs.get("units", "") dset = dset.rename( {"freq": attrs.FREQNAME, "dir": attrs.DIRNAME, "wsp": attrs.WSPDNAME} ) dset[attrs.SPECNAME] = (dset["specden"].astype("float32") + 127.0) * dset["factor"] dset = dset.drop_vars(["specden", "factor", "df"]) # Assign site coordinate so they will look like those read from native ww3 files dset[attrs.SITENAME] = np.arange(1.0, dset.site.size + 1) set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({"_units": _units, "_variable_name": "specden"}) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) # Only selected variables to be returned to_drop = list(set(dset.data_vars.keys()) - to_keep) return dset.drop_vars(to_drop)
def from_ww3(dset): """Format WW3 dataset to receive wavespectra accessor. Args: dset (xr.Dataset): Dataset created from a WW3 file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ dset = dset.rename(MAPPING) # Ensuring lon,lat are not function of time if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) # Only selected variables to be returned to_drop = list(set(dset.data_vars.keys()) - to_keep) # Converting from radians dset[attrs.SPECNAME] *= D2R # Convert to coming-from dset = dset.assign_coords( {attrs.DIRNAME: (dset[attrs.DIRNAME] + 180) % 360}) # Setting standard attributes set_spec_attributes(dset) return dset.drop_vars(to_drop).drop_dims("string16", errors="ignore")
def from_wwm(dset): """Format WWM netcdf dataset to receive wavespectra accessor. Args: dset (xr.Dataset): Dataset created from a SWAN netcdf file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ _units = dset.AC.attrs.get("units", "") dset = dset.rename(MAPPING) # Calculating wind speeds and directions if "Uwind" in dset and "Vwind" in dset: dset[attrs.WSPDNAME], dset[attrs.WDIRNAME] = uv_to_spddir( dset["Uwind"], dset["Vwind"], coming_from=True) # Setting standard names and storing original file attributes set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ "_units": _units, "_variable_name": attrs.SPECNAME }) # Assigning spectral coordinates dset[attrs.FREQNAME] = dset.SPSIG / (2 * np.pi) # convert rad to Hz dset[attrs.DIRNAME] = dset.SPDIR # converting Action to Energy density and adjust density to Hz dset[attrs.SPECNAME] = dset[attrs.SPECNAME] * dset.SPSIG * (2 * np.pi) # Converting from radians dset[attrs.DIRNAME] *= R2D dset[attrs.SPECNAME] /= R2D # Returns only selected variables, transposed to_drop = list(set(dset.data_vars.keys()) - to_keep) dims = [d for d in ["time", "site", "freq", "dir"] if d in dset.efth.dims] return dset.drop_vars(to_drop).transpose(*dims)
def read_wavespectra(filename_or_fileglob, file_format="netcdf", chunks={}): """Read Spectra from from netCDF or ZARR format in Wavespectra convention. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - file_format (str): format of file to open, one of `netcdf` or `zarr`. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). Returns: - dset (SpecDataset): spectra dataset object read from netcdf file Note: - Assumes frequency in :math:`Hz`, direction in :math:`degree` and spectral energy in :math:`m^{2}degree^{-1}{s}`. - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = open_netcdf_or_zarr( filename_or_fileglob=filename_or_fileglob, file_format=file_format, chunks=chunks, ) set_spec_attributes(dset) return dset
def make_dataset(spec, freqs, dirs, coordinates=[]): """Package spectral matrix to xarray. Args: spec: freqs: dirs: coordinates: Returns: dset: SpecDset object """ coords = tuple(coordinates) + ( (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs), ) dimensions = tuple([c[0] for c in coords]) dset = xr.DataArray( data=spec, coords=coords, dims=dimensions, name=attrs.SPECNAME, ).to_dataset() set_spec_attributes(dset) return dset
def read_wwm(filename_or_fileglob, chunks={}, convert_wind_vectors=True): """Read Spectra from SWAN native netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). - convert_wind_vectors (bool): choose it to convert wind vectors into speed / direction data arrays. Returns: - dset (SpecDataset): spectra dataset object read from ww3 file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset.AC.attrs.get('units', '') dset = dset.rename({ 'nfreq': attrs.FREQNAME, 'ndir': attrs.DIRNAME, 'nbstation': attrs.SITENAME, 'AC': attrs.SPECNAME, 'lon': attrs.LONNAME, 'lat': attrs.LATNAME, 'DEP': attrs.DEPNAME, 'ocean_time': attrs.TIMENAME }) # Calculating wind speeds and directions if convert_wind_vectors and 'Uwind' in dset and 'Vwind' in dset: dset[attrs.WSPDNAME], dset[attrs.WDIRNAME] = uv_to_spddir( dset['Uwind'], dset['Vwind'], coming_from=True) # Setting standard names and storing original file attributes set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ '_units': _units, '_variable_name': attrs.SPECNAME }) # Assigning spectral coordinates dset[attrs.FREQNAME] = dset.SPSIG / (2 * np.pi) # convert rad to Hz dset[attrs.DIRNAME] = dset.SPDIR # converting Action to Energy density and adjust density to Hz dset[attrs.SPECNAME] = dset[attrs.SPECNAME] * dset.SPSIG * (2 * np.pi) # Converting from radians dset[attrs.DIRNAME] *= R2D dset[attrs.SPECNAME] /= R2D # Returns only selected variables, transposed to_drop = [ dvar for dvar in dset.data_vars if dvar not in [ attrs.SPECNAME, attrs.WSPDNAME, attrs.WDIRNAME, attrs.DEPNAME, attrs.LONNAME, attrs.LATNAME ] ] dims = [d for d in ['time', 'site', 'freq', 'dir'] if d in dset.efth.dims] return dset.drop(to_drop).transpose(*dims)
def construct_dataset(self): self.dset = xr.DataArray( data=self.spec_list, coords=self.coords, dims=self.dims, name=attrs.SPECNAME ).to_dataset() set_spec_attributes(self.dset) if not self.is_dir: self.dset = self.dset.isel(drop=True, **{attrs.DIRNAME: 0}) self.dset[attrs.SPECNAME].attrs.update(units="m^{2}.s")
def read_hotswan(fileglob, dirorder=True): """Read multiple swan hotfiles into single gridded Dataset. Args: - fileglob (str, list): glob pattern specifying hotfiles to read and merge. - dirorder (bool): if True ensures directions are sorted. Returns: - dset (SpecDataset): spectra dataset object with different grid parts merged. Note: - SWAN hotfiles from mpi runs are split by the number of cores over the largest dim of (lat, lon) with overlapping rows or columns that are computed in only one of the split hotfiles. Here overlappings are merged so that those with higher values are kept which assumes non-computed overlapping rows or columns are filled with zeros. """ hotfiles = sorted(fileglob) if isinstance(fileglob, list) else sorted( glob.glob(fileglob)) assert hotfiles, 'No SWAN file identified with fileglob %s' % (fileglob) dsets = [read_swan(hotfiles[0])] for hotfile in hotfiles[1:]: dset = read_swan(hotfile) # Ensure we keep non-zeros in overlapping rows or columns overlap = { attrs.LONNAME: set(dsets[-1].lon.values).intersection(dset.lon.values), attrs.LATNAME: set(dsets[-1].lat.values).intersection(dset.lat.values) } concat_dim = min(overlap, key=lambda x: len(overlap[x])) for concat_val in overlap[concat_dim]: slc = {concat_dim: [concat_val]} if dsets[-1].efth.loc[slc].sum() > dset.efth.loc[slc].sum(): dset.efth.loc[slc] = dsets[-1].efth.loc[slc] else: dsets[-1].efth.loc[slc] = dset.efth.loc[slc] dsets.append(dset) dset = xr.auto_combine(dsets) set_spec_attributes(dset) if attrs.DIRNAME in dset and len(dset.dir) > 1: dset[attrs.SPECNAME].attrs.update({ '_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens' }) else: dset[attrs.SPECNAME].attrs.update({ 'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens' }) return dset
def read_funwave(filename): """Read Spectra in Funwave format. Args: - filename (str): Funwave file to read. Returns: - dset (SpecDataset): spectra dataset object read from funwave file. Note: - Format description: https://fengyanshi.github.io/build/html/wavemaker_para.html. - Both 2D E(f,d) and 1d E(f) spectra are supported. - Directions converted from Cartesian (0E, CCW, to) to wavespectra (0N, CW, from). - Phases are ignored if present. """ with open(filename, "r") as stream: data = stream.readlines() # Remove any empty rows data = [row for row in data if row != "\n"] # Shape nf, nd = [int(val) for val in data.pop(0).split()[:2]] # Tp tp = float(data.pop(0).split()[0]) # Spectral coordinates (convert dir from cartesian to wavespectra convention) freq = np.array([float(data.pop(0).split()[0]) for count in range(nf)]) dir = np.array([float(data.pop(0).split()[0]) for count in range(nd)]) dir = (270 - dir) % 360 # Amplitude spectrum if nd == 1: amp = np.genfromtxt(data) coords = {attrs.FREQNAME: freq} dims = (attrs.FREQNAME) else: amp = np.genfromtxt(data[:nd]) coords = {attrs.FREQNAME: freq, attrs.DIRNAME: dir} dims = (attrs.FREQNAME, attrs.DIRNAME) darr = xr.DataArray(data=amp.transpose(), coords=coords, dims=dims) # Energy density spectrum darr = darr**2 / (darr.spec.dfarr * darr.spec.dd * 2) # Define output dataset dset = darr.to_dataset(name=attrs.SPECNAME) if nd > 1: dset = dset.sortby(attrs.DIRNAME) dset["tp"] = tp set_spec_attributes(dset) return dset
def read_netcdf(filename_or_fileglob, chunks={}, freqname=attrs.FREQNAME, dirname=attrs.DIRNAME, sitename=attrs.SITENAME, specname=attrs.SPECNAME, lonname=attrs.LONNAME, latname=attrs.LATNAME, timename=attrs.TIMENAME): """Read Spectra from generic netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). - <coord>name :: coordinate name in netcdf, used for standarising dataset. Returns: - dset (SpecDataset): spectra dataset object read from netcdf file Note: - Assumes frequency in :math:`Hz`, direction in :math:`degree` and spectral energy in :math:`m^{2}degree^{-1}{s}`. - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset[specname].attrs.get('units', '') _variable_name = specname coord_map = { freqname: attrs.FREQNAME, dirname: attrs.DIRNAME, lonname: attrs.LONNAME, latname: attrs.LATNAME, sitename: attrs.SITENAME, specname: attrs.SPECNAME, timename: attrs.TIMENAME } dset.rename({k: v for k, v in coord_map.items() if k in dset}, inplace=True) dset[attrs.SPECNAME].attrs.update({ '_units': _units, '_variable_name': _variable_name }) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s'}) set_spec_attributes(dset) return dset
def read_ww3(filename_or_fileglob, chunks={}): """Read Spectra from WAVEWATCHIII native netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). Returns: - dset (SpecDataset): spectra dataset object read from ww3 file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset.efth.attrs.get("units", "") dset = dset.rename({ "frequency": attrs.FREQNAME, "direction": attrs.DIRNAME, "station": attrs.SITENAME, "efth": attrs.SPECNAME, "longitude": attrs.LONNAME, "latitude": attrs.LATNAME, }) # Deal with wind variables separately as those are optional in some cases try: dset = dset.rename({ "wnddir": attrs.WDIRNAME, "wnd": attrs.WSPDNAME, }) except: warnings.warn( "read_ww3 - Failed to rename wind variables wnddir and wnd. " "Those might not be available in the file") if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.SPECNAME] *= D2R set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ "_units": _units, "_variable_name": attrs.SPECNAME }) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) dset[attrs.DIRNAME] = (dset[attrs.DIRNAME] + 180) % 360 return dset
def _construct_dataset(self): """Construct wavespectra dataset.""" self.dset = xr.DataArray(data=self.efth, coords=self.coords, dims=self.dims, name=attrs.SPECNAME).to_dataset() self.dset[attrs.LATNAME] = xr.DataArray( data=self.latitude, coords={"time": self.dset.time}, dims=("time")) self.dset[attrs.LONNAME] = xr.DataArray( data=self.longitude, coords={"time": self.dset.time}, dims=("time")) set_spec_attributes(self.dset) return self.dset
def read_ww3(filename_or_fileglob, chunks={}): """Read Spectra from WAVEWATCHIII native netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). Returns: - dset (SpecDataset): spectra dataset object read from ww3 file Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset.efth.attrs.get('units', '') dset.rename( { 'frequency': attrs.FREQNAME, 'direction': attrs.DIRNAME, 'station': attrs.SITENAME, 'efth': attrs.SPECNAME, 'longitude': attrs.LONNAME, 'latitude': attrs.LATNAME, 'wnddir': attrs.WDIRNAME, 'wnd': attrs.WSPDNAME }, inplace=True) if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.SPECNAME].values = np.radians(dset[attrs.SPECNAME].values) set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ '_units': _units, '_variable_name': attrs.SPECNAME }) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s'}) dset[attrs.DIRNAME] = (dset[attrs.DIRNAME] + 180) % 360 return dset
def to_dset(self, spec_info={}, **kwargs): """ Create wavespectra dataset Arguments: spec_info: dictionary for updating reconstruction defaults. Returns: ds: wavespectra dataset with reconstructed frequency-direction spectra """ # TODO: Ensure that all arrays have wavespectra compatible names if spec_info: ds = prepare_reconstruction(spec_info, base_dset=self._obj) else: ds = self._obj.copy() ds[attrs.SPECNAME] = ds.construct.efth(**kwargs) set_spec_attributes(ds) return ds
def read_era5(filename_or_fileglob, chunks={}, freqs=None, dirs=None): """Read Spectra from ECMWF ERA5 netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). - freqs (list): list of frequencies. By default use all 30 ERA5 frequencies. - dirs (list): list of directions. By default use all 24 ERA5 directions. Returns: - dset (SpecDataset): spectra dataset object read from netcdf file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ default_freqs = np.full(30, 0.03453) * (1.1**np.arange(0, 30)) default_dirs = direction = (np.arange(7.5, 352.5 + 15, 15) + 180) % 360 dset = read_netcdf( filename_or_fileglob, specname="d2fd", freqname="frequency", dirname="direction", lonname="longitude", latname="latitude", timename="time", chunks=chunks, ) # Convert ERA5 format to wavespectra format dset = 10**dset * np.pi / 180 dset = dset.fillna(0) dset[attrs.FREQNAME] = freqs if freqs else default_freqs dset[attrs.DIRNAME] = dirs if dirs else default_dirs # Setting standard attributes set_spec_attributes(dset) return dset
def s1_compute_efth(ds, ekth_name='EKTH', ekth_wavnum='WAVNUM', ekth_dir='DIRECTION', ekth_time='TIME', efth_name='efth', efth_freq='freq', efth_dir='dir', efth_time='time'): """ Compute efth from Sentinel-1 wavenumber spectra with wavespectra library naming conventions so that wavespectra.SpecArray methods and plotting can be used. Args: - ds (xr.Dataset): Sentinel-1 dataset - ekth_name (str): name of wavenumber spectra variable - ekth_wavnum (str): name of wavenumber coord dim - ekth_dir (str): name of direction coord dim - ekth_time (str): name of time coord dim - efth_name (str): name of efth DataArray - efth_freq (str): name of freq coord dim - efth_dir (str): name of dir coord dim - efth_time (str): name of time coord dim Returns: - efth DataArray in wavespectra library conventions. """ try: efth = ek2f(ek=ds.data_vars[ekth_name], k=ds.coords[ekth_wavnum], units='m4.rad-1') efth = xr.DataArray(data=efth, coords=[ k2f(ds.coords[ekth_wavnum]), efth.coords[ekth_time], efth.coords[ekth_dir] ], dims=[efth_freq, efth_time, efth_dir], name=efth_name) set_spec_attributes(efth) efth[efth_dir].attrs = { 'unit': 'degree', 'standard_name': 'sea_surface_wave_to_direction' } return efth except Exception as e: sys.exit(e)
def from_ncswan(dset): """Format SWAN netcdf dataset to receive wavespectra accessor. Args: dset (xr.Dataset): Dataset created from a SWAN netcdf file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ _units = dset.density.attrs.get("units", "") dset = dset.rename(MAPPING) # Ensuring lon,lat are not function of time if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) # Calculating wind speeds and directions if "xwnd" in dset and "ywnd" in dset: dset[attrs.WSPDNAME], dset[attrs.WDIRNAME] = uv_to_spddir( dset["xwnd"], dset["ywnd"], coming_from=True) # Only selected variables to be returned to_drop = list(set(dset.data_vars.keys()) - to_keep) # Setting standard names and storing original file attributes set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ "_units": _units, "_variable_name": attrs.SPECNAME }) # Converting from radians dset[attrs.SPECNAME] /= R2D if attrs.DIRNAME in dset: dset[attrs.DIRNAME] *= R2D dset[attrs.DIRNAME] %= 360 # dset = dset.sortby(attrs.DIRNAME) # Adjustting attributes if 1D if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) # Ensure site is a coordinate if attrs.SITENAME in dset.dims and attrs.SITENAME not in dset.coords: dset[attrs.SITENAME] = np.arange(1, len(dset[attrs.SITENAME]) + 1) return dset.drop_vars(to_drop)
def read_ww3_msl(filename_or_fileglob, chunks={}): """Read Spectra from WAVEWATCHIII MetOcean Solutions netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). Returns: - dset (SpecDataset): spectra dataset object read from ww3 file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'site' dims """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) _units = dset.specden.attrs.get('units', '') dset.rename( { 'freq': attrs.FREQNAME, 'dir': attrs.DIRNAME, 'wsp': attrs.WSPDNAME }, inplace=True) dset[attrs.SPECNAME] = (dset['specden'].astype('float32') + 127.) * dset['factor'] dset = dset.drop(['specden', 'factor', 'df']) set_spec_attributes(dset) dset[attrs.SPECNAME].attrs.update({ '_units': _units, '_variable_name': 'specden' }) if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s'}) return dset
def from_ncswan(dset): """Format SWAN netcdf dataset to receive wavespectra accessor. Args:o dset (xr.Dataset): Dataset created from a SWAN netcdf file. Returns: Formated dataset with the SpecDataset accessor in the `spec` namespace. """ dset = dset.rename(MAPPING) # Ensuring lon,lat are not function of time if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset = dset.assign({ attrs.LONNAME: dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}), attrs.LATNAME: dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) }) # Calculating wind speeds and directions if "xwnd" in dset and "ywnd" in dset: dset[attrs.WSPDNAME], dset[attrs.WDIRNAME] = uv_to_spddir( dset["xwnd"], dset["ywnd"], coming_from=True) # Only selected variables to be returned to_drop = list(set(dset.data_vars.keys()) - to_keep) # Converting from radians dset[attrs.SPECNAME] /= R2D if attrs.DIRNAME in dset: dset = dset.assign_coords( {attrs.DIRNAME: (dset[attrs.DIRNAME] * R2D) % 360}) # Ensure site is a coordinate if attrs.SITENAME in dset.dims and attrs.SITENAME not in dset.coords: dset[attrs.SITENAME] = np.arange(1, len(dset[attrs.SITENAME]) + 1) # Setting standard attributes set_spec_attributes(dset) return dset.drop_vars(to_drop)
def read_swan(filename, dirorder=True, as_site=None): """Read Spectra from SWAN ASCII file. Args: - dirorder (bool): If True reorder spectra so that directions are sorted. - as_site (bool): If True locations are defined by 1D site dimension. Returns: - dset (SpecDataset): spectra dataset object read from file. """ swanfile = SwanSpecFile(filename, dirorder=dirorder) times = swanfile.times lons = swanfile.x lats = swanfile.y sites = [os.path.splitext(os.path.basename(filename))[0]] if len(lons)==1 else np.arange(len(lons))+1 freqs = swanfile.freqs dirs = swanfile.dirs tab = None if as_site: swanfile.is_grid = False spec_list = [s for s in swanfile.readall()] # Create fake time if no timestamp times = times or [datetime.datetime.now().replace(second=0, microsecond=0)] if swanfile.is_tab: try: tab = read_tab(swanfile.tabfile) if len(swanfile.times) == tab.index.size: if 'X-wsp' in tab and 'Y-wsp' in tab: tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(tab['X-wsp'], tab['Y-wsp'], coming_from=True) else: warnings.warn( "Times in {} and {} not consistent, not appending winds and depth" .format(swanfile.filename, swanfile.tabfile) ) tab = None except Exception as exc: warnings.warn( "Cannot parse depth and winds from {}:\n{}".format(swanfile.tabfile, exc) ) if swanfile.is_grid: lons = sorted(np.unique(lons)) lats = sorted(np.unique(lats)) arr = np.array(spec_list).reshape(len(times), len(lons), len(lats), len(freqs), len(dirs)) dset = xr.DataArray( data=np.swapaxes(arr, 1, 2), coords=OrderedDict(((attrs.TIMENAME, times), (attrs.LATNAME, lats), (attrs.LONNAME, lons), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() if tab is not None and attrs.WSPDNAME in tab: dset[attrs.WSPDNAME] = xr.DataArray(data=tab[attrs.WSPDNAME].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) dset[attrs.WDIRNAME] = xr.DataArray(data=tab[attrs.WDIRNAME].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) if tab is not None and 'dep' in tab: dset[attrs.DEPNAME] = xr.DataArray(data=tab['dep'].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) else: arr = np.array(spec_list).reshape(len(times), len(sites), len(freqs), len(dirs)) dset = xr.DataArray( data=arr, coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() if tab is not None and attrs.WSPDNAME in tab: dset[attrs.WSPDNAME] = xr.DataArray(data=tab[attrs.WSPDNAME].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) dset[attrs.WDIRNAME] = xr.DataArray(data=tab[attrs.WDIRNAME].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) if tab is not None and 'dep' in tab: dset[attrs.DEPNAME] = xr.DataArray(data=tab['dep'].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) dset[attrs.LATNAME] = xr.DataArray(data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) dset[attrs.LONNAME] = xr.DataArray(data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) set_spec_attributes(dset) if 'dir' in dset and len(dset.dir)>1: dset[attrs.SPECNAME].attrs.update({'_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens'}) else: dset[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens'}) return dset
def read_swans(fileglob, ndays=None, int_freq=True, int_dir=False, dirorder=True, ntimes=None): """Read multiple swan files into single Dataset. Args: - fileglob (str, list): glob pattern specifying files to read. - ndays (float): number of days to keep from each file, choose None to keep entire period. - int_freq (ndarray, bool): frequency array for interpolating onto: - ndarray: 1d array specifying frequencies to interpolate onto. - True: logarithm array is constructed such that fmin=0.0418 Hz, fmax=0.71856 Hz, df=0.1f. - False: No interpolation performed in frequency space. - int_dir (ndarray, bool): direction array for interpolating onto: - ndarray: 1d array specifying directions to interpolate onto. - True: circular array is constructed such that dd=10 degrees. - False: No interpolation performed in direction space. - dirorder (bool): if True ensures directions are sorted. - ntimes (int): use it to read only specific number of times, useful for checking headers only. Returns: - dset (SpecDataset): spectra dataset object read from file with different sites and cycles concatenated along the 'site' and 'time' dimensions. Note: - If multiple cycles are provided, 'time' coordinate is replaced by 'cycletime' multi-index coordinate. - If more than one cycle is prescribed from fileglob, each cycle must have same number of sites. - Either all or none of the spectra in fileglob must have tabfile associated to provide wind/depth data. - Concatenation is done with numpy arrays for efficiency. """ swans = sorted(fileglob) if isinstance(fileglob, list) else sorted(glob.glob(fileglob)) assert swans, 'No SWAN file identified with fileglob %s' % (fileglob) # Default spectral basis for interpolating if int_freq == True: int_freq = [0.04118 * 1.1**n for n in range(31)] elif int_freq == False: int_freq = None if int_dir == True: int_dir = np.arange(0, 360, 10) elif int_dir == False: int_dir = None cycles = list() dsets = SortedDict() tabs = SortedDict() all_times = list() all_sites = SortedDict() all_lons = SortedDict() all_lats = SortedDict() deps = SortedDict() wspds = SortedDict() wdirs = SortedDict() for filename in swans: swanfile = SwanSpecFile(filename, dirorder=dirorder) times = swanfile.times lons = list(swanfile.x) lats = list(swanfile.y) sites = [os.path.splitext(os.path.basename(filename))[0]] if len(lons)==1 else np.arange(len(lons))+1 freqs = swanfile.freqs dirs = swanfile.dirs if ntimes is None: spec_list = [s for s in swanfile.readall()] else: spec_list = [swanfile.read() for itime in range(ntimes)] # Read tab files for winds / depth if swanfile.is_tab: try: tab = read_tab(swanfile.tabfile).rename(columns={'dep': attrs.DEPNAME}) if len(swanfile.times) == tab.index.size: if 'X-wsp' in tab and 'Y-wsp' in tab: tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(tab['X-wsp'], tab['Y-wsp'], coming_from=True) else: warnings.warn( "Times in {} and {} not consistent, not appending winds and depth" .format(swanfile.filename, swanfile.tabfile) ) tab = pd.DataFrame() tab = tab[list(set(tab.columns).intersection((attrs.DEPNAME, attrs.WSPDNAME, attrs.WDIRNAME)))] except Exception as exc: warnings.warn( "Cannot parse depth and winds from {}:\n{}".format(swanfile.tabfile, exc) ) else: tab = pd.DataFrame() # Shrinking times if ndays is not None: tend = times[0] + datetime.timedelta(days=ndays) if tend > times[-1]: raise IOError('Times in %s does not extend for %0.2f days' % (filename, ndays)) iend = times.index(min(times, key=lambda d: abs(d - tend))) times = times[0:iend+1] spec_list = spec_list[0:iend+1] tab = tab.loc[times[0]:tend] if tab is not None else tab spec_list = flatten_list(spec_list, []) # Interpolate spectra if int_freq is not None or int_dir is not None: spec_list = [interp_spec(spec, freqs, dirs, int_freq, int_dir) for spec in spec_list] freqs = int_freq if int_freq is not None else freqs dirs = int_dir if int_dir is not None else dirs # Appending try: arr = np.array(spec_list).reshape(len(times), len(sites), len(freqs), len(dirs)) cycle = times[0] if cycle not in dsets: dsets[cycle] = [arr] tabs[cycle] = [tab] all_sites[cycle] = sites all_lons[cycle] = lons all_lats[cycle] = lats all_times.append(times) nsites = 1 else: dsets[cycle].append(arr) tabs[cycle].append(tab) all_sites[cycle].extend(sites) all_lons[cycle].extend(lons) all_lats[cycle].extend(lats) nsites += 1 except: if len(spec_list) != arr.shape[0]: raise IOError('Time length in %s (%i) does not match previous files (%i), cannot concatenate', (filename, len(spec_list), arr.shape[0])) else: raise swanfile.close() cycles = dsets.keys() # Ensuring sites are consistent across cycles sites = all_sites[cycle] lons = all_lons[cycle] lats = all_lats[cycle] for site, lon, lat in zip(all_sites.values(), all_lons.values(), all_lats.values()): if (list(site) != list(sites)) or (list(lon) != list(lons)) or (list(lat) != list(lats)): raise IOError('Inconsistent sites across cycles in glob pattern provided') # Ensuring consistent tabs cols = set([frozenset(tabs[cycle][n].columns) for cycle in cycles for n in range(len(tabs[cycle]))]) if len(cols) > 1: raise IOError('Inconsistent tab files, ensure either all or none of the spectra have associated tabfiles and columns are consistent') # Concat sites for cycle in cycles: dsets[cycle] = np.concatenate(dsets[cycle], axis=1) deps[cycle] = np.vstack([tab[attrs.DEPNAME].values for tab in tabs[cycle]]).T if attrs.DEPNAME in tabs[cycle][0] else None wspds[cycle] = np.vstack([tab[attrs.WSPDNAME].values for tab in tabs[cycle]]).T if attrs.WSPDNAME in tabs[cycle][0] else None wdirs[cycle] = np.vstack([tab[attrs.WDIRNAME].values for tab in tabs[cycle]]).T if attrs.WDIRNAME in tabs[cycle][0] else None time_sizes = [dsets[cycle].shape[0] for cycle in cycles] # Concat cycles if len(dsets) > 1: dsets = np.concatenate(dsets.values(), axis=0) deps = np.concatenate(deps.values(), axis=0) if attrs.DEPNAME in tabs[cycle][0] else None wspds = np.concatenate(wspds.values(), axis=0) if attrs.WSPDNAME in tabs[cycle][0] else None wdirs = np.concatenate(wdirs.values(), axis=0) if attrs.WDIRNAME in tabs[cycle][0] else None else: dsets = dsets[cycle] deps = deps[cycle] if attrs.DEPNAME in tabs[cycle][0] else None wspds = wspds[cycle] if attrs.WSPDNAME in tabs[cycle][0] else None wdirs = wdirs[cycle] if attrs.WDIRNAME in tabs[cycle][0] else None # Creating dataset times = flatten_list(all_times, []) dsets = xr.DataArray( data=dsets, coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() dsets[attrs.LATNAME] = xr.DataArray(data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) dsets[attrs.LONNAME] = xr.DataArray(data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) if wspds is not None: dsets[attrs.WSPDNAME] = xr.DataArray(data=wspds, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) dsets[attrs.WDIRNAME] = xr.DataArray(data=wdirs, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) if deps is not None: dsets[attrs.DEPNAME] = xr.DataArray(data=deps, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) # Setting multi-index if len(cycles) > 1: dsets = dsets.rename({attrs.TIMENAME: 'cycletime'}) cycletime = zip( [item for sublist in [[c]*t for c,t in zip(cycles, time_sizes)] for item in sublist], dsets.cycletime.values ) dsets['cycletime'] = pd.MultiIndex.from_tuples(cycletime, names=[attrs.CYCLENAME, attrs.TIMENAME]) dsets['cycletime'].attrs = attrs.ATTRS[attrs.TIMENAME] set_spec_attributes(dsets) if 'dir' in dsets and len(dsets.dir)>1: dsets[attrs.SPECNAME].attrs.update({'_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens'}) else: dsets[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens'}) return dsets
def read_ncswan(filename_or_fileglob, chunks={}, convert_wind_vectors=True, sort_dirs=True): """Read Spectra from SWAN native netCDF format. Args: - filename_or_fileglob (str): filename or fileglob specifying multiple files to read. - chunks (dict): chunk sizes for dimensions in dataset. By default dataset is loaded using single chunk for all dimensions (see xr.open_mfdataset documentation). - convert_wind_vectors (bool): choose it to convert wind vectors into speed / direction data arrays. - sort_dirs (bool): choose it to sort spectra by directions. Returns: - dset (SpecDataset): spectra dataset object read from ww3 file. Note: - If file is large to fit in memory, consider specifying chunks for 'time' and/or 'station' dims. """ dset = xr.open_mfdataset(filename_or_fileglob, chunks=chunks) dset = dset.rename({ "frequency": attrs.FREQNAME, "direction": attrs.DIRNAME, "points": attrs.SITENAME, "density": attrs.SPECNAME, "longitude": attrs.LONNAME, "latitude": attrs.LATNAME, "depth": attrs.DEPNAME, }) # Ensuring lon,lat are not function of time if attrs.TIMENAME in dset[attrs.LONNAME].dims: dset[attrs.LONNAME] = dset[attrs.LONNAME].isel(drop=True, **{attrs.TIMENAME: 0}) dset[attrs.LATNAME] = dset[attrs.LATNAME].isel(drop=True, **{attrs.TIMENAME: 0}) # Calculating wind speeds and directions if convert_wind_vectors and "xwnd" in dset and "ywnd" in dset: dset[attrs.WSPDNAME], dset[attrs.WDIRNAME] = uv_to_spddir( dset["xwnd"], dset["ywnd"], coming_from=True) # Setting standard names and storing original file attributes set_spec_attributes(dset) # Converting from radians dset[attrs.SPECNAME] /= R2D if attrs.DIRNAME in dset: dir_var = dset[attrs.DIRNAME] dir_var.attrs.update({'units': 'degree'}) dset = dset.assign_coords(dir=xr.DataArray(data=(dir_var.data * R2D) % 360, coords=dir_var.coords, dims=dir_var.dims, name=dir_var.name, attrs=dir_var.attrs)) if sort_dirs: dset = dset.sortby(attrs.DIRNAME) # Adjustting attributes if 1D if attrs.DIRNAME not in dset or len(dset.dir) == 1: dset[attrs.SPECNAME].attrs.update({"units": "m^{2}.s"}) # Returns only selected variables to_drop = [ dvar for dvar in dset.data_vars if dvar not in [ attrs.SPECNAME, attrs.WSPDNAME, attrs.WDIRNAME, attrs.DEPNAME, attrs.LONNAME, attrs.LATNAME, ] ] # Ensure site is a coordinate if attrs.SITENAME in dset.dims and attrs.SITENAME not in dset.coords: dset[attrs.SITENAME] = np.arange(1, len(dset[attrs.SITENAME]) + 1) return dset.drop(to_drop)
def sel_idw(dset, lons, lats, tolerance=2.0, max_sites=4, dset_lons=None, dset_lats=None): """Select sites from inverse distance weighting. Args: dset (Dataset): Stations SpecDataset to interpolate from. lons (array): Longitude of sites to interpolate spectra at. lats (array): Latitude of sites to interpolate spectra at. tolerance (float): Maximum distance to use site for interpolation. max_sites (int): Maximum number of neighbour sites to use for interpolation. dset_lons (array): Longitude of stations in dset. dset_lats (array): Latitude of stations in dset. Returns: Selected SpecDataset at locations defined by (lons, lats). Note: Args `dset_lons`, `dset_lats` are not required but can improve performance when `dset` is chunked with site=1 (expensive to access station coordinates) and improve precision if projected coordinates are provided at high latitudes. """ assert len(lons) == len(lats), "`lons` and `lats` must be the same size." if (attrs.LONNAME in dset.dims or attrs.LATNAME in dset.dims or attrs.SITENAME not in dset.dims): raise NotImplementedError( "sel_idw only implemented for stations dataset.") # Providing station coordinates could be a lot more efficient for chunked datasets if dset_lons is None: dset_lons = dset[attrs.LONNAME].values if dset_lats is None: dset_lats = dset[attrs.LATNAME].values mask = dset.isel(site=0, drop=True) * np.nan dsout = [] for lon, lat in zip(lons, lats): closest_ids, closest_dist = nearer(dset_lons, dset_lats, lon, lat, tolerance, max_sites) if len(closest_ids) == 0: logger.debug("No stations within {} deg of site (lat={}, lon={}), " "this site will be masked.".format( tolerance, lat, lon)) # Collect ids and factors of neighbours indices = [] factors = [] for ind, dist in zip(closest_ids, closest_dist): indices.append(ind) if dist == 0: factors.append(1.0) break factors.append(1.0 / dist) # Mask it if no neighbour is found if len(indices) == 0: dsout.append(mask) else: # Inverse distance weighting sumfac = float(1.0 / sum(factors)) ind = indices.pop(0) fac = factors.pop(0) weighted = float(fac) * dset.isel(site=ind, drop=True) for ind, fac in zip(indices, factors): weighted += float(fac) * dset.isel(site=ind, drop=True) if len(indices) > 0: weighted *= sumfac dsout.append(weighted) # Concat sites into dataset dsout = xr.concat(dsout, dim=attrs.SITENAME).transpose(*dset[attrs.SPECNAME].dims) # Redefining coordinates and variables dsout[attrs.SITENAME] = np.arange(len(lons)) dsout[attrs.LONNAME] = ((attrs.SITENAME), lons) dsout[attrs.LATNAME] = ((attrs.SITENAME), lats) dsout.attrs = dset.attrs set_spec_attributes(dsout) return dsout
def sel_idw(dset, lons, lats, tolerance=2.0, max_sites=4, dset_lons=None, dset_lats=None): """Select sites from inverse distance weighting. Args: dset (Dataset): Stations SpecDataset to interpolate from. lons (array): Longitude of sites to interpolate spectra at. lats (array): Latitude of sites to interpolate spectra at. tolerance (float): Maximum distance to use site for interpolation. max_sites (int): Maximum number of neighbour sites to use for interpolation. dset_lons (array): Longitude of stations in dset. dset_lats (array): Latitude of stations in dset. Returns: Selected SpecDataset at locations defined by (lons, lats). Note: Args `dset_lons`, `dset_lats` are not required but can improve performance when `dset` is chunked with site=1 (expensive to access station coordinates) and improve precision if projected coordinates are provided at high latitudes. """ coords = Coordinates(dset, lons=lons, lats=lats, dset_lons=dset_lons, dset_lats=dset_lats) mask = dset.isel(site=0, drop=True) * np.nan dsout = [] for lon, lat in zip(coords.lons, coords.lats): closest_ids, closest_dist = coords.nearer(lon, lat, tolerance, max_sites) if len(closest_ids) == 0: logger.debug( f"No stations within {tolerance} deg of site (lat={lat}, lon={lon}), " "this site will be masked.") # Collect ids and factors of neighbours indices = [] factors = [] for ind, dist in zip(closest_ids, closest_dist): indices.append(ind) if dist == 0: factors.append(1.0) break factors.append(1.0 / dist) # Mask it if no neighbour is found if len(indices) == 0: dsout.append(mask) else: # Inverse distance weighting sumfac = float(1.0 / sum(factors)) ind = indices.pop(0) fac = factors.pop(0) weighted = float(fac) * dset.isel(site=ind, drop=True) for ind, fac in zip(indices, factors): weighted += float(fac) * dset.isel(site=ind, drop=True) if len(indices) > 0: weighted *= sumfac dsout.append(weighted) # Concat sites into dataset dsout = xr.concat(dsout, dim=attrs.SITENAME) for dvar in dsout.data_vars: if set(dsout[dvar].dims) == set(dset[dvar].dims): dsout[dvar] = dsout[dvar].transpose(*dset[dvar].dims) # Redefining coordinates and variables dsout[attrs.SITENAME] = np.arange(len(coords.lons)) dsout[attrs.LONNAME] = ((attrs.SITENAME), coords.lons) dsout[attrs.LATNAME] = ((attrs.SITENAME), coords.lats) # Return longitudes in the convention provided if coords.consistent is False: dsout.lon.values = coords._swap_longitude_convention(dsout.lon.values) dsout.attrs = dset.attrs set_spec_attributes(dsout) return dsout