def get_results(flight, config="", group="All quantities"): """ This function loads the retrieval results from a given flight Results are automatically augmented with the retrieved iwc. Args: flight: The flight name, i.e. b984, c159 or c161 config: Name of retrieval configuration that defines from which sub-folder (if any) the data is loaded. group: The NetCDF4 group containing the results. """ flight = flight.lower() path = os.path.join(joint_flight.PATH, "data", "old") if config != "": path = os.path.join(path, config) pattern = re.compile(f"output_{flight}_([\w-]*).nc") results = {} psd = D14NDmIce() for f in glob.glob(os.path.join(path, "*")): match = re.match(pattern, os.path.basename(f)) if match is None: continue shape = match.group(1) data = xr.load_dataset(os.path.join(path, f), group=group) dm = data["ice_dm"] n0 = data["ice_n0"] psd.mass_weighted_diameter = dm psd.intercept_parameter = n0 wc = psd.get_mass_density() k = np.ones((5, 1)) / 5.0 wc_s = convolve(wc, k, mode="same") nd = psd.get_moment(0) nd.data[wc < 5e-6] = 0.0 data["ice_water_content"] = (dm.dims, wc.data) data["ice_water_content_smooth"] = (dm.dims, wc_s.data) data["number_density"] = (dm.dims, nd.data) results[shape] = data return results
def load_nc(path, time): """Load the netCDF dataset corresponding to the given time This function finds files matching the AERIS :data:`nc_filename` formatted for the given time Args: path (str): The directory containing GOES netCDF files time (datetime.datetime): The time of the file to load Returns: xarray.dataset: The loaded netCDF file with values filtered where the coordinates are NaN Raises: FileNotFoundError: If the netCDF is not present FileExistsError: If multiple matching netCDF files are found with the same name """ filename = nc_filename.format( year=time.year, day=time.timetuple().tm_yday, hour=time.hour, minute=time.minute, something="*", ) # Find matching filenames in the GOES folder file_path = list(pathlib.Path(path).rglob(filename)) if len(file_path) == 0: raise FileNotFoundError("No GOES data found in {} for {}".format( path, time)) elif len(file_path) > 1: raise FileExistsError("More than one file found in {} for {}".format( path, time)) dataset = xr.load_dataset(str(file_path[0])) # Remove values where the coordinates are NaNs dataset = dataset.where(~dataset.longitude.isnull(), drop=True) return dataset
def load_workflow_result(datasets, loader=xarray.load_dataset, sortby='location'): if isinstance(datasets, str): if isdir(datasets): datasets = glob(join(datasets, "*.nc")) else: datasets = glob(datasets) if len(datasets) == 1: ds = xarray.load_dataset(datasets[0]).sortby('locations') else: ds = xarray.concat(map(loader, datasets), dim="location") if sortby is not None: ds = ds.sortby(sortby) return ds
def read_and_concat_smoothFinals(rinexpath, solution='Final'): import xarray as xr from aux_gps import save_ncfile from aux_gps import path_glob years = [x.as_posix().split('/')[-1] for x in path_glob(rinexpath, '*/')] years = [x for x in years if x.isnumeric()] for year in years: dsl = [] # doys = [x.as_posix().split('/')[-1] for x in path_glob(rinexpath/year, '*/')] for doypath in path_glob(rinexpath/year, '*/'): file = doypath / 'dr' / solution / 'smoothFinal.nc' if file.is_file(): dsl.append(xr.load_dataset(file)) print('found smoothFinal.nc in {}'.format(doypath)) if dsl: ds = xr.concat(dsl, 'time') ds = ds.sortby('time') save_ncfile(ds, rinexpath, 'smoothFinal_{}.nc'.format(year)) return ds
def process_month(self, year, month, output_path): files = { gn: fs for gn, fs in self.validation_files.items() if fs["date"].year == year and fs["date"].month == month } parts = [] for gn, fs in files.items(): match_up_file = fs["match_up_file"] gprof_file = fs["gprof_file"] qprof_file = fs["qprof_file"] match_up_data = xr.load_dataset(match_up_file) gprof_data = RetrievalFile( gprof_file, has_sensitivity=True).to_xarray_dataset() qprof_data = RetrievalFile(qprof_file).to_xarray_dataset() match_up_data = match_up_data.stack(samples=("scans", "pixels")) gprof_data = gprof_data.stack(samples=("scans", "pixels")) qprof_data = qprof_data.stack(samples=("scans", "pixels")) valid = np.isfinite(match_up_data["surface_precipitation"]) valid *= gprof_data["surface_precip"] >= 0.0 match_up_data = match_up_data.isel(samples=valid).rename( {"surface_precipitation": "surface_precip_mrms"}) gprof_data = gprof_data.isel(samples=valid) gprof_data = gprof_data[["surface_precip"]].rename( {"surface_precip": "surface_precip_gprof"}) qprof_data = qprof_data.isel(samples=valid) qprof_data = qprof_data[["surface_precip"]].rename( {"surface_precip": "surface_precip_qprof"}) merged = xr.merge([match_up_data, gprof_data, qprof_data]) parts.append(merged.reset_index("samples")) results = xr.concat(parts, dim="samples") output_path = Path(output_path) if output_path.is_dir(): output_path = output_path = f"validation_results_{year}_{month:02}.nc" results.to_netcdf(output_path)
def test_goes_mag(): fname = 'dn_magn-l2-hires_g17_d20211219_v1-0-1.nc' url = ( "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/dn_magn-l2-hires_g17_d20211219_v1-0-1.nc") if not os.path.exists(fname): urllib.request.urlretrieve(url, fname) c = xr.load_dataset("dn_magn-l2-hires_g17_d20211219_v1-0-1.nc") for var in c: c[var].attrs['VAR_TYPE'] = 'data' c['coordinate'].attrs['VAR_TYPE'] = 'support_data' c['time'].attrs['VAR_TYPE'] = 'support_data' c['time_orbit'].attrs['VAR_TYPE'] = 'support_data' cdflib.xarray_to_cdf(c, 'dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf') d = cdflib.cdf_to_xarray('dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf', to_unixtime=True, fillval_to_nan=True) os.remove('dn_magn-l2-hires_g17_d20211219_v1-0-1-created-from-netcdf-input.cdf') os.remove('dn_magn-l2-hires_g17_d20211219_v1-0-1.nc')
def read_evo(filename): """Load an `evo` post-processed Enlil file into an Evolution object. Parameters ---------- filename : str netcdf Enlil post-processed output file Example: evo.earth.nc Returns ------- enlil.Evolution An Evolution class representing the loaded file. """ ds = xr.load_dataset(filename) # Change the dimension to time # Depending on which version, the key could be rundate or refdate try: t0 = np.datetime64(ds.attrs['rundate_cal'], 's') except KeyError: t0 = np.datetime64(ds.attrs['refdate_cal'], 's') time = t0 + np.array(ds['TIME'], np.timedelta64) ds = ds.rename({'nevo': 'earth_t'}).assign_coords({'earth_t': time}) ds = ds.drop(['TIME', 'DT', 'NSTEP']) for var in _variables_evo: if var not in ds: continue da = ds[var] # Update the name to be consistent across data sets name = _variables_evo[var] da.name = name # Unit conversions da = _transform_variable(da) ds[name] = da ds = ds.drop(var) ds.attrs['name'] = ds.label return Evolution(ds)
def get_infos(subd): files = glob.glob(f"{dirgrid}/{subd:02}/*nc") tiles = sorted([int(f.split(".")[1]) for f in files]) ntiles = len(files) grdfile = files[0] ds = xr.load_dataset(grdfile) headersize = 29400 stripes = {"size": 177848320, "roundingsize": 1024} dims = {"chunk": ntiles // 100 + 1, "tile": 100} attrs = ds.attrs varlist = list(ds.variables) attrs.pop("_NCProperties") attrs["partition_ucla"] = [0, 10000, 1, 1] varlist.remove("spherical") variables = {} for name in varlist: var = ds.variables[name] variables[name] = {"shape": list(var.shape), "dtype": var.dtype.name} vattrs = var.attrs for key, value in var.attrs.items(): if isinstance(value, np.float64): vattrs[key] = float(value) if isinstance(value, np.int32): vattrs[key] = int(value) variables[name].update(vattrs) infos = {} infos["headersize"] = headersize infos["stripes"] = stripes infos["dimensions"] = dims infos["variables"] = variables infos["attrs"] = attrs infos["tiles"] = tiles return infos
def test_saber(): fname = 'SABER_L2B_2021020_103692_02.07.nc' url = ( "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/SABER_L2B_2021020_103692_02.07.nc") if not os.path.exists(fname): urllib.request.urlretrieve(url, fname) c = xr.load_dataset("SABER_L2B_2021020_103692_02.07.nc") for var in c: c[var].attrs['VAR_TYPE'] = 'data' c['event'].attrs['VAR_TYPE'] = 'support_data' c['sclatitude'].attrs['VAR_TYPE'] = 'support_data' c['sclongitude'].attrs['VAR_TYPE'] = 'support_data' c['scaltitude'].attrs['VAR_TYPE'] = 'support_data' cdflib.xarray_to_cdf(c, 'SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf') d = cdflib.cdf_to_xarray('SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf', to_unixtime=True, fillval_to_nan=True) os.remove('SABER_L2B_2021020_103692_02.07-created-from-netcdf-input.cdf') os.remove('SABER_L2B_2021020_103692_02.07.nc')
def convert_to_rcmip_nc(fname): ds = xr.load_dataset(fname) # convert years to datetimes ds["time"] = np.array([dt.datetime(y, 1, 1) for y in ds["time"][:]], dtype="datetime64[s]").astype("float") for v in ds.variables: ds_variable = ds.variables[v] if v not in ["time", "ensemble_member"]: # rename units ds_variable.attrs["unit"] = ds_variable.attrs["units"] del ds_variable.attrs["units"] # Add scenario and model info ds_variable.attrs["scenario"] = ds.attrs["scenario"] ds_variable.attrs["model"] = ds.attrs["model"] # Write out as rcmipII-{sce}-converted.nc ds.to_netcdf(os.path.join(root_dir, "{}-converted.nc".format(fname[:-3])))
def load_data(self, **kwargs): """Load data to xarray dataset :returns: (xarray.Dataset) simulation output data """ # load z and zi with netcdf.netcdf_file(self.data, 'r', mmap=False) as ncfile: nc_z = ncfile.variables['z'] nc_zi = ncfile.variables['zi'] z = xr.DataArray(nc_z[0, :, 0, 0], dims=('z'), coords={'z': nc_z[0, :, 0, 0]}, attrs={ 'long_name': nc_z.long_name.decode(), 'units': nc_z.units.decode() }) zi = xr.DataArray(nc_zi[0, :, 0, 0], dims=('zi'), coords={'zi': nc_zi[0, :, 0, 0]}, attrs={ 'long_name': nc_zi.long_name.decode(), 'units': nc_zi.units.decode() }) # load other variables out = xr.load_dataset( self.data, drop_variables=['z', 'zi'], **kwargs, ) out = out.assign_coords({ 'z': z, 'zi': zi, }) for var in out.data_vars: if 'z' in out.data_vars[var].dims: out.data_vars[var].assign_coords({'z': z}) elif 'zi' in out.data_vars[var].dims: out.data_vars[var].assign_coords({'zi': zi}) # return a reorderd view return out.transpose('z', 'zi', 'time', 'lon', 'lat')
def read_input_data(metadata: list, dim: str = 'dataset'): """Load data from metadata. Read the input data from the list of given data sets. `metadata` is a list of metadata containing the filenames to load. The datasets are stacked along the `dim` dimension. Returns an xarray.DataArray. """ identifiers = [] datasets = [] for info in metadata: filename = info['filename'] dataset = xr.load_dataset(filename) datasets.append(dataset) identifier = info[dim] identifiers.append(identifier) stacked_datasets = xr.concat(datasets, dim=dim) stacked_datasets[dim] = identifiers return stacked_datasets
def deserialize_dataset(data): """ Read xarray dataset from byte stream containing the dataset in NetCDF format. Args: data: The bytes object containing the binary data of the NetCDf file. Returns: The deserialized xarray dataset. """ _, filename = mkstemp() try: with open(filename, "wb") as file: buffer = file.write(data) dataset = xr.load_dataset(filename) finally: Path(filename).unlink() return dataset
def test_MGITM_model(): fname = 'MGITM_LS180_F130_150615.nc' url = ( "https://lasp.colorado.edu/maven/sdc/public/data/sdc/web/cdflib_testing/MGITM_LS180_F130_150615.nc") if not os.path.exists(fname): urllib.request.urlretrieve(url, fname) c = xr.load_dataset("MGITM_LS180_F130_150615.nc") for var in c: c[var].attrs['VAR_TYPE'] = 'data' c = c.rename({'Latitude': 'latitude', 'Longitude': 'longitude'}) c['longitude'].attrs['VAR_TYPE'] = 'support_data' c['latitude'].attrs['VAR_TYPE'] = 'support_data' c['altitude'].attrs['VAR_TYPE'] = 'support_data' cdflib.xarray_to_cdf(c, 'MGITM_LS180_F130_150615-created-from-netcdf-input.cdf') d = cdflib.cdf_to_xarray('MGITM_LS180_F130_150615-created-from-netcdf-input.cdf', to_unixtime=True, fillval_to_nan=True) os.remove('MGITM_LS180_F130_150615-created-from-netcdf-input.cdf') os.remove('MGITM_LS180_F130_150615.nc')
def interpolate_vertical(ml_file, inter_file, new_vertical_axis): """ Linearly interpolate all 4D variables of ml_file to the levels of new_vertical_axis and save it in inter_file """ with xr.load_dataset(inter_file) as interpolated: reference = [variable for variable in interpolated.variables if len(interpolated[variable].shape) == 4][0] with xr.open_dataset(ml_file) as ml: for variable in [variable for variable in ml.variables if variable not in interpolated.variables and len(ml[variable].dims) == 4 and "lev_2" in ml[variable].dims]: try: x = np.array(ml[new_vertical_axis].data) y = np.array(ml[variable].data) interpolated_data = interpolate_1d(interpolated["lev"].data, x, y, axis=1) attributes = ml[variable].attrs interpolated[variable] = interpolated[reference].copy(data=interpolated_data) interpolated[variable].attrs = ml[variable].attrs except Exception as e: print(variable, e) interpolated.to_netcdf(inter_file)
def download(self): outfile = self.datasets_root / 'MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc' get_current_timeseries(outfile=outfile) # made in previous notebook xd = xr.load_dataset(outfile) df = xd.to_dataframe() df = df[[ 'VCUR', 'UCUR', 'WCUR', 'TEMP', 'DEPTH', 'M2', 'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4', 'MK3', 'MM', 'SSA', 'SA', 'SPD' ]] df.dropna(subset=self.columns_target, inplace=True) # Only keep parts with at most 5 nans in last 48 periods has_past = df.SPD.isna().rolling(48).sum() < 5 df = df[has_past] df = df.resample('30T').mean() return df
def sample_function(sample, baseFolder=baseFolder, destination=destination, overWrite=False): filename = ''.join(['pore_affiliation_', sample, '.nc']) path = os.path.join(destination, filename) if not os.path.exists(path) or overWrite: try: # metadata = xr.load_dataset(os.path.join(destination, ''.join(['pore_props_',sample,'.nc']))) metadata = xr.load_dataset( os.path.join(destination, ''.join(['dyn_data_', sample, '.nc']))) relevant_pores = metadata['label'].data pore_affiliation, labels = track_pore_affiliation( sample, relevant_pores, baseFolder, label_im=metadata['label_matrix'].data) data = xr.Dataset( {'pore_affiliation': ('label', pore_affiliation)}, coords={'label': labels}) # attrs={'explanation': '1 - top yarn, 2 - bottom yarn, 3 - interlace, 0 - not in contact'}) data.attrs = metadata.attrs data.attrs[ 'explanation'] = '1 - top yarn, 2 - bottom yarn, 3 - interlace, 0 - not in contact' filename = ''.join( ['pore_affiliation_small_interface', sample, '.nc']) path = os.path.join(destination, filename) data.to_netcdf(path) return 'completed' except Exception: # as e: # return traceback.print_exc() else: return 'already done'
def fudge_intervals_cbound(file): """make data consistent for given bound""" values_changed = 0 ds = xr.load_dataset(file) for cbound in ["pf_lower", "pf_upper"]: for i in range(1, len(ds.interval)): higher_arr = ds[cbound][i, ...].values lower_arr = ds[cbound][i - 1, ...].values diff = higher_arr - lower_arr mask = diff <= 0 num = np.count_nonzero(mask) if num > 0: print(f" {num} value(s) changed", flush=True) values_changed += num higher_arr[mask] = lower_arr[mask] * 1.001 ds[cbound][i, ...] = higher_arr ds.close() ds.to_netcdf(file) return values_changed
def test_save_netcdf(self): """ Testing for saver done here since we have a model to integrate so it's easy. """ system = BackendTestingHelper() results = system.run(self.DURATION, self.DT, ZeroInput().as_cubic_splines(self.DURATION, self.DT)) results.attrs = self.EXTRA_ATTRS # save to pickle nc_name = os.path.join(self.TEST_DIR, "netcdf_test") save_to_netcdf(results, nc_name) # actual data self.assertTrue(os.path.exists(nc_name + ".nc")) # metadata self.assertTrue(os.path.exists(nc_name + ".json")) # load and check loaded = xr.load_dataset(nc_name + ".nc") with open(nc_name + ".json", "r") as f: attrs = json.load(f) loaded.attrs = attrs xr.testing.assert_equal(results, loaded) self.assertDictEqual(loaded.attrs, self.EXTRA_ATTRS)
def process_IMS_data_at_station_on_dsea_foehn_dates( ims_path=ims_path, station='SEDOM', path=des_path, times=['2014-08-01', '2014-08-31']): import xarray as xr import pandas as pd import numpy as np # import matplotlib.pyplot as plt ws = xr.open_dataset( ims_path / 'IMS_WS_israeli_10mins.nc')[station].sel(time=slice(*times)) wd = xr.open_dataset( ims_path / 'IMS_WD_israeli_10mins.nc')[station].sel(time=slice(*times)) rh = xr.open_dataset( ims_path / 'IMS_RH_israeli_10mins.nc')[station].sel(time=slice(*times)) ts = xr.open_dataset( ims_path / 'IMS_TD_israeli_10mins.nc')[station].sel(time=slice(*times)) ds = xr.Dataset() ds['WS'] = ws ds['WD'] = wd ds['T'] = ts ds['RH'] = rh # convert to UTC, since IMS is always UTC+2 new_time = ds['time'] - pd.Timedelta(2, units='H') new_time = new_time.dt.round('s') ds['time'] = new_time ds['PWV'] = xr.load_dataset(path / 'DSEA_PWV_GNSS_2014-08.nc')['pwv-soi'] da8 = ds.sel(time=slice('2014-08-08T13:00:00', '2014-08-08T19:00:00')).to_array('var') da8['time'] = np.linspace(13, 19, len(da8['time'])) # da8['time'] = da8['time'].dt.time da16 = ds.sel(time=slice('2014-08-16T13:00:00', '2014-08-16T19:00:00')).to_array('var') da16['time'] = np.linspace(13, 19, len(da16['time'])) # da16['time'] = da16['time'].dt.time dss = xr.concat([da8, da16], 'date') # dss['date'] = [pd.to_datetime(x).date() for x in ['2014-08-08', '2014-08-16']] dss['date'] = ['2014-08-08', '2014-08-16'] return dss
def prepare_for_go(self): logger.debug('prepare_for_go...') try: #input is either going to be a netcdf file or a list of rmc6f files data = xr.load_dataset(self.input) assert 'rmcalyse_version' in data.attrs.keys() logger.info('successfully loaded data from netcdf file {}'.format( self.input)) self.data = data except FileNotFoundError as e: logger.debug('File {} not found: {}'.format(self.input, e)) raise except (OSError, AssertionError, AttributeError): logger.debug( 'File {} is not a valid rmcalyse/netcdf4 file. trying rmc6f loader...' .format(self.input)) try: self._load_rmc6f_files() except Exception as e: logger.error( 'there was an unhandled error in loading {}'.format( self.input))
def saveDataToFile(self, source): """ Saves the given data as database to file. @ In, source, DataObjects.DataObject, object to write to file @ Out, None """ ds, meta = source.getData() # we actually just tell the DataSet to write out as netCDF path = self.get_fullpath() # TODO set up to use dask for on-disk operations # convert metadata into writeable for key, xml in meta.items(): ds.attrs[key] = xmlUtils.prettify(xml.getRoot()) # get rid of "object" types for var in ds: if ds[var].dtype == np.dtype(object): # is it a string? if mathUtils.isAString(ds[var].values[0]): ds[var] = ds[var].astype(str) # is there existing data? Read it in and merge it, if so # -> we've already wiped the file in initializeDatabase if it's in write mode if os.path.isfile(path): exists = xr.load_dataset(path) if 'RAVEN_sample_ID' in exists: floor = int(exists['RAVEN_sample_ID'].values[-1]) + 1 new = ds['RAVEN_sample_ID'].values + floor ds = ds.assign_coords(RAVEN_sample_ID=new) # NOTE order matters! This preserves the sampling order in which data was inserted # into this database ds = xr.concat((exists, ds), 'RAVEN_sample_ID') # if this is open somewhere else, we can't write to it # TODO is there a way to check if it's writable? I can't find one ... try: ds.to_netcdf(path, engine=self._format) except PermissionError: self.raiseAnError( PermissionError, f'NetCDF file "{path}" denied RAVEN permission to write! Is it open in another program?' )
def read_multiple_strain_netcdfs(MyParams, plot_type): """ Get all the models (e.g. gpsgridder, geostats, huang, etc.) that have computed plot_type of strain rate and return them as a single xarray Dataset Parameters ---------- MyParams: dict - Parameter Dictionary containing strain rate methods/directories in a sub-dict plot_type: str - The type of strain rate quantity to return. Can be max_shear, dilatation, etc. Returns ------- ds_new: xarray Dataset - A dataset containing the plot_type variable from each type of model """ building_dict = {} for k, method in enumerate(MyParams.strain_dict.keys()): specific_filename = os.path.join(MyParams.strain_dict[method], "{}_strain.nc".format(method)) ds = xr.load_dataset(specific_filename) building_dict[method] = ds[plot_type]; ds_new = xr.Dataset(building_dict, coords=ds.coords) return ds_new
def iterate_intervals(): print(" Iterating over intervals...", flush=True) values_changed = 0 # Since intervals are stored within each file, this process is # more straightforward than iterating over durations since all the info # we need to compare interval values are contained within a single file. # We just have to work on each file in turn. # Since the first iteration over intervals occurs after the iteration over # durations, we can also just load from the output directory since the # durations step would have moved everything into there. files = glob(os.path.join(out_path, f"*_{data_group}_*.nc")) for file in files: print(f" {os.path.basename(file)}", flush=True) ds = xr.load_dataset(file) for i in range(1, len(ds.interval)): higher_arr = ds['pf'][i, ..., ...].values lower_arr = ds['pf'][i - 1, ..., ...].values diff = higher_arr - lower_arr mask = diff <= 0 num = np.count_nonzero(mask) if num > 0: print(f" {num} value(s) changed", flush=True) values_changed += num higher_arr[mask] = lower_arr[mask] * 1.01 ds['pf'][i, ..., ...] = higher_arr ds.close() ds.to_netcdf(file) return values_changed
def get_weights(): ds = xr.load_dataset("weights.nc") n_s = ds.dims['n_s'] col = ds['col'].values - 1 row = ds['row'].values - 1 S = ds['S'].values A = coo_matrix((S, (row, col))) if args.type == 'csc': A = A.tocsc() elif args.type == 'csr': A = A.tocsr() elif args.type == 'coo': pass else: raise ValueError return A
def _set_cache_from_netcdf(cls, ds: DataSetInMem, xr_path: Optional[str]) -> bool: import xarray as xr success = True if xr_path is not None: try: loaded_data = xr.load_dataset(xr_path, engine="h5netcdf") ds._cache = DataSetCacheInMem(ds) ds._cache._data = cls._from_xarray_dataset_to_qcodes_raw_data( loaded_data) except ( FileNotFoundError, OSError, ): # older versions of h5py may throw a OSError here success = False warnings.warn("Could not load raw data for dataset with guid :" f"{ds.guid} from location {xr_path}") else: warnings.warn( f"No raw data stored for dataset with guid : {ds.guid}") success = False return success
def produce_pwv_from_dsea_axis_station(path=axis_path, ims_path=ims_path): """use axis_path = work_yuval/dsea_gispyx for original soi-apn dsea station""" import xarray as xr from aux_gps import transform_ds_to_lat_lon_alt from aux_gps import get_unique_index ds = xr.load_dataset(path / 'smoothFinal_2014.nc').squeeze() ds = get_unique_index(ds) # for now cut: if 'axis' in path.as_posix(): ds = ds.sel(time=slice(None, '2014-08-12')) ds = transform_ds_to_lat_lon_alt(ds) axis_zwd = ds['WetZ'] ts = xr.open_dataset(ims_path / 'IMS_TD_israeli_10mins.nc')['SEDOM'] axis_pwv = produce_pwv_from_zwd_with_ts_tm_from_deserve(ts=ts, zwd=axis_zwd) if 'axis' in path.as_posix(): axis_pwv.name = 'AXIS-DSEA' else: axis_pwv.name = 'SOI-DSEA' axis_pwv.attrs['lat'] = ds['lat'].values[0] axis_pwv.attrs['lon'] = ds['lon'].values[0] axis_pwv.attrs['alt'] = ds['alt'].values[0] return axis_pwv
def __init__(self, path): # waiting time statistics self.delta_t_025 = np.array([]) self.delta_t_100 = np.array([]) self.delta_t_300 = np.array([]) self.delta_t_all = np.array([]) print('Reading the statistics dataset at {}'.format(path)) stats_dataset = xr.load_dataset(path) for key in list(stats_dataset.coords): if not key[-2:] == '_t': continue if key[3:6] == '025': self.delta_t_025 = np.concatenate( [self.delta_t_025, stats_dataset[key].data]) if key[3:6] == '100': self.delta_t_100 = np.concatenate( [self.delta_t_100, stats_dataset[key].data]) if key[3:6] == '300': self.delta_t_300 = np.concatenate( [self.delta_t_300, stats_dataset[key].data]) self.delta_t_all = stats_dataset['deltatall'].data
def get_pressure_lapse_rate(path=ims_path, model='LR', plot=False): from aux_gps import linear_fit_using_scipy_da_ts import matplotlib.pyplot as plt import xarray as xr from aux_gps import keep_iqr bp = xr.load_dataset(ims_path / 'IMS_BP_israeli_10mins.nc') bps = [keep_iqr(bp[x]) for x in bp] bp = xr.merge(bps) mean_p = bp.mean('time').to_array('alt') mean_p.name = 'mean_pressure' alts = [bp[x].attrs['station_alt'] for x in bp.data_vars] mean_p['alt'] = alts _, results = linear_fit_using_scipy_da_ts(mean_p, model=model, slope_factor=1, not_time=True) slope = results['slope'] inter = results['intercept'] modeled_var = slope * mean_p['alt'] + inter if plot: fig, ax = plt.subplots() modeled_var.plot(ax=ax, color='r') mean_p.plot.line(linewidth=0., marker='o', ax=ax, color='b') # lr = 1000 * abs(slope) textstr = 'Pressure lapse rate: {:.1f} hPa/km'.format(1000 * slope) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords ax.text(0.5, 0.95, textstr, transform=ax.transAxes, fontsize=12, verticalalignment='top', bbox=props) ax.set_xlabel('Height a.s.l [m]') ax.set_ylabel('Mean Pressure [hPa]') return results
def perform_pwv_filling_last_decade(path=work_yuval, fyear='2009', lyear='2019', drop=['slom', 'elro']): import xarray as xr from aux_gps import save_ncfile pw = xr.load_dataset(path / 'GNSS_PW_monthly_thresh_50.nc') pw = pw.sel(time=slice(fyear, lyear)) pw = pw.drop_vars(drop) prepare_pwv_for_climatol(freq='monthly', first_year=fyear, last_year=lyear, pwv_ds=pw) # then run these two lines in R: # homogen('PWV',2009,2019, na.strings="-999.9",dz.max=7,std=2) # dahstat('PWV',2009,2019,stat='series',long=TRUE) ds, ds_flag = read_climatol_results(first_year=fyear, last_year=lyear) filename = 'GNSS_PW_monthly_homogenized_filled_{}-{}.nc'.format( fyear, lyear) save_ncfile(ds, path, filename) filename = 'GNSS_PW_monthly_homogenized_filled_flags_{}-{}.nc'.format( fyear, lyear) save_ncfile(ds_flag, path, filename) return