def setup(self): requires_dask() self.make_ds() self.format = "NETCDF3_64BIT" xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
def save_segments_to_netcdf(segments, output_path, mission_name=''): full_output_path = "%s/%s/" % (output_path, mission_name) if not os.path.isdir(full_output_path): os.makedirs(full_output_path) output_filenames = [full_output_path + mission_name + '_segment_%02d.nc' % (ns + 1) for ns in range(len(segments))] xr.save_mfdataset(segments, output_filenames)
def writefile(ds, fs, io_format, root, fname): filename = f'sst.{fname}' # if isinstance(fs, fsspec.AbstractFileSystem): if io_format == 'zarr': if isinstance(fs, fsspec.AbstractFileSystem): store = fs.get_mapper(root=f'{root}/{filename}.zarr', check=False, create=True) else: store = f'{root}/test1/{filename}.zarr' ds = ds.to_zarr( store, encoding={'sst': { 'compressor': None }}, consolidated=True, compute=False, mode='w', ) ds.compute() elif io_format == 'netcdf': ds_list = list(split_by_chunks(ds)) dss = [item[1] for item in ds_list] paths = [ create_filepath(ds, prefix=filename, root_path=f'{root}/test1') for ds in dss ] xr.save_mfdataset(datasets=dss, paths=paths, engine='h5netcdf', parallel=True) if isinstance(fs, fsspec.AbstractFileSystem): fs.upload(lpath=f'{root}/test1', rpath=f'{root}/', recursive=True) return filename
def to_restart(self, savepath='.', nxpe=None, nype=None, original_splitting=False): """ Write out final timestep as a set of netCDF BOUT.restart files. If processor decomposition is not specified then data will be saved using the decomposition it had when loaded. Parameters ---------- savepath : str nxpe : int nype : int """ # Set processor decomposition if not given if original_splitting: if any([nxpe, nype]): raise ValueError('Inconsistent choices for domain ' 'decomposition.') else: nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE'] # Is this even possible without saving the guard cells? # Can they be recreated? restart_datasets, paths = _split_into_restarts(self.data, savepath, nxpe, nype) with ProgressBar(): save_mfdataset(restart_datasets, paths, compute=True) return
def setup(self): requires_dask() self.make_ds() self.format = 'NETCDF3_64BIT' xr.save_mfdataset(self.ds_list, self.filenames_list, format=self.format)
def save_density_netcdf(rho_da: xr.DataArray) -> None: """Passes in density datarray, saves it in a reasonable way. Args: rho_da (xr.DataArray): [description] """ xr.save_mfdataset([rho_da], ["nc/Density.nc"], format="NETCDF4")
def test_save_mfdataset_roundtrip(self): original = Dataset({'foo': ('x', np.random.randn(10))}) datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))] with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: save_mfdataset(datasets, [tmp1, tmp2]) with open_mfdataset([tmp1, tmp2]) as actual: self.assertDatasetIdentical(actual, original)
def netcdf_local_paths(daily_xarray_dataset, tmpdir_factory, request): """Return a list of paths pointing to netcdf files.""" tmp_path = tmpdir_factory.mktemp("netcdf_data") # copy needed to avoid polluting metadata across multiple tests datasets, fnames = _split_up_files_by_day(daily_xarray_dataset.copy(), request.param) full_paths = [tmp_path.join(fname) for fname in fnames] xr.save_mfdataset(datasets, [str(path) for path in full_paths]) items_per_file = {"D": 1, "2D": 2}[request.param] return full_paths, items_per_file
def x_grad() -> None: """ Save x grad. """ density_da = xr.open_mfdataset("nc/density.nc", decode_cf=False).astype("float32") grad_da = density_da.Density.differentiate(cst.X_COORD).astype("float32") density_da["x_grad"] = grad_da grad_ds = density_da.drop("Density").astype("float32") xr.save_mfdataset([grad_ds], ["nc/density_grad_x.nc"], format="NETCDF4")
def write_netcdf(ds, netcdf_dir, netcdf_prefix): ds.attrs['history'] = ds.attrs[ 'history'] + ', written to NetCDF files using write_netcdf:' + datetime.now( ).strftime("%Y-%m-%d %H:%M:%S") prefix = os.path.join(netcdf_dir, netcdf_prefix) years, datasets = zip(*ds.groupby('time.year')) paths = [prefix + '%s.nc' % y for y in years] xr.save_mfdataset(datasets, paths)
def netcdf_local_paths(daily_xarray_dataset, tmpdir_factory, request): """Return a list of paths pointing to netcdf files.""" tmp_path = tmpdir_factory.mktemp("netcdf_data") gb = daily_xarray_dataset.resample(time=request.param) _, datasets = zip(*gb) fnames = [f"{n:03d}.nc" for n in range(len(datasets))] paths = [tmp_path.join(fname) for fname in fnames] print(len(paths)) xr.save_mfdataset(datasets, [str(path) for path in paths]) return paths
def netcdf_local_paths_by_variable(daily_xarray_dataset, tmpdir_factory, request): """Return a list of paths pointing to netcdf files.""" tmp_path = tmpdir_factory.mktemp("netcdf_data") datasets, fnames, fnames_by_variable = _split_up_files_by_variable_and_day( daily_xarray_dataset.copy(), request.param) full_paths = [tmp_path.join(fname) for fname in fnames] xr.save_mfdataset(datasets, [str(path) for path in full_paths]) items_per_file = {"D": 1, "2D": 2}[request.param] path_format = str(tmp_path) + "/{variable}_{n:03d}.nc" return full_paths, items_per_file, fnames_by_variable, path_format
def metrics_save(metrics, odir, fname, mf_save=False, **kwargs): for kk in metrics.keys(): if mf_save: years, datasets = zip(*metrics[kk].groupby('time.year')) paths = [ pjoin(odir, '%04i_%s_%s.nc' % (y, fname, kk)) for y in years ] xr.save_mfdataset(datasets, paths) else: metrics[kk].to_netcdf(pjoin(odir, '%s_%s.nc' % (fname, kk)), **kwargs)
def split_netcdf(file2split): ''' splits concatenated file (SURF.nc, PLEV.ncin to monthly files as originally downloaded from CDS, for use in tscale_fast''' basename = file2split.split(".nc")[0] ds = xr.open_dataset(file2split) dates, datasets = zip(*ds.resample(time='1M').mean('time').groupby('time')) filenames = [ basename + "_" + pd.to_datetime(date).strftime('%Y%m') + '.nc' for date in dates ] xr.save_mfdataset(datasets, filenames)
def save_years_wrapper(ds, odir, name, start_year, timesteps_per_yr=1, timedim='time', **kwargs): if not os.path.isdir(odir): os.mkdir(odir) years = list(range(start_year, start_year + len(ds[timedim]))) datasets = [ds[{timedim: a}] for a in range(len(ds[timedim]))] paths = [pjoin(odir, '%04i.' + name) % y for y in years] xr.save_mfdataset(datasets, paths, **kwargs)
def merge_and_save(k_clusters: int = 5, pca: int = 3) -> None: """Merge and save joint.""" pca_ds = xr.open_mfdataset( io.return_folder(k_clusters, pca) + "*.nc", concat_dim=cst.T_COORD, combine="by_coords", chunks={cst.T_COORD: 1}, data_vars="minimal", coords="minimal", compat="override", ) xr.save_mfdataset( [pca_ds], [io.return_name(k_clusters, pca) + ".nc"], format="NETCDF4" )
def save_big_ERA5_dataset_as_yearly_files(file, time_dim='time', verbose=True): import xarray as xr ds = xr.open_dataset(file) years, datasets = zip(*ds.groupby("{}.year".format(time_dim))) savepath, filename = return_savepath_and_filename_from_filepath(file) paths = [savepath / (filename + '_{}.nc'.format(y)) for y in years] # paths = ["%s.nc" % y for y in years] if verbose: yrmin = min(years) yrmax = max(years) filemin = filename + '_{}.nc'.format(yrmin) filemax = filename + '_{}.nc'.format(yrmax) print('saving {} to {}.'.format(filemin, filemax)) xr.save_mfdataset(datasets, paths) return
def save_mfdataset(self): """ Use xarray.save_mfdataset to write multiple netcdf files. """ dar1 = xarray.DataArray(np.random.randn(2, 3)) dar2 = xarray.DataArray(np.random.randn(2, 3)) data1 = xarray.Dataset({'foo': dar1, 'bar': ('x', [1, 2])}) data2 = xarray.Dataset({'foo': dar2, 'bar': ('x', [1, 2])}) file_names = [os.path.join(self.data_dir, f) for f in ['data1.nc', 'data2.nc']] xarray.save_mfdataset([data1, data2], file_names) for f in file_names: os.remove(f)
def make_netcdf(station_dir, netcdf_dir, netcdf_prefix, station, download=False, overwrite=False): """ Create a netcdf file containing MLML historical seawater or weather data. The file will be created from csv and readme files already on disk, or they can be downloaded. INPUT: station_dir - string specifying the location of csv files (e.g. '/home/username/data/') netcdf_dir - string specifying the location of netcdf files to be created (e.g. '/home/username/data/') netcdf_prefix - string specifying filename pattern for netcdf files the year will be appended this prefix (e.g. 'moss_landing_' for moss_landing_2015.nc, moss_landing_2016.nc, etc.) station - either 'seawater' or 'weather' (default: 'seawater') download - boolean specifying whether to download new files (default: False) overwrite - boolean specifying whether to overwrite the existing files, only used if downloading new data (default: False) """ # download new data, if specified if download == True: download_station_data(station_dir, station, overwrite) # read data in csv files to xarray dataset d = read_csv_data(station_dir, format='dataset') # specify location of readme file and add metadata to dataset readme_file = station_dir + '1_README.TXT' d = add_metadata(d, station, readme_file) # Additional processing d = cleanup_raw(d) d = add_flags(d) d.attrs['history'] = d.attrs[ 'history'] + 'netcdf file created using mlml.make_netcdf(station_dir' + station_dir + ',netcdf_dir=' + netcdf_dir + ',netcdf_prefix=' + netcdf_prefix + ',station=' + station + 'download=' + str( download) + ',overwrite=' + str(overwrite) + '): ' + datetime.now( ).strftime("%Y-%m-%d %H:%M:%S") + ')' # create netcdf files prefix = os.path.join(netcdf_dir, netcdf_prefix) years, datasets = zip(*d.groupby('time.year')) paths = [prefix + '%s.nc' % y for y in years] xr.save_mfdataset(datasets, paths)
def writefile(ds, fs, io_format, root, fname): filename = f'sst.{fname}' if io_format == 'zarr': store = fs.get_mapper(root=f'{root}/{filename}.zarr', check=False, create=True) ds = ds.to_zarr( store, encoding={'sst': {'compressor': None}}, consolidated=True, compute=False, mode='w', ) ds.compute() elif io_format == 'netcdf': ds_list = list(split_by_chunks(ds)) dss = [item[1] for item in ds_list] paths = [create_filepath(ds, prefix=filename, root_path=f'{root}') for ds in dss] xr.save_mfdataset(datasets=dss, paths=paths) if fs.protocol[0] == 's3': fs.upload(lpath=f'{root}', rpath=f'{root}/', recursive=True) return filename
def to_netcdf(self, savedir): """Save to netCDF4 files Args: savedir (str): Path to save files to """ # Add dates self._subset_ds['date'] = self._subset_ds.time.to_pandas().dt.strftime( '%Y%m%d') # Write as multi-file dataset dates, datasets = zip(*self._subset_ds.groupby('date')) basename = self._ds.attrs['title'].split(':')[0].replace(' ', '.') filepaths = [ os.path.abspath( os.path.join(savedir, '{}.{}.SUB.nc4'.format(basename, date))) for date in dates ] logger.debug("Writing to %s", os.path.abspath(savedir)) xa.save_mfdataset(datasets, filepaths)
def take_derivative_density( dimension: str = cst.Y_COORD, typ: str = "float32", engine: str = "h5netcdf" ) -> None: """ Take derivative of density. Args: dimension (str, optional): [description]. Defaults to cst.Y_COORD. typ (str, optional): [description]. Defaults to "float32". engine (str, optional): [description]. Defaults to "h5netcdf". """ chunk_d = {cst.T_COORD: 1, cst.Z_COORD: 52, cst.Y_COORD: 588, cst.X_COORD: 2160} density_ds = xr.open_mfdataset( "nc/density.nc", # engine=engine, # decode_cf=False, chunks=chunk_d, combine="by_coords", data_vars="minimal", coords="minimal", compat="override", engine=engine, parallel=True, ).astype(typ) grad_da = density_ds.Density.differentiate(dimension) # .astype(typ).chunk(chunks=chunk_d) name = "Density_Gradient_" + dimension grad_ds = grad_da.to_dataset().rename_vars({"Density": name}) grad_ds[name].attrs["long_name"] = "Density Gradient " + dimension grad_ds[name].attrs["units"] = "kg m-3 box-1" # .astype(typ).chunk(chunks=chunk_d) xr.save_mfdataset( [grad_ds], ["nc/density_grad_" + dimension + ".nc"], format="NETCDF4" )
def y_grad(set_ok: bool = False) -> None: """Take y grad. Args: set (bool, optional): take y gradient of density. Defaults to False. """ density_da = xr.open_mfdataset( "nc/density.nc", decode_cf=False, parallel=True ).astype("float32") grad_da = ( density_da.Density.astype("float32") .differentiate(cst.Y_COORD) .astype("float32") ) del density_da if not set_ok: grad_da.to_netcdf("nc/density_grad_y_da.nc", engine="netcdf4") else: grad_ds = grad_da.to_dataset().astype("float32") # density_da['y_grad'] = grad_da # grad_ds = density_da.drop('Density') xr.save_mfdataset([grad_ds], ["nc/density_grad_y.nc"], format="NETCDF4")
def test_save_mfdataset_invalid(self): ds = Dataset() with self.assertRaisesRegexp(ValueError, 'cannot use mode'): save_mfdataset([ds, ds], ['same', 'same']) with self.assertRaisesRegexp(ValueError, 'same length'): save_mfdataset([ds, ds], ['only one path'])
def time_write_dataset_netcdf4(self): xr.save_mfdataset(self.ds_list, self.filenames_list, engine='netcdf4', format=self.format)
# add some attributes for convenience to the stats conv_intensity.attrs['units'] = 'mm/hour' conv_mean.attrs['units'] = 'mm/hour' conv_area.attrs['units'] = '% of radar area' stra_intensity.attrs['units'] = 'mm/hour' stra_mean.attrs['units'] = 'mm/hour' stra_area.attrs['units'] = '% of radar area' # save as netcdf-files path = '/Users/mret0001/Desktop/' xr.save_mfdataset([ xr.Dataset({'conv_intensity': conv_intensity}), xr.Dataset({'conv_rr_mean': conv_mean}), xr.Dataset({'conv_area': conv_area}), xr.Dataset({'stra_intensity': stra_intensity}), xr.Dataset({'stra_rr_mean': stra_mean}), xr.Dataset({'stra_area': stra_area}) ], [ path + 'conv_intensity.nc', path + 'conv_rr_mean.nc', path + 'conv_area.nc', path + 'stra_intensity.nc', path + 'stra_rr_mean.nc', path + 'stra_area.nc' ]) # sanity check check = False if check: r = ds_rr.radar_estimated_rain_rate / 6. cr = r.where(ds_st.steiner_echo_classification == 2) cr = cr.where(cr != 0.) cr_1h = cr[ 9774:9780, :, :].load() # the most precip hour in the 09/10-season # cr_1h = cr[9774, :, :].load() # '2010-02-25T21:00:00'
def time_write_dataset_scipy(self): xr.save_mfdataset(self.ds_list, self.filenames_list, engine='scipy', format=self.format)
def main(argv=None): # Creating an argparse object parser = argparse.ArgumentParser(description='ZPLSC/G echogram generator') # Creating input arguments parser.add_argument( '-s', '--site', dest='site', type=str, required=True, help='The OOI 8-letter site name for where the ZPLSC/G is located.') parser.add_argument( '-d', '--data_directory', dest='data_directory', type=str, required=True, help= 'The path to the root directory below which the .01A or .raw files may be found.' ) parser.add_argument( '-o', '--output_directory', dest='output_directory', type=str, required=True, help= 'The path to the root directory below which the .nc file(s) and .png plot will be saved.' ) parser.add_argument( '-dr', '--date_range', dest='dates', type=str, nargs='+', required=True, help= ('Date range to plot as either YYYYMM or YYYYMMDD. Specifying an end date is optional, ' 'it will be assumed to be 1 month or 1 day depending on input.')) parser.add_argument( '-zm', '--zpls_model', dest='zpls_model', type=str, required=True, help='Specifies the ZPLS instrument model, either AZFP or EK60.') parser.add_argument( '-xf', '--xml_file', dest='xml_file', type=str, required=False, help= 'The path to .XML file used to process the AZFP data in the .01A files' ) parser.add_argument('-tc', '--tilt_correction', dest='tilt_correction', type=int, required=False, help='Apply tilt correction in degree(s)') parser.add_argument('-dd', '--deployed_depth', dest='deployed_depth', type=int, required=False, help='The depth where the ZPLSC/G is located at') parser.add_argument('-cr', '--colorbar_range', dest='colorbar_range', type=int, nargs=2, required=False, help='Set colorbar range. Usage: "min" "max"') parser.add_argument( '-vr', '--vertical_range', dest='vertical_range', type=int, nargs=2, required=False, help='Set the range for the y-axis. Usage: "min" "max"') # parse the input arguments args = parser.parse_args(argv) site = args.site.upper() data_directory = os.path.abspath(args.data_directory) output_directory = os.path.abspath(args.output_directory) dates = args.dates zpls_model = args.zpls_model.upper() tilt_correction = args.tilt_correction deployed_depth = args.deployed_depth colorbar_range = args.colorbar_range vertical_range = args.vertical_range xml_file = args.xml_file if xml_file: xml_file = os.path.abspath(xml_file) # assign per site variables if site in site_config: # if tilt_correction flag is not set, set the tilt correction from the site configuration if tilt_correction is None: tilt_correction = site_config[site]['tilt_correction'] # if deployed_depth flag is not set, set the deployed_depth from the site configuration if deployed_depth is None: deployed_depth = site_config[site]['deployed_depth'] # if colorbar_range flag is not set, set the colorbar_range from the site configuration if colorbar_range is None: colorbar_range = site_config[site]['colorbar_range'] # if vertical_range flag is not set, set the vertical_range from the site configuration if vertical_range is None: vertical_range = site_config[site]['vertical_range'] elif site is not None: raise parser.error( 'The site name was not found in the configuration dictionary.') # make sure the root output directory exists if not os.path.isdir(output_directory): os.mkdir(output_directory) # use the ZPLS model to determine how to process the data data = None if zpls_model not in ['AZFP', 'EK60']: raise ValueError( 'The ZPLS model must be set as either AZFP or EK60 (case insensitive)' ) else: if zpls_model == 'AZFP': data = process_azfp(site, data_directory, xml_file, output_directory, dates, tilt_correction) if zpls_model == 'EK60': data = process_ek60(site, data_directory, output_directory, dates, tilt_correction) # test to see if we have any data from the processing if not data: return None # save the full resolution data to daily NetCDF files file_name = set_file_name(site, dates) output_directory = os.path.join(output_directory, dates[0] + '-' + dates[1]) # reset a couple data types (helps to control size of NetCDF files) data['range'] = data['range'].astype(np.float32) data['Sv'] = data['Sv'].astype(np.float32) # split the data into daily records days, datasets = zip(*data.groupby("ping_time.day")) # create a list of file names based on the day of the record start = datetime.strptime(dates[0], '%Y%m%d') stop = datetime.strptime(dates[1], '%Y%m%d') date_list = [ start + timedelta(days=x) for x in range(0, (stop - start).days) ] nc_file = os.path.join(output_directory, file_name) nc_files = [] for day in days: for dt in date_list: if dt.day == day: nc_files.append(nc_file + "_Full_%s.nc" % dt.strftime('%Y%m%d')) # convert ping_time from a datetime64[ns] object to a float (seconds since 1970) and update the attributes for dataset in datasets: dataset['ping_time'] = dataset['ping_time'].values.astype( np.float64) / 10.0**9 dataset.attrs = attributes['global'] dataset.attrs['instrument_orientation'] = site_config[site][ 'instrument_orientation'] for v in dataset.variables: dataset[v].attrs = attributes[v] # save the daily files xr.save_mfdataset(datasets, nc_files, mode='w', format='NETCDF4', engine='h5netcdf') # if a global mooring, create hourly averaged data records, otherwise create 15 minute records if 'HYPM' in site: # resample the data into a 60 minute, median averaged record, filling gaps less than 180 minutes avg = data.resample(ping_time='60Min').mean() avg = avg.interpolate_na(dim='ping_time', max_gap='180Min') else: # resample the data into a 15 minute, median averaged record, filling gaps less than 45 minutes avg = data.resample(ping_time='15Min').median() avg = avg.interpolate_na(dim='ping_time', max_gap='45Min') # generate the echogram long_name = site_config[site]['long_name'] generate_echogram(avg, site, long_name, deployed_depth, output_directory, file_name, dates, vertical_range=vertical_range, colorbar_range=colorbar_range) # add the OOI logo as a watermark echogram = os.path.join(output_directory, file_name + '.png') echo_image = Image.open(echogram) ooi_image = Image.open('ooi-logo.png') width, height = echo_image.size transparent = Image.new('RGBA', (width, height), (0, 0, 0, 0)) transparent.paste(echo_image, (0, 0)) if max(vertical_range) > 99: transparent.paste(ooi_image, (96, 15), mask=ooi_image) else: transparent.paste(ooi_image, (80, 15), mask=ooi_image) # re-save the echogram with the added logo transparent.save(echogram) # save the averaged data avg['ping_time'] = avg['ping_time'].values.astype(np.float64) / 10.0**9 avg.attrs = attributes['global'] avg.attrs['instrument_orientation'] = site_config[site][ 'instrument_orientation'] for v in avg.variables: avg[v].attrs = attributes[v] avg_file = nc_file + '_Averaged.nc' avg.to_netcdf(avg_file, mode='w', format='NETCDF4', engine='h5netcdf')
da_mod_avg_mon = da_mod_avg # Already monthly means, # In[ ]: # Trim to common time periods (ds_obs_trim, ds_mod_trim) = esio.trim_common_times(da_obs_avg_mon, da_mod_avg_mon) # In[ ]: # Temp dump to netcdf then load os.chdir(temp_dir) c_e, datasets = zip(*ds_mod_trim.to_dataset( name='sic').groupby('init_time.year')) paths = ['GFDL_extent_esns_%s.nc' % e for e in c_e] xr.save_mfdataset(datasets, paths) # In[ ]: print("Done!") # In[ ]: # ds_mod_trim = None # Flush memory # In[ ]: # ds_mod_trim = xr.open_mfdataset(os.path.join(temp_dir, 'GFDL_extent_esns_*.nc'), concat_dim='ensemble') # ds_mod_trim = ds_mod_trim.reindex(ensemble=sorted(ds_mod_trim.ensemble.values)) # #