def open_files(ncfiles, return_dsvar=False): """Open netCDF files, either with xray or netCDF4""" try: if _ncmodule == 'xray': # open files with xray try: ds = xray.open_mfdataset(ncfiles) except ValueError: ds = xray.open_mfdataset(ncfiles, decode_times=False) print('Warning: Using decode_times=False') dsvar = ds else: # open files with netCDF4 if len(ncfiles) > 1: ds = netCDF4.MFDataset(ncfiles) else: ds = netCDF4.Dataset(ncfiles[0]) dsvar = ds.variables except RuntimeError as err: traceback.print_exc(err) print('Warning: File(s) could not be opened: {}'.format(ncfiles)) dsvar = None if return_dsvar: return ds, dsvar else: return ds
def test_deterministic_names(self): with create_tmp_file() as tmp: data = create_test_data() data.to_netcdf(tmp) with open_mfdataset(tmp) as ds: original_names = dict((k, v.data.name) for k, v in ds.items()) with open_mfdataset(tmp) as ds: repeat_names = dict((k, v.data.name) for k, v in ds.items()) for var_name, dask_name in original_names.items(): self.assertIn(var_name, dask_name) self.assertIn(tmp, dask_name) self.assertEqual(original_names, repeat_names)
def test_lock(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp, format='NETCDF3_CLASSIC') with open_dataset(tmp, chunks=10) as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertIsInstance(task[-1], type(Lock())) with open_mfdataset(tmp) as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertIsInstance(task[-1], type(Lock())) with open_mfdataset(tmp, engine='scipy') as ds: task = ds.foo.data.dask[ds.foo.data.name, 0] self.assertNotIsInstance(task[-1], type(Lock()))
def test_open_and_do_math(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) with open_mfdataset(tmp) as ds: actual = 1.0 * ds self.assertDatasetAllClose(original, actual)
def test_preprocess_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) preprocess = lambda ds: ds.assign_coords(z=0) expected = preprocess(original) with open_mfdataset(tmp, preprocess=preprocess) as actual: self.assertDatasetIdentical(expected, actual)
def test_open_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2]) as actual: self.assertIsInstance(actual.foo.variable.data, da.Array) self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),)) self.assertDatasetAllClose(original, actual) with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual: self.assertEqual(actual.foo.variable.data.chunks, ((3, 2, 3, 2),)) with self.assertRaisesRegexp(IOError, 'no files to open'): open_mfdataset('foo-bar-baz-*.nc')
def test_save_mfdataset_roundtrip(self): original = Dataset({'foo': ('x', np.random.randn(10))}) datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))] with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: save_mfdataset(datasets, [tmp1, tmp2]) with open_mfdataset([tmp1, tmp2]) as actual: self.assertDatasetIdentical(actual, original)
def test_open_mfdataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp1: with create_tmp_file() as tmp2: original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2]) as actual: self.assertIsInstance(actual.foo.variable.data, da.Array) self.assertEqual(actual.foo.variable.data.chunks, ((5, 5), )) self.assertDatasetAllClose(original, actual) with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual: self.assertEqual(actual.foo.variable.data.chunks, ((3, 2, 3, 2), )) with self.assertRaisesRegexp(IOError, 'no files to open'): open_mfdataset('foo-bar-baz-*.nc')
def _get_grid_files(self): """Get the files holding grid data for an aospy object.""" datasets = [] for path in self.grid_file_paths: try: ds = xray.open_dataset(path, decode_times=False) except TypeError: ds = xray.open_mfdataset(path, decode_times=False) datasets.append(ds) return tuple(datasets)
def load_experiment(exp_dir, name="single_timestep", format='csv'): """ Load the results from a complete experiment into a DataFrame. Parameters ---------- exp_dir : str The path to the directory containing the output CSV files from the simulations. name : str The name of the experiment files; default is "single_timestep" format : str Either "csv" or "nc" for loading the correct input """ fns = sorted(glob.glob(os.path.join(exp_dir, "%s*.%s" % (name, format)))) print("Found %d files" % len(fns)) if format == 'csv': dfs = [] print("Reading...") for fn in fns: print (" ", fn) dfs.append(pd.read_csv(fn, index_col=0)) df = pd.concat(dfs, ignore_index=True) return df elif format == 'nc': print("Reading...") ds = xray.open_mfdataset(fns) ds.set_coords(["lat", "lon", "lev"], inplace=True) return ds else: raise ValueError("Format should either be 'nc' or 'csv'.")
out_tuple = In_tup(**the_dict) return out_tuple if __name__ == "__main__": the_files = glob.glob('mar*nc') the_files.sort(key=sort_name) # # put the 10 ensembles together along a new "ens" dimension # checkpoint the output fields for future runs # firstrun = False if firstrun: ds = xray.open_mfdataset(the_files, engine='netcdf4', concat_dim='ens') # dump the structure print(ds) # # 3-d ensemble average for temp # x = ds['x'] y = ds['y'] z = ds['z'] temp = ds['TABS'] mean_temp = temp[:, 0, :, :, :].mean(dim='ens') # # same for velocity # wvel = ds['W']
def get_timeseries(ncfiles, varn, grid, reducefunc=np.nanmean, latlim=None, lonlim=None, k=0): """Get time series of any 2D POP field reduced by a numpy function Parameters ---------- ncfiles : list of str paths to input files varn : str variable name grid : str ('T' or 'U') which grid the variable is on reducefunc : function function to reduce the selected region NOTE: must be NaN-aware latlim : tup latitude limits for maximum lonlim : tup longitude limits for maximum k : int layer """ n = len(ncfiles) _nfiles_diag(n) maxn = get_ulimitn() # get mask with xray.open_dataset(ncfiles[0], decode_times=False) as ds: if latlim is None and lonlim is None: mask = None else: mask = poppygrid.get_grid_mask( lon = ds[grid+'LONG'], lat = ds[grid+'LAT'], lonlim=lonlim, latlim=latlim) mask &= ds.variables['KM'+grid][:]>0 # read data if n <= maxn: with xray.open_mfdataset(ncfiles, decode_times=False) as ds: # select variable ds = ds[varn] # select level try: ds = ds.isel(z_t=k) except ValueError: pass # apply mask if mask is not None: ds = ds.where(mask) tseries = ds.reduce(reducefunc, ['nlon', 'nlat']).values timevar = ds['time'] timeax = utils.get_time_decimal_year(timevar) else: timeax = np.zeros(n) tseries = np.zeros((n)) for i,fname in enumerate(ncfiles): with xray.open_dataset(fname, decode_times=False) as ds: # select variable ds = ds[varn] # select level try: ds = ds.isel(z_t=k) except ValueError: pass # apply mask if mask is not None: ds = ds.where(mask) tseries[i] = ds.reduce(reducefunc, ['nlon', 'nlat']).values timevar = ds['time'] timeax[i] = utils.get_time_decimal_year(timevar) # output if use_pandas: index = pd.Index(timeax, name='ModelYear') ts = pd.Series(tseries, index=index, name=varn) _pandas_add_meta_data(ts, meta=dict( latlim = latlim, lonlim = lonlim, varn = varn, reducefunc = str(reducefunc), k = k, grid = grid, )) return ts else: return tseries, timeax
import numpy as np import os, sys, re, xray from rasterio import Affine as A from rasterio.warp import reproject, RESAMPLING from osgeo import gdal from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid # some setup pathing input_dir = '~/Documents/hur' os.chdir( input_dir ) # the level of the atmosphere we want to use atmos_level = 11 # open multiple datasets as a single file xds = xray.open_mfdataset( 'hur_Amon_GFDL-CM3_historical_r1i1p1_*.nc' ) xds_hur = xds.hur.loc['1900-01-01':'2005-12-12'] # slice the dataset using the time variable in xray object hur_lev = xds_hur[ :, atmos_level, ... ] # calculate climatology and anomalies climatology = hur_lev.loc[ '1961-01-01':'1990-12-31' ].groupby( 'time.month' ).mean( 'time' ) anomalies = hur_lev.groupby( 'time.month' ) - climatology # # # REPROJECT AND CROP EXTENT # what do we need to do to properly resample the data time_len, rows, cols = hur_lev.shape # NOTE: geotransform = [left, res, 0.0, top, 0.0, res] height = rows width = cols crs = 'epsg:4326' affine = A( *[np.diff( xds.lon )[ 0 ], 0.0, -180.0, 0.0, -np.diff( xds.lat )[ 0 ], 90.0] )
#import Ngl #import math from scipy import stats from rhwhitepackages.readwrite import shiftlons from rhwhitepackages.readwrite import xrayOpen from rhwhitepackages.stats import regressmaps from rhwhitepackages.readwrite import getdenfilename # plotting import xray.plot as xplt uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv' startyr = 1998 endyr = 2015 for iyear in range(startyr, endyr): uvfile = xr.open_mfdataset(uvindir + '/interim_daily_' + str(iyear) + '*.grb') ulev, vlev = uvfile['u'], uvfile['v'] udash = ulev - ulev.mean(dim='longitude') vdash = vlev - vlev.mean(dim='longitude') EKEall = 0.5 * ((ulev * ulev) + (vlev * vlev)) EKEyears = EKEall.groupby('time.month').sum(dim='time') EKEyears = EKEyears.rename({'month': 'time'}) EKEyears = EKEyears.rename({'latitude': 'lat'}) EKEyears = EKEyears.rename({'longitude': 'lon'}) EKEds = xr.Dataset({'EKE': EKEyears}) EKEds.to_netcdf(uvindir + '/EKE_' + str(iyear) + '.nc', mode='w') uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv' startyr = 1998 endyr = 2015 for iyear in range(startyr, endyr):