def nc_object(ncfile, tname='time'): if isinstance(ncfile, str): try: return netCDF4.Dataset(ncfile) except (IOError, RuntimeError, IndexError): # Are we a set of files? try: return netCDF4.MFDataset(ncfile) except (IOError, RuntimeError, IndexError): try: return netCDF4.MFDataset(ncfile, aggdim=tname) except (IOError, RuntimeError, IndexError): try: # Unicode isn't working sometimes? return netCDF4.MFDataset(str(ncfile), aggdim=tname) except Exception: logger.exception("Can not open %s" % ncfile) raise except ValueError: # Probably a DAP endpoint logger.exception("Can not open %s" % ncfile) raise except Exception: logger.exception("Can not open %s" % ncfile) raise elif isinstance(ncfile, Dataset): # Passed in paegan Dataset object return ncfile.nc elif isinstance(ncfile, netCDF4.Dataset) or isinstance(ncfile, netCDF4.MFDataset): # Passed in a netCDF4 Dataset object return ncfile
def get_measurements(data_dir, quantity, lat, lon, start_time, end_time): """return data for a given location""" data_dir = pathlib.Path(data_dir) v_urls = list(sorted(data_dir.glob('vwnd.10m.gauss.*.nc'))) u_urls = list(sorted(data_dir.glob('uwnd.10m.gauss.*.nc'))) logger.info("reading data in %s, found urls: %s and %s", data_dir, u_urls, v_urls) # get all data required to find correct dataset data = {} with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u: # lookup variables in both files t_u = ds_u.variables['time'][:] lon_u = ds_u.variables['lon'][:] lat_u = ds_u.variables['lat'][:] # don't use num2date from netcdf4, too slow t0 = np.datetime64('1800-01-01', 'm') # use variables from u data['t'] = t0 + t_u.astype('timedelta64[h]') data['lon'] = lon_u data['lat'] = lat_u lat_idx = np.argmin(np.abs(data['lat'] - lat)) lon_idx = np.argmin(np.abs(data['lon'] - lon)) data['lat'][lat_idx], data['lon'][lon_idx], (lat_idx, lon_idx) t_range = np.asarray([start_time, end_time], 'datetime64[m]') t_start_idx, t_end_idx = np.searchsorted(data['t'], t_range) t_start_idx, t_end_idx # slice s = np.s_[t_start_idx:t_end_idx, lat_idx, lon_idx] names = {} units = {} with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u: data['u'] = ds_u.variables['uwnd'][s] names['u'] = ds_u.variables['uwnd'].long_name units['u'] = ds_u.variables['uwnd'].units with netCDF4.MFDataset(v_urls, aggdim='time') as ds_v: data['v'] = ds_v.variables['vwnd'][s] names['v'] = ds_u.variables['uwnd'].long_name units['v'] = ds_u.variables['uwnd'].units series = pd.DataFrame(data=dict( dateTime=data['t'][t_start_idx:t_end_idx], u=data['u'], v=data['v'])) # make sure we serialize to json series = json.loads(json.dumps(series, cls=CustomEncoder)) response = {"series": series} return response
def MFDataset(ncfile): """Return an MFnetCDF4 object given a string or list. A string is expanded with wildcards using glob. A netCDF4 or MFnetCDF4 object returns itself.""" if isinstance(ncfile, str): ncfiles = glob(ncfile) return netCDF.MFDataset(sorted(ncfiles)) elif isinstance(ncfile, list) or isinstance(ncfile, tuple): return netCDF.MFDataset(sorted(ncfile)) elif hasattr(ncfile, 'variables'): # accept any oject with a variables attribute assert isinstance(ncfile.variables, dict), \ 'variables attribute must be a dictionary' return ncfile else: raise TypeError('type %s not supported' % type(ncfile)) return MFnetCDF4.Dataset(files)
def get_grid_info(data_dir): info = {} data_dir = pathlib.Path(data_dir) v_urls = list(sorted(data_dir.glob('vwnd.10m.gauss.*.nc'))) u_urls = list(sorted(data_dir.glob('uwnd.10m.gauss.*.nc'))) info['urls'] = u_urls + v_urls with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u: attrs = ds_u.ncattrs() for attr in attrs: info[attr] = getattr(ds_u, attr) with netCDF4.MFDataset(v_urls, aggdim='time') as ds_v: attrs = ds_v.ncattrs() for attr in attrs: info[attr] = getattr(ds_v, attr) return info
def _get_dataset(filename): df = None if isinstance(filename, basestring): df = nc4.Dataset(filename) else: df = nc4.MFDataset(filename) return df
def __init__(self, path_to_files, analysis_file="lfff00000000c.nc"): self._date_time_regex = re.compile( "(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})") self._variables = None self._init_time = None self._history_interval = None self._timesteps = None self._lats = None self._lons = None self._rlats = None self._rlons = None self._xshape = None self._yshape = None self._last_time = None self._grid_north_pole_lat = None self._grid_north_pole_lon = None self._rotated_grid = None self._analysis_file = analysis_file self._cosmo_file_path = os.path.join(path_to_files, '') self._files_in_path = list( set(glob.glob(self._cosmo_file_path + "lfff*.nc")) - set(glob.glob(self._cosmo_file_path + self._analysis_file))) self._files_in_path.sort() self._num_of_files = len(self._files_in_path) if self._num_of_files < 1: raise ValueError( "COSMOPython Lib: No COSMO netCDF dataset found. Check the path" ) try: self._cosmo_multifile = netCDF4.MFDataset(self._files_in_path) except: raise ValueError( "COSMOPythonLib: netCDF File(s) could not be opened. Corrupt file(s)?" ) self.__create_meta_data()
def land_sea_mask(): """ builds the boolean land sea mask """ with netCDF4.MFDataset("%s/*.sp.nc" % wam2layers_config.data_dir) as dataset: latitude = dataset.variables['latitude'][:] longitude = wrap_lon180(dataset.variables['longitude'][:]) gridsize = latitude[0] - latitude[1] xgrd, ygrd = np.meshgrid(longitude, latitude) filename = '%s/lsm.%s.npy' % (wam2layers_config.data_dir, gridsize) if os.path.isfile(filename): mask = np.unpackbits(np.load(filename)).astype('bool')[:xgrd.size] else: shp = cartopy.io.shapereader.natural_earth(resolution='110m', category='physical', name='land') # load the shapefile to use as mask and build the polygons shp = cartopy.io.shapereader.Reader(shp) geoms = shp.geometries() polygon = shapely.ops.cascaded_union(list(geoms)) mask = inpolygon(polygon, zip(xgrd.ravel(), ygrd.ravel())) np.save(filename, np.packbits(mask)) return xgrd, ygrd, mask.reshape(xgrd.shape)
def split_to_monthly_vars(src_root: str, dst_root: str, year: int, domain_num: int, filetype: str, month: int, included_vars=None, included_plev_nums=None): """Split and convert a netCDF Classic (3/4) file to CMOR specs""" global current_file infiles = os.path.join(src_root, tabs.src_file_pattern(filetype, domain_num, year, month)) with netCDF4.MFDataset(infiles) as src: print('CMORize split:', src_root, '-->', dst_root) print('inputs:', os.path.basename(infiles)) time_values = None # loop source variables for src_vname, src_var in src.variables.items(): if src_vname not in tabs.varmap: continue vname = tabs.varmap[src_vname] if included_vars and vname not in included_vars: continue print('', src_vname, src_var.dimensions, end='', flush=True) values = None # Loop through possible plevels start, finish = get_plevels_range(vname) if start > 0: print('') for plev_num in range(start, finish): if plev_num and included_plev_nums and (plev_num not in included_plev_nums): continue outfile = os.path.join(dst_root, tabs.dst_file_month(vname, domain_num, year, month, plev_num)) outfinal = os.path.join(dst_root, tabs.dst_file_year(vname, domain_num, year, plev_num)) if os.path.isfile(outfile): print('') print('skipping existing:', outfile) continue if os.path.isfile(outfinal): print('') print('skipping existing:', outfinal) continue if values is None: values = src_var[:] current_file = outfile with netCDF4.Dataset(outfile, 'w', format='NETCDF4_CLASSIC') as dst: # Create missing dimensions required by variable and return destination dimensions dst_dims = process_dimensions(src_var, src, dst) # Filter the main variable values dst_values = process_values(vname, plev_num, values, dst) vname_full = tabs.full_vname(vname, plev_num) print(' -->', vname_full, tabs.constants['domains'][domain_num], year, month, dst_dims, dst_values.dtype, end='', flush=True) # Add the main variable var_out = dst.createVariable(vname_full, dst_values.dtype, dst_dims, fill_value=tabs.constants['missing_value'], zlib=(tabs.compress > 0), complevel=tabs.compress) var_out[:] = dst_values # Add time variable if time_values is None: time_values = process_values(tabs.constants['dst_timevar'], 0, src.variables[tabs.constants['src_timevar']][:], None) var_out = dst.createVariable(tabs.constants['dst_timevar'], time_values.dtype, (tabs.constants['dst_timedim'],), zlib=(tabs.compress > 0), complevel=tabs.compress) var_out[:] = time_values print('') current_file = None
def extract_season(inroot, infiles, outroot, outfile, month, y_min, y_max): season = {0: 'full', 3: 's1', 6: 's2', 9: 's3', 12: 's4'} opr = MERGED basefile = outfile + '_%s_%s.nc' % (season[month], opr) outfile_full = os.path.join(outroot, season[month], opr, basefile) print('Output:') print(os.path.dirname(outfile_full)) print(' ', os.path.basename(outfile_full)) if os.path.isfile(outfile_full): print(' ...exists') return outfile_full tmp = os.path.join(outroot, str(uuid.uuid4()) + '.nc') infiles_full = [os.path.join(inroot, f) for f in infiles] with nc4.Dataset(infiles_full[0]) as src1, nc4.MFDataset( infiles_full) as src, nc4.Dataset(tmp, "w") as dst: write_nc4_season(src1, src, dst, y_min, y_max, month) odir = os.path.dirname(outfile_full) if not os.path.isdir(odir): os.makedirs(odir) print('Created dir:', odir) os.rename(tmp, outfile_full) return outfile_full
def _open_(uri, mode='r', **kwargs): """ :rtype: object """ kwargs = kwargs.copy() group_indexing = kwargs.pop('group_indexing', None) lvm = kwargs.pop('vm', vm) if isinstance(uri, six.string_types): # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in # parallel. if mode == 'w' and lvm.size > 1: if kwargs.get('format', 'NETCDF4') == 'NETCDF4': if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI: kwargs['parallel'] = True if kwargs.get('parallel') and kwargs.get('comm') is None: kwargs['comm'] = lvm.comm ret = nc.Dataset(uri, mode=mode, **kwargs) # tdk:FIX: this should be enabled for MFDataset as well. see https://github.com/Unidata/netcdf4-python/issues/809#issuecomment-435144221 # netcdf4 >= 1.4.0 always returns masked arrays. This is inefficient and is turned off by default by ocgis. if hasattr(ret, 'set_always_mask'): ret.set_always_mask(False) else: ret = nc.MFDataset(uri, **kwargs) if group_indexing is not None: for group_name in get_iter(group_indexing): ret = ret.groups[group_name] return ret
def load_model(model_path, start_date, end_date, field, nowcast_flag=False): """Loads model grid_T data in date range defined by start_date and end_date Only considers daily averaged model fields. Returns model depths, variable defined by field, and dates associated with variable """ files = analyze.get_filenames(start_date, end_date, '1d', 'grid_T', model_path) if nowcast_flag: var, dates = analyze.combine_files(files, field, np.arange(0, 40), np.arange(0, 898), np.arange(0, 398)) tmp = nc.Dataset(files[0]) depth = tmp.variables['deptht'][:] else: tracers = nc.MFDataset(files) time = tracers.variables['time_counter'] # convert date dates = [] start = datetime.datetime.strptime(time.time_origin, ' %Y-%b-%d %H:%M:%S') for t in time[:]: d = start + datetime.timedelta(seconds=t) dates.append(d) depth = tracers.variables['deptht'][:] var = tracers.variables[field][:] return depth, var, dates
def netcdf(file, aggdim=None): """ Wrapper around netCDF4 to open a file as either a Dataset or an MFDataset. Parameters ---------- file : string or list, Filename(s) to open. If the string has wildcards or is a list, this attempts to open an MFDataset aggdim : string, Name of dimension to concatenate along if loading a set of files. A value of None (default) uses the unlimited dimension. Returns ------- netCDF4 Dataset or MFDataset """ import netCDF4 try: nc = netCDF4.Dataset(file) except (OSError, RuntimeError): try: nc = netCDF4.MFDataset(file, aggdim=aggdim) except IndexError: raise FileNotFoundError("{:s} cannot be found.".format(file)) return nc
def read_ncdf(name,path='/work/gg0877/KST/tide_gaughes/ncdf',origin='2012-01-01 00:00:00'): ncname = '_'.join(word[0].upper()+word[1:] for word in name.split('_')) if path[-6:]=='hawaii': nc = netCDF4.MFDataset(path+'/'+ncname+'*.nc') else: nc = netCDF4.Dataset(path+'/'+ncname+'.nc') ncv = nc.variables ot = utime('seconds since '+origin) ut = utime(ncv['time'].units) time = ot.date2num(ut.num2date(ncv['time'][:])) print(ncv.keys()) scale = {'cm':100.,'m':1.0,'millimeters':1000.,'mm':1000.} if 'elev' in ncv: if 'units' in ncv['elev'].ncattrs(): scale_factor = scale[ncv['elev'].units] else: if ncv['elev'][:].std()>100.: scale_factor=1000. elif ncv['elev'][:].std()>10.: scale_factor=100. else: scale_factor=1.0 elev = ncv['elev'][:].squeeze()/scale_factor elif 'sea_surface_height_above_reference_level' in ncv: if 'units' in ncv['sea_surface_height_above_reference_level'].ncattrs(): scale_factor = scale[ncv['sea_surface_height_above_reference_level'].units] else: scale_factor = 1000. elev = ncv['sea_surface_height_above_reference_level'][:].squeeze()/scale_factor nc.close() return time,elev
def ncopen(f, mode='r'): if not isstr(f) or any([i in f for i in '*?']): nc = netCDF4.MFDataset(f) else: nc = netCDF4.Dataset(f, mode) return nc
def _open_(uri, mode='r', **kwargs): """ :rtype: object """ kwargs = kwargs.copy() group_indexing = kwargs.pop('group_indexing', None) lvm = kwargs.pop('vm', vm) if isinstance(uri, six.string_types): # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in # parallel. if mode == 'w' and lvm.size > 1: if kwargs.get('format', 'NETCDF4') == 'NETCDF4': if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI: kwargs['parallel'] = True if kwargs.get('parallel') and kwargs.get('comm') is None: kwargs['comm'] = lvm.comm ret = nc.Dataset(uri, mode=mode, **kwargs) else: ret = nc.MFDataset(uri, **kwargs) if group_indexing is not None: for group_name in get_iter(group_indexing): ret = ret.groups[group_name] return ret
def open_files(ncfiles, return_dsvar=False): """Open netCDF files, either with xray or netCDF4""" try: if _ncmodule == 'xray': # open files with xray try: ds = xray.open_mfdataset(ncfiles) except ValueError: ds = xray.open_mfdataset(ncfiles, decode_times=False) print('Warning: Using decode_times=False') dsvar = ds else: # open files with netCDF4 if len(ncfiles) > 1: ds = netCDF4.MFDataset(ncfiles) else: ds = netCDF4.Dataset(ncfiles[0]) dsvar = ds.variables except RuntimeError as err: traceback.print_exc(err) print('Warning: File(s) could not be opened: {}'.format(ncfiles)) dsvar = None if return_dsvar: return ds, dsvar else: return ds
def lsm_demo(): """ plots a masked layer of time-averaged surface pressure """ import netCDF4 import cartopy.crs import matplotlib.pyplot as plt import wam2layers_config import numpy.ma as ma xgrd, ygrd, mask = wam2layers.land_sea_mask() with netCDF4.MFDataset("%s/*.sp.nc" % wam2layers_config.data_dir) as dataset: pres = np.average(dataset.variables['sp'][:], axis=0) projection = cartopy.crs.PlateCarree() # plot mask fig, axis = plt.subplots(subplot_kw=dict(projection=projection)) axis.plot(xgrd[mask], ygrd[mask], 'k.', alpha=0.25) # plot masked data fig, axis = plt.subplots(subplot_kw=dict(projection=projection)) #cs = ax.pcolormesh(longitude, latitude, ma.masked_array(pressure, ~m)) plt.contourf(ma.masked_array(xgrd, mask), ma.masked_array(ygrd, mask), ma.masked_array(pres, ~mask)) axis.coastlines(resolution='50m') axis.set_extent([-90, -30, -40, +20])
def _ds(self): if self.__ds is None: try: self.__ds = nc.Dataset(self.request_dataset.uri,'r') ## likely multiple uris... except TypeError: self.__ds = nc.MFDataset(self.request_dataset.uri) return(self.__ds)
def nc_cherche_calendrier(l_f): """trouve le calendrier d'une liste de fichier netCDF """ ds = nc.MFDataset(l_f) calendrier = ds.variables['time'].calendar ds.close() return calendrier
def main(cmdLineArgs,stream=False): numpy.seterr(divide='ignore', invalid='ignore', over='ignore') # To avoid warnings if not os.path.exists(cmdLineArgs.gridspec): raise ValueError('Specified gridspec directory/tar file does not exist.') if os.path.isdir(cmdLineArgs.gridspec): x = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['x'][::2,::2] xcenter = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['x'][1::2,1::2] y = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['y'][::2,::2] ycenter = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['y'][1::2,1::2] msk = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_mask.nc').variables['mask'][:] area = msk*netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['area'][:,:].reshape([msk.shape[0], 2, msk.shape[1], 2]).sum(axis=-3).sum(axis=-1) depth = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_topog.nc').variables['depth'][:] elif os.path.isfile(cmdLineArgs.gridspec): x = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','x')[::2,::2] xcenter = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','x')[1::2,1::2] y = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','y')[::2,::2] ycenter = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','y')[1::2,1::2] msk = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_mask.nc','mask')[:] area = msk*m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','area')[:,:].reshape([msk.shape[0], 2, msk.shape[1], 2]).sum(axis=-3).sum(axis=-1) depth = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_topog.nc','depth')[:] else: raise ValueError('Unable to extract grid information from gridspec directory/tar file.') Sobs = netCDF4.Dataset( cmdLineArgs.woa ).variables['salt'] if len(Sobs.shape)==3: Sobs = Sobs[0] else: Sobs = Sobs[:,0].mean(axis=0) rootGroup = netCDF4.MFDataset( cmdLineArgs.infile ) if 'salt' in rootGroup.variables: varName = 'salt' elif 'so' in rootGroup.variables: varName = 'so' else: raise Exception('Could not find "salt" or "so" in file "%s"'%(cmdLineArgs.infile)) if rootGroup.variables[varName].shape[0]>1: Smod = rootGroup.variables[varName][:,0].mean(axis=0) else: Smod = rootGroup.variables[varName][0,0] if cmdLineArgs.suptitle != '': suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label else: suptitle = rootGroup.title + ' ' + cmdLineArgs.label imgbufs = [] ci=m6plot.pmCI(0.125,2.25,.25) if stream is True: img = io.BytesIO() else: img = cmdLineArgs.outdir+'/SSS_bias_WOA05.png' m6plot.xyplot( Smod - Sobs , x, y, area=area, suptitle=suptitle, title='SSS bias (w.r.t. WOA\'05) [ppt]', clim=ci, colormap='dunnePM', centerlabels=True, extend='both', save=img) if stream is True: imgbufs.append(img) m6plot.xycompare( Smod, Sobs , x, y, area=area, suptitle=suptitle, title1='SSS [ppt]', title2='WOA\'05 SSS [ppt]', clim=m6plot.linCI(20,30,10, 31,39,.5), colormap='dunneRainbow', extend='both', dlim=ci, dcolormap='dunnePM', dextend='both', centerdlabels=True, save=cmdLineArgs.outdir+'/SSS_bias_WOA05.3_panel.png') if stream is True: return imgbufs
def read_list(files_list, var): from joblib import Parallel, delayed ctime_read = checkpoint(0) print ' --> READING FILES ' wrfvar = (getwrfname(var)[0]).split('-') if len(files_list) <= 15: method = 'MFDataset' else: method = 'Dataset' # --------------------- if method == 'MFDataset': print files_list fin = nc.MFDataset(files_list) # Read all files print ' --> EXTRACTING VARIABLE Time' time = fin.variables['Times'][:] # Get time variable print ' --> EXTRACTING VARIABLE ', var varvals = get_wrfvars(wrfvar, fin) fin.close() # --------------------- # --------------------- if method == 'Dataset': varvals = {} njobs = 10 nlen = len(files_list) / njobs #time step block length a = len(files_list) - njobs * nlen nt_v = np.zeros(njobs) nt_v[:] = nlen nt_v[njobs - 1] = nlen + a #block length for each job nt_v = nt_v.cumsum() files_in = [(files_list[0:int(nt_v[0])], wrfvar)] for tt in np.arange(1, njobs): files_in.append( (files_list[int(nt_v[tt - 1]):int(nt_v[tt])], wrfvar)) var_v = Parallel(n_jobs=njobs)(delayed(read_block)(*files_in[i]) for i in xrange(len(files_in))) for i in np.arange(0, njobs): if i == 0: time = var_v[i][0] for ii, wrfv in enumerate(wrfvar): varvals[wrfv] = var_v[i][1][wrfv] else: time = np.concatenate((time, var_v[i][0])) for ii, wrfv in enumerate(wrfvar): varvals[wrfv] = np.concatenate( (varvals[wrfv], var_v[i][1][wrfv])) # --------------------- ctime = checkpoint(ctime_read) return np.asarray(time), varvals
def main(cmdLineArgs,stream=None): rootGroupT = netCDF4.MFDataset( cmdLineArgs.annual_directory + '/*.thetao_xyave.nc' ) rootGroupS = netCDF4.MFDataset( cmdLineArgs.annual_directory + '/*.so_xyave.nc' ) if 'thetao_xyave' not in rootGroupT.variables: raise Exception('Could not find "thetao_xyave" files "%s"'%(cmdLineArgs.annual_directory)) if 'so_xyave' not in rootGroupS.variables: raise Exception('Could not find "so_xyave" files "%s"'%(cmdLineArgs.annual_directory)) zt = rootGroupT.variables['zt'][::-1] * -1 timeT = rootGroupT.variables['time'] timeS = rootGroupS.variables['time'] timeT = numpy.array([int(x.year) for x in netCDF4.num2date(timeT[:],timeT.units,calendar=timeT.calendar)]) timeS = numpy.array([int(x.year) for x in netCDF4.num2date(timeS[:],timeS.units,calendar=timeS.calendar)]) if cmdLineArgs.trange != None: start = list(timeT).index(cmdLineArgs.trange[0]) end = list(timeT).index(cmdLineArgs.trange[1]) else: start = 0 end = -1 variable = rootGroupT.variables['thetao_xyave'] T = variable[start:end] - variable[start] T = T[:,::-1] timeT = timeT[start:end] variable = rootGroupS.variables['so_xyave'] S = variable[start:end] - variable[start] S = S[:,::-1] timeS = timeS[start:end] if cmdLineArgs.suptitle != '': suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label else: suptitle = rootGroupT.title + ' ' + cmdLineArgs.label if stream != None: objOut = stream[0] else: objOut = cmdLineArgs.outdir+'/T_drift.png' m6plot.ztplot( T, timeT, zt, splitscale=[0., -1000., -6500.], suptitle=suptitle, title='Potential Temperature [C]', extend='both', colormap='dunnePM', autocenter=True, clim=cmdLineArgs.climT,save=objOut) if stream != None: objOut = stream[1] else: objOut = cmdLineArgs.outdir+'/S_drift.png' m6plot.ztplot( S, timeS, zt, splitscale=[0., -1000., -6500.], suptitle=suptitle, title='Salinity [psu]', extend='both', colormap='dunnePM', autocenter=True, clim=cmdLineArgs.climS,save=objOut)
def _get_dataset(filename, dataset=None): if dataset is not None: return dataset df = None if isinstance(filename, basestring): df = nc4.Dataset(filename) else: df = nc4.MFDataset(filename) return df
def __init__(self, cmdLineArgs, section, var, label=None, ylim=None, mks2Sv=True): if not isinstance(section, list): section = [section] if not isinstance(var, list): var = [var] self.section = section[0] self.var = var if label != None: self.label = label else: self.label = section[0] self.ylim = ylim for k in range(0, len(section)): try: rootGroup = netCDF4.MFDataset(cmdLineArgs.ts_directory + section[k] + '/ts/120hr/20yr/*.' + var[k] + '.nc') except: rootGroup = netCDF4.MFDataset(cmdLineArgs.ts_directory + section[k] + '/ts/120hr/5yr/*.' + var[k] + '.nc') if k == 0: total = numpy.ones( rootGroup.variables[var[k]][:].shape[0]) * 0.0 trans = rootGroup.variables[var[k]][:].sum( axis=1) # Depth summation if var[k] == 'umo': total = total + trans.sum(axis=1).squeeze() elif var[k] == 'vmo': total = total + trans.sum(axis=2).squeeze() else: raise ValueError('Unknown variable name') if mks2Sv == True: total = total * 1.e-9 self.data = total self.time = rootGroup.variables['time'][:] * (1 / 365.0) if cmdLineArgs.suptitle != '': self.suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label else: self.suptitle = rootGroup.title + ' ' + cmdLineArgs.label
def inventory(self): """ Report on data avaialbe in directory: time slice, variables, area """ print("\n\n\n") print("=== INVENTORY FOR GLOBSIM ERA-INTERIM DATA === \n") print("Download parameter file: \n" + self.pfile + "\n") # loop over filetypes, read, report file_type = [ 'erai_pl_*.nc', 'erai_sa_*.nc', 'erai_sf_*.nc', 'erai_t*.nc' ] for ft in file_type: infile = path.join(self.directory, ft) nf = len(filter(listdir(self.directory), ft)) print(str(nf) + " FILE(S): " + infile) if nf > 0: # open dataset ncf = nc.MFDataset(infile, 'r') # list variables keylist = [str_encode(x) for x in ncf.variables.keys()] print(" VARIABLES:") print(" " + str(len(keylist)) + " variables, inclusing dimensions") for key in keylist: print(" " + ncf.variables[key].long_name) # time slice time = ncf.variables['time'] tmin = nc.num2date(min(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') tmax = nc.num2date(max(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') print(" TIME SLICE") print(" " + str(len(time[:])) + " time steps") print(" " + tmin + " to " + tmax) # area lon = ncf.variables['longitude'] lat = ncf.variables['latitude'] nlat = str(len(lat)) nlon = str(len(lon)) ncel = str(len(lat) * len(lon)) print(" BOUNDING BOX / AREA") print(" " + ncel + " cells, " + nlon + " W-E and " + nlat + " S-N") print(" N: " + str(max(lat))) print(" S: " + str(min(lat))) print(" W: " + str(min(lon))) print(" E: " + str(max(lon))) ncf.close()
def setupROMSfiles(loc, date, ff, tout, time_units, tstride=1): """ setupROMSfiles() Kristen Thyng, March 2013 Figures out necessary files to read in for track times and what model output indices within those files to use. Args: loc: File location. loc can be a thredds server web address, a single string of a file location, a list of strings of multiple file locations to be searched through. date: datetime format start date ff: Time direction. ff=1 forward, ff=-1 backward tout: Number of model outputs to use time_units: To convert to datetime tstride: Stride in time, in case want to use less model output than is available. Default is 1, using all output. Returns: * nc - NetCDF object for relevant files * tinds - Indices of outputs to use from fname files """ # For thredds server where all information is available in one place # or for a single file if 'http' in loc or type(loc) == str: nc = netCDF.Dataset(loc) # This is for the case when we have a bunch of files to sort through else: # the globbing should happen ahead of time so this case looks # different than the single file case # files in fname are in chronological order nc = netCDF.MFDataset(loc) # Convert date to number # dates = netCDF.num2date(nc.variables['ocean_time'][:], time_units) # The calendar definition extends dates to before the year 1582 for use # with idealized simulations without meaningful dates. dates = netCDF.num2date(nc.variables['ocean_time'][:], time_units, calendar='proleptic_gregorian') # time index with time value just below date (relative to file ifile) istart = find(dates <= date)[-1] # Select indices if ff == 1: # indices of model outputs desired tinds = range(istart, istart + tout, tstride) else: # backward in time # have to shift istart since there are now new indices behind since # going backward tinds = range(istart, istart - tout, -tstride) return nc, tinds
def extract_data(file): """ Purpose: To extract all of the data in a file(s) even the errors Inputs: File - the name of the file(s) to be read Outputs: data - an array of mce data including raw data, header data, and on_off data. 0 - A list of all times without errors [100 * number of files, 2] 1 - mce0_raw_data without errors [100 * number of files, 33, 32, 100] 2 - mce1_raw_data without errors [100 * number of files, 33, 32, 100] 3 - mce0 on off data without errors [100 * number of files, 33, 32] 4 - mce1 on off data without errors [100 * number of files, 33, 32] 5 - head0 - mce0_header data without errors [100 * number of files, 1700, 1] 6 - head1 - mce1 header data without errors [100 * number of files, 1700, 1] 7 - status flags data without errors [100 * number of files, 1, 5] tel_data - an array of telescope data [100 * number of files, 20, 21] k_data - k mirror data """ f = nc.MFDataset(file) #flags to make sure we have returned data for mce and tel or not. will have to add kms flag mce_flag = False tel_flag = False k_flag = False unix = f.variables['time'][:, 0] data = [] counter = 0 print('collecting K-mirror data') try: k_data = f.variables['kms'][:] except KeyError: k_flag = True print('WARNING missing k-mirror data') try: for var in f.variables: if 'mce' in var or var == 'status' or var == 'time': print('collecting %s data' % (var)) data.append([]) data[counter].append(f.variables[var][:]) counter += 1 except KeyError: mce_flag = True print('WARNING missing some or all MCE data') print('collecting tel_data') try: tel_data = f.variables['tel'][:] except KeyError: tel_flag = True print('WARNING missing telescope data') if tel_flag: tel_data = None if mce_flag: data = None if k_flag: k_data = None return data, tel_data, k_data
def avg2csv(ncfile, csvname): print ncfile, csvname nc = netCDF4.MFDataset(ncfile, 'r') names = nc.variables.keys() flux = {} for name in names: if nc.variables[name].ndim == 3: flux[name] = nc.variables[name][:,:,:].flatten() nt = len(nc.dimensions['ocean_time']) nx = len(nc.dimensions['xi_rho']) ny = len(nc.dimensions['eta_rho']) tunit = nc.variables['ocean_time'].units # time = np.zeros_like(nc.variables['zeta'][:,:,:]) lon = np.zeros_like(nc.variables['zeta'][:,:,:]) lat = np.zeros_like(nc.variables['zeta'][:,:,:]) h = np.zeros_like(nc.variables['zeta'][:,:,:]) for t in xrange(nt): time[t,:,:] = nc.variables['ocean_time'][t] h[t,:,:] = nc.variables['h'][:,:] for x in xrange(nx): lon[:,:,x] = nc.variables['lon_rho'][0,x] for y in xrange(ny): lat[:,y,:] = nc.variables['lat_rho'][y,0] flux['time'] = time.flatten() flux['lon'] = lon.flatten() flux['lat'] = lat.flatten() flux['h'] = h.flatten() # df = pd.DataFrame(flux) num2date = lambda num: netCDF4.num2date(num, tunit) df.time = df.time.apply(num2date) df = df.set_index(['time','lon','lat','h']) df = df.dropna() mean = df.mean() mean mean.to_csv('{}_mean.csv'.format(csvname)) df.describe().to_csv('{}_describe.csv'.format(csvname)) df.sum().to_csv('{}_sum.csv'.format(csvname)) df.to_csv('{}_all.csv'.format(csvname))
def parsing_nc_file(hk_file): """ Purpose : All this function does is take data out of the hk file Inputs : hk_file - the name of the file Outputs : data - the raw data from the hk file """ f = nc.MFDataset(hk_file) data = f.variables['hk_data'][:] f.close() return data
def check(u_urls, v_urls): """check files for consistency and assumptions""" with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u: # lookup variables in both files t_u = ds_u.variables['time'][:] lon_u = ds_u.variables['lon'][:] lat_u = ds_u.variables['lat'][:] with netCDF4.MFDataset(u_urls, aggdim='time') as ds_v: # assert equality t_v = ds_v.variables['time'][:] lon_v = ds_v.variables['lon'][:] lat_v = ds_v.variables['lat'][:] assert (t_u == t_v).all() assert (lat_u == lat_v).all() assert (lon_u == lon_v).all() # assert assumed units assert ds_u.variables['time'].units == 'hours since 1800-01-01 00:00:0.0' assert ds_v.variables['time'].units == 'hours since 1800-01-01 00:00:0.0'