def get_sf_data(osse_path, flux_var): """ Gets last gctm.sf.NN file from given path Parameters: osse_path (str) : directory path where sf files are located flux_var (str) : name of flux variable to consider Returns: tuple of the following numpy arrays - latitude - longitude - scale factor array with indices [month, lat, lon] """ # find the last scale factor iteration file sf_fp = sorted(glob(osse_path + '/gctm.sf*'), key=lambda x: int(x[-2:]))[-1] # acquire scale factor pseudo netcdf file sf = pnc.pncopen(sf_fp) # get latitude and longitude lat = sf.variables['latitude'].array() lon = sf.variables['longitude'].array() # get the scale factors sf_arr = sf.variables[flux_var].array()[0, :, :, :] return lat, lon, sf_arr
def boundingbox(self, path, keys=['time']): tmpf = pnc.pncopen(path, format='netcdf') out = {} if 'time' in keys: rtf = pnc.PseudoNetCDFFile() rtf.createDimension('time', 1) rtf.copyVariable(tmpf['PRODUCT/time'], key='time') refdate = rtf.getTimes()[0] tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000') tf = pnc.PseudoNetCDFFile() tf.createDimension('time', 1) tf.copyDimension(tmpf['PRODUCT'].dimensions['scanline']) tf.copyVariable(tmpf['PRODUCT/delta_time'], key='time') tf.variables['time'].units = tunit tf = tf.removeSingleton() del tmpf times = tf.getTimes() out['time'] = times.min(), times.max() if 'longitude' in keys: longitude = tmpf['PRODUCT/longitude'][:] out['longitude'] = longitude.min(), longitude.max() if 'longitude' in keys: latitude = tmpf['PRODUCT/latitude'][:] out['latitude'] = latitude.min(), latitude.max() return out
def opendappaths(inpaths, opts, verbose): omfs = [] dapdims = opts.get('opendapdims', None) for inpath in inpaths: if verbose > 1: print('Opening', inpath, flush=True) tmpf = pnc.pncopen(inpath, format='netcdf') omfi = pnc.PseudoNetCDFFile() for varkey in opts['datakeys'] + opts['geokeys']: if verbose > 2: print('Processing', varkey, flush=True) tmpv = tmpf.variables[varkey] for dim, dimlen in zip(tmpv.dimensions, tmpv.shape): if dim not in omfi.dimensions: omfi.createDimension(dim, dimlen) dtype = tmpv.dtype # Aura OMI data is occasionaly stored as an int16 # and scaled to a float32 for propkey in ['scale_factor', 'add_offset']: if hasattr(tmpv, propkey): stype = getattr(tmpv, propkey).dtype if (dtype.char in ('i', 'h') and stype.char not in ('i', 'h')): dtype = stype omfi.copyVariable(tmpv, key=varkey, dtype=dtype) if dapdims is not None: omfi.renameDimensions(**dapdims, inplace=True) omfs.append(omfi) return omfs
def writeconfig(sectors, outpath): hcpaths = [] for sector in sectors: hemco2dpath = hemcotmpl(sector=sector, month=month) hemco3dpath = hemco2dpath.replace('0pt1degree', '0pt1degree_3D') if os.path.exists(hemco3dpath): hcpaths.append(hemco3dpath) elif os.path.exists(hemco2dpath): hcpaths.append(hemco2dpath) else: raise KeyError('Could not find regridded: ' + hemco2dpath) defaults = set() ignores = set() with open(outpath, 'w') as hcf: hcf.write('(((EPA2016_MONMEAN\n') for hcpath in hcpaths: hcpatt = changepathtopattern(hcpath) sector = getsector(hcpath) print(sector, hcpatt, end='', flush=True) hcfile = pnc.pncopen(hcpath, format='netcdf') for cqkey, v in hcfile.variables.items(): if cqkey in hcfile.dimensions or cqkey in ('hyai', 'hybi'): continue elif cqkey in ('TOLU', ): warn('TOLU mass is duplicated by TOL') if cqkey in cq2gc: gctrans = cq2gc.get(cqkey) if len(gctrans) == 0: ignores.add(cqkey) else: defaults.add(cqkey) gctrans = [[cqkey, '1007']] for gckey, scale in gctrans: if gckey in [ 'ACET', 'MEK', 'ALD2', 'PRPE', 'PRPA', 'BENZ', 'TOLU', 'XYLE', 'EOH', 'ALK4', 'ISOP' ]: units = 'kgC/m2/s' else: units = v.units.strip() opts = dict(unit=units, gckey=gckey, cqkey=cqkey, sector=sector, path=hcpatt, scale=scale, cat='1/2', hier=50) hcf.write( '0 EPA16_{gckey}__{sector}{cqkey} {path} {cqkey} 2016-2016/1-12/1/0 C xyz {unit} {gckey} {scale} {cat} {hier}\n' .format(**opts)) # If I use - to repeat the file, the mass is from the previous cqkey too. # hcpatt = '-' print() hcf.write(')))EPA2016_MONMEAN\n') print('Ignored', sorted(ignores)) print('Defaults', sorted(defaults))
def create_sf_dict(dir_path, variable, output_path=None, year=2010): """ Creates a dictionary of data from a collection of gctm.sf.** files. The output file is saved as a pickle. Parameters: dir_path (str) : directory where code can find the scaling variable (str) : CO2 scaling variable to use in the output output_path (str) : save location of output pickle file (if not none) year (int) : starting year for inversion Returns: dictionary with the following key values - time : numpy array - latitude : numpy array - longitude : numpy array - sf_array : numpy array NOTE: - PseudoNetcdf assumes that the tracerinfo.dat and diaginfo.dat files are included in the directory path given. TODO: - the time dimension in the sf files appear to all point to the same date. """ # create the list of files sf_filepaths = sorted(glob.glob(dir_path + 'gctm.sf*')) # read in the above sf_files = [pnc.pncopen(path) for path in sf_filepaths] # get latitude/longitude/time information sample_file = sf_files[0] latitude = sample_file.variables['latitude'].array() longitude = sample_file.variables['longitude'].array() time_vals_raw = sample_file.variables['layer9'].array() time = [datetime(year, month, day=1) for month in time_vals_raw] # concatenate the scaling factors over time sf_concat = np.concatenate( [i.variables[variable].array() for i in sf_files] ) output_dict = { 'time': time, 'latitude': latitude, 'longitude': longitude, 'sf_array': sf_concat } if output_path: with open(output_path, 'w') as f: pickle.dump(output_dict, f) return output_dict
def openfile(path): """ Clean out duplicated days """ mo = int(path[-2:]) f = pnc.pncopen(path, format='netcdf').subsetVariables(['O3']) times = f.getTimes() tidx = np.array([ti for ti, t in enumerate(times) if t.month == mo]) return f.sliceDimensions(TSTEP=tidx)
def get_timezonefile(self): if self.timezonefile is not None: return self.timezonefile elif os.path.exists(self.timezonepath): self.timezonefile = pnc.pncopen(self.timezonepath, format='ioapi') return self.get_timezonefile() print( f'{self.timezonepath} not available;' ' calculating UTCOFFSET in hours from longitude...', end='' ) gf = pnc.pncopen( self.griddescpath, format='griddesc', GDNAM=self.gdnam ) del gf.variables['TFLAG'] gf.SDATE = 1970001 I, J = np.meshgrid(np.arange(gf.NCOLS), np.arange(gf.NROWS)) lon, lat = gf.ij2ll(I, J) utcoffset = (lon / 15) tzf = gf.subset([]) tzvar = tzf.createVariable( 'UTCOFFSET', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name='UTCOFFSET', var_desc='UTCOFFSET', units='hours' ) tzvar[:] = utcoffset mthdvar = tzf.createVariable( 'METHOD', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name='METHOD', units='none', var_desc='METHOD: 0=tz_world.geojson; 1=lon/15' ) mthdvar[:] = 1 tzf.updatetflag(overwrite=True) tzf.updatemeta() tzf.FILEDESC = """Calculated TZ from longitude""" tzf.HISTORY = """Calculated TZ from longitude""" tzf.save( self.timezonepath, format='NETCDF4_CLASSIC', complevel=1, verbose=0 ).close() print('done') return self.get_timezonefile()
def process(args): if args.verbose > 1: print(f'Opening {args.GRIDDESC} GDNAM={args.GDNAM}', flush=True) gf = pnc.pncopen(args.GRIDDESC, format='griddesc', GDNAM=args.GDNAM) outpath = args.outpath if os.path.exists(outpath): print('Using cached', outpath, flush=True) return opts = eval(open(args.optpath, 'r').read()) omf = subset(args, gf, opts) outf = grid(args, gf, opts, omf) outf.save(outpath, verbose=1, complevel=1)
def __init__(self, path): tmpf = pnc.pncopen(path, format='netcdf') geogrpk = 'PRODUCT/SUPPORT_DATA/GEOLOCATIONS/' outkeys = dict( time='PRODUCT/delta_time', qa_value='PRODUCT/qa_value', latitude='PRODUCT/latitude', longitude='PRODUCT/longitude', level='PRODUCT/layer', hyai='PRODUCT/tm5_constant_a', hybi='PRODUCT/tm5_constant_b', tropopause_level_index='PRODUCT/tm5_tropopause_layer_index', averaging_kernel='PRODUCT/averaging_kernel', nitrogendioxide_tropospheric_column= 'PRODUCT/nitrogendioxide_tropospheric_column', air_mass_factor_troposphere='PRODUCT/air_mass_factor_troposphere', air_mass_factor_total='PRODUCT/air_mass_factor_total', surface_pressure='PRODUCT/SUPPORT_DATA/INPUT_DATA/surface_pressure', longitude_bounds=geogrpk + 'longitude_bounds', latitude_bounds=geogrpk + 'latitude_bounds', viewing_zenith_angle=geogrpk + 'viewing_zenith_angle', solar_zenith_angle=geogrpk + 'solar_zenith_angle') f = pnc.PseudoNetCDFFile() for ok, ik in outkeys.items(): iv = tmpf[ik] for dk, dl in zip(iv.dimensions, iv.shape): if dk not in f.dimensions: f.createDimension(dk, dl) f.copyVariable(iv, key=ok) tf = pnc.PseudoNetCDFFile() tf.createDimension('time', 1) tf.copyVariable(tmpf['PRODUCT/time'], key='time') refdate = tf.getTimes()[0] x = np.arange(len(f.dimensions['scanline'])) y = np.arange(len(f.dimensions['ground_pixel'])) X, Y = np.meshgrid(x, y) outf = f.removeSingleton().slice(scanline=X.ravel(), ground_pixel=Y.ravel(), newdims=('retrieval', )).slice( scanline=X.ravel(), newdims=('retrieval', )).slice( ground_pixel=Y.ravel(), newdims=('retrieval', )) outf.renameDimensions(scanline='retrieval', inplace=True) outf.renameDimensions(ground_pixel='retrieval', inplace=True) tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000') outf.variables['time'].units = tunit self.variables = outf.variables self.dimensions = outf.dimensions self.setncatts(outf.getncatts()) del tmpf
def bpch_to_netCDF_via_PNC(format='bpch2', filename='ctm.nc', output_file=None, bpch_file=None, folder=None): """ Convert bpch to NetCDF using PNC as backend """ import PseudoNetCDF as pnc # Load the file into memory infile = pnc.pncopen(bpch_file, format=format) # Kludge - reduce DXYP_DXYP dims online dxyp = infile.variables['DXYP_DXYP'] # Surface area should have time dim, if fit does remove it. if len(dxyp.shape) == 4: dxyp.dimensions = dxyp.dimensions[1:] infile.variables['DXYP_DXYP'] = dxyp # Now write file to disc # pnc.pncwrite(infile, folder+filename) pnc.pncwrite(infile, output_file)
def read_sf_objs(base_df_dir): """ Reads in all objects present in the ./scale_factors directory Parameters: base_df_dir (str) : base directory where all scale factors can be found Returns: list of sf objects NOTE: - tracerinfo and diaginfo files must be present in the given directory """ # obtain the scale factor file names (NOTE: file order doesn't matter) file_names = glob(base_df_dir + '/data/scale_factors/sf*') return [pnc.pncopen(fn, format='bpch') for fn in file_names]
def getsites(path): """ path : path to ioapi file returns i, j locations """ keepvars = [ 'PM25_TOT', 'PM25_SO4', 'PM25_NO3', 'PM25_OC', 'PM25_OM', 'PM25_CL', 'PMC_CL', 'PM25_EC', 'PM25_SOIL', 'PMC_TOT' ] print(path, flush=True) inf = pnc.pncopen(path, format='ioapi') varf = inf.subsetVariables(keepvars) sitef = inf.slice(ROW=jc, COL=ic, newdims=('site', )) ntimes = len(sitef.dimensions['TSTEP']) dims = sitef.variables[keepvars[0]].dimensions mymask = mask[None, None, :].repeat(ntimes, 0) outf = sitef.mask(mymask, dims=dims) return outf
def combine(inpath, outpath, exprpath, clobber=False): """ Arguments --------- inpath : path to netcdf input file outpath : path to output file exprpath : path to text file with expressions Returns ------- None """ if os.path.exists(outpath) and not clobber: print('Using cached:', outpath) return spcexpr = open(exprpath, 'r').read() infile = pnc.pncopen(inpath, format='ioapi') if len(infile.dimensions['TSTEP']) > 1: infile = infile.sliceDimensions(TSTEP=slice(None, -1)) spcfile = infile.copy().eval(spcexpr, inplace=False) spcfile.save(outpath, format='NETCDF4_CLASSIC')
def collapse(inpath, outpath, clobber=False): """ Arguments --------- inpath : path to netcdf input file outpath : path to output file Returns ------- None """ if os.path.exists(outpath) and not clobber: print('Using cached:', outpath) return infile = pnc.pncopen(inpath, format='ioapi') outfile = infile.interpSigma( vglvls=outvglvls, vgtop=infile.VGTOP, interptype='conserve' ) outfile.save(outpath, format='NETCDF4_CLASSIC')
def read_sf_objs(base_df_dir, sf_prefix): """ Reads in all files in directory with provided scale factor prefix. E.g. ./scale_factors/sf_* where base_df_dir == 'scale_factors' and sf_prefix == 'sf_' Parameters: base_df_dir (str) : base directory where all scale factors can be found sf_prefix (str) : prefix for each scale factor file Returns: list of sf objects NOTE: - tracerinfo and diaginfo files must be present in the given directory - all scale factor files are assumed to have the same prefix form """ # obtain the scale factor file names (NOTE: file order doesn't matter) file_names = glob(base_df_dir + '/' + sf_prefix + '*') return [pnc.pncopen(fn, format='bpch') for fn in file_names]
def __init__( self, gridpath, nominaldate='1970-01-01', format='griddesc', **kwds ): """ Arguments --------- gridpath : str path to a GRIDDESC file nominaldate : str Date for spatial and regional files (default: '1970-01-01') format : str griddesc, by default, but can be any ioapi_base class kwds : mappable Keywords for opening GRIDDESC. For example, GDNAM if there are multiple domains. Returns ------- """ nominaldate = pd.to_datetime(nominaldate) gf = pnc.pncopen(gridpath, format=format, **kwds) gf.SDATE = int(nominaldate.strftime('%Y%j')) gf.STIME = int(nominaldate.strftime('%H%M%S')) gf.TSTEP = 10000 self.spatialfile = gf.subset([]) uv = self.spatialfile.createVariable( 'UNIFORM', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name='UNIFORM', var_desc='UNIFORM', units='none' ) uv[:] = 1 / uv.size self.regionfile = gf.subset([]) dw = self.regionfile.createVariable( 'DOMAINWIDE', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name='DOMAINWIDE', var_desc='DOMAINWIDE', units='fraction' ) dw[:] = 1. self.regions = ['DOMAINWIDE']
import PseudoNetCDF as pnc from datetime import datetime import sys import os import gc exists = os.path.exists # dummy assignent f = pandoras panpath = sys.argv[1] concpat = sys.argv[2] metpat = sys.argv[3] outpat = sys.argv[4] allf = pnc.pncopen(panpath, format='pandoraslb3') allptimes = allf.getTimes() moddates = sorted([ datetime.strptime(p, metpat) for p in list(set([t.strftime(metpat) for t in allptimes])) ]) for moddate in moddates: metpath = moddate.strftime(metpat) concpath = moddate.strftime(concpat) outpath = moddate.strftime(outpat) if not exists(metpath): print('Skipping missing met date', metpath, flush=True) continue elif not exists(concpath): print('Skipping missing model date', concpath, flush=True) continue
os.system( f"wget --continue -q ftp://newftp.epa.gov/aqmg/global/gadm/gadm36_12US1.IOAPI.nc" ) # # Opening Files For Reading And Plotting # # In[ ]: smokepaths = { sector: smoketmpl(sector=sector, month=month) for sector in include_sectors + natural_sectors } smokefiles = { sector: pnc.pncopen(path, format='ioapi', mode='r') for sector, path in smokepaths.items() } reffile = smokefiles[include_sectors[0]] # # Store Grid Parameters for Later Use # # * Regridding requires knowing about the grid structure # * We are pulling all the metadata, so that we can use what we need. # # In[ ]: gridproperties = reffile.getncatts() exec('nominalarea = XCELL * YCELL', None, gridproperties)
def allocate(self, infile, alloc_keys, outpath=None, **save_kwds): """ Arguments --------- infile : str or PseudoNetCDF File path to netcdf file (or file) to use as input (format keyword used as a modifier) alloc_keys : mappable or str alloc_keys key/value pairs map region and spatial allocation variables (e.g., DOMAINWIDE and POP) to variables in infile to allocate spatially. Each key should be a tuple of region and spatial variable (e.g., ('DOMAINWIDE', 'POPULATION')). The region key must exist as a variable in self.regionfile and the spatial variable must exist in self.spatialfile. Each value should be a list of variables in infile to pair with the region/spatial pair. One allocation variable can be assigned None instead of a list, which results in all unassigned variables being used. If alloc_keys is a str, this is equivalent to `alloc_keys={alloc_keys: None}` outpath : str or None path for output to be saved. If None, outf will be returned and not saved Returns ------- outf : PseudoNetCDFFile file with spatial variation Notes ----- """ if isinstance(infile, str): infile = pnc.pncopen(infile, format=format) if isinstance(alloc_keys, str): alloc_keys = {alloc_keys: None} all_keys = [] for k, v in infile.variables.items(): if 'LAY' in v.dimensions: all_keys.append(k) assigned_keys = [] isnone = [] for (region, srgkey), varkeys in alloc_keys.items(): if varkeys is None: isnone.append((region, srgkey)) else: assigned_keys.extend(varkeys) unassigned_keys = list(set(all_keys).difference(assigned_keys)) if len(isnone) > 1: raise ValueError(f'Can only have 1 None sector; got {isnone}') if len(isnone) == 1: alloc_keys[isnone[0]] = unassigned_keys outf = self.spatialfile.subset([]) for (regionkey, allockey), varkeys in alloc_keys.items(): regionvar = self.regionfile.variables[regionkey] allocvar = self.spatialfile.variables[allockey] factor = regionvar[:] * allocvar[:] factor /= factor.sum() for varkey in varkeys: invar = infile.variables[varkey] outvar = outf.createVariable( varkey, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name=varkeys, vardesc=varkey, units=getattr(invar, 'units', 'unknown') ) outvar[:] = invar[:] * factor if outpath is None: return outf else: return outf.save(outpath, **save_kwds)
from glob import glob import os from collections import OrderedDict from functools import partial import matplotlib.pyplot as plt import PseudoNetCDF as pnc from perim import perimslices np = plt.np inpaths = sorted(glob('../combine/*BCON.combine.4LAY.nc')) infile = pnc.sci_var.stack_files( [pnc.pncopen(path, format='ioapi') for path in inpaths], 'TSTEP') lays = np.arange(-.5, infile.NLAYS + 1) pcolors = dict(S='#2ca02c', N='#d62728', W='#1f77b4', E='#ff7f0e', L='k') def sigmabyt(plotfile, vark, title, pslices, yscale, outpath): pf = plotfile.subsetVariables([vark]) var = pf.variables[vark] plt.close() units = var.units.strip() lname = var.long_name.strip() vglvls = plotfile.VGLVLS fig, axarr = plt.subplots(4, 1, sharex=True, gridspec_kw=dict(hspace=0.1, bottom=0.15), figsize=(6, 8))
def get_monthlyfile(self, propath=None, read_kwds=None): """ Arguments --------- propath : str path to tpro file ATPRO_MONTHLY file read_kwds : dict or None If None, default read_kwds are dict(comment='#', index_col=0, names=['Jan', ..., 'Dec', 'comment']) Returns: df : PseudoNetCDFFile IOAPI-like file with month of year allocations for sectors (as variables) with shape TSTEP=12, LAY=1, ROW=NROWS, COL=NCOLS """ if self.monthlyfile is not None: return self.monthlyfile elif os.path.exists(self.monthlypath): self.monthlyfile = pnc.pncopen(self.monthlypath, format='ioapi') return self.get_monthlyfile() if propath is None: raise KeyError( f'propath required because {self.monthlypath} not found' ) print( f'{self.monthlypath} not available; calculating from {propath}' ) names = _monnames + ['comment'] if read_kwds is None: read_kwds = dict(comment='#', index_col=0, names=names) mondf = pd.read_csv(propath, **read_kwds) tzf = self.get_timezonefile() mon_f = tzf.subset([]) mon_f.createDimension('TSTEP', 12).setunlimited(True) mon_f.SDATE = 2020001 mon_f.STIME = 0 mon_f.TSTEP = 24 * 30.5 * 10000 for monidx, monrow in mondf.iterrows(): cmt = monrow['comment'] label = getlabel(cmt) print(label, cmt) monvals = monfactor(mondf, monidx, tzf) monvar = mon_f.createVariable( label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name=label, var_desc=label, units='s/s' ) monvar[:] = monvals[:, None] mon_f.updatemeta() mon_f.updatetflag(overwrite=True) mon_f.FILEDESC = """ ## NASA-like metadata 1, 2310 Henderson, Barron US EPA/Office of Air Quality Planning and Standards EPA sector-based hourly profiles Not Applicable 1, 1 2021, 01, 13, 2021, 01, 13 0 ... PI_CONTACT_INFO: [email protected] PLATFORM: CMAQ Emission processing input DATA_INFO: All data in monthly average per second rates UNCERTAINTY: large, preliminary data based on US averages. DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected] PROJECT_INFO: For easy processing processing of emissions. STIPULATIONS_ON_USE: Use of these data requires PI notification OTHER_COMMENTS: None. REVISION: R0 R0: Preliminary data """ mon_f.save( self.monthlypath, format='NETCDF4_CLASSIC', complevel=1, verbose=0 ).close() return self.get_monthlyfile()
import PseudoNetCDF as pnc from readobs import obsdf, locs import numpy as np import pandas as pd import os dailytmpl = os.environ['DAILYTMPL'] shortname = os.environ['SHORTNAME'] monobskeys = ['FRH', 'FLRH', 'FSRH', 'FSSRH', 'SS_RAYLEIGH'] dayobskeys = ['GROUP'] cmaqinpath = 'derived/' + dailytmpl + '.ncf' cmaqf = pnc.pncopen(cmaqinpath, format='ioapi').copy() dims = cmaqf.variables['GROUP'].dimensions cmaqg = cmaqf.variables['GROUP'][:] q90 = (cmaqg != 90).filled(True) q10 = (cmaqg != 10).filled(True) qother = (~(q90 | q10)) | cmaqg.mask cmaqif = cmaqf.mask(q90, dims=dims).apply(TSTEP='mean') cmaqcf = cmaqf.mask(q10, dims=dims).apply(TSTEP='mean') cmaqof = cmaqf.mask(qother, dims=dims).apply(TSTEP='mean') impdf = obsdf.query('GROUP == 90').groupby(['site_id']).mean() clrdf = obsdf.query('GROUP == 10').groupby(['site_id']).mean() othdf = obsdf.query('(GROUP != 10) & (GROUP != 90)').groupby(['site_id' ]).mean() compkeys = 'SEA_SALT AMM_SO4 AMM_NO3 OMC EC CRUSTAL CM'.split() extkeys = 'SS_RAYLEIGH E_SEA_SALT E_AMM_SO4 E_AMM_NO3 E_OMC E_EC E_CRUSTAL E_CM'.split( ) keys = ['longitude', 'latitude'] + compkeys + extkeys siteids = np.char.decode( cmaqif.variables['site_id'][:].view('S16')).astype(str)[:, 0]
from mpl_toolkits.basemap import Basemap import argparse import os parser = argparse.ArgumentParser() parser.add_argument('obspath') parser.add_argument('modpath') args = parser.parse_args([ '../obs/CASTNET2016.nc', '../mod/combine_aconc_v521_intel17.0_HEMIS_cb6_2016.nc', ]) plt.rcParams['axes.labelsize'] = 18 plt.rcParams['xtick.labelsize'] = 16 plt.rcParams['ytick.labelsize'] = 16 modf = pnc.pncopen(args.modpath).copy() obsf = pnc.pncopen(args.obspath).copy() lat = obsf.variables['latitude'] lon = obsf.variables['longitude'] tzoff = -obsf.variables['TIME_OFFSET'][:].astype('i') tidx = np.arange(24, len(modf.dimensions['time']) - 24)[:, None] + tzoff sidx = np.arange(len(obsf.dimensions['site']))[None, :].repeat( tidx.shape[0], 0) times = obsf.getTimes() oto3 = obsf.variables['O3'][tidx, sidx].T mto3 = modf.variables['O3'][tidx, 0, sidx].T bto3 = mto3 - oto3
del tmpf.variables['TFLAG'] tmpf = fracf.subsetVariables([vark]) tmpf = tmpf.slice(TSTEP=include) return np.ma.filled(tmpf.variables[vark], 0).sum(0, keepdims=True) def getmask(idf, vark, namelist): var = idf.variables[vark] i2k = eval(var.description) k2i = {k: i for i, k in i2k.items()} idlist = [k2i[k] for k in namelist] outvar = np.in1d(var[:], idlist).reshape(var.shape) return outvar gadmf = pnc.pncopen(args.inpath, format='ioapi') if args.variable is None: for vark in ['ID_0', 'ID_1', 'ID_2']: if vark in gadmf.variables: args.variable = vark break else: print('Could not find variable ID_0, ID_1, ID_2') exit() outf = gadmf.slice(TSTEP=0).subsetVariables([args.variable]) configd = json.load(open(args.definitions, mode='r', encoding='utf-8')) for outvark, namelist in configd.items(): outv = outf.createVariable(outvark, 'f', ('TSTEP', 'LAY', 'ROW', 'COL')) outv.units = '1' outv.long_name = outvark.ljust(16)
import sys import os import PseudoNetCDF as pnc # inpath = 'GRIDCRO2D.12US2.35L.160101' # outpath = 'GRIDCRO2D.12US2.35L.160101.CF.nc' try: inpath, outpath = sys.argv[1:] except Exception as e: print('Usage: python {} <INPATH> <OUTPATH>'.format(sys.argv[0])) if not os.path.exists(inpath): raise IOError(f'{inpath} does not exist.') if os.path.exists(outpath): raise IOError(f'{outpath} exists. Will not overwrite.') infile = pnc.pncopen(inpath, format='ioapi').copy() pnc.conventions.ioapi.add_cf_from_ioapi(infile) infile.save(outpath, verbose=0)
def allocate( self, infile, outdate, alloc_keys, outpath=None, monthly=True, dayofweek=True, diurnal=True, time=None, format=None, overwrite=False, verbose=0 ): """ Arguments --------- infile : str or PseudoNetCDF File path to netcdf file (or file) to use as input (format keyword used as a modifier) outdate : datetime date to destination outpath : str or None path for output to be saved. If None, outf will be returned and not saved alloc_keys : mappable or str alloc_keys key/value pairs map allocation variables (e.g., ENERGY) to variables in infile to allocate temporally. Each key should be in monthlyfile/dayofweekfile/diurnalfile variables. And each value is a list of variables in infile. One allocation variable can be assigned None instead of a list, which results in all unassigned variables being used. If alloc_keys is a str, this is equivalent to `alloc_keys={alloc_keys: None}` monthly : bool apply monthly scaling. If file already has months, use month=False and time=m to apply other scaling to time m. dayofweek : bool apply day of week scaling. If file already has day of week, use dayofweek=False and time=d to apply other scaling to time d. diurnal : bool apply hour of day scaling. If file already has hour of day, use diurnal=False and time=h to apply other scaling to time h. time : int or None if None, checks to ensure that file has only 1 time and uses first (i.e., 0) format : str format of file or meta data (e.g., netcdf or ioapi; see PseudoNetCDF pncopen) Returns ------- outf : PseudoNetCDFFile file with temporal variation Notes ----- 1. month, dayofweek, and diurnal can be combined to exlude one or many scalings """ remove = False if outpath is not None and os.path.exists(outpath): if not overwrite: raise IOError(f'{outpath} exists') else: remove = True refdate = outdate if verbose > 0: print('Opening input', flush=True) if isinstance(infile, str): ef = pnc.pncopen(infile, format=format) else: ef = infile if isinstance(alloc_keys, str): alloc_keys = {alloc_keys: None} all_keys = [] for k, v in ef.variables.items(): if 'LAY' in v.dimensions: all_keys.append(k) assigned_keys = [] isnone = [] for sector, varkeys in alloc_keys.items(): if varkeys is None: isnone.append(sector) else: assigned_keys.extend(varkeys) unassigned_keys = list(set(all_keys).difference(assigned_keys)) if len(isnone) > 1: raise ValueError(f'Can only have 1 None sector; got {isnone}') if len(isnone) == 1: alloc_keys[isnone[0]] = unassigned_keys if time is None: if len(ef.dimensions['TSTEP']) > 1: print('Time dimension is not 1, so you must choose a time') else: time = 0 if format == 'ioapi': if verbose > 0: print('Appending TFLAG to exclude', flush=True) if verbose > 0: print('Creating output template', flush=True) outf = ef.subset([]) if 'TFLAG' in outf.variables: del outf.variables['TFLAG'] nsteps = 1 if monthly: nsteps = nsteps * 1 tstep = 30*240000 if dayofweek: nsteps = nsteps * 1 tstep = 240000 if diurnal: nsteps = nsteps * 25 tstep = 10000 outf.createDimension('TSTEP', nsteps).setunlimited(True) if verbose > 0: print('Calculating composite factor', flush=True) for sectorkey, varkeys in alloc_keys.items(): factor = self.get_factor( sectorkey, refdate, diurnal=diurnal, dayofweek=dayofweek, monthly=monthly ) for varkey in varkeys: invar = ef.variables[varkey] if verbose > 0: print(f'Scaling {varkey}...', flush=True) outvar = outf.copyVariable(invar, key=varkey, withdata=False) outvar.setncatts( {pk: getattr(invar, pk) for pk in invar.ncattrs()} ) outvar[:] = invar[time] * factor outf.SDATE = int(refdate.strftime('%Y%j')) outf.STIME = int(refdate.strftime('%H%M%S')) outf.TSTEP = tstep if format == 'ioapi': outf.updatemeta() outf.updatetflag(overwrite=True) history = getattr(outf, 'HISTORY') history += f'apply_temporal({locals})' setattr(outf, 'HISTORY', history) if outpath is not None and remove: os.remove(outpath) if outpath is None: return outf else: outf.save(outpath, verbose=0).close() return pnc.pncopen(outpath, format='ioapi')
import matplotlib.pyplot as plt from matplotlib.ticker import StrMethodFormatter import PseudoNetCDF as pnc from perim import perimslices np = plt.np inpaths = sorted(glob('../combine/*201?????.BCON.combine.nc')) tslice = slice(None, None, 6) varks = ['O3PPB', 'ASO4IJ', 'ANO3IJ', 'NOx', 'ANAIJ', 'PMIJ'] infiles = [ pnc.pncopen( inpath, format='ioapi').subsetVariables(varks).sliceDimensions(TSTEP=tslice) for inpath in inpaths ] infile = infiles[0].stack(infiles[1:], 'TSTEP') del infiles infile.TSTEP = tslice.step * infile.TSTEP time = infile.getTimes() warn('Debug using {}h'.format(tslice.step)) lays = np.arange(0, infile.NLAYS + 1) def sigmabyt(plotfile, vark, title, norm, ticks, formatter, outpath): plt.close() ax = plotfile.plot(vark, plottype='TSTEP-LAY',
def openhe5(inpaths, opts, verbose): tdim = opts.get('time_dim', 'nTimes') xdim = opts.get('xtrack_dim', 'nXtrack') lcenterdim = opts.get('level_center_dim', 'nLevels') omfs = [] for inpath in inpaths: if verbose > 1: print('Opening', inpath, flush=True) tmpf = pnc.pncopen(inpath, format='netcdf') omfi = pnc.PseudoNetCDFFile.from_ncvs( **{ varkey: tmpf[opts['datagrp']].variables[varkey] for varkey in opts['datakeys'] }) _applyscale(omfi) omgfi = pnc.PseudoNetCDFFile.from_ncvs( **{ varkey: tmpf[opts['geogrp']].variables[varkey] for varkey in opts['geokeys'] }) _applyscale(omgfi) datadims = opts.get('datadims', None) geodims = opts.get('geodims', None) if datadims is None: ddims = list(omfi.dimensions) datadims = dict(zip(ddims, [tdim, xdim, lcenterdim])) print('Dimension mapping heuristically', flush=True) print({dk: len(dv) for dk, dv in omfi.dimensions.items()}) print('Selected dimension mapping:', datadims, flush=True) if geodims is None: gdims = list(omgfi.dimensions) geodims = dict(zip(gdims, [tdim, xdim, lcenterdim])) print('Dimension mapping heuristically', flush=True) print({dk: len(dv) for dk, dv in omgfi.dimensions.items()}) print('Selected dimension mapping:', geodims, flush=True) for inkey, outkey in datadims.items(): if inkey not in omfi.dimensions: print('** Error renaming data dimension:\n' + f'Key {inkey} ({outkey}) not found:\n{omfi.dimensions}' + '\n\n** Try increase or decreasing phony numbered' + ' dimensions by 1 in the configuration.' + '\n** Different netcdf versions give them' + ' different names for repeated length dimensions.') sys.exit() omfi.renameDimensions(**datadims, inplace=True) for inkey, outkey in geodims.items(): if inkey not in omgfi.dimensions: print( 'Error renaming geo dimension:' + f'Key {inkey} ({outkey}) not found:\n{omgfi.dimensions}' + '\n\n** Try increase or decreasing phony numbered' + ' dimensions by 1 in the configuration.' + '\n** Different netcdf versions give them' + ' different names for repeated length dimensions.') sys.exit() omgfi.renameDimensions(**geodims, inplace=True) for geokey in opts['geokeys']: omfi.copyVariable(omgfi.variables[geokey], key=geokey) flipdimkeys = opts.get('flipdims', []) if len(flipdimkeys) > 0: flipslices = { k: slice(None, None, -1) for k in flipdimkeys if k in omfi.dimensions } omfi = omfi.sliceDimensions(**flipslices) omfs.append(omfi) return omfs
test = """ gcpath = 'CONC/GEOSChem.SpeciesConc.20160701_0000z.nc4' #gcpath = '/work/ROMO/global/GCv12.0.1/GC/rundirs/geosfp_2x25_standard/Output/GEOSChem.SpeciesConc.20160101_0000z.nc4' gcexprpath = 'definitions/gc/gc12_to_cb6r3.expr' aeexprpath = 'definitions/gc/gc12_to_ae6_nvPOA.expr' args = parser.parse_args([ '--spcprefix', 'SpeciesConc_', gcpath, 'GEOS-Chem_Species_Database.json', 'CMAQ.json', gcexprpath, aeexprpath ]) """ args = parser.parse_args() f = pnc.pncopen(args.inpath) fromspcs = json.load(open(args.fromjson, 'r')) tospcs = json.load(open(args.tojson, 'r')) exprstr = '\n'.join( [open(exprpath, 'r').read() for exprpath in args.exprpaths]) noadvspc = [k for k, v in fromspcs.items() if not v['Is_Advected']] gcspc = [ k for k, v in fromspcs.items() if not v['Is_Aero'] and v['Is_Advected'] ] aespc = [k for k, v in fromspcs.items() if v['Is_Aero'] and v['Is_Advected']] spc = gcspc prefix = args.spcprefix symtbl = symtable(exprstr, '<pncexpr>', 'exec')
def get_dayofweekfile(self, propath=None, read_kwds=None): """ Arguments --------- propath : str path to tpro file ATPRO_WEEKLY file read_kwds : dict or None If None, default read_kwds are dict(comment='#', index_col=0, names=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'comment']) Returns: df : PseudoNetCDFFile IOAPI-like file with day of week allocations for sectors (as variables) with shape TSTEP=7, LAY=1, ROW=NROWS, COL=NCOLS """ if self.dayofweekfile is not None: return self.dayofweekfile elif os.path.exists(self.dayofweekpath): self.dayofweekfile = pnc.pncopen( self.dayofweekpath, format='ioapi' ) return self.get_dayofweekfile() if propath is None: raise KeyError( f'propath required because {self.dayofweekpath} not found' ) print( f'{self.dayofweekpath} not available; calculating from {propath}' ) if read_kwds is None: read_kwds = dict( comment='#', index_col=0, names='Mon Tue Wed Thu Fri Sat Sun comment'.split() ) wkdf = pd.read_csv(propath, **read_kwds) wkdf.index.name = 'profile_id' tzf = self.get_timezonefile() day_f = tzf.subset([]) day_f.createDimension('TSTEP', 25).setunlimited(True) day_f.createDimension('LAY', 7) day_f.VGLVLS = np.arange(8) day_f.VGTYP = 6 day_f.SDATE = 2020001 day_f.STIME = 0 day_f.TSTEP = 10000 for wkidx, wkrow in wkdf.iterrows(): cmt = wkrow['comment'] label = getlabel(cmt) print(label, cmt) wkvals = weekdayfactor(wkdf, wkidx, tzf) wkvar = day_f.createVariable( label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'), long_name=label, var_desc=label, units='s/s' ) wkvar[:] = wkvals day_f.updatemeta() day_f.updatetflag(overwrite=True) day_f.FILEDESC = ( """ ## NASA-like metadata 1, 2310 Henderson, Barron US EPA/Office of Air Quality Planning and Standards EPA sector-based hourly profiles Not Applicable 1, 1 2021, 01, 13, 2021, 01, 13 0 ... PI_CONTACT_INFO: [email protected] PLATFORM: CMAQ Emission processing input DATA_INFO: All data in daily average per second rates UNCERTAINTY: large, preliminary data based on US averages. DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected] PROJECT_INFO: For easy processing processing of emissions. STIPULATIONS_ON_USE: Use of these data requires PI notification OTHER_COMMENTS: The LAY dimension is day of the week (Mon, Tue, ..., Sun).""" + "Time is UTC, but the profiles are based on LST days. So, " + "UTC_Mon will include hours from Sun and Tue as appropriate " + """given the hour offset. REVISION: R0 R0: Preliminary data """ ) day_f.save( self.dayofweekpath, format='NETCDF4_CLASSIC', complevel=1, verbose=0 ).close() return self.get_dayofweekfile()