def boundingbox(self, path, keys=['time']): tmpf = pnc.pncopen(path, format='netcdf') out = {} if 'time' in keys: rtf = pnc.PseudoNetCDFFile() rtf.createDimension('time', 1) rtf.copyVariable(tmpf['PRODUCT/time'], key='time') refdate = rtf.getTimes()[0] tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000') tf = pnc.PseudoNetCDFFile() tf.createDimension('time', 1) tf.copyDimension(tmpf['PRODUCT'].dimensions['scanline']) tf.copyVariable(tmpf['PRODUCT/delta_time'], key='time') tf.variables['time'].units = tunit tf = tf.removeSingleton() del tmpf times = tf.getTimes() out['time'] = times.min(), times.max() if 'longitude' in keys: longitude = tmpf['PRODUCT/longitude'][:] out['longitude'] = longitude.min(), longitude.max() if 'longitude' in keys: latitude = tmpf['PRODUCT/latitude'][:] out['latitude'] = latitude.min(), latitude.max() return out
def __init__(self, path): tmpf = pnc.pncopen(path, format='netcdf') geogrpk = 'PRODUCT/SUPPORT_DATA/GEOLOCATIONS/' outkeys = dict( time='PRODUCT/delta_time', qa_value='PRODUCT/qa_value', latitude='PRODUCT/latitude', longitude='PRODUCT/longitude', level='PRODUCT/layer', hyai='PRODUCT/tm5_constant_a', hybi='PRODUCT/tm5_constant_b', tropopause_level_index='PRODUCT/tm5_tropopause_layer_index', averaging_kernel='PRODUCT/averaging_kernel', nitrogendioxide_tropospheric_column= 'PRODUCT/nitrogendioxide_tropospheric_column', air_mass_factor_troposphere='PRODUCT/air_mass_factor_troposphere', air_mass_factor_total='PRODUCT/air_mass_factor_total', surface_pressure='PRODUCT/SUPPORT_DATA/INPUT_DATA/surface_pressure', longitude_bounds=geogrpk + 'longitude_bounds', latitude_bounds=geogrpk + 'latitude_bounds', viewing_zenith_angle=geogrpk + 'viewing_zenith_angle', solar_zenith_angle=geogrpk + 'solar_zenith_angle') f = pnc.PseudoNetCDFFile() for ok, ik in outkeys.items(): iv = tmpf[ik] for dk, dl in zip(iv.dimensions, iv.shape): if dk not in f.dimensions: f.createDimension(dk, dl) f.copyVariable(iv, key=ok) tf = pnc.PseudoNetCDFFile() tf.createDimension('time', 1) tf.copyVariable(tmpf['PRODUCT/time'], key='time') refdate = tf.getTimes()[0] x = np.arange(len(f.dimensions['scanline'])) y = np.arange(len(f.dimensions['ground_pixel'])) X, Y = np.meshgrid(x, y) outf = f.removeSingleton().slice(scanline=X.ravel(), ground_pixel=Y.ravel(), newdims=('retrieval', )).slice( scanline=X.ravel(), newdims=('retrieval', )).slice( ground_pixel=Y.ravel(), newdims=('retrieval', )) outf.renameDimensions(scanline='retrieval', inplace=True) outf.renameDimensions(ground_pixel='retrieval', inplace=True) tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000') outf.variables['time'].units = tunit self.variables = outf.variables self.dimensions = outf.dimensions self.setncatts(outf.getncatts()) del tmpf
def opendappaths(inpaths, opts, verbose): omfs = [] dapdims = opts.get('opendapdims', None) for inpath in inpaths: if verbose > 1: print('Opening', inpath, flush=True) tmpf = pnc.pncopen(inpath, format='netcdf') omfi = pnc.PseudoNetCDFFile() for varkey in opts['datakeys'] + opts['geokeys']: if verbose > 2: print('Processing', varkey, flush=True) tmpv = tmpf.variables[varkey] for dim, dimlen in zip(tmpv.dimensions, tmpv.shape): if dim not in omfi.dimensions: omfi.createDimension(dim, dimlen) dtype = tmpv.dtype # Aura OMI data is occasionaly stored as an int16 # and scaled to a float32 for propkey in ['scale_factor', 'add_offset']: if hasattr(tmpv, propkey): stype = getattr(tmpv, propkey).dtype if (dtype.char in ('i', 'h') and stype.char not in ('i', 'h')): dtype = stype omfi.copyVariable(tmpv, key=varkey, dtype=dtype) if dapdims is not None: omfi.renameDimensions(**dapdims, inplace=True) omfs.append(omfi) return omfs
NO2.var_desc = 'NO2 columns' """, inplace=True) modts = cf.getTimes() stime = modts[0] etime = modts[-1] f = allf.slice(time=(allptimes >= stime) & (allptimes < etime)) t, k, j, i = f.findcells(cf) ts = f.getTimes() mod = cf.variables['NO2'][t, k, j, i].sum(1) mod.dimensions = ('time', ) sza = f.variables['SolarZenithAngle'] saa = f.variables['SolarAzimuthAngle'] obs = f.variables['SpeciesColumnAmount'] obsu = f.variables['SpeciesColumnUncertainty'] outf = pnc.PseudoNetCDFFile() outf.createDimension('time', len(ts)) outf.createDimension('layer', len(cf.dimensions['LAY'])) outf.copyVariable(f.variables['time'], key='time') vari = outf.createVariable('i', 'f', ('time', 'layer')) vari.long_name = vari.var_desc = 'i' vari.units = '0-based index' vari[:] = i varj = outf.copyVariable(vari[:] * 0 + j, key='j') varj.long_name = varj.var_desc = 'j' vark = outf.copyVariable(vari[:] * 0 + k, key='k') vark.long_name = vark.var_desc = 'k' outf.copyVariable(saa, key='SAA') outf.copyVariable(sza, key='SZA') outf.copyVariable(mod, key='MOD') outf.copyVariable(obs, key='OBS')
def grid(args, gf, opts, omf): """ Arguments --------- args: namespace must have inpaths, verbose, grndfilterexpr, datafilterexpr satpath and any requirements of openpaths gf : pnc.PseudoNetCDFFile griddesc file that implements IOAPI opts : mappable Product specific options omf : pnc.PseudoNetCDFFile subset of data with masks applied Returns ------- outf : PseudoNetCDFFile dimensions nTimes, nXtrack, and nLevels with datakeys dn geokeys """ outpath = args.outpath datakeys = opts['datakeys'] outkeys = opts.get('outkeys', datakeys) latkey = opts.get('Latitude', 'Latitude') lonkey = opts.get('Longitude', 'Longitude') timekey = opts.get('Time', 'Time') tdim = opts.get('time_dim', 'nTimes') xdim = opts.get('xtrack_dim', 'nXtrack') lcenterdim = opts.get('level_center_dim', 'nLevels') ledgedim = opts.get('level_edge_dim', 'nLevelEdges') if args.verbose > 1: print(f'Calculating time', flush=True) for tkey in [timekey, 'Time', 'time', 'TIME']: if tkey in omf.variables: tf = omf.subsetVariables([tkey]).renameVariable(tkey, 'time') tf.variables['time'].units = ( "seconds since 1993-01-01 00:00:00+0000") break else: tf = pnc.PseudoNetCDFFile() tf.createDimension('time', 1) t = tf.createVariable('time', 'd', ('time', )) t.units = "seconds since 1993-01-01 00:00:00+0000" date = tf.getTimes()[0] gf.SDATE = int(date.strftime('%Y%j')) gf.STIME = 0 gf.TSTEP = 240000 LAT = omf.variables[latkey][:] LON = omf.variables[lonkey][:] i, j = gf.ll2ij(LON, LAT, clean='mask') mask2d = omf.variables['BADDATA'][:] == 1 if mask2d.all(): print('No data; skipping', outpath, flush=True) return else: print('Making', outpath, flush=True) if args.verbose > 0: utchour = np.array([t.hour for t in tf.getTimes()]) localhour = np.ma.masked_where( mask2d, utchour[:, None] + omf.variables[lonkey][:] / 15) ptiles = [0, 10, 25, 75, 90, 100] localhourpct = np.percentile(localhour.compressed(), ptiles) localhourpctstr = ' '.join(['{:5.2f}'.format(h) for h in localhourpct]) ptilestr = ' '.join(['{:5d}'.format(p) for p in ptiles]) print('Percentiles:', ptilestr) print('Local Time :', localhourpctstr) outf = gf.copy().subsetVariables(['DUMMY']) if lcenterdim in omf.dimensions: nk = len(omf.dimensions[lcenterdim]) else: nk = 1 outf.createDimension('LAY', nk) twodkeys = [] renamevars = opts.get('renamevars', {}) for ki, varkey in enumerate(outkeys): outvarkey = renamevars.get(varkey, varkey) if args.verbose > 1: print(f'Masking and gridding {varkey} as {outvarkey}', flush=True) varv = omf.variables[varkey] if tdim not in varv.dimensions: continue varo = np.ma.masked_invalid( reorderVarDims(varv, (tdim, xdim), key=varkey)[:]) varmask = varo.mask mask = broadcastVar(mask2d, varo) if mask2d.shape == varmask.shape[:mask2d.ndim]: mask = (mask2d.T | varmask.T).T elif mask2d.shape == varmask.shape[-mask2d.ndim:]: mask = (mask2d | varmask) else: raise ValueError( f'Masks not aligned {mask2d.shape} and {varmask.shape}') ol = np.ones(mask.shape) myi = np.ma.masked_where(mask, (i.T * ol.T).T).compressed() + 0.5 myj = np.ma.masked_where(mask, (j.T * ol.T).T).compressed() + 0.5 if varo.ndim <= 2: myk = myj * 0 + .5 twodkeys.append(outvarkey) else: myk = np.ma.masked_where(mask, np.indices( mask.shape)[-1]).compressed() + 0.5 if varo.ndim <= 3: loc = [myk, myj, myi] outdims = ('TSTEP', 'LAY', 'ROW', 'COL') bins = (np.arange(nk + 1), np.arange(gf.NROWS + 1), np.arange(gf.NCOLS + 1)) else: myk1, myk2 = np.indices(mask.shape)[-2:] myk1 = np.ma.masked_where(mask, myk1).compressed() + 0.5 myk2 = np.ma.masked_where(mask, myk2).compressed() + 0.5 loc = [myk1, myk2, myj, myi] outdims = ('TSTEP', 'LAY', 'LAY', 'ROW', 'COL') bins = (np.arange(nk + 1), np.arange(nk + 1), np.arange(gf.NROWS + 1), np.arange(gf.NCOLS + 1)) myvcd = np.ma.masked_where(mask, varo[:]).compressed() r = binned_statistic_dd(loc, myvcd, 'mean', bins=bins) c = binned_statistic_dd(loc, myvcd, 'count', bins=bins) var = outf.createVariable(outvarkey, 'f', outdims, missing_value=-9.000E36) var.var_desc = varkey.ljust(80) var.long_name = outvarkey.ljust(16) var.units = getunit(varv) var[:] = np.ma.masked_invalid(r[0]) nvar = outf.createVariable('N' + outvarkey, 'f', outdims, missing_value=-9.000E36) nvar.var_desc = ('Count ' + varkey).ljust(80) nvar.long_name = ('N' + outvarkey).ljust(16) nvar.units = 'none' nvar[:] = c[0] delattr(outf, 'VAR-LIST') # {dk: slice(None, None, -1) for dk in invertdims} if args.verbose > 1: print('Calculating pressure for sigma approximation', flush=True) if opts['pressurekey'] is None: dims = [lcenterdim] p = np.array([50000], dtype='f') pedges1d = np.array([101325, 0], dtype='f') else: pkey = opts['pressurekey'] pvf = omf.subset([pkey]) pv = pvf.variables[pkey] pu = getunit(pv).lower() dims = list(pv.dimensions) afuncs = {} for dk in dims: if dk in (lcenterdim, ledgedim): ldim = dk else: afuncs[dk] = 'mean' pvmf = pvf.apply(**afuncs) pvdf = pvmf.apply(**{ldim: np.diff}) # If the delta P is negative, invert a bunch of stuff if pvdf.variables[pkey].mean() > 0: pvmf = pvmf.slice(ldim=slice(None, None, -1)) pvdf = pvmf.apply(**{ldim: np.diff}) # 2-D variables have data in layer 0 # after inverting, it is in layerN # it must be inverted again outf = outf.slice(LAY=slice(None, None, -1)) for varkey in twodkeys: tmpv = outf.variables[varkey] tmpv[:] = tmpv[:, ::-1] if pu in ("hpa", "mb"): pfactor = 100. elif pu == ("pa", "pascal"): pfactor = 1. else: warn('Unknown unit {}; scale factor = 1'.format(pu)) pfactor = 1. # all other dimensions have been averaged # so, they have a unity dimension (ROW=1, COL=1) p = pvmf.variables[pkey][:].squeeze() dp = pvdf.variables[pkey][:].squeeze() if ledgedim in dims: pedges1d = p else: hdp = dp / 2 pedges1d = np.append(np.append(p[:-1] - hdp, p[-1] - hdp[-1]), p[-1] + hdp[-1]) # Ensure pedges is never negative # heuristic top identification could cause that problem. pedges1d = np.maximum(0, pedges1d) * pfactor ptop = outf.VGTOP = pedges1d[-1] psrf = pedges1d[0] if len(dims) == 1: # OMI ScatteringWtPressure is on a pressure grid that is not changing # in space or time, so there is only one dimension outf.VGTYP = 4 outf.VGLVLS = pedges1d.astype('f') else: # Other products will be converted to an approximate sigma coordinate # This is not strictly true. The OMPROFOZ readme[1] describes the # vertical coordinate as follows. # # The 25-level vertical pressure grid is set initially at # Pi = 2-i/2 atm for i = 0, 23 and P24 = 0. This pressure grid is # then modified: The daily NCEP thermal tropopause pressure is # used to replace the level closest to it, and layers between # surface and tropopause are distributed equally in logarithmic # pressure. I is on a hybrid sigma/eta coordinate sigma # approximation is being used. # # [1] https://avdc.gsfc.nasa.gov/pub/data/satellite/Aura/OMI/V03/L2/ # OMPROFOZ/OMPROFOZ_readme-v3.pdf sigma = (pedges1d[:] - ptop) / (psrf - ptop) outf.VGTYP = 7 outf.VGLVLS = sigma[:].astype('f') del outf.variables['DUMMY'] for k in list(outf.variables): klen = len(k) if klen > 15: print(k, 'too long', len(k)) if hasattr(outf, 'VAR-LIST'): delattr(outf, 'VAR-LIST') outf.updatemeta() outf.FILEDESC = "cmaqsatproc output" outf.HISTORY = sys.argv[0] + ': ' + str(args) gc.collect() return outf