示例#1
0
def get_sf_data(osse_path, flux_var):
    """
    Gets last gctm.sf.NN file from given path

    Parameters:
        osse_path (str) : directory path where sf files are located
        flux_var  (str) : name of flux variable to consider

    Returns:
        tuple of the following numpy arrays
            - latitude
            - longitude
            - scale factor array with indices [month, lat, lon]

    """
    # find the last scale factor iteration file
    sf_fp = sorted(glob(osse_path + '/gctm.sf*'),
                   key=lambda x: int(x[-2:]))[-1]

    # acquire scale factor pseudo netcdf file
    sf = pnc.pncopen(sf_fp)

    # get latitude and longitude
    lat = sf.variables['latitude'].array()
    lon = sf.variables['longitude'].array()

    # get the scale factors
    sf_arr = sf.variables[flux_var].array()[0, :, :, :]

    return lat, lon, sf_arr
示例#2
0
    def boundingbox(self, path, keys=['time']):
        tmpf = pnc.pncopen(path, format='netcdf')
        out = {}
        if 'time' in keys:
            rtf = pnc.PseudoNetCDFFile()
            rtf.createDimension('time', 1)
            rtf.copyVariable(tmpf['PRODUCT/time'], key='time')
            refdate = rtf.getTimes()[0]
            tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000')

            tf = pnc.PseudoNetCDFFile()
            tf.createDimension('time', 1)
            tf.copyDimension(tmpf['PRODUCT'].dimensions['scanline'])
            tf.copyVariable(tmpf['PRODUCT/delta_time'], key='time')
            tf.variables['time'].units = tunit
            tf = tf.removeSingleton()
            del tmpf
            times = tf.getTimes()
            out['time'] = times.min(), times.max()

        if 'longitude' in keys:
            longitude = tmpf['PRODUCT/longitude'][:]
            out['longitude'] = longitude.min(), longitude.max()

        if 'longitude' in keys:
            latitude = tmpf['PRODUCT/latitude'][:]
            out['latitude'] = latitude.min(), latitude.max()

        return out
示例#3
0
def opendappaths(inpaths, opts, verbose):
    omfs = []
    dapdims = opts.get('opendapdims', None)
    for inpath in inpaths:
        if verbose > 1:
            print('Opening', inpath, flush=True)
        tmpf = pnc.pncopen(inpath, format='netcdf')
        omfi = pnc.PseudoNetCDFFile()
        for varkey in opts['datakeys'] + opts['geokeys']:
            if verbose > 2:
                print('Processing', varkey, flush=True)
            tmpv = tmpf.variables[varkey]
            for dim, dimlen in zip(tmpv.dimensions, tmpv.shape):
                if dim not in omfi.dimensions:
                    omfi.createDimension(dim, dimlen)
            dtype = tmpv.dtype
            # Aura OMI data is occasionaly stored as an int16
            # and scaled to a float32
            for propkey in ['scale_factor', 'add_offset']:
                if hasattr(tmpv, propkey):
                    stype = getattr(tmpv, propkey).dtype
                    if (dtype.char in ('i', 'h')
                            and stype.char not in ('i', 'h')):
                        dtype = stype

            omfi.copyVariable(tmpv, key=varkey, dtype=dtype)

        if dapdims is not None:
            omfi.renameDimensions(**dapdims, inplace=True)

        omfs.append(omfi)

    return omfs
示例#4
0
def writeconfig(sectors, outpath):
    hcpaths = []
    for sector in sectors:
        hemco2dpath = hemcotmpl(sector=sector, month=month)
        hemco3dpath = hemco2dpath.replace('0pt1degree', '0pt1degree_3D')
        if os.path.exists(hemco3dpath):
            hcpaths.append(hemco3dpath)
        elif os.path.exists(hemco2dpath):
            hcpaths.append(hemco2dpath)
        else:
            raise KeyError('Could not find regridded: ' + hemco2dpath)

    defaults = set()
    ignores = set()
    with open(outpath, 'w') as hcf:
        hcf.write('(((EPA2016_MONMEAN\n')
        for hcpath in hcpaths:
            hcpatt = changepathtopattern(hcpath)
            sector = getsector(hcpath)
            print(sector, hcpatt, end='', flush=True)
            hcfile = pnc.pncopen(hcpath, format='netcdf')
            for cqkey, v in hcfile.variables.items():
                if cqkey in hcfile.dimensions or cqkey in ('hyai', 'hybi'):
                    continue
                elif cqkey in ('TOLU', ):
                    warn('TOLU mass is duplicated by TOL')
                if cqkey in cq2gc:
                    gctrans = cq2gc.get(cqkey)
                    if len(gctrans) == 0:
                        ignores.add(cqkey)
                else:
                    defaults.add(cqkey)
                    gctrans = [[cqkey, '1007']]
                for gckey, scale in gctrans:
                    if gckey in [
                            'ACET', 'MEK', 'ALD2', 'PRPE', 'PRPA', 'BENZ',
                            'TOLU', 'XYLE', 'EOH', 'ALK4', 'ISOP'
                    ]:
                        units = 'kgC/m2/s'
                    else:
                        units = v.units.strip()
                    opts = dict(unit=units,
                                gckey=gckey,
                                cqkey=cqkey,
                                sector=sector,
                                path=hcpatt,
                                scale=scale,
                                cat='1/2',
                                hier=50)
                    hcf.write(
                        '0 EPA16_{gckey}__{sector}{cqkey} {path}  {cqkey}       2016-2016/1-12/1/0 C xyz  {unit}  {gckey}   {scale}     {cat} {hier}\n'
                        .format(**opts))
                    # If I use - to repeat the file, the mass is from the previous cqkey too.
                    # hcpatt = '-'
            print()
        hcf.write(')))EPA2016_MONMEAN\n')
    print('Ignored', sorted(ignores))
    print('Defaults', sorted(defaults))
示例#5
0
def create_sf_dict(dir_path, variable, output_path=None, year=2010):
    """
    Creates a dictionary of data from a collection of gctm.sf.** files. The
    output file is saved as a pickle.

    Parameters:
        dir_path    (str) : directory where code can find the scaling
        variable    (str) : CO2 scaling variable to use in the output
        output_path (str) : save location of output pickle file (if not none)
        year        (int) : starting year for inversion

    Returns:
        dictionary with the following key values
            - time      : numpy array
            - latitude  : numpy array
            - longitude : numpy array
            - sf_array  : numpy array

    NOTE:
    - PseudoNetcdf assumes that the tracerinfo.dat and diaginfo.dat files are
      included in the directory path given.

    TODO:
    - the time dimension in the sf files appear to all point to the same date.
    """
    # create the list of files
    sf_filepaths = sorted(glob.glob(dir_path + 'gctm.sf*'))

    # read in the above
    sf_files = [pnc.pncopen(path) for path in sf_filepaths]

    # get latitude/longitude/time information
    sample_file = sf_files[0]

    latitude = sample_file.variables['latitude'].array()
    longitude = sample_file.variables['longitude'].array()

    time_vals_raw = sample_file.variables['layer9'].array()
    time = [datetime(year, month, day=1) for month in time_vals_raw]

    # concatenate the scaling factors over time
    sf_concat = np.concatenate(
            [i.variables[variable].array() for i in sf_files]
    )

    output_dict = {
        'time': time,
        'latitude': latitude,
        'longitude': longitude,
        'sf_array': sf_concat
    }

    if output_path:
        with open(output_path, 'w') as f:
            pickle.dump(output_dict, f)

    return output_dict
示例#6
0
def openfile(path):
    """
    Clean out duplicated days
    """
    mo = int(path[-2:])
    f = pnc.pncopen(path, format='netcdf').subsetVariables(['O3'])
    times = f.getTimes()
    tidx = np.array([ti for ti, t in enumerate(times) if t.month == mo])
    return f.sliceDimensions(TSTEP=tidx)
示例#7
0
    def get_timezonefile(self):
        if self.timezonefile is not None:
            return self.timezonefile
        elif os.path.exists(self.timezonepath):
            self.timezonefile = pnc.pncopen(self.timezonepath, format='ioapi')
            return self.get_timezonefile()
        print(
            f'{self.timezonepath} not available;'
            ' calculating UTCOFFSET in hours from longitude...', end=''
        )
        gf = pnc.pncopen(
            self.griddescpath, format='griddesc', GDNAM=self.gdnam
        )
        del gf.variables['TFLAG']
        gf.SDATE = 1970001
        I, J = np.meshgrid(np.arange(gf.NCOLS), np.arange(gf.NROWS))
        lon, lat = gf.ij2ll(I, J)
        utcoffset = (lon / 15)
        tzf = gf.subset([])
        tzvar = tzf.createVariable(
            'UTCOFFSET', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='UTCOFFSET', var_desc='UTCOFFSET', units='hours'
        )
        tzvar[:] = utcoffset
        mthdvar = tzf.createVariable(
            'METHOD', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='METHOD', units='none',
            var_desc='METHOD: 0=tz_world.geojson; 1=lon/15'
        )
        mthdvar[:] = 1
        tzf.updatetflag(overwrite=True)
        tzf.updatemeta()
        tzf.FILEDESC = """Calculated TZ from longitude"""
        tzf.HISTORY = """Calculated TZ from longitude"""
        tzf.save(
            self.timezonepath, format='NETCDF4_CLASSIC',
            complevel=1, verbose=0
        ).close()
        print('done')

        return self.get_timezonefile()
示例#8
0
def process(args):
    if args.verbose > 1:
        print(f'Opening {args.GRIDDESC} GDNAM={args.GDNAM}', flush=True)
    gf = pnc.pncopen(args.GRIDDESC, format='griddesc', GDNAM=args.GDNAM)
    outpath = args.outpath
    if os.path.exists(outpath):
        print('Using cached', outpath, flush=True)
        return

    opts = eval(open(args.optpath, 'r').read())
    omf = subset(args, gf, opts)
    outf = grid(args, gf, opts, omf)
    outf.save(outpath, verbose=1, complevel=1)
示例#9
0
 def __init__(self, path):
     tmpf = pnc.pncopen(path, format='netcdf')
     geogrpk = 'PRODUCT/SUPPORT_DATA/GEOLOCATIONS/'
     outkeys = dict(
         time='PRODUCT/delta_time',
         qa_value='PRODUCT/qa_value',
         latitude='PRODUCT/latitude',
         longitude='PRODUCT/longitude',
         level='PRODUCT/layer',
         hyai='PRODUCT/tm5_constant_a',
         hybi='PRODUCT/tm5_constant_b',
         tropopause_level_index='PRODUCT/tm5_tropopause_layer_index',
         averaging_kernel='PRODUCT/averaging_kernel',
         nitrogendioxide_tropospheric_column=
         'PRODUCT/nitrogendioxide_tropospheric_column',
         air_mass_factor_troposphere='PRODUCT/air_mass_factor_troposphere',
         air_mass_factor_total='PRODUCT/air_mass_factor_total',
         surface_pressure='PRODUCT/SUPPORT_DATA/INPUT_DATA/surface_pressure',
         longitude_bounds=geogrpk + 'longitude_bounds',
         latitude_bounds=geogrpk + 'latitude_bounds',
         viewing_zenith_angle=geogrpk + 'viewing_zenith_angle',
         solar_zenith_angle=geogrpk + 'solar_zenith_angle')
     f = pnc.PseudoNetCDFFile()
     for ok, ik in outkeys.items():
         iv = tmpf[ik]
         for dk, dl in zip(iv.dimensions, iv.shape):
             if dk not in f.dimensions:
                 f.createDimension(dk, dl)
         f.copyVariable(iv, key=ok)
     tf = pnc.PseudoNetCDFFile()
     tf.createDimension('time', 1)
     tf.copyVariable(tmpf['PRODUCT/time'], key='time')
     refdate = tf.getTimes()[0]
     x = np.arange(len(f.dimensions['scanline']))
     y = np.arange(len(f.dimensions['ground_pixel']))
     X, Y = np.meshgrid(x, y)
     outf = f.removeSingleton().slice(scanline=X.ravel(),
                                      ground_pixel=Y.ravel(),
                                      newdims=('retrieval', )).slice(
                                          scanline=X.ravel(),
                                          newdims=('retrieval', )).slice(
                                              ground_pixel=Y.ravel(),
                                              newdims=('retrieval', ))
     outf.renameDimensions(scanline='retrieval', inplace=True)
     outf.renameDimensions(ground_pixel='retrieval', inplace=True)
     tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000')
     outf.variables['time'].units = tunit
     self.variables = outf.variables
     self.dimensions = outf.dimensions
     self.setncatts(outf.getncatts())
     del tmpf
示例#10
0
def bpch_to_netCDF_via_PNC(format='bpch2', filename='ctm.nc',
                           output_file=None, bpch_file=None, folder=None):
    """ Convert bpch to NetCDF using PNC as backend """
    import PseudoNetCDF as pnc
    # Load the file into memory
    infile = pnc.pncopen(bpch_file, format=format)
    # Kludge - reduce DXYP_DXYP dims online
    dxyp = infile.variables['DXYP_DXYP']
    # Surface area should have time dim, if fit does remove it.
    if len(dxyp.shape) == 4:
        dxyp.dimensions = dxyp.dimensions[1:]
        infile.variables['DXYP_DXYP'] = dxyp
    # Now write file to disc
#    pnc.pncwrite(infile, folder+filename)
    pnc.pncwrite(infile, output_file)
示例#11
0
def read_sf_objs(base_df_dir):
    """
    Reads in all objects present in the ./scale_factors directory

    Parameters:
        base_df_dir (str) : base directory where all scale factors can be found

    Returns:
        list of sf objects

    NOTE:
    - tracerinfo and diaginfo files must be present in the given directory
    """
    # obtain the scale factor file names (NOTE: file order doesn't matter)
    file_names = glob(base_df_dir + '/data/scale_factors/sf*')

    return [pnc.pncopen(fn, format='bpch') for fn in file_names]
示例#12
0
def getsites(path):
    """
    path : path to ioapi file
    returns i, j locations
    """
    keepvars = [
        'PM25_TOT', 'PM25_SO4', 'PM25_NO3', 'PM25_OC', 'PM25_OM', 'PM25_CL',
        'PMC_CL', 'PM25_EC', 'PM25_SOIL', 'PMC_TOT'
    ]
    print(path, flush=True)
    inf = pnc.pncopen(path, format='ioapi')
    varf = inf.subsetVariables(keepvars)
    sitef = inf.slice(ROW=jc, COL=ic, newdims=('site', ))
    ntimes = len(sitef.dimensions['TSTEP'])
    dims = sitef.variables[keepvars[0]].dimensions
    mymask = mask[None, None, :].repeat(ntimes, 0)

    outf = sitef.mask(mymask, dims=dims)
    return outf
示例#13
0
文件: combine.py 项目: barronh/aqmbc
def combine(inpath, outpath, exprpath, clobber=False):
    """
    Arguments
    ---------
    inpath : path to netcdf input file
    outpath : path to output file
    exprpath : path to text file with expressions

    Returns
    -------
    None
    """
    if os.path.exists(outpath) and not clobber:
        print('Using cached:', outpath)
        return
    spcexpr = open(exprpath, 'r').read()
    infile = pnc.pncopen(inpath, format='ioapi')
    if len(infile.dimensions['TSTEP']) > 1:
        infile = infile.sliceDimensions(TSTEP=slice(None, -1))
    spcfile = infile.copy().eval(spcexpr, inplace=False)
    spcfile.save(outpath, format='NETCDF4_CLASSIC')
示例#14
0
文件: collapse.py 项目: barronh/aqmbc
def collapse(inpath, outpath, clobber=False):
    """
    Arguments
    ---------
    inpath : path to netcdf input file
    outpath : path to output file

    Returns
    -------
    None
    """
    if os.path.exists(outpath) and not clobber:
        print('Using cached:', outpath)
        return
    infile = pnc.pncopen(inpath, format='ioapi')
    outfile = infile.interpSigma(
        vglvls=outvglvls,
        vgtop=infile.VGTOP,
        interptype='conserve'
    )
    outfile.save(outpath, format='NETCDF4_CLASSIC')
示例#15
0
def read_sf_objs(base_df_dir, sf_prefix):
    """
    Reads in all files in directory with provided scale factor prefix.

    E.g. ./scale_factors/sf_*

    where base_df_dir == 'scale_factors' and sf_prefix == 'sf_'

    Parameters:
        base_df_dir (str) : base directory where all scale factors can be found
        sf_prefix   (str) : prefix for each scale factor file

    Returns:
        list of sf objects

    NOTE:
    - tracerinfo and diaginfo files must be present in the given directory
    - all scale factor files are assumed to have the same prefix form
    """
    # obtain the scale factor file names (NOTE: file order doesn't matter)
    file_names = glob(base_df_dir + '/' + sf_prefix + '*')

    return [pnc.pncopen(fn, format='bpch') for fn in file_names]
示例#16
0
    def __init__(
        self, gridpath, nominaldate='1970-01-01', format='griddesc', **kwds
    ):
        """
        Arguments
        ---------
        gridpath : str
            path to a GRIDDESC file
        nominaldate : str
            Date for spatial and regional files (default: '1970-01-01')
        format : str
            griddesc, by default, but can be any ioapi_base class
        kwds : mappable
            Keywords for opening GRIDDESC. For example, GDNAM if there are
            multiple domains.

        Returns
        -------
        """
        nominaldate = pd.to_datetime(nominaldate)
        gf = pnc.pncopen(gridpath, format=format, **kwds)
        gf.SDATE = int(nominaldate.strftime('%Y%j'))
        gf.STIME = int(nominaldate.strftime('%H%M%S'))
        gf.TSTEP = 10000
        self.spatialfile = gf.subset([])
        uv = self.spatialfile.createVariable(
            'UNIFORM', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='UNIFORM', var_desc='UNIFORM', units='none'
        )
        uv[:] = 1 / uv.size
        self.regionfile = gf.subset([])
        dw = self.regionfile.createVariable(
            'DOMAINWIDE', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='DOMAINWIDE', var_desc='DOMAINWIDE', units='fraction'
        )
        dw[:] = 1.
        self.regions = ['DOMAINWIDE']
示例#17
0
import PseudoNetCDF as pnc
from datetime import datetime
import sys
import os
import gc

exists = os.path.exists

# dummy assignent
f = pandoras
panpath = sys.argv[1]
concpat = sys.argv[2]
metpat = sys.argv[3]
outpat = sys.argv[4]

allf = pnc.pncopen(panpath, format='pandoraslb3')
allptimes = allf.getTimes()
moddates = sorted([
    datetime.strptime(p, metpat)
    for p in list(set([t.strftime(metpat) for t in allptimes]))
])
for moddate in moddates:
    metpath = moddate.strftime(metpat)
    concpath = moddate.strftime(concpat)
    outpath = moddate.strftime(outpat)
    if not exists(metpath):
        print('Skipping missing met date', metpath, flush=True)
        continue
    elif not exists(concpath):
        print('Skipping missing model date', concpath, flush=True)
        continue
示例#18
0
os.system(
    f"wget --continue -q ftp://newftp.epa.gov/aqmg/global/gadm/gadm36_12US1.IOAPI.nc"
)

# # Opening Files For Reading And Plotting
#

# In[ ]:

smokepaths = {
    sector: smoketmpl(sector=sector, month=month)
    for sector in include_sectors + natural_sectors
}
smokefiles = {
    sector: pnc.pncopen(path, format='ioapi', mode='r')
    for sector, path in smokepaths.items()
}

reffile = smokefiles[include_sectors[0]]

# # Store Grid Parameters for Later Use
#
# * Regridding requires knowing about the grid structure
# * We are pulling all the metadata, so that we can use what we need.
#

# In[ ]:

gridproperties = reffile.getncatts()
exec('nominalarea = XCELL * YCELL', None, gridproperties)
示例#19
0
    def allocate(self, infile, alloc_keys, outpath=None, **save_kwds):
        """
        Arguments
        ---------
        infile : str or PseudoNetCDF File
            path to netcdf file (or file) to use as input (format keyword used
            as a modifier)
        alloc_keys : mappable  or str
            alloc_keys key/value pairs map region and spatial allocation
            variables (e.g., DOMAINWIDE and POP) to variables in infile to
            allocate spatially. Each key should be a tuple of region and
            spatial variable (e.g., ('DOMAINWIDE', 'POPULATION')). The region
            key must exist as a variable in self.regionfile and the spatial
            variable must exist in self.spatialfile. Each value should be a
            list of variables in infile to pair with the region/spatial pair.
            One allocation variable can be assigned None instead of a list,
            which results in all unassigned variables being used. If alloc_keys
            is a str, this is equivalent to `alloc_keys={alloc_keys: None}`
        outpath : str or None
            path for output to be saved. If None, outf will be returned and not
            saved

        Returns
        -------
        outf : PseudoNetCDFFile
            file with spatial variation

        Notes
        -----

        """
        if isinstance(infile, str):
            infile = pnc.pncopen(infile, format=format)

        if isinstance(alloc_keys, str):
            alloc_keys = {alloc_keys: None}

        all_keys = []
        for k, v in infile.variables.items():
            if 'LAY' in v.dimensions:
                all_keys.append(k)

        assigned_keys = []

        isnone = []
        for (region, srgkey), varkeys in alloc_keys.items():
            if varkeys is None:
                isnone.append((region, srgkey))
            else:
                assigned_keys.extend(varkeys)

        unassigned_keys = list(set(all_keys).difference(assigned_keys))
        if len(isnone) > 1:
            raise ValueError(f'Can only have 1 None sector; got {isnone}')
        if len(isnone) == 1:
            alloc_keys[isnone[0]] = unassigned_keys

        outf = self.spatialfile.subset([])
        for (regionkey, allockey), varkeys in alloc_keys.items():
            regionvar = self.regionfile.variables[regionkey]
            allocvar = self.spatialfile.variables[allockey]
            factor = regionvar[:] * allocvar[:]
            factor /= factor.sum()
            for varkey in varkeys:
                invar = infile.variables[varkey]
                outvar = outf.createVariable(
                    varkey, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                    long_name=varkeys, vardesc=varkey,
                    units=getattr(invar, 'units', 'unknown')
                )
                outvar[:] = invar[:] * factor

        if outpath is None:
            return outf
        else:
            return outf.save(outpath, **save_kwds)
示例#20
0
from glob import glob
import os
from collections import OrderedDict
from functools import partial

import matplotlib.pyplot as plt

import PseudoNetCDF as pnc
from perim import perimslices

np = plt.np

inpaths = sorted(glob('../combine/*BCON.combine.4LAY.nc'))
infile = pnc.sci_var.stack_files(
    [pnc.pncopen(path, format='ioapi') for path in inpaths], 'TSTEP')
lays = np.arange(-.5, infile.NLAYS + 1)
pcolors = dict(S='#2ca02c', N='#d62728', W='#1f77b4', E='#ff7f0e', L='k')


def sigmabyt(plotfile, vark, title, pslices, yscale, outpath):
    pf = plotfile.subsetVariables([vark])
    var = pf.variables[vark]
    plt.close()
    units = var.units.strip()
    lname = var.long_name.strip()
    vglvls = plotfile.VGLVLS
    fig, axarr = plt.subplots(4,
                              1,
                              sharex=True,
                              gridspec_kw=dict(hspace=0.1, bottom=0.15),
                              figsize=(6, 8))
示例#21
0
    def get_monthlyfile(self, propath=None, read_kwds=None):
        """
        Arguments
        ---------
        propath : str
            path to tpro file ATPRO_MONTHLY file
        read_kwds : dict or None
            If None, default read_kwds are dict(comment='#', index_col=0,
            names=['Jan', ..., 'Dec', 'comment'])

        Returns:
            df : PseudoNetCDFFile
                IOAPI-like file with month of year allocations for sectors (as
                variables) with shape TSTEP=12, LAY=1, ROW=NROWS, COL=NCOLS
        """

        if self.monthlyfile is not None:
            return self.monthlyfile
        elif os.path.exists(self.monthlypath):
            self.monthlyfile = pnc.pncopen(self.monthlypath, format='ioapi')
            return self.get_monthlyfile()
        if propath is None:
            raise KeyError(
                f'propath required because {self.monthlypath} not found'
            )

        print(
            f'{self.monthlypath} not available; calculating from {propath}'
        )
        names = _monnames + ['comment']

        if read_kwds is None:
            read_kwds = dict(comment='#', index_col=0, names=names)

        mondf = pd.read_csv(propath, **read_kwds)
        tzf = self.get_timezonefile()

        mon_f = tzf.subset([])
        mon_f.createDimension('TSTEP', 12).setunlimited(True)
        mon_f.SDATE = 2020001
        mon_f.STIME = 0
        mon_f.TSTEP = 24 * 30.5 * 10000

        for monidx, monrow in mondf.iterrows():
            cmt = monrow['comment']
            label = getlabel(cmt)
            print(label, cmt)
            monvals = monfactor(mondf, monidx, tzf)
            monvar = mon_f.createVariable(
                label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                long_name=label, var_desc=label, units='s/s'
            )
            monvar[:] = monvals[:, None]

        mon_f.updatemeta()
        mon_f.updatetflag(overwrite=True)
        mon_f.FILEDESC = """
## NASA-like metadata
1, 2310
Henderson, Barron
US EPA/Office of Air Quality Planning and Standards
EPA sector-based hourly profiles
Not Applicable
1, 1
2021, 01, 13, 2021, 01, 13
0
...
PI_CONTACT_INFO: [email protected]
PLATFORM: CMAQ Emission processing input
DATA_INFO:  All data in monthly average per second rates
UNCERTAINTY:  large, preliminary data based on US averages.
DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected]
PROJECT_INFO: For easy processing processing of emissions.
STIPULATIONS_ON_USE: Use of these data requires PI notification
OTHER_COMMENTS: None.
REVISION: R0
R0: Preliminary data
"""
        mon_f.save(
            self.monthlypath, format='NETCDF4_CLASSIC', complevel=1, verbose=0
        ).close()
        return self.get_monthlyfile()
示例#22
0
import PseudoNetCDF as pnc
from readobs import obsdf, locs
import numpy as np
import pandas as pd
import os

dailytmpl = os.environ['DAILYTMPL']
shortname = os.environ['SHORTNAME']
monobskeys = ['FRH', 'FLRH', 'FSRH', 'FSSRH', 'SS_RAYLEIGH']
dayobskeys = ['GROUP']

cmaqinpath = 'derived/' + dailytmpl + '.ncf'
cmaqf = pnc.pncopen(cmaqinpath, format='ioapi').copy()
dims = cmaqf.variables['GROUP'].dimensions
cmaqg = cmaqf.variables['GROUP'][:]
q90 = (cmaqg != 90).filled(True)
q10 = (cmaqg != 10).filled(True)
qother = (~(q90 | q10)) | cmaqg.mask
cmaqif = cmaqf.mask(q90, dims=dims).apply(TSTEP='mean')
cmaqcf = cmaqf.mask(q10, dims=dims).apply(TSTEP='mean')
cmaqof = cmaqf.mask(qother, dims=dims).apply(TSTEP='mean')
impdf = obsdf.query('GROUP == 90').groupby(['site_id']).mean()
clrdf = obsdf.query('GROUP == 10').groupby(['site_id']).mean()
othdf = obsdf.query('(GROUP != 10) & (GROUP != 90)').groupby(['site_id'
                                                              ]).mean()
compkeys = 'SEA_SALT AMM_SO4 AMM_NO3 OMC EC CRUSTAL CM'.split()
extkeys = 'SS_RAYLEIGH E_SEA_SALT E_AMM_SO4 E_AMM_NO3 E_OMC E_EC E_CRUSTAL E_CM'.split(
)
keys = ['longitude', 'latitude'] + compkeys + extkeys
siteids = np.char.decode(
    cmaqif.variables['site_id'][:].view('S16')).astype(str)[:, 0]
示例#23
0
from mpl_toolkits.basemap import Basemap
import argparse
import os

parser = argparse.ArgumentParser()
parser.add_argument('obspath')
parser.add_argument('modpath')
args = parser.parse_args([
    '../obs/CASTNET2016.nc',
    '../mod/combine_aconc_v521_intel17.0_HEMIS_cb6_2016.nc',
])

plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
modf = pnc.pncopen(args.modpath).copy()
obsf = pnc.pncopen(args.obspath).copy()

lat = obsf.variables['latitude']
lon = obsf.variables['longitude']
tzoff = -obsf.variables['TIME_OFFSET'][:].astype('i')

tidx = np.arange(24, len(modf.dimensions['time']) - 24)[:, None] + tzoff
sidx = np.arange(len(obsf.dimensions['site']))[None, :].repeat(
    tidx.shape[0], 0)

times = obsf.getTimes()

oto3 = obsf.variables['O3'][tidx, sidx].T
mto3 = modf.variables['O3'][tidx, 0, sidx].T
bto3 = mto3 - oto3
示例#24
0
    del tmpf.variables['TFLAG']
    tmpf = fracf.subsetVariables([vark])
    tmpf = tmpf.slice(TSTEP=include)
    return np.ma.filled(tmpf.variables[vark], 0).sum(0, keepdims=True)


def getmask(idf, vark, namelist):
    var = idf.variables[vark]
    i2k = eval(var.description)
    k2i = {k: i for i, k in i2k.items()}
    idlist = [k2i[k] for k in namelist]
    outvar = np.in1d(var[:], idlist).reshape(var.shape)
    return outvar


gadmf = pnc.pncopen(args.inpath, format='ioapi')
if args.variable is None:
    for vark in ['ID_0', 'ID_1', 'ID_2']:
        if vark in gadmf.variables:
            args.variable = vark
            break
    else:
        print('Could not find variable ID_0, ID_1, ID_2')
        exit()

outf = gadmf.slice(TSTEP=0).subsetVariables([args.variable])
configd = json.load(open(args.definitions, mode='r', encoding='utf-8'))
for outvark, namelist in configd.items():
    outv = outf.createVariable(outvark, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'))
    outv.units = '1'
    outv.long_name = outvark.ljust(16)
示例#25
0
import sys
import os
import PseudoNetCDF as pnc

# inpath = 'GRIDCRO2D.12US2.35L.160101'
# outpath = 'GRIDCRO2D.12US2.35L.160101.CF.nc'
try:
    inpath, outpath = sys.argv[1:]
except Exception as e:
    print('Usage: python {} <INPATH> <OUTPATH>'.format(sys.argv[0]))

if not os.path.exists(inpath):
    raise IOError(f'{inpath} does not exist.')

if os.path.exists(outpath):
    raise IOError(f'{outpath} exists. Will not overwrite.')

infile = pnc.pncopen(inpath, format='ioapi').copy()
pnc.conventions.ioapi.add_cf_from_ioapi(infile)
infile.save(outpath, verbose=0)
示例#26
0
    def allocate(
        self, infile, outdate, alloc_keys, outpath=None,
        monthly=True, dayofweek=True, diurnal=True,
        time=None, format=None,
        overwrite=False, verbose=0
    ):
        """
        Arguments
        ---------
        infile : str or PseudoNetCDF File
            path to netcdf file (or file) to use as input (format keyword used
            as a modifier)
        outdate : datetime
            date to destination
        outpath : str or None
            path for output to be saved. If None, outf will be returned and not
            saved
        alloc_keys : mappable  or str
            alloc_keys key/value pairs map allocation variables (e.g., ENERGY)
            to variables in infile to allocate temporally. Each key should
            be in monthlyfile/dayofweekfile/diurnalfile variables. And each
            value is a list of variables in infile. One allocation variable can
            be assigned None instead of a list, which results in all unassigned
            variables being used. If alloc_keys is a str, this is equivalent to
            `alloc_keys={alloc_keys: None}`
        monthly : bool
            apply monthly scaling. If file already has months, use month=False
            and time=m to apply other scaling to time m.
        dayofweek : bool
            apply day of week  scaling. If file already has day of week, use
            dayofweek=False and time=d to apply other scaling to time d.
        diurnal : bool
            apply hour of day  scaling. If file already has hour of day, use
            diurnal=False and time=h to apply other scaling to time h.
        time : int or None
            if None, checks to ensure that file has only 1 time and uses first
            (i.e., 0)
        format : str
            format of file or meta data (e.g., netcdf or ioapi; see
            PseudoNetCDF pncopen)

        Returns
        -------
        outf : PseudoNetCDFFile
            file with temporal variation

        Notes
        -----

        1. month, dayofweek, and diurnal can be combined to exlude one or many
           scalings

        """
        remove = False

        if outpath is not None and os.path.exists(outpath):
            if not overwrite:
                raise IOError(f'{outpath} exists')
            else:
                remove = True

        refdate = outdate

        if verbose > 0:
            print('Opening input', flush=True)

        if isinstance(infile, str):
            ef = pnc.pncopen(infile, format=format)
        else:
            ef = infile

        if isinstance(alloc_keys, str):
            alloc_keys = {alloc_keys: None}

        all_keys = []
        for k, v in ef.variables.items():
            if 'LAY' in v.dimensions:
                all_keys.append(k)

        assigned_keys = []

        isnone = []
        for sector, varkeys in alloc_keys.items():
            if varkeys is None:
                isnone.append(sector)
            else:
                assigned_keys.extend(varkeys)

        unassigned_keys = list(set(all_keys).difference(assigned_keys))
        if len(isnone) > 1:
            raise ValueError(f'Can only have 1 None sector; got {isnone}')
        if len(isnone) == 1:
            alloc_keys[isnone[0]] = unassigned_keys

        if time is None:
            if len(ef.dimensions['TSTEP']) > 1:
                print('Time dimension is not 1, so you must choose a time')
            else:
                time = 0

        if format == 'ioapi':
            if verbose > 0:
                print('Appending TFLAG to exclude', flush=True)

        if verbose > 0:
            print('Creating output template', flush=True)

        outf = ef.subset([])
        if 'TFLAG' in outf.variables:
            del outf.variables['TFLAG']

        nsteps = 1
        if monthly:
            nsteps = nsteps * 1
            tstep = 30*240000
        if dayofweek:
            nsteps = nsteps * 1
            tstep = 240000
        if diurnal:
            nsteps = nsteps * 25
            tstep = 10000

        outf.createDimension('TSTEP', nsteps).setunlimited(True)

        if verbose > 0:
            print('Calculating composite factor', flush=True)

        for sectorkey, varkeys in alloc_keys.items():
            factor = self.get_factor(
                sectorkey, refdate,
                diurnal=diurnal, dayofweek=dayofweek, monthly=monthly
            )
            for varkey in varkeys:
                invar = ef.variables[varkey]
                if verbose > 0:
                    print(f'Scaling {varkey}...', flush=True)
                outvar = outf.copyVariable(invar, key=varkey, withdata=False)
                outvar.setncatts(
                    {pk: getattr(invar, pk) for pk in invar.ncattrs()}
                )
                outvar[:] = invar[time] * factor

        outf.SDATE = int(refdate.strftime('%Y%j'))
        outf.STIME = int(refdate.strftime('%H%M%S'))
        outf.TSTEP = tstep
        if format == 'ioapi':
            outf.updatemeta()
            outf.updatetflag(overwrite=True)

        history = getattr(outf, 'HISTORY')
        history += f'apply_temporal({locals})'
        setattr(outf, 'HISTORY', history)
        if outpath is not None and remove:
            os.remove(outpath)

        if outpath is None:
            return outf
        else:
            outf.save(outpath, verbose=0).close()
            return pnc.pncopen(outpath, format='ioapi')
示例#27
0
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter

import PseudoNetCDF as pnc

from perim import perimslices

np = plt.np

inpaths = sorted(glob('../combine/*201?????.BCON.combine.nc'))
tslice = slice(None, None, 6)
varks = ['O3PPB', 'ASO4IJ', 'ANO3IJ', 'NOx', 'ANAIJ', 'PMIJ']
infiles = [
    pnc.pncopen(
        inpath,
        format='ioapi').subsetVariables(varks).sliceDimensions(TSTEP=tslice)
    for inpath in inpaths
]
infile = infiles[0].stack(infiles[1:], 'TSTEP')
del infiles
infile.TSTEP = tslice.step * infile.TSTEP
time = infile.getTimes()
warn('Debug using {}h'.format(tslice.step))
lays = np.arange(0, infile.NLAYS + 1)


def sigmabyt(plotfile, vark, title, norm, ticks, formatter, outpath):
    plt.close()
    ax = plotfile.plot(vark,
                       plottype='TSTEP-LAY',
示例#28
0
def openhe5(inpaths, opts, verbose):
    tdim = opts.get('time_dim', 'nTimes')
    xdim = opts.get('xtrack_dim', 'nXtrack')
    lcenterdim = opts.get('level_center_dim', 'nLevels')
    omfs = []
    for inpath in inpaths:
        if verbose > 1:
            print('Opening', inpath, flush=True)
        tmpf = pnc.pncopen(inpath, format='netcdf')
        omfi = pnc.PseudoNetCDFFile.from_ncvs(
            **{
                varkey: tmpf[opts['datagrp']].variables[varkey]
                for varkey in opts['datakeys']
            })
        _applyscale(omfi)

        omgfi = pnc.PseudoNetCDFFile.from_ncvs(
            **{
                varkey: tmpf[opts['geogrp']].variables[varkey]
                for varkey in opts['geokeys']
            })
        _applyscale(omgfi)

        datadims = opts.get('datadims', None)
        geodims = opts.get('geodims', None)
        if datadims is None:
            ddims = list(omfi.dimensions)
            datadims = dict(zip(ddims, [tdim, xdim, lcenterdim]))
            print('Dimension mapping heuristically', flush=True)
            print({dk: len(dv) for dk, dv in omfi.dimensions.items()})
            print('Selected dimension mapping:', datadims, flush=True)

        if geodims is None:
            gdims = list(omgfi.dimensions)
            geodims = dict(zip(gdims, [tdim, xdim, lcenterdim]))
            print('Dimension mapping heuristically', flush=True)
            print({dk: len(dv) for dk, dv in omgfi.dimensions.items()})
            print('Selected dimension mapping:', geodims, flush=True)

        for inkey, outkey in datadims.items():
            if inkey not in omfi.dimensions:
                print('** Error renaming data dimension:\n' +
                      f'Key {inkey} ({outkey}) not found:\n{omfi.dimensions}' +
                      '\n\n** Try increase or decreasing phony numbered' +
                      ' dimensions by 1 in the configuration.' +
                      '\n** Different netcdf versions give them' +
                      ' different names for repeated length dimensions.')
                sys.exit()

        omfi.renameDimensions(**datadims, inplace=True)

        for inkey, outkey in geodims.items():
            if inkey not in omgfi.dimensions:
                print(
                    'Error renaming geo dimension:' +
                    f'Key {inkey} ({outkey}) not found:\n{omgfi.dimensions}' +
                    '\n\n** Try increase or decreasing phony numbered' +
                    ' dimensions by 1 in the configuration.' +
                    '\n** Different netcdf versions give them' +
                    ' different names for repeated length dimensions.')
                sys.exit()

        omgfi.renameDimensions(**geodims, inplace=True)

        for geokey in opts['geokeys']:
            omfi.copyVariable(omgfi.variables[geokey], key=geokey)

        flipdimkeys = opts.get('flipdims', [])
        if len(flipdimkeys) > 0:
            flipslices = {
                k: slice(None, None, -1)
                for k in flipdimkeys if k in omfi.dimensions
            }
            omfi = omfi.sliceDimensions(**flipslices)
        omfs.append(omfi)

    return omfs
示例#29
0
test = """
gcpath = 'CONC/GEOSChem.SpeciesConc.20160701_0000z.nc4'
#gcpath = '/work/ROMO/global/GCv12.0.1/GC/rundirs/geosfp_2x25_standard/Output/GEOSChem.SpeciesConc.20160101_0000z.nc4'
gcexprpath = 'definitions/gc/gc12_to_cb6r3.expr'
aeexprpath = 'definitions/gc/gc12_to_ae6_nvPOA.expr'

args = parser.parse_args([
    '--spcprefix', 'SpeciesConc_', gcpath,
    'GEOS-Chem_Species_Database.json', 'CMAQ.json',
    gcexprpath, aeexprpath
])
"""
args = parser.parse_args()

f = pnc.pncopen(args.inpath)

fromspcs = json.load(open(args.fromjson, 'r'))
tospcs = json.load(open(args.tojson, 'r'))
exprstr = '\n'.join(
    [open(exprpath, 'r').read() for exprpath in args.exprpaths])

noadvspc = [k for k, v in fromspcs.items() if not v['Is_Advected']]
gcspc = [
    k for k, v in fromspcs.items() if not v['Is_Aero'] and v['Is_Advected']
]
aespc = [k for k, v in fromspcs.items() if v['Is_Aero'] and v['Is_Advected']]

spc = gcspc
prefix = args.spcprefix
symtbl = symtable(exprstr, '<pncexpr>', 'exec')
示例#30
0
    def get_dayofweekfile(self, propath=None, read_kwds=None):
        """
        Arguments
        ---------
        propath : str
            path to tpro file ATPRO_WEEKLY file
        read_kwds : dict or None
            If None, default read_kwds are dict(comment='#', index_col=0,
            names=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'comment'])

        Returns:
            df : PseudoNetCDFFile
                IOAPI-like file with day of week allocations for sectors (as
                variables) with shape  TSTEP=7, LAY=1, ROW=NROWS, COL=NCOLS
        """

        if self.dayofweekfile is not None:
            return self.dayofweekfile
        elif os.path.exists(self.dayofweekpath):
            self.dayofweekfile = pnc.pncopen(
                self.dayofweekpath, format='ioapi'
            )
            return self.get_dayofweekfile()

        if propath is None:
            raise KeyError(
                f'propath required because {self.dayofweekpath} not found'
            )

        print(
            f'{self.dayofweekpath} not available; calculating from {propath}'
        )
        if read_kwds is None:
            read_kwds = dict(
                comment='#', index_col=0,
                names='Mon Tue Wed Thu Fri Sat Sun comment'.split()
            )
        wkdf = pd.read_csv(propath, **read_kwds)

        wkdf.index.name = 'profile_id'
        tzf = self.get_timezonefile()

        day_f = tzf.subset([])
        day_f.createDimension('TSTEP', 25).setunlimited(True)
        day_f.createDimension('LAY', 7)
        day_f.VGLVLS = np.arange(8)
        day_f.VGTYP = 6
        day_f.SDATE = 2020001
        day_f.STIME = 0
        day_f.TSTEP = 10000

        for wkidx, wkrow in wkdf.iterrows():
            cmt = wkrow['comment']
            label = getlabel(cmt)
            print(label, cmt)
            wkvals = weekdayfactor(wkdf, wkidx, tzf)
            wkvar = day_f.createVariable(
                label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                long_name=label, var_desc=label, units='s/s'
            )
            wkvar[:] = wkvals

        day_f.updatemeta()
        day_f.updatetflag(overwrite=True)
        day_f.FILEDESC = (
            """
## NASA-like metadata
1, 2310
Henderson, Barron
US EPA/Office of Air Quality Planning and Standards
EPA sector-based hourly profiles
Not Applicable
1, 1
2021, 01, 13, 2021, 01, 13
0
...
PI_CONTACT_INFO: [email protected]
PLATFORM: CMAQ Emission processing input
DATA_INFO:  All data in daily average per second rates
UNCERTAINTY:  large, preliminary data based on US averages.
DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected]
PROJECT_INFO: For easy processing processing of emissions.
STIPULATIONS_ON_USE: Use of these data requires PI notification
OTHER_COMMENTS: The LAY dimension is day of the week (Mon, Tue, ..., Sun)."""
            + "Time is UTC, but the profiles are based on LST days. So, "
            + "UTC_Mon will include hours from Sun and Tue as appropriate "
            + """given the hour offset.
REVISION: R0
R0: Preliminary data
"""
        )
        day_f.save(
            self.dayofweekpath, format='NETCDF4_CLASSIC', complevel=1,
            verbose=0
        ).close()
        return self.get_dayofweekfile()