def getCommonGrid(grid, res=None, lfilepath=False): ''' return definitions of commonly used grids (either from datasets or pickles) ''' # try pickle first griddef = loadPickledGridDef(grid=grid, res=res, folder=grid_folder, check=False, lfilepath=lfilepath) # alternatively look in known datasets if griddef is None: try: dataset = import_module(grid) if res is None: griddef = dataset.default_grid else: griddef = dataset.grid_def[res] if lfilepath: griddef.filepath = None # monkey-patch... except ImportError: griddef = None # assert (elon-slon) % dlon == 0 # lon = np.linspace(slon+dlon/2,elon-dlon/2,(elon-slon)/dlon) # assert (elat-slat) % dlat == 0 # lat = np.linspace(slat+dlat/2,elat-dlat/2,(elat-slat)/dlat) # # add new geographic coordinate axes for projected map # xlon = Axis(coord=lon, atts=dict(grid='lon', long_name='longitude', units='deg E')) # ylat = Axis(coord=lat, atts=dict(grid='lat', long_name='latitude', units='deg N')) # gridstr = '{0:s}_{1:s}'.format(grid,res) if res is not None else grid # return grid definition object (or None, if none found) return griddef
def getCommonGrid(grid, res=None): ''' return definitions of commonly used grids (either from datasets or pickles) ''' # try pickle first griddef = loadPickledGridDef(grid=grid, res=res, folder=grid_folder, check=False) # alternatively look in known datasets if griddef is None: try: dataset = import_module(grid) if res is None: griddef = dataset.default_grid else: griddef = dataset.grid_def[res] except ImportError: griddef = None # assert (elon-slon) % dlon == 0 # lon = np.linspace(slon+dlon/2,elon-dlon/2,(elon-slon)/dlon) # assert (elat-slat) % dlat == 0 # lat = np.linspace(slat+dlat/2,elat-dlat/2,(elat-slat)/dlat) # # add new geographic coordinate axes for projected map # xlon = Axis(coord=lon, atts=dict(grid='lon', long_name='longitude', units='deg E')) # ylat = Axis(coord=lat, atts=dict(grid='lat', long_name='latitude', units='deg N')) # gridstr = '{0:s}_{1:s}'.format(grid,res) if res is not None else grid # return grid definition object (or None, if none found) return griddef
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'): ''' A function to load standardized observational datasets. ''' # prepare input if mode.lower() == 'climatology': # post-processed climatology files # transform period if period is None or period == '': if name not in ('PCIC','PRISM','GPCC','NARR'): raise ValueError("A period is required to load observational climatologies.") elif isinstance(period,basestring): period = tuple([int(prd) for prd in period.split('-')]) elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): raise TypeError(period) elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files period = None # to indicate time-series (but for safety, the input must be more explicit) if lautoregrid is None: lautoregrid = False # this can take very long! # cast/copy varlist if isinstance(varlist,basestring): varlist = [varlist] # cast as list elif varlist is not None: varlist = list(varlist) # make copy to avoid interference # figure out station and shape options if station and shape: raise ArgumentError() elif station or shape: if grid is not None: raise NotImplementedError('Currently observational station data can only be loaded from the native grid.') if lautoregrid: raise GDALError('Station data can not be regridded, since it is not map data.') lstation = bool(station); lshape = bool(shape) grid = station if lstation else shape # add station/shape parameters if varlist: params = stn_params if lstation else shp_params for param in params: if param not in varlist: varlist.append(param) else: lstation = False; lshape = False # varlist (varlist = None means all variables) if varatts is None: varatts = default_varatts.copy() if varlist is not None: varlist = translateVarNames(varlist, varatts) # filelist if filelist is None: filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern) # check existance filepath = '{:s}/{:s}'.format(folder,filename) if not os.path.exists(filepath): nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern) nativepath = '{:s}/{:s}'.format(folder,nativename) if os.path.exists(nativepath): if lautoregrid: from processing.regrid import performRegridding # causes circular reference if imported earlier griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) dataargs = dict(period=period, resolution=resolution) performRegridding(name, 'climatology',griddef, dataargs) # default kwargs else: raise IOError("The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) ) else: raise IOError("The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath)) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, axes=axes, multifile=False, ncformat='NETCDF4') # mask all shapes that are incomplete in dataset if shape and lencl and 'shp_encl' in dataset: dataset.load() # need to load data before masking; is cheap for shape averages, anyway dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params) # correct ordinal number of shape (should start at 1, not 0) if lshape: if dataset.hasAxis('shapes'): raise AxisError("Axis 'shapes' should be renamed to 'shape'!") if not dataset.hasAxis('shape'): raise AxisError() if dataset.shape.coord[0] == 0: dataset.shape.coord += 1 # figure out grid if not lstation and not lshape: if grid is None or grid == name: dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder) elif isinstance(grid,basestring): # load from pickle file # griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder) # add GDAL functionality to dataset dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder) else: raise TypeError(dataset) # N.B.: projection should be auto-detected, if geographic (lat/lon) return dataset
if griddef is None: print('GridDefinition object for {0:s} not found!'.format(gridstr)) else: # save pickle filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(gridstr)) filehandle = open(filename, 'w') pickle.dump(griddef, filehandle) filehandle.close() print(' Saving Pickle to \'{0:s}\''.format(filename)) print('') # load pickle to make sure it is right del griddef griddef = loadPickledGridDef(grid, res=res, folder=grid_folder) print(griddef) print('') ## create a new grid elif mode == 'create_grid': # parameters for UTM 17 name = 'grw1' # 1km resolution geotransform = [500.e3,1.e3,0,4740.e3,0,1.e3]; size = (132,162) # name = 'grw2' # 5km resolution # geotransform = [500.e3,5.e3,0,4740.e3,0,5.e3]; size = (27,33) projection = "+proj=utm +zone=17 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs" # N.B.: [x_0, dx, 0, y_0, 0, dy] # GT(0),GT(3) are the coordinates of the bottom left corner # GT(1) & GT(5) are pixel width and height
sink = DatasetNetCDF(name='GPCC Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w') # sink = addGDALtoDataset(sink, griddef=source.griddef) # initialize processing CPU = CentralProcessingUnit(source, sink, tmp=True) if period is not None: # determine averaging interval offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 # start processing climatology CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False) # CPU.sync(flush=True) # get NARR coordinates if grid is not 'GPCC': griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) #new_grid = import_module(grid[0:4]).__dict__[grid+'_grid'] # if grid == 'NARR': # from datasets.NARR import NARR_grid # reproject and resample (regrid) dataset CPU.Regrid(griddef=griddef, flush=False) # # shift longitude axis by 180 degrees left (i.e. -180 - 180 -> 0 - 360) # print('') # print(' +++ processing shift longitude +++ ') # CPU.Shift(lon=-180, flush=True) # print('\n') # # # shift longitude axis by 180 degrees left (i.e. -180 - 180 -> 0 - 360) # print('') # print(' +++ processing shift/roll +++ ')
## operational config for SON2 # project = 'SON' # start_date = '2011-01-01'; end_date = None # grid_name = 'son2' ## operational config for ASB2 # project = 'ASB' # start_date = '2010-01-01'; end_date = None # grid_name = 'asb2' ## define target grid/projection # projection/UTM zone tgt_size = None; tgt_geotrans = None # valid for native grid if project == 'WRF': # load pickled GridDef from geodata.gdal import loadPickledGridDef griddef = loadPickledGridDef(grid=grid_name, encoding='latin1') print(griddef) tgt_crs = genProj(griddef.projection.ExportToProj4(), name=grid_name) tgt_geotrans = griddef.geotransform; tgt_size = griddef.size elif project == 'SnoDAS': tgt_crs = None # native grid elif project.upper() in ('SON','GRW'): # southern Ontario projection tgt_crs = genProj("+proj=utm +zone=17 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs", name=grid_name) elif project.upper() == 'CMB': # southern Ontario projection tgt_crs = genProj("+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs", name=grid_name) elif project.upper() == 'ASB': # Assiniboin projection tgt_crs = genProj("+proj=utm +zone=14 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs", name=grid_name) # grid definition (mostly UTM grids for HGS)
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'): ''' A function to load standardized observational datasets. ''' # prepare input if mode.lower() == 'climatology': # post-processed climatology files # transform period if period is None or period == '': if name not in ('PCIC','PRISM','GPCC','NARR'): raise ValueError, "A period is required to load observational climatologies." elif isinstance(period,basestring): period = tuple([int(prd) for prd in period.split('-')]) elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): raise TypeError elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files period = None # to indicate time-series (but for safety, the input must be more explicit) if lautoregrid is None: lautoregrid = False # this can take very long! # cast/copy varlist if isinstance(varlist,basestring): varlist = [varlist] # cast as list elif varlist is not None: varlist = list(varlist) # make copy to avoid interference # figure out station and shape options if station and shape: raise ArgumentError elif station or shape: if grid is not None: raise NotImplementedError, 'Currently observational station data can only be loaded from the native grid.' if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.' lstation = bool(station); lshape = bool(shape) grid = station if lstation else shape # add station/shape parameters if varlist: params = stn_params if lstation else shp_params for param in params: if param not in varlist: varlist.append(param) else: lstation = False; lshape = False # varlist (varlist = None means all variables) if varatts is None: varatts = default_varatts.copy() if varlist is not None: varlist = translateVarNames(varlist, varatts) # filelist if filelist is None: filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern) # check existance filepath = '{:s}/{:s}'.format(folder,filename) if not os.path.exists(filepath): nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern) nativepath = '{:s}/{:s}'.format(folder,nativename) if os.path.exists(nativepath): if lautoregrid: from processing.regrid import performRegridding # causes circular reference if imported earlier griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) dataargs = dict(period=period, resolution=resolution) performRegridding(name, 'climatology',griddef, dataargs) # default kwargs else: raise IOError, "The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) else: raise IOError, "The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, axes=axes, multifile=False, ncformat='NETCDF4') # mask all shapes that are incomplete in dataset if shape and lencl and 'shp_encl' in dataset: dataset.load() # need to load data before masking; is cheap for shape averages, anyway dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params) # correct ordinal number of shape (should start at 1, not 0) if lshape: if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!" if not dataset.hasAxis('shape'): raise AxisError if dataset.shape.coord[0] == 0: dataset.shape.coord += 1 # figure out grid if not lstation and not lshape: if grid is None or grid == name: dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder) elif isinstance(grid,basestring): # load from pickle file # griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder) # add GDAL functionality to dataset dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder) else: raise TypeError # N.B.: projection should be auto-detected, if geographic (lat/lon) return dataset
print('') if res is None: gridstr = grid print(' *** Pickling Grid Definition for {0:s} *** '.format(grid)) else: gridstr = '{0:s}_{1:s}'.format(grid,res) print(' *** Pickling Grid Definition for {0:s} Resolution {1:s} *** '.format(grid,res)) print('') # load GridDefinition griddef = getCommonGrid(grid,res) if griddef is None: print('GridDefinition object for {0:s} not found!'.format(gridstr)) else: # save pickle filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(gridstr)) filehandle = open(filename, 'w') pickle.dump(griddef, filehandle) filehandle.close() print(' Saving Pickle to \'{0:s}\''.format(filename)) print('') # load pickle to make sure it is right del griddef griddef = loadPickledGridDef(grid, res=res, folder=grid_folder) print(griddef) print('')
'T2', 'Tmin', 'Tmax', 'Q2', 'pet', 'cldfrc', 'wetfrq', 'frzfrq' ], lautoregrid=True) cruclim = loadCRU(period=(1979, 2009), grid=grid, varlist=[ 'T2', 'Tmin', 'Tmax', 'Q2', 'pet', 'cldfrc', 'wetfrq', 'frzfrq' ], lautoregrid=True) # grid definition try: griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) grid_name = griddef.name except IOError: griddef = None grid_name = grid periodstr = '{0:4d}-{1:4d}'.format(*period) print( '\n *** Merging Climatology from {0:s} on {1:s} Grid *** \n' .format( periodstr, grid, )) ## prepare target dataset filename = getFileName(grid=grid_name,
def __init__(self): self.name = 'const' self.atts = dict(orog = dict(name='zs', units='m'), # surface altitude # axes (don't have their own file) class Axes(FileType): ''' A mock-filetype for axes. ''' def __init__(self): self.atts = dict(time = dict(name='time', units='days', offset=-47116, atts=dict(long_name='Month since 1979')), # time coordinate (days since 1979-01-01) # NOTE THAT THE CMIP5 DATASET HAVE DIFFERENT TIME OFFSETS BETWEEN MEMBERS !!! # N.B.: the time coordinate is only used for the monthly time-series data, not the LTM # the time offset is chose such that 1979 begins with the origin (time=0) lon = dict(name='lon', units='deg E'), # west-east coordinate lat = dict(name='lat', units='deg N'), # south-north coordinate plev = dict(name='lev', units='')) # hybrid pressure coordinate self.vars = self.atts.keys() # Time-Series (monthly) def loadCMIP5_TS(experiment=None, name=None, grid=None, filetypes=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True, lreplaceTime=True, lwrite=False, exps=None): ''' Get a properly formatted CESM dataset with a monthly time-series. (wrapper for loadCESM)''' return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=None, station=None, filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, mode='time-series', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite, exps=exps) # load minimally pre-processed CESM climatology files def loadCMIP5(experiment=None, name=None, grid=None, period=None, filetypes=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True, lreplaceTime=True, lencl=False, lwrite=False, exps=None): ''' Get a properly formatted monthly CESM climatology as NetCDFDataset. ''' return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=period, station=None, filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, exps=exps, mode='climatology', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite) # load any of the various pre-processed CESM climatology and time-series files def loadCMIP5_All(experiment=None, name=None, grid=None, station=None, shape=None, period=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, mode='climatology', cvdp_mode=None, lcheckExp=True, exps=None, lreplaceTime=True, filetypes=None, lencl=False, lwrite=False, check_vars=None): ''' Get any of the monthly CESM files as a properly formatted NetCDFDataset. ''' # period if isinstance(period,(tuple,list)): if not all(isNumber(period)): raise ValueError elif isinstance(period,basestring): period = [int(prd) for prd in period.split('-')] elif isinstance(period,(int,np.integer)) or period is None : pass # handled later else: raise DateError, "Illegal period definition: {:s}".format(str(period)) # prepare input lclim = False; lts = False; lcvdp = False; ldiag = False # mode switches if mode.lower() == 'climatology': # post-processed climatology files lclim = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps) if period is None: raise DateError, 'Currently CESM Climatologies have to be loaded with the period explicitly specified.' elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files lts = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps) lclim = False; period = None; periodstr = None # to indicate time-series (but for safety, the input must be more explicit) if lautoregrid is None: lautoregrid = False # this can take very long! elif mode.lower() == 'cvdp': # concatenated time-series files lcvdp = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='cvdp', cvdp_mode=cvdp_mode, exps=exps) if period is None: if not isinstance(experiment,Exp): raise DatasetError, 'Periods can only be inferred for registered datasets.' period = (experiment.beginyear, experiment.endyear) elif mode.lower() == 'diag': # concatenated time-series files ldiag = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='diag', exps=exps) raise NotImplementedError, "Loading AMWG diagnostic files is not supported yet." else: raise NotImplementedError,"Unsupported mode: '{:s}'".format(mode) # cast/copy varlist if isinstance(varlist,basestring): varlist = [varlist] # cast as list elif varlist is not None: varlist = list(varlist) # make copy to avoid interference # handle stations and shapes if station and shape: raise ArgumentError elif station or shape: if grid is not None: raise NotImplementedError, 'Currently CESM station data can only be loaded from the native grid.' if lcvdp: raise NotImplementedError, 'CVDP data is not available as station data.' if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.' lstation = bool(station); lshape = bool(shape) # add station/shape parameters if varlist: params = stn_params if lstation else shp_params for param in params: if param not in varlist: varlist.append(param) else: lstation = False; lshape = False # period if isinstance(period,(int,np.integer)): if not isinstance(experiment,Exp): raise DatasetError, 'Integer periods are only supported for registered datasets.' period = (experiment.beginyear, experiment.beginyear+period) if lclim: periodstr = '_{0:4d}-{1:4d}'.format(*period) elif lcvdp: periodstr = '{0:4d}-{1:4d}'.format(period[0],period[1]-1) else: periodstr = '' # N.B.: the period convention in CVDP is that the end year is included # generate filelist and attributes based on filetypes and domain if filetypes is None: filetypes = ['atm','lnd'] elif isinstance(filetypes,(list,tuple,set,basestring)): if isinstance(filetypes,basestring): filetypes = [filetypes] else: filetypes = list(filetypes) # interprete/replace WRF filetypes (for convenience) tmp = [] for ft in filetypes: if ft in ('const','drydyn3d','moist3d','rad','plev3d','srfc','xtrm','hydro'): if 'atm' not in tmp: tmp.append('atm') elif ft in ('lsm','snow'): if 'lnd' not in tmp: tmp.append('lnd') elif ft in ('aux'): pass # currently not supported # elif ft in (,): # if 'atm' not in tmp: tmp.append('atm') # if 'lnd' not in tmp: tmp.append('lnd') else: tmp.append(ft) filetypes = tmp; del tmp if 'axes' not in filetypes: filetypes.append('axes') else: raise TypeError atts = dict(); filelist = []; typelist = [] for filetype in filetypes: fileclass = fileclasses[filetype] if lclim and fileclass.climfile is not None: filelist.append(fileclass.climfile) elif lts and fileclass.tsfile is not None: filelist.append(fileclass.tsfile) elif lcvdp and fileclass.cvdpfile is not None: filelist.append(fileclass.cvdpfile) elif ldiag and fileclass.diagfile is not None: filelist.append(fileclass.diagfile) typelist.append(filetype) atts.update(fileclass.atts) # figure out ignore list if ignore_list is None: ignore_list = set(ignore_list_2D) elif isinstance(ignore_list,(list,tuple)): ignore_list = set(ignore_list) elif not isinstance(ignore_list,set): raise TypeError if not load3D: ignore_list.update(ignore_list_3D) if lautoregrid is None: lautoregrid = not load3D # don't auto-regrid 3D variables - takes too long! # translate varlist if varatts is not None: atts.update(varatts) lSST = False if varlist is not None: varlist = list(varlist) if 'SST' in varlist: # special handling of name SST variable, as it is part of Ts varlist.remove('SST') if not 'Ts' in varlist: varlist.append('Ts') lSST = True # Ts is renamed to SST below if translateVars is None: varlist = list(varlist) + translateVarNames(varlist, atts) # also aff translations, just in case elif translateVars is True: varlist = translateVarNames(varlist, atts) # N.B.: DatasetNetCDF does never apply translation! # NetCDF file mode ncmode = 'rw' if lwrite else 'r' # get grid or station-set name if lstation: # the station name can be inserted as the grid name gridstr = '_'+station.lower(); # only use lower case for filenames griddef = None elif lshape: # the station name can be inserted as the grid name gridstr = '_'+shape.lower(); # only use lower case for filenames griddef = None else: if grid is None or grid == experiment.grid: gridstr = ''; griddef = None else: gridstr = '_'+grid.lower() # only use lower case for filenames griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder, check=True) # insert grid name and period filenames = [] for filetype,fileformat in zip(typelist,filelist): if lclim: filename = fileformat.format(gridstr,periodstr) # put together specfic filename for climatology elif lts: filename = fileformat.format(gridstr) # or for time-series elif lcvdp: filename = fileformat.format(experiment.name if experiment else name,periodstr) # not implemented: gridstr elif ldiag: raise NotImplementedError else: raise DatasetError filenames.append(filename) # append to list (passed to DatasetNetCDF later) # check existance filepath = '{:s}/{:s}'.format(folder,filename) if not os.path.exists(filepath): nativename = fileformat.format('',periodstr) # original filename (before regridding) nativepath = '{:s}/{:s}'.format(folder,nativename) if os.path.exists(nativepath): if lautoregrid: from processing.regrid import performRegridding # causes circular reference if imported earlier griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) dataargs = dict(experiment=experiment, filetypes=[filetype], period=period) print("The '{:s}' (CESM) dataset for the grid ('{:s}') is not available:\n Attempting regridding on-the-fly.".format(name,filename,grid)) if performRegridding('CESM','climatology' if lclim else 'time-series', griddef, dataargs): # default kwargs raise IOError, "Automatic regridding failed!" print("Output: '{:s}'".format(name,filename,grid,filepath)) else: raise IOError, "The '{:s}' (CESM) dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(name,filename,grid) else: raise IOError, "The '{:s}' (CESM) dataset file '{:s}' does not exits!\n({:s})".format(name,filename,folder) # load dataset #print varlist, filenames if experiment: title = experiment.title else: title = name dataset = DatasetNetCDF(name=name, folder=folder, filelist=filenames, varlist=varlist, axes=None, varatts=atts, title=title, multifile=False, ignore_list=ignore_list, ncformat='NETCDF4', squeeze=True, mode=ncmode, check_vars=check_vars) # replace time axis if lreplaceTime: if lts or lcvdp: # check time axis and center at 1979-01 (zero-based) if experiment is None: ys = period[0]; ms = 1 else: ys,ms,ds = [int(t) for t in experiment.begindate.split('-')]; assert ds == 1 if dataset.hasAxis('time'): ts = (ys-1979)*12 + (ms-1); te = ts+len(dataset.time) # month since 1979 (Jan 1979 = 0) atts = dict(long_name='Month since 1979-01') timeAxis = Axis(name='time', units='month', coord=np.arange(ts,te,1, dtype='int16'), atts=atts) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) if dataset.hasAxis('year'): ts = ys-1979; te = ts+len(dataset.year) # month since 1979 (Jan 1979 = 0) atts = dict(long_name='Years since 1979-01') yearAxis = Axis(name='year', units='year', coord=np.arange(ts,te,1, dtype='int16'), atts=atts) dataset.replaceAxis(dataset.year, yearAxis, asNC=False, deepcopy=False) elif lclim: if dataset.hasAxis('time') and not dataset.time.units.lower() in monthlyUnitsList: atts = dict(long_name='Month of the Year') timeAxis = Axis(name='time', units='month', coord=np.arange(1,13, dtype='int16'), atts=atts) assert len(dataset.time) == len(timeAxis), dataset.time dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) elif dataset.hasAxis('year'): raise NotImplementedError, dataset # rename SST if lSST: dataset['SST'] = dataset.Ts # correct ordinal number of shape (should start at 1, not 0) if lshape: # mask all shapes that are incomplete in dataset if lencl and 'shp_encl' in dataset: dataset.mask(mask='shp_encl', invert=True) if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!" if not dataset.hasAxis('shape'): raise AxisError if dataset.shape.coord[0] == 0: dataset.shape.coord += 1 # check if len(dataset) == 0: raise DatasetError, 'Dataset is empty - check source file or variable list!' # add projection, if applicable if not ( lstation or lshape ): dataset = addGDALtoDataset(dataset, griddef=griddef, gridfolder=grid_folder, lwrap360=True, geolocator=True) # return formatted dataset return dataset ## Dataset API dataset_name = 'CMIP5' # dataset name root_folder # root folder of the dataset avgfolder # root folder for monthly averages outfolder # root folder for direct WRF output ts_file_pattern = 'cmip5{0:s}{1:s}_monthly.nc' # filename pattern: filetype, grid clim_file_pattern = 'cmip5{0:s}{1:s}_clim{2:s}.nc' # filename pattern: filetype, grid, period data_folder = root_folder # folder for user data grid_def = {'':None} # there are too many... grid_res = {'':1.} # approximate grid resolution at 45 degrees latitude default_grid = None # functions to access specific datasets loadLongTermMean = None # WRF doesn't have that... loadClimatology = loadCESM # pre-processed, standardized climatology loadTimeSeries = loadCESM_TS # time-series data #loadStationClimatology = loadCESM_Stn # pre-processed, standardized climatology at stations #loadStationTimeSeries = loadCESM_StnTS # time-series data at stations #loadShapeClimatology = loadCESM_Shp # climatologies without associated grid (e.g. provinces or basins) #loadShapeTimeSeries = loadCESM_ShpTS # time-series without associated grid (e.g. provinces or basins) ## (ab)use main execution for quick test if __name__ == '__main__': # set mode/parameters # mode = 'test_climatology' # mode = 'test_timeseries' # mode = 'test_ensemble' # mode = 'test_point_climatology' # mode = 'test_point_timeseries' # mode = 'test_point_ensemble' # mode = 'test_cvdp' mode = 'pickle_grid' # mode = 'shift_lon' # experiments = ['Ctrl-1', 'Ctrl-A', 'Ctrl-B', 'Ctrl-C'] # experiments += ['Ctrl-2050', 'Ctrl-A-2050', 'Ctrl-B-2050', 'Ctrl-C-2050'] experiments = ('Ctrl-1',) periods = (15,) filetypes = ('atm',) # ['atm','lnd','ice'] grids = ('cesm1x1',)*len(experiments) # grb1_d01 # pntset = 'shpavg' pntset = 'ecprecip' from projects.CESM_experiments import Exp, CESM_exps, ensembles # N.B.: importing Exp through CESM_experiments is necessary, otherwise some isinstance() calls fail # pickle grid definition if mode == 'pickle_grid': for grid,experiment in zip(grids,experiments): print('') print(' *** Pickling Grid Definition for {0:s} *** '.format(grid)) print('') # load GridDefinition dataset = loadCESM(experiment=CESM_exps[experiment], grid=None, filetypes=['lnd'], period=(1979,1989)) griddef = dataset.griddef #del griddef.xlon, griddef.ylat print griddef griddef.name = grid print(' Loading Definition from \'{0:s}\''.format(dataset.name)) # save pickle filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(grid)) if os.path.exists(filename): os.remove(filename) # overwrite filehandle = open(filename, 'w') pickle.dump(griddef, filehandle) filehandle.close() print(' Saving Pickle to \'{0:s}\''.format(filename)) print('') # load pickle to make sure it is right del griddef griddef = loadPickledGridDef(grid, res=None, folder=grid_folder) print(griddef) print('') print griddef.wrap360
grid_name = 'son2' ## # project = 'SNW' # grid_name = 'snw2' ## operational config for ASB2 # project = 'ASB' # grid_name = 'asb2' ## define target grid/projection # projection/UTM zone tgt_size = None tgt_geotrans = None # valid for native grid if project == 'WRF': # load pickled GridDef from geodata.gdal import loadPickledGridDef griddef = loadPickledGridDef(grid=grid_name, encoding='latin1') print(griddef) tgt_crs = genCRS(griddef.projection.ExportToProj4(), name=grid_name) tgt_geotrans = griddef.geotransform tgt_size = griddef.size elif project == 'Geo': # generic geographic lat/lon tgt_crs = genCRS(name=grid_name) elif project == 'ARB': # Projection for ARB model tgt_crs = genCRS( '+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +ellps=sphere +units=m +no_defs', name=grid_name) elif project == 'Hugo': # Hugo's projection for Quebec tgt_crs = genCRS(
def loadXArray(varname=None, varlist=None, folder=None, grid=None, bias_correction=None, resolution=None, varatts=None, filename_pattern=None, default_varlist=None, resampling=None, mask_and_scale=True, varmap=None, lgeoref=True, geoargs=None, chunks=None, lautoChunk=False, lskip=False, **kwargs): ''' function to open a dataset where variables are stored in separate files and non-native grids are stored in subfolders; this mainly applies to high-resolution, high-frequency (daily) observations (e.g. SnoDAS); datasets are opened using xarray ''' if grid: folder = '{}/{}'.format(folder,grid) # non-native grids are stored in sub-folders # auto-detect resampling folders if resampling is None: old_folder = os.getcwd() os.chdir(folder) # inspect folder nc_file = False; default_folder = False; folder_list = [] for item in os.listdir(): if os.path.isfile(item): if item.endswith('.nc'): nc_file = True elif os.path.isdir(item): if item.lower() == 'default': default_folder = item folder_list.append(item) else: raise IOError(item) os.chdir(old_folder) # return # evaluate findings if nc_file: resampling = None elif default_folder: resampling = default_folder elif len(folder_list) == 1: resampling = folder_list[0] if resampling: folder = '{}/{}'.format(folder,resampling) # different resampling options are stored in subfolders # # load variables if bias_correction is None and 'resolution' in kwargs: bias_correction = kwargs['resolution'] # allow backdoor if varname and varlist: raise ValueError(varname,varlist) elif varname: varlist = [varname] # load a single variable elif varlist is None: varlist = default_varlist # apply varmap in reverse to varlist if varmap is None and varatts is not None: varmap = {name:atts.get('name',name) for name,atts in varatts.items()} if varmap is not None: ravmap = {value:key for key,value in varmap.items()} varlist = [ravmap.get(varname,varname) for varname in varlist] # construct dataset xds = None for varname in varlist: if grid: varname = '{}_{}'.format(varname,grid) # also append non-native grid name to varname if bias_correction: varname = '{}_{}'.format(bias_correction,varname) # prepend bias correction method filename = filename_pattern.format(VAR=varname, RES=resolution).lower() filepath = '{}/{}'.format(folder,filename) if os.path.exists(filepath): # load dataset ds = xr.open_dataset(filepath, chunks=chunks, mask_and_scale=mask_and_scale, **kwargs) # N.B.: the use of open_mfdataset is problematic, because it does not play nicely with chunking - # by default it loads everything as one chunk, and it only respects chunking, if chunks are # specified explicitly at the initial load time (later chunking seems to have no effect!) # merge into new dataset if xds is None: xds = ds else: xds.update(ds) elif not lskip: raise IOError("The dataset file '{}' was not found in folder:\n '{}'".format(filename,folder)) # rewrite chunking, if desired (this happens here, so we can infer chunking from dimension sizes) if lautoChunk: xds = autoChunkXArray(xds, chunks=chunks) # rename and apply attributes if varatts or varmap: xds = updateVariableAttrs(xds, varatts=varatts, varmap=varmap) # add projection info if lgeoref: if geoargs is not None: # check if 'proj4' in xds.attrs and 'proj4' in geoargs: if xds.attrs['proj4'] != geoargs['proj4']: raise ValueError(xds.attrs['proj4']) # custom options xds = addGeoReference(xds, **geoargs) # default options elif 'proj4' in xds.attrs: # read projection string xds = addGeoReference(xds, proj4_string=xds.attrs['proj4']) elif grid: # load griddef from pickle from geodata.gdal import loadPickledGridDef griddef = loadPickledGridDef(grid=grid) xds = addGeoReference(xds, proj4_string=griddef.projection.ExportToProj4(),) else: # use default lat/lon, if it works... xds = addGeoReference(xds,) return xds