def loadGPCC_TS(name=dataset_name, grid=None, varlist=None, resolution='25', varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted dataset with the monthly GPCC time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # prepare input if resolution not in ('05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!"%resolution # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist filelist = [] if 'p' in varlist: filelist.append(orig_ts_file.format('precip',resolution)) if 's' in varlist: filelist.append(orig_ts_file.format('statio',resolution)) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add GDAL info dataset = addGDALtoDataset(dataset, projection=None, geotransform=None) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder grid, resolution = checkGridRes(grid, resolution, period=None, lclim=False) dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def loadGPCC_LTM( name=dataset_name, varlist=None, resolution="025", varatts=ltmvaratts, filelist=None, folder=ltmfolder ): """ Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. """ # prepare input if resolution not in ("025", "05", "10", "25"): raise DatasetError, "Selected resolution '%s' is not available!" % resolution # translate varlist if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # load variables separately if "p" in varlist: dataset = DatasetNetCDF( name=name, folder=folder, filelist=["normals_v2011_%s.nc" % resolution], varlist=["p"], varatts=varatts, ncformat="NETCDF4_CLASSIC", ) if "s" in varlist: gauges = nc.Dataset(folder + "normals_gauges_v2011_%s.nc" % resolution, mode="r", format="NETCDF4_CLASSIC") stations = Variable(data=gauges.variables["p"][0, :, :], axes=(dataset.lat, dataset.lon), **varatts["s"]) # consolidate dataset dataset.addVariable(stations, asNC=False, copy=True) dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic # return formatted dataset return dataset
def loadNARR_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted NARR dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = tsvarlist if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist filelist = [orig_ts_file.format(special[var]) if var in special else orig_ts_file.format(var) for var in varlist if var not in nofile and var in varatts] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name)) dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, gridfolder=grid_folder) else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def loadCRU_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted CRU dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # assemble filelist if filelist is None: # generate default filelist filelist = [orig_ts_file.format(var) for var in varlist if var not in nofile] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def loadNARR_LTM(name=dataset_name, varlist=None, grid=None, interval='monthly', varatts=None, filelist=None, folder=ltmfolder): ''' Get a properly formatted dataset of daily or monthly NARR climatologies (LTM). ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # prepare input if varatts is None: varatts = ltmvaratts.copy() if varlist is None: varlist = ltmvarlist if interval == 'monthly': pfx = '.mon.ltm.nc'; tlen = 12 elif interval == 'daily': pfx = '.day.ltm.nc'; tlen = 365 else: raise DatasetError, "Selected interval '%s' is not supported!"%interval # translate varlist if varlist and varatts: varlist = translateVarNames(varlist, varatts) # axes dictionary, primarily to override time axis axes = dict(time=Axis(name='time',units='day',coord=(1,tlen,tlen)),load=True) if filelist is None: # generate default filelist filelist = [special[var]+pfx if var in special else var+pfx for var in varlist if var not in nofile] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, axes=axes, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC') # add projection projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name)) dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, folder=grid_folder) else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder raise NotImplementedError, "Need to implement loading neatly formatted and regridded time-series!" # return formatted dataset return dataset
def loadCFSR_TS(name=dataset_name, grid=None, varlist=None, varatts=None, resolution='hires', filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted CFSR dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: if resolution == 'hires' or resolution == '03' or resolution == '031': varlist = varlist_hires elif resolution == 'lowres' or resolution == '05': varlist = varlist_lowres if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist if resolution == 'hires' or resolution == '03' or resolution == '031': files = [hiresfiles[var] for var in varlist if var in hiresfiles] elif resolution == 'lowres' or resolution == '05': files = [lowresfiles[var] for var in varlist if var in lowresfiles] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, check_override=['time'], multifile=False, ncformat='NETCDF4_CLASSIC') # load static data if filelist is None: # generate default filelist if resolution == 'hires' or resolution == '03' or resolution == '031': files = [hiresstatic[var] for var in varlist if var in hiresstatic] elif resolution == 'lowres' or resolution == '05': files = [lowresstatic[var] for var in varlist if var in lowresstatic] # load constants, if any (and with singleton time axis) if len(files) > 0: staticdata = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, axes=dict(lon=dataset.lon, lat=dataset.lat), multifile=False, check_override=['time'], ncformat='NETCDF4_CLASSIC') # N.B.: need to override the axes, so that the datasets are consistent if len(staticdata.variables) > 0: for var in staticdata.variables.values(): if not dataset.hasVariable(var.name): var.squeeze() # remove time dimension dataset.addVariable(var, copy=False) # no need to copy... but we can't write to the netcdf file! # replace time axis with number of month since Jan 1979 data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder grid, resolution = checkGridRes(grid, resolution) dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def loadPCIC_LTM(name=dataset_name, varlist=None, varatts=ltmvaratts, filelist=None, folder=ltmfolder): ''' Get a properly formatted dataset the monthly PCIC PRISM climatology. ''' # translate varlist if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # generate file list filelist = [ltmfile.format(var) for var in varlist if var not in ('time','lat','lon')] # load variables separately dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, ncformat='NETCDF4') dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic # return formatted dataset return dataset
def sync(self, varlist=None, flush=False, gdal=True, copydata=True): ''' Transfer contents of temporary storage to output/target dataset. ''' if not isinstance(self.output,Dataset): raise DatasetError, "Cannot sync without target Dataset!" if self.tmp: if varlist is None: varlist = self.tmpput.variables.keys() for varname in varlist: if varname in self.tmpput.variables: var = self.tmpput.variables[varname] self.output.addVariable(var, loverwrite=True, deepcopy=copydata) # N.B.: without copydata/deepcopy, only the variable header is created but no data is written if flush: var.unload() # remove unnecessary references (unlink data) if gdal and 'gdal' in self.tmpput.__dict__: if self.tmpput.gdal: projection = self.tmpput.projection; geotransform = self.tmpput.geotransform #xlon = self.tmpput.xlon; ylat = self.tmpput.ylat else: projection=None; geotransform=None; #xlon = None; ylat = None self.output = addGDALtoDataset(self.output, projection=projection, geotransform=geotransform)
def Climatology(self, timeAxis='time', climAxis=None, period=None, offset=0, shift=0, timeSlice=None, **kwargs): ''' Setup climatology and start computation; calls processClimatology. ''' if period is not None and not isinstance(period,(np.integer,int)): raise TypeError # period in years if not isinstance(offset,(np.integer,int)): raise TypeError # offset in years (from start of record) if not isinstance(shift,(np.integer,int)): raise TypeError # shift in month (if first month is not January) # construct new time axis for climatology if climAxis is None: climAxis = Axis(name=timeAxis, units='month', length=12, coord=np.arange(1,13,1), dtype=dtype_int) # monthly climatology else: if not isinstance(climAxis,Axis): raise TypeError # add axis to output dataset if self.target.hasAxis(climAxis.name): self.target.repalceAxis(climAxis, check=False) # will have different shape else: self.target.addAxis(climAxis, copy=True) # copy=True allows recasting as, e.g., a NC variable climAxis = self.target.axes[timeAxis] # make sure we have exactly that instance # figure out time slice if period is not None: start = offset * len(climAxis); end = start + period * len(climAxis) timeSlice = slice(start,end,None) else: if not isinstance(timeSlice,slice): raise TypeError, timeSlice # add variables that will cause errors to ignorelist (e.g. strings) for varname,var in self.source.variables.iteritems(): if var.hasAxis(timeAxis) and var.dtype.kind == 'S': self.ignorelist.append(varname) # prepare function call function = functools.partial(self.processClimatology, # already set parameters timeAxis=timeAxis, climAxis=climAxis, timeSlice=timeSlice, shift=shift) # start process if self.feedback: print('\n +++ processing climatology +++ ') if self.source.gdal: griddef = self.source.griddef else: griddef = None self.process(function, **kwargs) # currently 'flush' is the only kwarg # add GDAL to target if griddef is not None: self.target = addGDALtoDataset(self.target, griddef=griddef) # N.B.: if the dataset is empty, it wont do anything, hence we do it now if self.feedback: print('\n')
def process(self, function, flush=False): ''' This method applies the desired operation/function to each variable in varlist. ''' if flush: # this function is to save RAM by flushing results to disk immediately if not isinstance(self.output,DatasetNetCDF): raise ProcessError, "Flush can only be used with NetCDF Datasets (and not with temporary storage)." if self.tmp: # flush requires output to be target if self.source.gdal and not self.tmpput.gdal: self.tmpput = addGDALtoDataset(self.tmpput, projection=self.source.projection, geotransform=self.source.geotransform) self.source = self.tmpput self.target = self.output self.tmp = False # not using temporary storage anymore # loop over input variables for varname in self.varlist: # check agaisnt ignore list if varname not in self.ignorelist: # check if variable already exists if self.target.hasVariable(varname): # "in-place" operations var = self.target.variables[varname] newvar = function(var) # perform actual processing if newvar.ndim != var.ndim or newvar.shape != var.shape: raise VariableError if newvar is not var: self.target.replaceVariable(var,newvar) elif self.source.hasVariable(varname): var = self.source.variables[varname] ldata = var.data # whether data was pre-loaded # perform operation from source and copy results to target newvar = function(var) # perform actual processing if not ldata: var.unload() # if it was already loaded, don't unload self.target.addVariable(newvar, copy=True) # copy=True allows recasting as, e.g., a NC variable else: raise DatasetError, "Variable '%s' not found in input dataset."%varname assert varname == newvar.name # flush data to disk immediately if flush: self.output.variables[varname].unload() # again, free memory newvar.unload(); del var, newvar # free space; already added to new dataset # after everything is said and done: self.source = self.target # set target to source for next time
def loadGPCC_LTM(name=dataset_name, varlist=None, resolution='025', varatts=ltmvaratts, filelist=None, folder=ltmfolder): ''' Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. ''' # prepare input if resolution not in ('025', '05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!" % resolution # translate varlist if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # load variables separately if 'p' in varlist: dataset = DatasetNetCDF(name=name, folder=folder, filelist=['normals_v2011_%s.nc' % resolution], varlist=['p'], varatts=varatts, ncformat='NETCDF4_CLASSIC') if 's' in varlist: gauges = nc.Dataset(folder + 'normals_gauges_v2011_%s.nc' % resolution, mode='r', format='NETCDF4_CLASSIC') stations = Variable(data=gauges.variables['p'][0, :, :], axes=(dataset.lat, dataset.lon), **varatts['s']) # consolidate dataset dataset.addVariable(stations, asNC=False, copy=True) dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic # return formatted dataset return dataset
def Regrid(self, griddef=None, projection=None, geotransform=None, size=None, xlon=None, ylat=None, lmask=True, int_interp=None, float_interp=None, **kwargs): ''' Setup climatology and start computation; calls processClimatology. ''' # make temporary gdal dataset if self.source is self.target: if self.tmp: assert self.source == self.tmpput and self.target == self.tmpput # the operation can not be performed "in-place"! self.target = Dataset(name='tmptoo', title='Temporary target dataset for non-in-place operations', varlist=[], atts={}) ltmptoo = True else: ltmptoo = False # make sure the target dataset is a GDAL-enabled dataset if 'gdal' in self.target.__dict__: # gdal info alread present if griddef is not None or projection is not None or geotransform is not None: raise AttributeError, "Target Dataset '%s' is already GDAL enabled - cannot overwrite settings!"%self.target.name if self.target.xlon is None: raise GDALError, "Map axis 'xlon' not found!" if self.target.ylat is None: raise GDALError, "Map axis 'ylat' not found!" xlon = self.target.xlon; ylat = self.target.ylat else: # need to set GDAL parameters if self.tmp and 'gdal' in self.output.__dict__: # transfer gdal settings from output to temporary dataset assert self.target is not self.output projection = self.output.projection; geotransform = self.output.geotransform xlon = self.output.xlon; ylat = self.output.ylat else: # figure out grid definition from input if griddef is None: griddef = GridDefinition(projection=projection, geotransform=geotransform, size=size, xlon=xlon, ylat=ylat) # pass arguments through GridDefinition, if not provided projection=griddef.projection; geotransform=griddef.geotransform xlon=griddef.xlon; ylat=griddef.ylat # apply GDAL settings target dataset for ax in (xlon,ylat): self.target.addAxis(ax, loverwrite=True) # i.e. replace if already present self.target = addGDALtoDataset(self.target, projection=projection, geotransform=geotransform) # use these map axes xlon = self.target.xlon; ylat = self.target.ylat assert isinstance(xlon,Axis) and isinstance(ylat,Axis) # determine source dataset grid definition if self.source.griddef is None: srcgrd = GridDefinition(projection=self.source.projection, geotransform=self.source.geotransform, size=self.source.mapSize, xlon=self.source.xlon, ylat=self.source.ylat) else: srcgrd = self.source.griddef srcres = srcgrd.scale; tgtres = griddef.scale # determine if shift is necessary to insure correct wrapping if not srcgrd.isProjected and not griddef.isProjected: lwrapSrc = srcgrd.wrap360 lwrapTgt = griddef.wrap360 # check grids for grd in (srcgrd,griddef): if grd.wrap360: assert grd.geotransform[0] + grd.geotransform[1]*(len(grd.xlon)-1) > 180 assert np.round(grd.geotransform[1]*len(grd.xlon), decimals=2) == 360 # require 360 deg. to some accuracy... assert any( grd.xlon.getArray() > 180 ) # need to wrap around assert all( grd.xlon.getArray() >= 0 ) assert all( grd.xlon.getArray() <= 360 ) else: assert grd.geotransform[0] + grd.geotransform[1]*(len(grd.xlon)-1) < 180 assert all( grd.xlon.getArray() >= -180 ) assert all( grd.xlon.getArray() <= 180 ) else: lwrapSrc = False # no need to shift, if a projected grid is involved! lwrapTgt = False # no need to shift, if a projected grid is involved! # determine GDAL interpolation if int_interp is None: int_interp = gdalInterp('nearest') else: int_interp = gdalInterp(int_interp) if float_interp is None: if srcres < tgtres: float_interp = gdalInterp('convolution') # down-sampling: 'convolution' else: float_interp = gdalInterp('cubicspline') # up-sampling else: float_interp = gdalInterp(float_interp) # prepare function call function = functools.partial(self.processRegrid, ylat=ylat, xlon=xlon, lwrapSrc=lwrapSrc, lwrapTgt=lwrapTgt, # already set parameters lmask=lmask, int_interp=int_interp, float_interp=float_interp) # start process if self.feedback: print('\n +++ processing regridding +++ ') self.process(function, **kwargs) # currently 'flush' is the only kwarg # now make sure we have a GDAL dataset! self.target = addGDALtoDataset(self.target, griddef=griddef) if self.feedback: print('\n') if self.tmp: self.tmpput = self.target if ltmptoo: assert self.tmpput.name == 'tmptoo' # set above, when temp. dataset is created
def loadGPCC_TS(name=dataset_name, grid=None, varlist=None, resolution='25', varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted dataset with the monthly GPCC time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # prepare input if resolution not in ('05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!" % resolution # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist filelist = [] if 'p' in varlist: filelist.append(orig_ts_file.format('precip', resolution)) if 's' in varlist: filelist.append(orig_ts_file.format('statio', resolution)) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0, len(dataset.time), 1, dtype='int16') + ( 1901 - 1979) * 12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add GDAL info dataset = addGDALtoDataset(dataset, projection=None, geotransform=None) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder grid, resolution = checkGridRes(grid, resolution, period=None, lclim=False) dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def performExport(dataset, mode, dataargs, expargs, bcargs, loverwrite=False, ldebug=False, lparallel=False, pidstr='', logger=None): ''' worker function to export ASCII rasters for a given dataset ''' # input checking if not isinstance(dataset,basestring): raise TypeError if not isinstance(dataargs,dict): raise TypeError # all dataset arguments are kwargs # logging if logger is None: # make new logger logger = logging.getLogger() # new logger logger.addHandler(logging.StreamHandler()) else: if isinstance(logger,basestring): logger = logging.getLogger(name=logger) # connect to existing one elif not isinstance(logger,logging.Logger): raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(str(logger)) ## extract meta data from arguments dataargs, loadfct, srcage, datamsgstr = getMetaData(dataset, mode, dataargs, lone=False) dataset_name = dataargs.dataset_name; periodstr = dataargs.periodstr; domain = dataargs.domain # figure out bias correction parameters if bcargs: bcargs = bcargs.copy() # first copy, then modify... bc_method = bcargs.pop('method',None) if bc_method is None: raise ArgumentError("Need to specify bias-correction method to use bias correction!") bc_obs = bcargs.pop('obs_dataset',None) if bc_obs is None: raise ArgumentError("Need to specify observational dataset to use bias correction!") bc_reference = bcargs.pop('reference',None) if bc_reference is None: # infer from experiment name if dataset_name[-5:] in ('-2050','-2100'): bc_reference = dataset_name[:-5] # cut of period indicator and hope for the best else: bc_reference = dataset_name bc_grid = bcargs.pop('grid',None) if bc_grid is None: bc_grid = dataargs.grid bc_domain = bcargs.pop('domain',None) if bc_domain is None: bc_domain = domain bc_varlist = bcargs.pop('varlist',None) bc_varmap = bcargs.pop('varmap',None) bc_tag = bcargs.pop('tag',None) # an optional name extension/tag bc_pattern = bcargs.pop('file_pattern',None) # usually default in getPickleFile lgzip = bcargs.pop('lgzip',None) # if pickle is gzipped (None: auto-detect based on file name extension) # get name of pickle file (and folder) picklefolder = dataargs.avgfolder.replace(dataset_name,bc_reference) picklefile = getPickleFileName(method=bc_method, obs_name=bc_obs, gridstr=bc_grid, domain=bc_domain, tag=bc_tag, pattern=bc_pattern) picklepath = '{:s}/{:s}'.format(picklefolder,picklefile) if lgzip: picklepath += '.gz' # add extension if not os.path.exists(picklepath): raise IOError(picklepath) elif lgzip is None: lgzip = False if not os.path.exists(picklepath): lgzip = True # assume gzipped file picklepath += '.gz' # try with extension... if not os.path.exists(picklepath): raise IOError(picklepath) elif not os.path.exists(picklepath): raise IOError(picklepath) pickleage = datetime.fromtimestamp(os.path.getmtime(picklepath)) # determine age of pickle file and compare against source age else: bc_method = False pickleage = srcage # parse export options expargs = expargs.copy() # first copy, then modify... lm3 = expargs.pop('lm3') # convert kg/m^2/s to m^3/m^2/s (water flux) expformat = expargs.pop('format') # needed to get FileFormat object exp_list= expargs.pop('exp_list') # this handled outside of export compute_list = expargs.pop('compute_list', []) # variables to be (re-)computed - by default all # initialize FileFormat class instance fileFormat = getFileFormat(expformat, bc_method=bc_method, **expargs) # get folder for target dataset and do some checks expname = '{:s}_d{:02d}'.format(dataset_name,domain) if domain else dataset_name expfolder = fileFormat.defineDataset(dataset=dataset, mode=mode, dataargs=dataargs, lwrite=True, ldebug=ldebug) # prepare destination for new dataset lskip = fileFormat.prepareDestination(srcage=max(srcage,pickleage), loverwrite=loverwrite) # depending on last modification time of file or overwrite setting, start computation, or skip if lskip: # print message skipmsg = "\n{:s} >>> Skipping: Format '{:s} for dataset '{:s}' already exists and is newer than source file.".format(pidstr,expformat,dataset_name) skipmsg += "\n{:s} >>> ('{:s}')\n".format(pidstr,expfolder) logger.info(skipmsg) else: ## actually load datasets source = loadfct() # load source data # check period if 'period' in source.atts and dataargs.periodstr != source.atts.period: # a NetCDF attribute raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(periodstr,source.atts.period) # load BiasCorrection object from pickle if bc_method: op = gzip.open if lgzip else open with op(picklepath, 'r') as filehandle: BC = pickle.load(filehandle) # assemble logger entry bcmsgstr = "(performing bias-correction using {:s} from {:s} towards {:s})".format(BC.long_name,bc_reference,bc_obs) # print message if mode == 'climatology': opmsgstr = 'Exporting Climatology ({:s}) to {:s} Format'.format(periodstr, expformat) elif mode == 'time-series': opmsgstr = 'Exporting Time-series to {:s} Format'.format(expformat) elif mode[-5:] == '-mean': opmsgstr = 'Exporting {:s}-Mean ({:s}) to {:s} Format'.format(mode[:-5], periodstr, expformat) else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # print feedback to logger logmsg = '\n{0:s} *** {1:^65s} *** \n{0:s} *** {2:^65s} *** \n'.format(pidstr,datamsgstr,opmsgstr) if bc_method: logmsg += "{0:s} *** {1:^65s} *** \n".format(pidstr,bcmsgstr) logger.info(logmsg) if not lparallel and ldebug: logger.info('\n'+str(source)+'\n') # create GDAL-enabled target dataset sink = Dataset(axes=(source.xlon,source.ylat), name=expname, title=source.title, atts=source.atts.copy()) addGDALtoDataset(dataset=sink, griddef=source.griddef) assert sink.gdal, sink # apply bias-correction if bc_method: source = BC.correct(source, asNC=False, varlist=bc_varlist, varmap=bc_varmap) # load bias-corrected variables into memory # N.B.: for variables that are not bias-corrected, data are not loaded immediately but on demand; this way # I/O and computing can be further disentangled and not all variables are always needed # compute intermediate variables, if necessary for varname in exp_list: variables = None # variable list var = None # (re-)compute variable, if desired... if varname in compute_list: if varname == 'precip': var = newvars.computeTotalPrecip(source) elif varname == 'waterflx': var = newvars.computeWaterFlux(source) elif varname == 'liqwatflx': var = newvars.computeLiquidWaterFlux(source) elif varname == 'netrad': var = newvars.computeNetRadiation(source, asVar=True) elif varname == 'netrad_bb': var = newvars.computeNetRadiation(source, asVar=True, lrad=False, name='netrad_bb') elif varname == 'netrad_bb0': var = newvars.computeNetRadiation(source, asVar=True, lrad=False, lA=False, name='netrad_bb0') elif varname == 'vapdef': var = newvars.computeVaporDeficit(source) elif varname in ('pet','pet_pm','petrad','petwnd') and 'pet' not in sink: if 'petrad' in exp_list or 'petwnd' in exp_list: variables = newvars.computePotEvapPM(source, lterms=True) # default; returns mutliple PET terms else: var = newvars.computePotEvapPM(source, lterms=False) # returns only PET elif varname == 'pet_th': var = None # skip for now #var = computePotEvapTh(source) # simplified formula (less prerequisites) # ... otherwise load from source file if var is None and variables is None and varname in source: var = source[varname].load() # load data (may not have to load all) #else: raise VariableError, "Unsupported Variable '{:s}'.".format(varname) # for now, skip variables that are None if var or variables: # handle lists as well if var and variables: raise VariableError, (var,variables) elif var: variables = (var,) for var in variables: addGDALtoVar(var=var, griddef=sink.griddef) if not var.gdal and isinstance(fileFormat,ASCII_raster): raise GDALError, "Exporting to ASCII_raster format requires GDAL-enabled variables." # add to new dataset sink += var # convert units if lm3: for var in sink: if var.units == 'kg/m^2/s': var /= 1000. # divide to get m^3/m^2/s var.units = 'm^3/m^2/s' # update units # compute seasonal mean if we are in mean-mode if mode[-5:] == '-mean': sink = sink.seasonalMean(season=mode[:-5], lclim=True) # N.B.: to remain consistent with other output modes, # we need to prevent renaming of the time axis sink = concatDatasets([sink,sink], axis='time', lensembleAxis=True) sink.squeeze() # we need the year-axis until now to distinguish constant fields; now remove # print dataset if not lparallel and ldebug: logger.info('\n'+str(sink)+'\n') # export new dataset to selected format fileFormat.exportDataset(sink) # write results to file writemsg = "\n{:s} >>> Export of Dataset '{:s}' to Format '{:s}' complete.".format(pidstr,expname, expformat) writemsg += "\n{:s} >>> ('{:s}')\n".format(pidstr,expfolder) logger.info(writemsg) # clean up and return source.unload(); #del source return 0 # "exit code"
def loadCRU_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted CRU dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # assemble filelist if filelist is None: # generate default filelist filelist = [ orig_ts_file.format(var) for var in varlist if var not in nofile ] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0, len(dataset.time), 1, dtype='int16') + ( 1901 - 1979) * 12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def performExport(dataset, mode, dataargs, expargs, loverwrite=False, ldebug=False, lparallel=False, pidstr='', logger=None): ''' worker function to perform regridding for a given dataset and target grid ''' # input checking if not isinstance(dataset,basestring): raise TypeError if not isinstance(dataargs,dict): raise TypeError # all dataset arguments are kwargs # logging if logger is None: # make new logger logger = logging.getLogger() # new logger logger.addHandler(logging.StreamHandler()) else: if isinstance(logger,basestring): logger = logging.getLogger(name=logger) # connect to existing one elif not isinstance(logger,logging.Logger): raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(str(logger)) ## extract meta data from arguments dataargs, loadfct, srcage, datamsgstr = getMetaData(dataset, mode, dataargs, lone=False) dataset_name = dataargs.dataset_name; periodstr = dataargs.periodstr; domain = dataargs.domain # parse export options expargs = expargs.copy() # first copy, then modify... lm3 = expargs.pop('lm3') # convert kg/m^2/s to m^3/m^2/s (water flux) expformat = expargs.pop('format') # needed to get FileFormat object varlist = expargs.pop('varlist') # this handled outside of export # initialize FileFormat class instance fileFormat = getFileFormat(expformat, **expargs) # get folder for target dataset and do some checks expname = '{:s}_d{:02d}'.format(dataset_name,domain) if domain else dataset_name expfolder = fileFormat.defineDataset(name=dataset_name, dataset=dataset, mode=mode, dataargs=dataargs, lwrite=True, ldebug=ldebug) # prepare destination for new dataset lskip = fileFormat.prepareDestination(srcage=srcage, loverwrite=loverwrite) # depending on last modification time of file or overwrite setting, start computation, or skip if lskip: # print message skipmsg = "\n{:s} >>> Skipping: Format '{:s} for dataset '{:s}' already exists and is newer than source file.".format(pidstr,expformat,dataset_name) skipmsg += "\n{:s} >>> ('{:s}')\n".format(pidstr,expfolder) logger.info(skipmsg) else: ## actually load datasets source = loadfct() # load source data # check period if 'period' in source.atts and dataargs.periodstr != source.atts.period: # a NetCDF attribute raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(periodstr,source.atts.period) # print message if mode == 'climatology': opmsgstr = 'Exporting Climatology ({:s}) to {:s} Format'.format(periodstr, expformat) elif mode == 'time-series': opmsgstr = 'Exporting Time-series to {:s} Format'.format(expformat) else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # print feedback to logger logger.info('\n{0:s} *** {1:^65s} *** \n{0:s} *** {2:^65s} *** \n'.format(pidstr,datamsgstr,opmsgstr)) if not lparallel and ldebug: logger.info('\n'+str(source)+'\n') # create GDAL-enabled target dataset sink = Dataset(axes=(source.xlon,source.ylat), name=expname, title=source.title) addGDALtoDataset(dataset=sink, griddef=source.griddef) assert sink.gdal, sink # N.B.: data are not loaded immediately but on demand; this way I/O and computing are further # disentangled and not all variables are always needed # Compute intermediate variables, if necessary for varname in varlist: vars = None # variable list if varname in source: var = source[varname].load() # load data (may not have to load all) else: var = None if varname == 'waterflx': var = newvars.computeWaterFlux(source) elif varname == 'liqwatflx': var = newvars.computeLiquidWaterFlux(source) elif varname == 'netrad': var = newvars.computeNetRadiation(source, asVar=True) elif varname == 'netrad_0': var = newvars.computeNetRadiation(source, asVar=True, lA=False, name='netrad_0') elif varname == 'netrad_bb': var = newvars.computeNetRadiation(source, asVar=True, lrad=False, name='netrad_bb') elif varname == 'vapdef': var = newvars.computeVaporDeficit(source) elif varname == 'pet' or varname == 'pet_pm': vars = newvars.computePotEvapPM(source, lterms=True) # default; returns mutliple PET terms #var = newvars.computePotEvapPM(source, lterms=False) # returns only PET elif varname == 'pet_th': var = None # skip for now #var = computePotEvapTh(source) # simplified formula (less prerequisites) else: raise VariableError, "Unsupported Variable '{:s}'.".format(varname) # for now, skip variables that are None if var or vars: # handle lists as well if var and vars: raise VariableError, (var,vars) if var: vars = (var,) for var in vars: addGDALtoVar(var=var, griddef=sink.griddef) if not var.gdal and isinstance(fileFormat,ASCII_raster): raise GDALError, "Exporting to ASCII_raster format requires GDAL-enabled variables." # add to new dataset sink += var # convert units if lm3: for var in sink: if var.units == 'kg/m^2/s': var /= 1000. # divide to get m^3/m^2/s var.units = 'm^3/m^2/s' # update units # print dataset if not lparallel and ldebug: logger.info('\n'+str(sink)+'\n') # export new dataset to selected format fileFormat.exportDataset(sink) # write results to file writemsg = "\n{:s} >>> Export of Dataset '{:s}' to Format '{:s}' complete.".format(pidstr,expname, expformat) writemsg += "\n{:s} >>> ('{:s}')\n".format(pidstr,expfolder) logger.info(writemsg) # clean up and return source.unload(); #del source return 0 # "exit code"
def rasterDataset(name=None, title=None, vardefs=None, axdefs=None, atts=None, projection=None, griddef=None, lgzip=None, lgdal=True, lmask=True, fillValue=None, lskipMissing=True, lgeolocator=True, file_pattern=None, lfeedback=True, **kwargs): ''' function to load a set of variables that are stored in raster format in a systematic directory tree into a Dataset Variables and Axis are defined as follows: vardefs[varname] = dict(name=string, units=string, axes=tuple of strings, atts=dict, plot=dict, dtype=np.dtype, fillValue=value) axdefs[axname] = dict(name=string, units=string, atts=dict, coord=array or list) or None The path to raster files is constructed as variable_pattern+axes_pattern, where axes_pattern is defined through the axes, (as in rasterVarialbe) and variable_pattern takes the special keywords VAR, which is the variable key in vardefs. ''' ## prepare input data and axes if griddef: xlon,ylat = griddef.xlon,griddef.ylat if projection is None: projection = griddef.projection elif projection != griddef.projection: raise ArgumentError("Conflicting projection and GridDef!") geotransform = griddef.geotransform isProjected = griddef.isProjected else: xlon = ylat = geotransform = None isProjected = False if projection is None else True # construct axes dict axes = dict() for axname,axdef in axdefs.items(): assert 'coord' in axdef, axdef assert ( 'name' in axdef and 'units' in axdef ) or 'atts' in axdef, axdef if axdef is None: axes[axname] = None else: ax = Axis(**axdef) axes[ax.name] = ax # check for map Axis if isProjected: if 'x' not in axes: axes['x'] = xlon if 'y' not in axes: axes['y'] = ylat else: if 'lon' not in axes: axes['lon'] = xlon if 'lat' not in axes: axes['lat'] = ylat ## load raster data into Variable objects varlist = [] for varname,vardef in vardefs.items(): # check definitions assert 'axes' in vardef and 'dtype' in vardef, vardef assert ( 'name' in vardef and 'units' in vardef ) or 'atts' in vardef, vardef # determine relevant axes vardef = vardef.copy() axes_list = [None if ax is None else axes[ax] for ax in vardef.pop('axes')] # define path parameters (with varname) path_params = vardef.pop('path_params',None) path_params = dict() if path_params is None else path_params.copy() if 'VAR' not in path_params: path_params['VAR'] = varname # a special key # add kwargs and relevant axis indices relaxes = [ax.name for ax in axes_list if ax is not None] # relevant axes for key,value in kwargs.items(): if key not in axes or key in relaxes: vardef[key] = value # create Variable object var = rasterVariable(projection=projection, griddef=griddef, file_pattern=file_pattern, lgzip=lgzip, lgdal=lgdal, lmask=lmask, lskipMissing=lskipMissing, axes=axes_list, path_params=path_params, lfeedback=lfeedback, **vardef) # vardef components: name, units, atts, plot, dtype, fillValue varlist.append(var) # check that map axes are correct for ax in var.xlon,var.ylat: if axes[ax.name] is None: axes[ax.name] = ax elif axes[ax.name] != ax: raise AxisError("{} axes are incompatible.".format(ax.name)) if griddef is None: griddef = var.griddef elif griddef != var.griddef: raise AxisError("GridDefs are inconsistent.") if geotransform is None: geotransform = var.geotransform elif geotransform != var.geotransform: raise AxisError("Conflicting geotransform (from Variable) and GridDef!\n {} != {}".format(var.geotransform,geotransform)) ## create Dataset # create dataset dataset = Dataset(name=name, title=title, varlist=varlist, axes=axes, atts=atts) # add GDAL functionality dataset = addGDALtoDataset(dataset, griddef=griddef, projection=projection, geotransform=geotransform, gridfolder=None, lwrap360=None, geolocator=lgeolocator, lforce=False) # N.B.: for some reason we also need to pass the geotransform, otherwise it is recomputed internally and some consistency # checks fail due to machine-precision differences # return GDAL-enabled Dataset return dataset
def __init__(self): self.name = 'const' self.atts = dict(orog = dict(name='zs', units='m'), # surface altitude # axes (don't have their own file) class Axes(FileType): ''' A mock-filetype for axes. ''' def __init__(self): self.atts = dict(time = dict(name='time', units='days', offset=-47116, atts=dict(long_name='Month since 1979')), # time coordinate (days since 1979-01-01) # NOTE THAT THE CMIP5 DATASET HAVE DIFFERENT TIME OFFSETS BETWEEN MEMBERS !!! # N.B.: the time coordinate is only used for the monthly time-series data, not the LTM # the time offset is chose such that 1979 begins with the origin (time=0) lon = dict(name='lon', units='deg E'), # west-east coordinate lat = dict(name='lat', units='deg N'), # south-north coordinate plev = dict(name='lev', units='')) # hybrid pressure coordinate self.vars = self.atts.keys() # Time-Series (monthly) def loadCMIP5_TS(experiment=None, name=None, grid=None, filetypes=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True, lreplaceTime=True, lwrite=False, exps=None): ''' Get a properly formatted CESM dataset with a monthly time-series. (wrapper for loadCESM)''' return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=None, station=None, filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, mode='time-series', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite, exps=exps) # load minimally pre-processed CESM climatology files def loadCMIP5(experiment=None, name=None, grid=None, period=None, filetypes=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True, lreplaceTime=True, lencl=False, lwrite=False, exps=None): ''' Get a properly formatted monthly CESM climatology as NetCDFDataset. ''' return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=period, station=None, filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, exps=exps, mode='climatology', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite) # load any of the various pre-processed CESM climatology and time-series files def loadCMIP5_All(experiment=None, name=None, grid=None, station=None, shape=None, period=None, varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, mode='climatology', cvdp_mode=None, lcheckExp=True, exps=None, lreplaceTime=True, filetypes=None, lencl=False, lwrite=False, check_vars=None): ''' Get any of the monthly CESM files as a properly formatted NetCDFDataset. ''' # period if isinstance(period,(tuple,list)): if not all(isNumber(period)): raise ValueError elif isinstance(period,basestring): period = [int(prd) for prd in period.split('-')] elif isinstance(period,(int,np.integer)) or period is None : pass # handled later else: raise DateError, "Illegal period definition: {:s}".format(str(period)) # prepare input lclim = False; lts = False; lcvdp = False; ldiag = False # mode switches if mode.lower() == 'climatology': # post-processed climatology files lclim = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps) if period is None: raise DateError, 'Currently CESM Climatologies have to be loaded with the period explicitly specified.' elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files lts = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps) lclim = False; period = None; periodstr = None # to indicate time-series (but for safety, the input must be more explicit) if lautoregrid is None: lautoregrid = False # this can take very long! elif mode.lower() == 'cvdp': # concatenated time-series files lcvdp = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='cvdp', cvdp_mode=cvdp_mode, exps=exps) if period is None: if not isinstance(experiment,Exp): raise DatasetError, 'Periods can only be inferred for registered datasets.' period = (experiment.beginyear, experiment.endyear) elif mode.lower() == 'diag': # concatenated time-series files ldiag = True folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='diag', exps=exps) raise NotImplementedError, "Loading AMWG diagnostic files is not supported yet." else: raise NotImplementedError,"Unsupported mode: '{:s}'".format(mode) # cast/copy varlist if isinstance(varlist,basestring): varlist = [varlist] # cast as list elif varlist is not None: varlist = list(varlist) # make copy to avoid interference # handle stations and shapes if station and shape: raise ArgumentError elif station or shape: if grid is not None: raise NotImplementedError, 'Currently CESM station data can only be loaded from the native grid.' if lcvdp: raise NotImplementedError, 'CVDP data is not available as station data.' if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.' lstation = bool(station); lshape = bool(shape) # add station/shape parameters if varlist: params = stn_params if lstation else shp_params for param in params: if param not in varlist: varlist.append(param) else: lstation = False; lshape = False # period if isinstance(period,(int,np.integer)): if not isinstance(experiment,Exp): raise DatasetError, 'Integer periods are only supported for registered datasets.' period = (experiment.beginyear, experiment.beginyear+period) if lclim: periodstr = '_{0:4d}-{1:4d}'.format(*period) elif lcvdp: periodstr = '{0:4d}-{1:4d}'.format(period[0],period[1]-1) else: periodstr = '' # N.B.: the period convention in CVDP is that the end year is included # generate filelist and attributes based on filetypes and domain if filetypes is None: filetypes = ['atm','lnd'] elif isinstance(filetypes,(list,tuple,set,basestring)): if isinstance(filetypes,basestring): filetypes = [filetypes] else: filetypes = list(filetypes) # interprete/replace WRF filetypes (for convenience) tmp = [] for ft in filetypes: if ft in ('const','drydyn3d','moist3d','rad','plev3d','srfc','xtrm','hydro'): if 'atm' not in tmp: tmp.append('atm') elif ft in ('lsm','snow'): if 'lnd' not in tmp: tmp.append('lnd') elif ft in ('aux'): pass # currently not supported # elif ft in (,): # if 'atm' not in tmp: tmp.append('atm') # if 'lnd' not in tmp: tmp.append('lnd') else: tmp.append(ft) filetypes = tmp; del tmp if 'axes' not in filetypes: filetypes.append('axes') else: raise TypeError atts = dict(); filelist = []; typelist = [] for filetype in filetypes: fileclass = fileclasses[filetype] if lclim and fileclass.climfile is not None: filelist.append(fileclass.climfile) elif lts and fileclass.tsfile is not None: filelist.append(fileclass.tsfile) elif lcvdp and fileclass.cvdpfile is not None: filelist.append(fileclass.cvdpfile) elif ldiag and fileclass.diagfile is not None: filelist.append(fileclass.diagfile) typelist.append(filetype) atts.update(fileclass.atts) # figure out ignore list if ignore_list is None: ignore_list = set(ignore_list_2D) elif isinstance(ignore_list,(list,tuple)): ignore_list = set(ignore_list) elif not isinstance(ignore_list,set): raise TypeError if not load3D: ignore_list.update(ignore_list_3D) if lautoregrid is None: lautoregrid = not load3D # don't auto-regrid 3D variables - takes too long! # translate varlist if varatts is not None: atts.update(varatts) lSST = False if varlist is not None: varlist = list(varlist) if 'SST' in varlist: # special handling of name SST variable, as it is part of Ts varlist.remove('SST') if not 'Ts' in varlist: varlist.append('Ts') lSST = True # Ts is renamed to SST below if translateVars is None: varlist = list(varlist) + translateVarNames(varlist, atts) # also aff translations, just in case elif translateVars is True: varlist = translateVarNames(varlist, atts) # N.B.: DatasetNetCDF does never apply translation! # NetCDF file mode ncmode = 'rw' if lwrite else 'r' # get grid or station-set name if lstation: # the station name can be inserted as the grid name gridstr = '_'+station.lower(); # only use lower case for filenames griddef = None elif lshape: # the station name can be inserted as the grid name gridstr = '_'+shape.lower(); # only use lower case for filenames griddef = None else: if grid is None or grid == experiment.grid: gridstr = ''; griddef = None else: gridstr = '_'+grid.lower() # only use lower case for filenames griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder, check=True) # insert grid name and period filenames = [] for filetype,fileformat in zip(typelist,filelist): if lclim: filename = fileformat.format(gridstr,periodstr) # put together specfic filename for climatology elif lts: filename = fileformat.format(gridstr) # or for time-series elif lcvdp: filename = fileformat.format(experiment.name if experiment else name,periodstr) # not implemented: gridstr elif ldiag: raise NotImplementedError else: raise DatasetError filenames.append(filename) # append to list (passed to DatasetNetCDF later) # check existance filepath = '{:s}/{:s}'.format(folder,filename) if not os.path.exists(filepath): nativename = fileformat.format('',periodstr) # original filename (before regridding) nativepath = '{:s}/{:s}'.format(folder,nativename) if os.path.exists(nativepath): if lautoregrid: from processing.regrid import performRegridding # causes circular reference if imported earlier griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) dataargs = dict(experiment=experiment, filetypes=[filetype], period=period) print("The '{:s}' (CESM) dataset for the grid ('{:s}') is not available:\n Attempting regridding on-the-fly.".format(name,filename,grid)) if performRegridding('CESM','climatology' if lclim else 'time-series', griddef, dataargs): # default kwargs raise IOError, "Automatic regridding failed!" print("Output: '{:s}'".format(name,filename,grid,filepath)) else: raise IOError, "The '{:s}' (CESM) dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(name,filename,grid) else: raise IOError, "The '{:s}' (CESM) dataset file '{:s}' does not exits!\n({:s})".format(name,filename,folder) # load dataset #print varlist, filenames if experiment: title = experiment.title else: title = name dataset = DatasetNetCDF(name=name, folder=folder, filelist=filenames, varlist=varlist, axes=None, varatts=atts, title=title, multifile=False, ignore_list=ignore_list, ncformat='NETCDF4', squeeze=True, mode=ncmode, check_vars=check_vars) # replace time axis if lreplaceTime: if lts or lcvdp: # check time axis and center at 1979-01 (zero-based) if experiment is None: ys = period[0]; ms = 1 else: ys,ms,ds = [int(t) for t in experiment.begindate.split('-')]; assert ds == 1 if dataset.hasAxis('time'): ts = (ys-1979)*12 + (ms-1); te = ts+len(dataset.time) # month since 1979 (Jan 1979 = 0) atts = dict(long_name='Month since 1979-01') timeAxis = Axis(name='time', units='month', coord=np.arange(ts,te,1, dtype='int16'), atts=atts) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) if dataset.hasAxis('year'): ts = ys-1979; te = ts+len(dataset.year) # month since 1979 (Jan 1979 = 0) atts = dict(long_name='Years since 1979-01') yearAxis = Axis(name='year', units='year', coord=np.arange(ts,te,1, dtype='int16'), atts=atts) dataset.replaceAxis(dataset.year, yearAxis, asNC=False, deepcopy=False) elif lclim: if dataset.hasAxis('time') and not dataset.time.units.lower() in monthlyUnitsList: atts = dict(long_name='Month of the Year') timeAxis = Axis(name='time', units='month', coord=np.arange(1,13, dtype='int16'), atts=atts) assert len(dataset.time) == len(timeAxis), dataset.time dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) elif dataset.hasAxis('year'): raise NotImplementedError, dataset # rename SST if lSST: dataset['SST'] = dataset.Ts # correct ordinal number of shape (should start at 1, not 0) if lshape: # mask all shapes that are incomplete in dataset if lencl and 'shp_encl' in dataset: dataset.mask(mask='shp_encl', invert=True) if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!" if not dataset.hasAxis('shape'): raise AxisError if dataset.shape.coord[0] == 0: dataset.shape.coord += 1 # check if len(dataset) == 0: raise DatasetError, 'Dataset is empty - check source file or variable list!' # add projection, if applicable if not ( lstation or lshape ): dataset = addGDALtoDataset(dataset, griddef=griddef, gridfolder=grid_folder, lwrap360=True, geolocator=True) # return formatted dataset return dataset ## Dataset API dataset_name = 'CMIP5' # dataset name root_folder # root folder of the dataset avgfolder # root folder for monthly averages outfolder # root folder for direct WRF output ts_file_pattern = 'cmip5{0:s}{1:s}_monthly.nc' # filename pattern: filetype, grid clim_file_pattern = 'cmip5{0:s}{1:s}_clim{2:s}.nc' # filename pattern: filetype, grid, period data_folder = root_folder # folder for user data grid_def = {'':None} # there are too many... grid_res = {'':1.} # approximate grid resolution at 45 degrees latitude default_grid = None # functions to access specific datasets loadLongTermMean = None # WRF doesn't have that... loadClimatology = loadCESM # pre-processed, standardized climatology loadTimeSeries = loadCESM_TS # time-series data #loadStationClimatology = loadCESM_Stn # pre-processed, standardized climatology at stations #loadStationTimeSeries = loadCESM_StnTS # time-series data at stations #loadShapeClimatology = loadCESM_Shp # climatologies without associated grid (e.g. provinces or basins) #loadShapeTimeSeries = loadCESM_ShpTS # time-series without associated grid (e.g. provinces or basins) ## (ab)use main execution for quick test if __name__ == '__main__': # set mode/parameters # mode = 'test_climatology' # mode = 'test_timeseries' # mode = 'test_ensemble' # mode = 'test_point_climatology' # mode = 'test_point_timeseries' # mode = 'test_point_ensemble' # mode = 'test_cvdp' mode = 'pickle_grid' # mode = 'shift_lon' # experiments = ['Ctrl-1', 'Ctrl-A', 'Ctrl-B', 'Ctrl-C'] # experiments += ['Ctrl-2050', 'Ctrl-A-2050', 'Ctrl-B-2050', 'Ctrl-C-2050'] experiments = ('Ctrl-1',) periods = (15,) filetypes = ('atm',) # ['atm','lnd','ice'] grids = ('cesm1x1',)*len(experiments) # grb1_d01 # pntset = 'shpavg' pntset = 'ecprecip' from projects.CESM_experiments import Exp, CESM_exps, ensembles # N.B.: importing Exp through CESM_experiments is necessary, otherwise some isinstance() calls fail # pickle grid definition if mode == 'pickle_grid': for grid,experiment in zip(grids,experiments): print('') print(' *** Pickling Grid Definition for {0:s} *** '.format(grid)) print('') # load GridDefinition dataset = loadCESM(experiment=CESM_exps[experiment], grid=None, filetypes=['lnd'], period=(1979,1989)) griddef = dataset.griddef #del griddef.xlon, griddef.ylat print griddef griddef.name = grid print(' Loading Definition from \'{0:s}\''.format(dataset.name)) # save pickle filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(grid)) if os.path.exists(filename): os.remove(filename) # overwrite filehandle = open(filename, 'w') pickle.dump(griddef, filehandle) filehandle.close() print(' Saving Pickle to \'{0:s}\''.format(filename)) print('') # load pickle to make sure it is right del griddef griddef = loadPickledGridDef(grid, res=None, folder=grid_folder) print(griddef) print('') print griddef.wrap360
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'): ''' A function to load standardized observational datasets. ''' # prepare input if mode.lower() == 'climatology': # post-processed climatology files # transform period if period is None or period == '': if name not in ('PCIC','PRISM','GPCC','NARR'): raise ValueError("A period is required to load observational climatologies.") elif isinstance(period,basestring): period = tuple([int(prd) for prd in period.split('-')]) elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): raise TypeError(period) elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files period = None # to indicate time-series (but for safety, the input must be more explicit) if lautoregrid is None: lautoregrid = False # this can take very long! # cast/copy varlist if isinstance(varlist,basestring): varlist = [varlist] # cast as list elif varlist is not None: varlist = list(varlist) # make copy to avoid interference # figure out station and shape options if station and shape: raise ArgumentError() elif station or shape: if grid is not None: raise NotImplementedError('Currently observational station data can only be loaded from the native grid.') if lautoregrid: raise GDALError('Station data can not be regridded, since it is not map data.') lstation = bool(station); lshape = bool(shape) grid = station if lstation else shape # add station/shape parameters if varlist: params = stn_params if lstation else shp_params for param in params: if param not in varlist: varlist.append(param) else: lstation = False; lshape = False # varlist (varlist = None means all variables) if varatts is None: varatts = default_varatts.copy() if varlist is not None: varlist = translateVarNames(varlist, varatts) # filelist if filelist is None: filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern) # check existance filepath = '{:s}/{:s}'.format(folder,filename) if not os.path.exists(filepath): nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern) nativepath = '{:s}/{:s}'.format(folder,nativename) if os.path.exists(nativepath): if lautoregrid: from processing.regrid import performRegridding # causes circular reference if imported earlier griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder) dataargs = dict(period=period, resolution=resolution) performRegridding(name, 'climatology',griddef, dataargs) # default kwargs else: raise IOError("The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) ) else: raise IOError("The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath)) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, axes=axes, multifile=False, ncformat='NETCDF4') # mask all shapes that are incomplete in dataset if shape and lencl and 'shp_encl' in dataset: dataset.load() # need to load data before masking; is cheap for shape averages, anyway dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params) # correct ordinal number of shape (should start at 1, not 0) if lshape: if dataset.hasAxis('shapes'): raise AxisError("Axis 'shapes' should be renamed to 'shape'!") if not dataset.hasAxis('shape'): raise AxisError() if dataset.shape.coord[0] == 0: dataset.shape.coord += 1 # figure out grid if not lstation and not lshape: if grid is None or grid == name: dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder) elif isinstance(grid,basestring): # load from pickle file # griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder) # add GDAL functionality to dataset dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder) else: raise TypeError(dataset) # N.B.: projection should be auto-detected, if geographic (lat/lon) return dataset
def rasterDataset(name=None, title=None, vardefs=None, axdefs=None, atts=None, projection=None, griddef=None, lgzip=None, lgdal=True, lmask=True, fillValue=None, lskipMissing=True, lgeolocator=True, file_pattern=None, lfeedback=True, **kwargs): ''' function to load a set of variables that are stored in raster format in a systematic directory tree into a Dataset Variables and Axis are defined as follows: vardefs[varname] = dict(name=string, units=string, axes=tuple of strings, atts=dict, plot=dict, dtype=np.dtype, fillValue=value) axdefs[axname] = dict(name=string, units=string, atts=dict, coord=array or list) or None The path to raster files is constructed as variable_pattern+axes_pattern, where axes_pattern is defined through the axes, (as in rasterVarialbe) and variable_pattern takes the special keywords VAR, which is the variable key in vardefs. ''' ## prepare input data and axes if griddef: xlon, ylat = griddef.xlon, griddef.ylat if projection is None: projection = griddef.projection elif projection != griddef.projection: raise ArgumentError("Conflicting projection and GridDef!") geotransform = griddef.geotransform isProjected = griddef.isProjected else: xlon = ylat = geotransform = None isProjected = False if projection is None else True # construct axes dict axes = dict() for axname, axdef in axdefs.items(): assert 'coord' in axdef, axdef assert ('name' in axdef and 'units' in axdef) or 'atts' in axdef, axdef if axdef is None: axes[axname] = None else: ax = Axis(**axdef) axes[ax.name] = ax # check for map Axis if isProjected: if 'x' not in axes: axes['x'] = xlon if 'y' not in axes: axes['y'] = ylat else: if 'lon' not in axes: axes['lon'] = xlon if 'lat' not in axes: axes['lat'] = ylat ## load raster data into Variable objects varlist = [] for varname, vardef in vardefs.items(): # check definitions assert 'axes' in vardef and 'dtype' in vardef, vardef assert ('name' in vardef and 'units' in vardef) or 'atts' in vardef, vardef # determine relevant axes vardef = vardef.copy() axes_list = [ None if ax is None else axes[ax] for ax in vardef.pop('axes') ] # define path parameters (with varname) path_params = vardef.pop('path_params', None) path_params = dict() if path_params is None else path_params.copy() if 'VAR' not in path_params: path_params['VAR'] = varname # a special key # add kwargs and relevant axis indices relaxes = [ax.name for ax in axes_list if ax is not None] # relevant axes for key, value in kwargs.items(): if key not in axes or key in relaxes: vardef[key] = value # create Variable object var = rasterVariable(projection=projection, griddef=griddef, file_pattern=file_pattern, lgzip=lgzip, lgdal=lgdal, lmask=lmask, lskipMissing=lskipMissing, axes=axes_list, path_params=path_params, lfeedback=lfeedback, **vardef) # vardef components: name, units, atts, plot, dtype, fillValue varlist.append(var) # check that map axes are correct for ax in var.xlon, var.ylat: if axes[ax.name] is None: axes[ax.name] = ax elif axes[ax.name] != ax: raise AxisError("{} axes are incompatible.".format(ax.name)) if griddef is None: griddef = var.griddef elif griddef != var.griddef: raise AxisError("GridDefs are inconsistent.") if geotransform is None: geotransform = var.geotransform elif geotransform != var.geotransform: raise AxisError( "Conflicting geotransform (from Variable) and GridDef!\n {} != {}" .format(var.geotransform, geotransform)) ## create Dataset # create dataset dataset = Dataset(name=name, title=title, varlist=varlist, axes=axes, atts=atts) # add GDAL functionality dataset = addGDALtoDataset(dataset, griddef=griddef, projection=projection, geotransform=geotransform, gridfolder=None, lwrap360=None, geolocator=lgeolocator, lforce=False) # N.B.: for some reason we also need to pass the geotransform, otherwise it is recomputed internally and some consistency # checks fail due to machine-precision differences # return GDAL-enabled Dataset return dataset