def getTargetFile(dataset=None, mode=None, dataargs=None, grid=None, shape=None, station=None, period=None, filetype=None, lwrite=True): ''' generate filename for target dataset ''' # for CESM & WRF if grid is None: grid = dataargs.gridstr # also use grid for station/shape type if period is None: period = dataargs.periodstr if dataset in ('WRF','CESM') and lwrite: # prepare some variables domain = dataargs.domain if filetype is None: filetype = dataargs.filetype gstr = '_{}'.format(grid) if grid else '' # prepend shape or station type before grid if shape and station: raise ArgumentError elif shape: gstr = '_{}{}'.format(shape,gstr) elif station: gstr = '_{}{}'.format(station,gstr) pstr = '_{}'.format(period) if period else '' if dataset == 'WRF': import datasets.WRF as WRF fileclass = WRF.fileclasses[filetype] if filetype in WRF.fileclasses else WRF.FileType(filetype) if mode == 'climatology': filename = fileclass.climfile.format(domain,gstr,pstr) elif mode == 'time-series': filename = fileclass.tsfile.format(domain,gstr) elif dataset == 'CESM': import datasets.CESM as CESM fileclass = CESM.fileclasses[filetype] if filetype in CESM.fileclasses else CESM.FileType(filetype) if mode == 'climatology': filename = fileclass.climfile.format(gstr,pstr) elif mode == 'time-series': filename = fileclass.tsfile.format(gstr) else: raise NotImplementedError, "Unsupported Mode: '{:s}'".format(mode) elif lwrite: # assume observational datasets filename = getFileName(grid=grid, shape=shape, station=station, period=period, name=dataargs.obs_res, filetype=mode) else: raise DatasetError(dataset) if not os.path.exists(dataargs.avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder) # return filename return filename
def getTargetFile(dataset=None, mode=None, dataargs=None, lwrite=True, grid=None, period=None, filetype=None): ''' generate filename for target dataset ''' # prepare some variables domain = dataargs.domain if filetype is None: filetype = dataargs.filetype if grid is None: grid = dataargs.gridstr # also use grid for station type if period is None: period = dataargs.periodstr gstr = '_{}'.format(grid) if grid else '' pstr = '_{}'.format(period) if period else '' # figure out filename if dataset == 'WRF' and lwrite: if mode == 'climatology': filename = WRF.clim_file_pattern.format(filetype,domain,gstr,pstr) elif mode == 'time-series': filename = WRF.ts_file_pattern.format(filetype,domain,gstr) else: raise NotImplementedError elif dataset == 'CESM' and lwrite: if mode == 'climatology': filename = CESM.clim_file_pattern.format(filetype,gstr,pstr) elif mode == 'time-series': filename = CESM.ts_file_pattern.format(filetype,gstr) else: raise NotImplementedError elif ( dataset == dataset.upper() or dataset == 'Unity' ) and lwrite: # observational datasets filename = getFileName(grid=grid, period=dataargs.period, name=dataargs.obs_res, filetype=mode) elif not lwrite: raise DatasetError if not os.path.exists(dataargs.avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder) # return filename return filename
def getTargetFile(dataset=None, mode=None, dataargs=None, grid=None, shape=None, station=None, period=None, filetype=None, lwrite=True): ''' generate filename for target dataset ''' # for CESM & WRF if grid is None: grid = dataargs.gridstr # also use grid for station/shape type if period is None: period = dataargs.periodstr if dataset in ('WRF', 'CESM') and lwrite: # prepare some variables domain = dataargs.domain if filetype is None: filetype = dataargs.filetype gstr = '_{}'.format(grid) if grid else '' # prepend shape or station type before grid if shape and station: raise ArgumentError elif shape: gstr = '_{}{}'.format(shape, gstr) elif station: gstr = '_{}{}'.format(station, gstr) pstr = '_{}'.format(period) if period else '' if dataset == 'WRF': import datasets.WRF as WRF fileclass = WRF.fileclasses[ filetype] if filetype in WRF.fileclasses else WRF.FileType( filetype) if mode == 'climatology': filename = fileclass.climfile.format(domain, gstr, pstr) elif mode == 'time-series': filename = fileclass.tsfile.format(domain, gstr) elif dataset == 'CESM': import datasets.CESM as CESM fileclass = CESM.fileclasses[ filetype] if filetype in CESM.fileclasses else CESM.FileType( filetype) if mode == 'climatology': filename = fileclass.climfile.format(gstr, pstr) elif mode == 'time-series': filename = fileclass.tsfile.format(gstr) else: raise NotImplementedError, "Unsupported Mode: '{:s}'".format(mode) elif lwrite: # assume observational datasets filename = getFileName(grid=grid, shape=shape, station=station, period=period, name=dataargs.obs_res, filetype=mode) else: raise DatasetError(dataset) if not os.path.exists(dataargs.avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format( dataargs.avgfolder) # return filename return filename
def getTargetFile(name, dataset, mode, module, dataargs, lwrite): ''' generate filename for target dataset ''' # extract some variables periodstr = dataargs.periodstr; filetype = dataargs.filetype; domain = dataargs.domain sstr = '_{}'.format(name) # use name as "grid" designation for station data pstr = '_{}'.format(periodstr) if periodstr else '' # figure out filename if dataset == 'WRF' and lwrite: if mode == 'climatology': filename = module.clim_file_pattern.format(filetype,domain,sstr,pstr) elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype,domain,sstr) else: raise NotImplementedError elif dataset == 'CESM' and lwrite: if mode == 'climatology': filename = module.clim_file_pattern.format(filetype,sstr,pstr) elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype,sstr) else: raise NotImplementedError elif ( dataset == dataset.upper() or dataset == 'Unity' ) and lwrite: # observational datasets filename = getFileName(grid=name, period=dataargs.period, name=dataargs.obs_res, filetype=mode) elif not lwrite: raise DatasetError if not os.path.exists(dataargs.avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder) # return filename return filename
def getTargetFile(name, dataset, mode, module, dataargs, lwrite): ''' generate filename for target dataset ''' # extract some variables periodstr = dataargs.periodstr filetype = dataargs.filetype domain = dataargs.domain sstr = '_{}'.format( name) # use name as "grid" designation for station data pstr = '_{}'.format(periodstr) if periodstr else '' # figure out filename if dataset == 'WRF' and lwrite: if mode == 'climatology': filename = module.clim_file_pattern.format(filetype, domain, sstr, pstr) elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype, domain, sstr) else: raise NotImplementedError elif dataset == 'CESM' and lwrite: if mode == 'climatology': filename = module.clim_file_pattern.format(filetype, sstr, pstr) elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype, sstr) else: raise NotImplementedError elif (dataset == dataset.upper() or dataset == 'Unity') and lwrite: # observational datasets filename = getFileName(grid=name, period=dataargs.period, name=dataargs.obs_res, filetype=mode) elif not lwrite: raise DatasetError if not os.path.exists(dataargs.avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format( dataargs.avgfolder) # return filename return filename
dataset = loadGPCC_LTM(varlist=['stations','precip'],resolution=res) # change meta-data dataset.name = 'GPCC' dataset.title = 'GPCC Long-term Climatology' dataset.atts.resolution = res # load data into memory dataset.load() # add landmask addLandMask(dataset) # create landmask from precip mask dataset.mask(dataset.landmask) # mask all fields using the new landmask # add length and names of month addLengthAndNamesOfMonth(dataset, noleap=False) # figure out a different filename filename = getFileName(grid=res, period=None, name='GPCC', filepattern=avgfile) print('\n'+filename+'\n') if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename) # write data and some annotation ncset = writeNetCDF(dataset, avgfolder+filename, close=False) # add_var(ncset,'name_of_month', name_of_month, 'time', # add names of month # atts=dict(name='name_of_month', units='', long_name='Name of the Month')) # close... ncset.close() dataset.close() # print dataset before print(dataset) print('')
def getMetaData(dataset, mode, dataargs, lone=True): ''' determine dataset type and meta data, as well as path to main source file ''' # determine dataset mode lclim = False lts = False if mode == 'climatology': lclim = True elif mode == 'time-series': lts = True elif mode[-5:] == '-mean': lclim = True mode = 'climatology' # only for export to seasonal means (load entire monthly climatology) else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # general arguments (dataset independent) varlist = dataargs.get('varlist', None) resolution = dataargs.get('resolution', None) grid = dataargs.get('grid', None) # get grid period = dataargs.get('period', None) # determine meta data based on dataset type if dataset == 'WRF': import datasets.WRF as WRF # WRF datasets obs_res = None # only for datasets (not used here) exp = dataargs['experiment'] # need that one dataset_name = exp.name avgfolder = exp.avgfolder filetypes = dataargs['filetypes'] fileclasses = WRF.fileclasses.copy() for filetype in filetypes: if filetype not in fileclasses: fileclasses[filetype] = WRF.FileType(filetype) domain = dataargs.get('domain', None) periodstr, gridstr = getPeriodGridString(period, grid, exp=exp) # check arguments if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!" if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time if not isinstance(domain, (np.integer, int)): raise DatasetError # construct dataset message if lone: datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format( filetypes[0], dataset_name, domain) else: datamsgstr = "Processing WRF dataset from Experiment '{:s}' (d{:02d})".format( dataset_name, domain) # figure out age of source file(s) srcage = getSourceAge(fileclasses=fileclasses, filetypes=filetypes, exp=exp, domain=domain, periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts) # load source data if lclim: loadfct = partial(WRF.loadWRF, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist, period=period, filetypes=filetypes, varatts=None, lconst=True, ltrimT=False) # still want topography... elif lts: loadfct = partial(WRF.loadWRF_TS, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist, filetypes=filetypes, varatts=None, lconst=True, ltrimT=False) # still want topography... elif dataset == 'CESM': import datasets.CESM as CESM # CESM datasets obs_res = None # only for datasets (not used here) domain = None # only for WRF exp = dataargs['experiment'] avgfolder = exp.avgfolder dataset_name = exp.name periodstr, gridstr = getPeriodGridString(period, grid, exp=exp) filetypes = dataargs['filetypes'] fileclasses = CESM.fileclasses.copy() for filetype in filetypes: if filetype not in fileclasses: fileclasses[filetype] = CESM.FileType(filetype) # check arguments if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!" if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time # construct dataset message if lone: datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format( filetypes[0], dataset_name) else: datamsgstr = "Processing CESM dataset from Experiment '{:s}'".format( dataset_name) # figure out age of source file(s) srcage = getSourceAge(fileclasses=fileclasses, filetypes=filetypes, exp=exp, domain=None, periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts) # load source data load3D = dataargs.pop( 'load3D', None) # if 3D fields should be loaded (default: False) if lclim: loadfct = partial(CESM.loadCESM, experiment=exp, name=None, grid=grid, period=period, varlist=varlist, filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None) elif lts: loadfct = partial(CESM.loadCESM_TS, experiment=exp, name=None, grid=grid, varlist=varlist, filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None) else: # assume observational datasets filetypes = [None] # only for CESM & WRF domain = None # only for WRF try: module = import_module('datasets.{0:s}'.format(dataset)) except ImportError: raise DatasetError( "Error loading dataset module '{:s}' from 'datasets' package!". format(dataset)) dataset_name = module.dataset_name resolution = dataargs['resolution'] if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name, resolution) else: obs_res = dataset_name # figure out period periodstr, gridstr = getPeriodGridString(period, grid, beginyear=1979) if period is None and lclim: periodstr = 'LTM' datamsgstr = "Processing Dataset '{:s}'".format(dataset_name) # assemble filename to check modification dates (should be only one file) filename = getFileName(grid=grid, period=period, name=obs_res, filetype=mode) avgfolder = module.avgfolder filepath = '{:s}/{:s}'.format(avgfolder, filename) # load pre-processed climatology kwargs = dict(name=dataset_name, grid=grid, varlist=varlist, resolution=resolution, varatts=None) if dataset == 'Unity': kwargs['unity_grid'] = dataargs['unity_grid'] if lclim and module.loadClimatology is not None: loadfct = partial(module.loadClimatology, period=period, **kwargs) elif lts and module.loadTimeSeries is not None: loadfct = partial(module.loadTimeSeries, **kwargs) else: raise DatasetError( "Unable to identify time aggregation mode; the dataset " + "'{}' may not support selected mode '{}'.".format( dataset, mode)) # check if the source file is actually correct if os.path.exists(filepath): filelist = [filepath] else: source = loadfct( ) # don't load dataset, just construct the file list filelist = source.filelist # figure out age of source file(s) srcage = getSourceAge(filelist=filelist, lclim=lclim, lts=lts) # N.B.: it would be nice to print a message, but then we would have to make the logger available, # which would be too much trouble ## assemble and return meta data dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, filetypes=filetypes, filetype=filetypes[0], domain=domain, obs_res=obs_res, varlist=varlist, grid=grid, gridstr=gridstr, resolution=resolution) # return meta data return dataargs, loadfct, srcage, datamsgstr
# load dataset dataset = loadNARR_LTM() # change meta-data dataset.name = 'NARR' dataset.title = 'NARR Long-term Climatology' # load data into memory dataset.load() # # add landmask # addLandMask(dataset) # create landmask from precip mask # dataset.mask(dataset.landmask) # mask all fields using the new landmask # add length and names of month addLengthAndNamesOfMonth(dataset, noleap=False) # figure out a different filename filename = getFileName(grid='NARR', period=None, name='NARR', filepattern=avgfile) print('\n'+filename+'\n') if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename) # write data and some annotation ncset = writeNetCDF(dataset, avgfolder+filename, close=False) add_strvar(ncset,'name_of_month', name_of_month, 'time', # add names of month atts=dict(name='name_of_month', units='', long_name='Name of the Month')) # close... ncset.close() dataset.close() # print dataset before print(dataset) print('')
time = dataset.time time.load(data=np.arange( 1, 13, dtype=time.dtype)) # 1 to 12 (incl.) for climatology time.units = 'month' time.atts.long_name = 'Month of the Year' print(time) # print diagnostic print(dataset) print('') for var in dataset: #print(var) if not var.strvar: print('Mean {0:s}: {1:s} {2:s}'.format(var.atts.long_name, str(var.mean()), var.units)) #print('') print('') ## create new NetCDF file # figure out a different filename filename = getFileName(name='PCIC', filepattern=avgfile) if os.path.exists(avgfolder + filename): os.remove(avgfolder + filename) # write data and some annotation sink = writeNetCDF(dataset, avgfolder + filename, close=False) # add_strvar(sink,'name_of_month', name_of_month, 'time', # add names of month # atts=dict(name='name_of_month', units='', long_name='Name of the Month')) sink.close() # close... print('Saving Climatology to: ' + filename) print(avgfolder)
def getMetaData(dataset, mode, dataargs, lone=True): ''' determine dataset type and meta data, as well as path to main source file ''' # determine dataset mode lclim = False; lts = False if mode == 'climatology': lclim = True elif mode == 'time-series': lts = True else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # general arguments (dataset independent) varlist = dataargs.get('varlist',None) grid = dataargs.get('grid',None) # get grid period = dataargs.get('period',None) # determine meta data based on dataset type if dataset == 'WRF': # WRF datasets obs_res = None # only for datasets (not used here) exp = dataargs['experiment'] # need that one dataset_name = exp.name avgfolder = exp.avgfolder filetypes = dataargs['filetypes'] domain = dataargs.get('domain',None) periodstr, gridstr = getPeriodGridString(period, grid, exp=exp) # check arguments if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!" if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time if not isinstance(domain, (np.integer,int)): raise DatasetError # construct dataset message if lone: datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(filetypes[0], dataset_name, domain) else: datamsgstr = "Processing WRF dataset from Experiment '{:s}' (d{:02d})".format(dataset_name, domain) # figure out age of source file(s) srcage = getSourceAge(fileclasses=WRF.fileclasses, filetypes=filetypes, exp=exp, domain=domain, periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts) # load source data if lclim: loadfct = partial(WRF.loadWRF, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist, period=period, filetypes=filetypes, varatts=None, lconst=True) # still want topography... elif lts: loadfct = partial(WRF.loadWRF_TS, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist, filetypes=filetypes, varatts=None, lconst=True) # still want topography... elif dataset == 'CESM': # CESM datasets obs_res = None # only for datasets (not used here) domain = None # only for WRF exp = dataargs['experiment'] avgfolder = exp.avgfolder dataset_name = exp.name periodstr, gridstr = getPeriodGridString(period, grid, exp=exp) filetypes = dataargs['filetypes'] # check arguments if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!" if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time # construct dataset message if lone: datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(filetypes[0], dataset_name) else: datamsgstr = "Processing CESM dataset from Experiment '{:s}'".format(dataset_name) # figure out age of source file(s) srcage = getSourceAge(fileclasses=CESM.fileclasses, filetypes=filetypes, exp=exp, domain=None, periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts) # load source data load3D = dataargs.pop('load3D',None) # if 3D fields should be loaded (default: False) if lclim: loadfct = partial(CESM.loadCESM, experiment=exp, name=None, grid=grid, period=period, varlist=varlist, filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None) elif lts: loadfct = partial(CESM.loadCESM_TS, experiment=exp, name=None, grid=grid, varlist=varlist, filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None) elif dataset == dataset.upper() or dataset == 'Unity': # observational datasets filetypes = [None] # only for CESM & WRF domain = None # only for WRF module = import_module('datasets.{0:s}'.format(dataset)) dataset_name = module.dataset_name resolution = dataargs['resolution'] if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name,resolution) else: obs_res = dataset_name # figure out period periodstr, gridstr = getPeriodGridString(period, grid, beginyear=1979) if period is None and lclim: periodstr = 'LTM' datamsgstr = "Processing Dataset '{:s}'".format(dataset_name) # assemble filename to check modification dates (should be only one file) filename = getFileName(grid=grid, period=period, name=obs_res, filetype=mode) avgfolder = module.avgfolder filepath = '{:s}/{:s}'.format(avgfolder,filename) # load pre-processed climatology if lclim: loadfct = partial(module.loadClimatology, name=dataset_name, period=period, grid=grid, varlist=varlist, resolution=resolution, varatts=None) elif lts: loadfct = partial(module.loadTimeSeries, name=dataset_name, grid=grid, varlist=varlist, resolution=resolution, varatts=None) # check if the source file is actually correct if os.path.exists(filepath): filelist = [filepath] else: source = loadfct() # don't load dataset, just construct the file list filelist = source.filelist # figure out age of source file(s) srcage = getSourceAge(filelist=filelist, lclim=lclim, lts=lts) # N.B.: it would be nice to print a message, but then we would have to make the logger available, # which would be too much trouble else: raise DatasetError, "Dataset '{:s}' not found!".format(dataset) ## assemble and return meta data dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, filetypes=filetypes,filetype=filetypes[0], domain=domain, obs_res=obs_res, varlist=varlist, grid=grid, gridstr=gridstr) # return meta data return dataargs, loadfct, srcage, datamsgstr
def getMetaData(dataset, mode, dataargs): ''' determine dataset type and meta data, as well as path to main source file ''' # determine dataset mode lclim = False lts = False if mode == 'climatology': lclim = True elif mode == 'time-series': lts = True else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # defaults for specific variables obs_res = None domain = None filetype = None varlist = dataargs.get('varlist', None) # determine meta data based on dataset type if dataset == 'WRF': # WRF datasets module = import_module('datasets.WRF') exp = dataargs['experiment'] dataset_name = exp.name domain = dataargs['domain'] grid = dataargs.get('grid', None) # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period, (int, np.integer)): beginyear = int(exp.begindate[0:4]) period = (beginyear, beginyear + period) elif len(period) != 2 and all(isInt(period)): raise DateError if period is None: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) gridstr = grid if grid is not None else '' # identify file and domain if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time filetype = dataargs['filetypes'][0] if isinstance(domain, (list, tuple)): domain = domain[0] if not isinstance(domain, (np.integer, int)): raise DatasetError datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format( filetype, dataset_name, domain) # assemble filename to check modification dates (should be only one file) fileclass = module.fileclasses[ filetype] # avoid WRF & CESM name collision pstr = '_' + periodstr if periodstr else '' gstr = '_' + gridstr if gridstr else '' if lclim: filename = fileclass.climfile.format( domain, gstr, pstr) # insert domain number, grid, and period elif lts: filename = fileclass.tsfile.format( domain, gstr) # insert domain number, and grid avgfolder = exp.avgfolder # load source data if lclim: loadfct = functools.partial( loadWRF, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist, period=period, filetypes=[filetype], varatts=None, lconst=True) # still want topography... elif lts: loadfct = functools.partial( loadWRF_TS, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist, filetypes=[filetype], varatts=None, lconst=True) # still want topography... filepath = '{:s}/{:s}'.format(avgfolder, filename) elif dataset == 'CESM': # CESM datasets module = import_module('datasets.CESM') exp = dataargs['experiment'] dataset_name = exp.name # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period, (int, np.integer)): beginyear = int(exp.begindate[0:4]) period = (beginyear, beginyear + period) elif len(period) != 2 and all(isInt(period)): raise DateError # identify file if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time filetype = dataargs['filetypes'][0] # check period if period is None: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format( filetype, dataset_name) # assemble filename to check modification dates (should be only one file) fileclass = module.fileclasses[ filetype] # avoid WRF & CESM name collision pstr = '_' + periodstr if periodstr else '' if lclim: filename = fileclass.climfile.format( '', pstr) # insert domain number, grid, and period elif lts: filename = fileclass.tsfile.format( '') # insert domain number, and grid avgfolder = exp.avgfolder # load source data load3D = dataargs.pop( 'load3D', None) # if 3D fields should be loaded (default: False) if lclim: loadfct = functools.partial(loadCESM, experiment=exp, name=None, grid=None, period=period, varlist=varlist, filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None) elif lts: loadfct = functools.partial(loadCESM_TS, experiment=exp, name=None, grid=None, varlist=varlist, filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None) filepath = '{:s}/{:s}'.format(avgfolder, filename) elif dataset == dataset.upper() or dataset == 'Unity': # observational datasets module = import_module('datasets.{0:s}'.format(dataset)) dataset_name = module.dataset_name resolution = dataargs['resolution'] if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name, resolution) else: obs_res = dataset_name # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period, (int, np.integer)): period = (1979, 1979 + period) # they all begin in 1979 elif len(period) != 2 and not all(isInt(period)): raise DateError datamsgstr = "Processing Dataset '{:s}'".format(dataset_name) # check period if period is None: if mode == 'climatology': periodstr = 'Long-Term Mean' else: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) # assemble filename to check modification dates (should be only one file) filename = getFileName(grid=None, period=period, name=obs_res, filetype=mode) avgfolder = module.avgfolder # load pre-processed climatology if lclim: loadfct = functools.partial(module.loadClimatology, name=dataset_name, period=period, grid=None, varlist=varlist, resolution=resolution, varatts=None, folder=module.avgfolder, filelist=None) elif lts: loadfct = functools.partial(module.loadTimeSeries, name=dataset_name, grid=None, varlist=varlist, resolution=resolution, varatts=None, folder=None, filelist=None) # check if the source file is actually correct filepath = '{:s}/{:s}'.format(avgfolder, filename) if not os.path.exists(filepath): source = loadfct( ) # no varlist - obs don't have many variables anyways filepath = source.filelist[0] # N.B.: it would be nice to print a message, but then we would have to make the logger available, # which would be too much trouble else: raise DatasetError, "Dataset '{:s}' not found!".format(dataset) ## assemble and return meta data if not os.path.exists(filepath): raise IOError, "Source file '{:s}' does not exist!".format(filepath) dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, filetype=filetype, domain=domain, obs_res=obs_res, varlist=varlist) # return meta data return module, dataargs, loadfct, filepath, datamsgstr
if grid in ('shpavg', ): # regional averages: shape index as grid uclim = loadUnity_Shp(shape=pntset, period=period) cruclim = loadCRU_Shp(shape=grid, period=period) cruts = loadCRU_ShpTS(shape=grid) else: raise NotImplementedError grid_name = grid periodstr = '{0:4d}-{1:4d}'.format(*period) print( '\n *** Merging Shape-Averaged Time-Series on {:s} Grid *** \n' .format(grid, )) ## prepare target dataset filename = getFileName(grid=grid_name, period=None, name=None, filepattern=tsfile) filepath = avgfolder + filename print(' Saving data to: \'{0:s}\'\n'.format(filepath)) assert os.path.exists(avgfolder) if os.path.exists(filepath): os.remove(filepath) # remove old file # set attributes atts = dict() # collect attributes, but add prefixes atts = uclim.atts.copy() atts['title'] = 'Corrected Time-sries on {:s} Grid'.format( grid_name) # make new dataset sink = DatasetNetCDF(folder=avgfolder, filelist=[filename], atts=atts, mode='w')
## load source datasets # period = (1979,2009) period = (1979,1994) if grid in ('shpavg',): # regional averages: shape index as grid uclim = loadUnity_Shp(shape=pntset, period=period) cruclim = loadCRU_Shp(shape=grid, period=period) cruts = loadCRU_ShpTS(shape=grid) else: raise NotImplementedError grid_name = grid periodstr = '{0:4d}-{1:4d}'.format(*period) print('\n *** Merging Shape-Averaged Time-Series on {:s} Grid *** \n'.format(grid,)) ## prepare target dataset filename = getFileName(grid=grid_name, period=None, name=None, filepattern=tsfile) filepath = avgfolder + filename print(' Saving data to: \'{0:s}\'\n'.format(filepath)) assert os.path.exists(avgfolder) if os.path.exists(filepath): os.remove(filepath) # remove old file # set attributes atts=dict() # collect attributes, but add prefixes atts = uclim.atts.copy() atts['title'] = 'Corrected Time-sries on {:s} Grid'.format(grid_name) # make new dataset sink = DatasetNetCDF(folder=avgfolder, filelist=[filename], atts=atts, mode='w') # sync and write data so far sink.sync() ## correct data (create variables) for varname,var in uclim.variables.iteritems():
T2 = dataset.Tmin + dataset.Tmax # average temperature is just the average between min and max T2 /= 2. T2.name = 'T2'; T2.atts.long_name='Average 2m Temperature' print(T2) dataset += T2 # add to dataset # rewrite time axis time = dataset.time time.load(data=np.arange(1,13)) time.units = 'month'; time.atts.long_name='Month of the Year' print(time) # print diagnostic print(dataset) print('') for var in dataset: #print(var) print('Mean {0:s}: {1:s} {2:s}'.format(var.atts.long_name, str(var.mean()), var.units)) #print('') print('') ## create new NetCDF file # figure out a different filename filename = getFileName(name='PCIC', filepattern=avgfile) if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename) # write data and some annotation sink = writeNetCDF(dataset, avgfolder+filename, close=False) add_strvar(sink,'name_of_month', name_of_month, 'time', # add names of month atts=dict(name='name_of_month', units='', long_name='Name of the Month')) sink.close() # close... print('Saving Climatology to: '+filename) print(avgfolder)
dataset.name = 'GPCC' dataset.title = 'GPCC Long-term Climatology' dataset.atts.resolution = res # load data into memory dataset.load() # add landmask addLandMask(dataset) # create landmask from precip mask dataset.mask( dataset.landmask) # mask all fields using the new landmask # add length and names of month addLengthAndNamesOfMonth(dataset, noleap=False) # figure out a different filename filename = getFileName(grid=res, period=None, name='GPCC', filepattern=avgfile) print('\n' + filename + '\n') if os.path.exists(avgfolder + filename): os.remove(avgfolder + filename) # write data and some annotation ncset = writeNetCDF(dataset, avgfolder + filename, close=False) # add_var(ncset,'name_of_month', name_of_month, 'time', # add names of month # atts=dict(name='name_of_month', units='', long_name='Name of the Month')) # close... ncset.close() dataset.close() # print dataset before print(dataset) print('')
def getMetaData(dataset, mode, dataargs): ''' determine dataset type and meta data, as well as path to main source file ''' # determine dataset mode lclim = False; lts = False if mode == 'climatology': lclim = True elif mode == 'time-series': lts = True else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode) # defaults for specific variables obs_res = None; domain = None; filetype = None varlist = dataargs.get('varlist',None) # determine meta data based on dataset type if dataset == 'WRF': # WRF datasets module = import_module('datasets.WRF') exp = dataargs['experiment'] dataset_name = exp.name domain = dataargs['domain'] grid = dataargs.get('grid',None) # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period,(int,np.integer)): beginyear = int(exp.begindate[0:4]) period = (beginyear, beginyear+period) elif len(period) != 2 and all(isInt(period)): raise DateError if period is None: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) gridstr = grid if grid is not None else '' # identify file and domain if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time filetype = dataargs['filetypes'][0] if isinstance(domain,(list,tuple)): domain = domain[0] if not isinstance(domain, (np.integer,int)): raise DatasetError datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(filetype, dataset_name, domain) # assemble filename to check modification dates (should be only one file) fileclass = module.fileclasses[filetype] # avoid WRF & CESM name collision pstr = '_'+periodstr if periodstr else '' gstr = '_'+gridstr if gridstr else '' if lclim: filename = fileclass.climfile.format(domain,gstr,pstr) # insert domain number, grid, and period elif lts: filename = fileclass.tsfile.format(domain,gstr) # insert domain number, and grid avgfolder = exp.avgfolder # load source data if lclim: loadfct = functools.partial(loadWRF, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist, period=period, filetypes=[filetype], varatts=None, lconst=True) # still want topography... elif lts: loadfct = functools.partial(loadWRF_TS, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist, filetypes=[filetype], varatts=None, lconst=True) # still want topography... filepath = '{:s}/{:s}'.format(avgfolder,filename) elif dataset == 'CESM': # CESM datasets module = import_module('datasets.CESM') exp = dataargs['experiment'] dataset_name = exp.name # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period,(int,np.integer)): beginyear = int(exp.begindate[0:4]) period = (beginyear, beginyear+period) elif len(period) != 2 and all(isInt(period)): raise DateError # identify file if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time filetype = dataargs['filetypes'][0] # check period if period is None: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(filetype, dataset_name) # assemble filename to check modification dates (should be only one file) fileclass = module.fileclasses[filetype] # avoid WRF & CESM name collision pstr = '_'+periodstr if periodstr else '' if lclim: filename = fileclass.climfile.format('',pstr) # insert domain number, grid, and period elif lts: filename = fileclass.tsfile.format('') # insert domain number, and grid avgfolder = exp.avgfolder # load source data load3D = dataargs.pop('load3D',None) # if 3D fields should be loaded (default: False) if lclim: loadfct = functools.partial(loadCESM, experiment=exp, name=None, grid=None, period=period, varlist=varlist, filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None) elif lts: loadfct = functools.partial(loadCESM_TS, experiment=exp, name=None, grid=None, varlist=varlist, filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None) filepath = '{:s}/{:s}'.format(avgfolder,filename) elif dataset == dataset.upper() or dataset == 'Unity': # observational datasets module = import_module('datasets.{0:s}'.format(dataset)) dataset_name = module.dataset_name resolution = dataargs['resolution'] if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name,resolution) else: obs_res = dataset_name # figure out period period = dataargs['period'] if period is None: pass elif isinstance(period,(int,np.integer)): period = (1979, 1979+period) # they all begin in 1979 elif len(period) != 2 and not all(isInt(period)): raise DateError datamsgstr = "Processing Dataset '{:s}'".format(dataset_name) # check period if period is None: if mode == 'climatology': periodstr = 'Long-Term Mean' else: periodstr = '' else: periodstr = '{0:4d}-{1:4d}'.format(*period) # assemble filename to check modification dates (should be only one file) filename = getFileName(grid=None, period=period, name=obs_res, filetype=mode) avgfolder = module.avgfolder # load pre-processed climatology if lclim: loadfct = functools.partial(module.loadClimatology, name=dataset_name, period=period, grid=None, varlist=varlist, resolution=resolution, varatts=None, folder=module.avgfolder, filelist=None) elif lts: loadfct = functools.partial(module.loadTimeSeries, name=dataset_name, grid=None, varlist=varlist, resolution=resolution, varatts=None, folder=None, filelist=None) # check if the source file is actually correct filepath = '{:s}/{:s}'.format(avgfolder,filename) if not os.path.exists(filepath): source = loadfct() # no varlist - obs don't have many variables anyways filepath = source.filelist[0] # N.B.: it would be nice to print a message, but then we would have to make the logger available, # which would be too much trouble else: raise DatasetError, "Dataset '{:s}' not found!".format(dataset) ## assemble and return meta data if not os.path.exists(filepath): raise IOError, "Source file '{:s}' does not exist!".format(filepath) dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, filetype=filetype, domain=domain, obs_res=obs_res, varlist=varlist) # return meta data return module, dataargs, loadfct, filepath, datamsgstr