def main(options, scomm, rank, size): """ """ # initialize the CASEROOT environment dictionary cesmEnv = dict() # CASEROOT is given on the command line as required option --caseroot caseroot = options.caseroot[0] # set the caseroot based on standalone or not pp_caseroot = caseroot if not options.standalone: caseroot, pp_subdir = os.path.split(caseroot) if rank == 0: print('cesm_tseries_generator: caseroot = {0}'.format(caseroot)) # set the debug level debug = options.debug[0] # cesmEnv["id"] = "value" parsed from the CASEROOT/env_*.xml files env_file_list = [ 'env_case.xml', 'env_run.xml', 'env_build.xml', 'env_mach_pes.xml' ] # check if the standalone option is set if options.standalone: env_file_list = ['env_postprocess.xml'] cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list) # initialize the specifiers list to contain the list of specifier classes specifiers = list() # loading the specifiers from the env_timeseries.xml only needs to run on the master task (rank=0) if rank == 0: specifiers = readArchiveXML(caseroot, cesmEnv['DOUT_S_ROOT'], cesmEnv['CASE'], options.standalone, debug) scomm.sync() # specifiers is a list of pyreshaper specification objects ready to pass to the reshaper specifiers = scomm.partition(specifiers, func=partition.Duplicate(), involved=True) # create the PyReshaper object - uncomment when multiple specifiers is allowed reshpr = reshaper.create_reshaper(specifiers, serial=False, verbosity=debug) # Run the conversion (slice-to-series) process reshpr.convert() # Print timing diagnostics reshpr.print_diagnostics() # TO-DO check if DOUT_S_SAVE_HISTORY_FILES is true or false and # delete history files accordingly return 0
def testDuplicate(self): for inp in self.inputs: pfunc = partition.Duplicate() actual = pfunc(*inp) expected = inp[0] msg = test_info_msg( 'Duplicate', inp[0], inp[1], inp[2], actual, expected) print(msg) testing.assert_array_equal(actual, expected, msg)
def main(options, scomm, rank, size, debug, debugMsg): """ """ # initialize the CASEROOT environment dictionary cesmEnv = dict() # CASEROOT is given on the command line as required option --caseroot caseroot = options.caseroot[0] # get the XML variables loaded into a hash env_file_list = ['env_postprocess.xml'] cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list) # initialize the specifiers list to contain the list of specifier classes specifiers = list() # loading the specifiers from the env_timeseries.xml only needs to run on the master task (rank=0) if rank == 0: tseries_input_rootdir = cesmEnv['TIMESERIES_INPUT_ROOTDIR'] tseries_output_rootdir = cesmEnv['TIMESERIES_OUTPUT_ROOTDIR'] case = cesmEnv['CASE'] completechunk = cesmEnv['TIMESERIES_COMPLETECHUNK'] if completechunk.upper() in ['T', 'TRUE']: completechunk = 1 else: completechunk = 0 specifiers, log = readArchiveXML(caseroot, tseries_input_rootdir, tseries_output_rootdir, case, options.standalone, completechunk, debug, debugMsg) scomm.sync() # specifiers is a list of pyreshaper specification objects ready to pass to the reshaper specifiers = scomm.partition(specifiers, func=partition.Duplicate(), involved=True) if rank == 0: debugMsg("# of Specifiers: " + str(len(specifiers)), header=True, verbosity=1) if len(specifiers) > 0: # setup subcommunicators to do streams and chunks in parallel # everyone participates except for root inter_comm, lsubcomms = divide_comm(scomm, len(specifiers)) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() GWORK_TAG = 10 # global comm mpi tag LWORK_TAG = 20 # local comm mpi tag # global root - hands out specifiers to work on. When complete, it must tell each subcomm all work is done. if (rank == 0): for i in range(0, len(specifiers)): # hand out all specifiers scomm.ration(data=i, tag=GWORK_TAG) for i in range(0, lsubcomms): # complete, signal this to all subcomms scomm.ration(data=-99, tag=GWORK_TAG) # subcomm root - performs the same tasks as other subcomm ranks, but also gets the specifier to work on and sends # this information to all ranks within subcomm elif (lrank == 0): i = -999 while i != -99: i = scomm.ration(tag=GWORK_TAG) # recv from global for x in range(1, lsize): inter_comm.ration(i, LWORK_TAG) # send to local ranks if i != -99: # create the PyReshaper object - uncomment when multiple specifiers is allowed reshpr = reshaper.create_reshaper(specifiers[i], serial=False, verbosity=debug, simplecomm=inter_comm) # Run the conversion (slice-to-series) process reshpr.convert() inter_comm.sync() # all subcomm ranks - recv the specifier to work on and call the reshaper else: i = -999 while i != -99: i = inter_comm.ration(tag=LWORK_TAG) # recv from local root if i != -99: # create the PyReshaper object - uncomment when multiple specifiers is allowed reshpr = reshaper.create_reshaper(specifiers[i], serial=False, verbosity=debug, simplecomm=inter_comm) # Run the conversion (slice-to-series) process reshpr.convert() inter_comm.sync() if rank == 0: # Update system log with the dates that were just converted debugMsg('before chunking.write_log', header=True, verbosity=1) chunking.write_log('{0}/logs/ts_status.log'.format(caseroot), log) debugMsg('after chunking.write_log', header=True, verbosity=1) scomm.sync() return 0
def main(options, main_comm, debugMsg): """setup the environment for running the diagnostics in parallel. Calls 6 different diagnostics generation types: model vs. observation (optional BGC - ecosystem) model vs. control (optional BGC - ecosystem) model time-series (optional BGC - ecosystem) Arguments: options (object) - command line options main_comm (object) - MPI simple communicator object debugMsg (object) - vprinter object for printing debugging messages The env_diags_ocn.xml configuration file defines the way the diagnostics are generated. See (website URL here...) for a complete desciption of the env_diags_ocn XML options. """ # initialize the environment dictionary envDict = dict() # CASEROOT is given on the command line as required option --caseroot if main_comm.is_manager(): caseroot = options.caseroot[0] debugMsg('caseroot = {0}'.format(caseroot), header=True, verbosity=2) debugMsg('calling initialize_main', header=True, verbosity=2) envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone) debugMsg('calling check_ncl_nco', header=True, verbosity=2) diagUtilsLib.check_ncl_nco(envDict) # broadcast envDict to all tasks envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) sys.path.append(envDict['PATH']) main_comm.sync() # get list of diagnostics types to be created diag_list = list() num_of_diags = 0 if main_comm.is_manager(): diag_list, diag_dict = setup_diags(envDict) num_of_diags = len(diag_list) if num_of_diags == 0: print('No ocean diagnostics specified. Please check the {0}/env_diags_ocn.xml settings.'.format(envDict['PP_CASE_PATH'])) sys.exit(1) print('User requested diagnostics:') for diag in diag_list: print(' {0}'.format(diag)) try: os.makedirs(envDict['WORKDIR']) except OSError as exception: if exception.errno != errno.EEXIST: err_msg = 'ERROR: ocn_diags_generator.py problem accessing the working directory {0}'.format(envDict['WORKDIR']) raise OSError(err_msg) debugMsg('Ocean diagnostics - Creating main index.html page', header=True, verbosity=2) # define the templatePath templatePath = '{0}/diagnostics/diagnostics/ocn/Templates'.format(envDict['POSTPROCESS_PATH']) templateLoader = jinja2.FileSystemLoader( searchpath=templatePath ) templateEnv = jinja2.Environment( loader=templateLoader ) template_file = 'ocean_diagnostics.tmpl' template = templateEnv.get_template( template_file ) # get the current datatime string for the template now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # set the template variables templateVars = { 'casename' : envDict['CASE'], 'tagname' : envDict['CESM_TAG'], 'diag_dict' : diag_dict, 'control_casename' : envDict['CNTRLCASE'], 'start_year' : envDict['YEAR0'], 'stop_year' : envDict['YEAR1'], 'control_start_year' : envDict['CNTRLYEAR0'], 'control_stop_year' : envDict['CNTRLYEAR1'], 'today': now, 'tseries_start_year' : envDict['TSERIES_YEAR0'], 'tseries_stop_year' : envDict['TSERIES_YEAR1'] } # write the main index.html page to the top working directory main_html = template.render( templateVars ) with open( '{0}/index.html'.format(envDict['WORKDIR']), 'w') as index: index.write(main_html) debugMsg('Ocean diagnostics - Copying stylesheet', header=True, verbosity=2) shutil.copy2('{0}/Templates/diag_style.css'.format(envDict['POSTPROCESS_PATH']), '{0}/diag_style.css'.format(envDict['WORKDIR'])) debugMsg('Ocean diagnostics - Copying logo files', header=True, verbosity=2) if not os.path.exists('{0}/logos'.format(envDict['WORKDIR'])): os.mkdir('{0}/logos'.format(envDict['WORKDIR'])) for filename in glob.glob(os.path.join('{0}/Templates/logos'.format(envDict['POSTPROCESS_PATH']), '*.*')): shutil.copy(filename, '{0}/logos'.format(envDict['WORKDIR'])) # setup the unique OCNDIAG_WEBDIR output file env_file = '{0}/env_diags_ocn.xml'.format(envDict['PP_CASE_PATH']) key = 'OCNDIAG_WEBDIR' value = envDict['WORKDIR'] ##web_file = '{0}/web_dirs/{1}.{2}-{3}'.format(envDict['PP_CASE_PATH'], key, main_comm.get_size(), main_comm.get_rank() ) web_file = '{0}/web_dirs/{1}.{2}'.format(envDict['PP_CASE_PATH'], key, datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')) try: diagUtilsLib.write_web_file(web_file, 'ocn', key, value) except: print('WARNING ocn_diags_generator unable to write {0}={1} to {2}'.format(key, value, web_file)) main_comm.sync() # broadcast the diag_list to all tasks num_of_diags = main_comm.partition(num_of_diags, func=partition.Duplicate(), involved=True) diag_list = main_comm.partition(data=diag_list, func=partition.Duplicate(), involved=True) main_comm.sync() # initialize some variables for distributing diagnostics across the communicators diags_send = diag_list gmaster = main_comm.is_manager() gsize = main_comm.get_size() grank = main_comm.get_rank() local_diag_list = list() # divide the main communicator into sub_communicators to be passed to each diag class # split mpi comm world if the size of the communicator > 1 and the num_of_diags > 1 if gsize > 1 and num_of_diags > 1: temp_color = (grank % num_of_diags) if (temp_color == num_of_diags): temp_color = temp_color - 1 groups = list() for g in range(0,num_of_diags): groups.append(g) debugMsg('global_rank {0}, temp_color {1}, #of groups(diag types) {2}, groups {3}, diag_list {4}'.format(grank, temp_color, num_of_diags, groups, diag_list), header=True, verbosity=2) group = groups[temp_color] inter_comm, multi_comm = main_comm.divide(group) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() lmaster = inter_comm.is_manager() debugMsg('color {0}, lsize {1}, lrank {2}, lmaster {3}'.format(color, lsize, lrank, lmaster), header=True, verbosity=2) # partition the diag_list between communicators DIAG_LIST_TAG = 10 if lmaster: local_diag_list = multi_comm.partition(diag_list,func=partition.EqualStride(),involved=True) debugMsg('lrank = {0} local_diag_list = {1}'.format(lrank, local_diag_list), header=True, verbosity=2) for b in range(1, lsize): diags_send = inter_comm.ration(data=local_diag_list, tag=DIAG_LIST_TAG) debugMsg('b = {0} diags_send = {1} lsize = {2}'.format(b, diags_send, lsize), header=True, verbosity=2) else: local_diag_list = inter_comm.ration(tag=DIAG_LIST_TAG) debugMsg('local_diag_list {0}',format(local_diag_list), header=True, verbosity=2) else: inter_comm = main_comm lmaster = main_comm.is_manager() lsize = main_comm.get_size() lrank = main_comm.get_rank() local_diag_list = diag_list inter_comm.sync() main_comm.sync() # loop through the local_diag_list for requested_diag in local_diag_list: try: debugMsg('requested_diag {0}, lrank {1}, lsize {2}, lmaster {3}'.format(requested_diag, lrank, lsize, lmaster), header=True, verbosity=2) diag = ocn_diags_factory.oceanDiagnosticsFactory(requested_diag) # check the prerequisites for the diagnostics types debugMsg('Checking prerequisites for {0}'.format(diag.__class__.__name__), header=True, verbosity=2) skip_key = '{0}_SKIP'.format(requested_diag) if lmaster: try: envDict = diag.check_prerequisites(envDict) except ocn_diags_bc.PrerequisitesError: print("Problem with check_prerequisites for '{0}' skipping!".format(requested_diag)) envDict[skip_key] = True except RuntimeError as e: # unrecoverable error, bail! print(e) envDict['unrecoverableErrorOnMaster'] = True inter_comm.sync() # broadcast the envDict envDict = inter_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) if envDict.has_key('unrecoverableErrorOnMaster'): raise RuntimeError # run the diagnostics type on each inter_comm if not envDict.has_key(skip_key): # set the shell env using the values set in the XML and read into the envDict across all tasks cesmEnvLib.setXmlEnv(envDict) # run the diagnostics envDict = diag.run_diagnostics(envDict, inter_comm) inter_comm.sync() except ocn_diags_bc.RecoverableError as e: # catch all recoverable errors, print a message and continue. print(e) print("Skipped '{0}' and continuing!".format(requested_diag)) except RuntimeError as e: # unrecoverable error, bail! print(e) return 1 main_comm.sync()
def main(options, main_comm, debugMsg): """setup the environment for running the pyAverager in parallel. Arguments: options (object) - command line options main_comm (object) - MPI simple communicator object debugMsg (object) - vprinter object for printing debugging messages The env_diags_ice.xml configuration file defines the way the diagnostics are generated. See (website URL here...) for a complete desciption of the env_diags_ice XML options. """ # initialize the environment dictionary envDict = dict() # CASEROOT is given on the command line as required option --caseroot caseroot = options.caseroot[0] if main_comm.is_manager(): debugMsg('caseroot = {0}'.format(caseroot), header=True) debugMsg('calling initialize_envDict', header=True) envDict = initialize_envDict(envDict, caseroot, debugMsg, options.standalone) # broadcast envDict to all tasks envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) main_comm.sync() # specify variables to include in the averages, empty list implies get them all varList = [] # generate the climatology files used for all plotting types using the pyAverager if main_comm.is_manager(): debugMsg('calling createClimFiles', header=True) debugMsg('calling checkHistoryFiles for control run', header=True) main_comm.sync() cont_time_series = envDict['CONT_TIMESERIES'] suffix = 'cice.h.*.nc' filep = '.*\.cice.h.\d{4,4}-\d{2,2}\.nc' main_comm.sync() # get model history file information from the DOUT_S_ROOT archive location start_year, stop_year, in_dir, envDict['cont_htype'], envDict[ 'cont_key_infile'] = diagUtilsLib.checkHistoryFiles( cont_time_series, envDict['PATH_CONT'], envDict['CASE_TO_CONT'], envDict['BEGYR_CONT'], envDict['ENDYR_CONT'], 'ice', suffix, filep) if envDict['COMPUTE_CLIMO_CONT'].lower() == 'true': try: split_size = None if cont_time_series.lower() == 'true': if main_comm.is_manager(): debugMsg('Computing averages for model vs. obs', header=True) h_path = envDict['PATH_CONT'] + '/ice/proc/tseries/monthly/' # Check to see if tseries is split into hemispheres split = checkIceSplit(envDict['ICE_NY_CONT'], envDict['cont_key_infile']) if split: split_size = 'nj=' + envDict[ 'ICE_NY_CONT'] + ',ni=' + envDict['ICE_NX_CONT'] else: h_path = envDict['PATH_CONT'] + '/ice/hist/' split = False avg_BEGYR = (int(envDict['ENDYR_CONT']) - int(envDict['YRS_TO_AVG'])) + 1 createClimFiles(avg_BEGYR, envDict['ENDYR_CONT'], h_path, split, split_size, envDict['cont_htype'], envDict['cont_key_infile'], envDict['PATH_CLIMO_CONT'], envDict['CASE_TO_CONT'], 'cice.h', varList, envDict, envDict['GRIDFILECONT'], envDict['BEGYR_CONT'], envDict['ENDYR_CONT'], main_comm, debugMsg) except Exception as error: print(str(error)) traceback.print_exc() sys.exit(1) if envDict['MODEL_VS_MODEL'].lower( ) == 'true' and envDict['COMPUTE_CLIMO_DIFF'].lower() == 'true': try: diff_time_series = envDict['DIFF_TIMESERIES'] split_size = None suffix = 'cice.h.*.nc' filep = '.*\.cice.h.\d{4,4}-\d{2,2}\.nc' start_year, stop_year, in_dir, envDict['diff_htype'], envDict[ 'diff_key_infile'] = diagUtilsLib.checkHistoryFiles( diff_time_series, envDict['PATH_DIFF'], envDict['CASE_TO_DIFF'], envDict['BEGYR_DIFF'], envDict['ENDYR_DIFF'], 'ice', suffix, filep) if diff_time_series.lower() == 'true': if main_comm.is_manager(): debugMsg('Computing averages for model vs. model', header=True) h_path = envDict['PATH_DIFF'] + '/ice/proc/tseries/monthly/' # Check to see if tseries is split into hemispheres split = checkIceSplit(envDict['ICE_NY_DIFF'], envDict['diff_key_infile']) if split: split_size = 'nj=' + envDict[ 'ICE_NY_DIFF'] + ',ni=' + envDict['ICE_NX_DIFF'] else: h_path = envDict['PATH_DIFF'] + '/ice/hist/' split = False avg_BEGYR_DIFF = (int(envDict['ENDYR_DIFF']) - int(envDict['YRS_TO_AVG'])) + 1 createClimFiles(avg_BEGYR_DIFF, envDict['ENDYR_DIFF'], h_path, split, split_size, envDict['diff_htype'], envDict['diff_key_infile'], envDict['PATH_CLIMO_DIFF'], envDict['CASE_TO_DIFF'], 'cice.h', varList, envDict, envDict['GRIDFILEDIFF'], envDict['BEGYR_DIFF'], envDict['ENDYR_DIFF'], main_comm, debugMsg) except Exception as error: print(str(error)) traceback.print_exc() sys.exit(1)
def main(options, scomm, rank, size): """ """ # initialize the CASEROOT environment dictionary cesmEnv = dict() # CASEROOT is given on the command line as required option --caseroot caseroot = options.caseroot[0] # set the debug level debug = options.debug[0] # is there only one mip definition in each file? ind = "True" # get the XML variables loaded into a hash env_file_list = ['env_postprocess.xml','env_conform.xml'] cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list); # We want to have warnings and not errors (at least for the first sets of cmip simulations) simplefilter("default", ValidationWarning) # Get the extra modules pyconform needs pp_path = cesmEnv["POSTPROCESS_PATH"] conform_module_path = pp_path+'/conformer/conformer/source/pyconform/modules/' for i, m in enumerate(external_mods): print("Loading: "+conform_module_path+"/"+m) load_source('user{}'.format(i), conform_module_path+"/"+m) # create the cesm stream to table mapping # if rank == 0: dout_s_root = cesmEnv['DOUT_S_ROOT'] case = cesmEnv['CASE'] pc_inpur_dir = cesmEnv['CONFORM_JSON_DIRECTORY']+'/PyConform_input/' #readArchiveXML(caseroot, dout_s_root, case, debug) nc_files = find_nc_files(dout_s_root) variable_list = fill_list(nc_files, pc_inpur_dir, cesmEnv["CONFORM_EXTRA_FIELD_NETCDF_DIR"], scomm, rank, size) mappings = {} if rank == 0: mappings = match_tableSpec_to_stream(pc_inpur_dir, variable_list) for k,v in sorted(mappings.iteritems()): print k for f in sorted(v): print f print len(v),'\n\n' scomm.sync() # Pass the stream and mapping information to the other procs mappings = scomm.partition(mappings, func=partition.Duplicate(), involved=True) print("I CAN RUN ",len(mappings.keys())," json files") failures = 0 if len(mappings.keys()) > 0: # setup subcommunicators to do streams and chunks in parallel # everyone participates except for root inter_comm, lsubcomms = divide_comm(scomm, len(mappings.keys()), ind) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() print "MPI INFO: ",color," ",lrank,"/",lsize," ",rank,"/",size GWORK_TAG = 10 # global comm mpi tag LWORK_TAG = 20 # local comm mpi tag # global root - hands out mappings to work on. When complete, it must tell each subcomm all work is done. if (rank == 0): #for i in range(0,len(mappings.keys())): # hand out all mappings for i in mappings.keys(): scomm.ration(data=i, tag=GWORK_TAG) for i in range(1,lsubcomms): # complete, signal this to all subcomms scomm.ration(data=-99, tag=GWORK_TAG) # subcomm root - performs the same tasks as other subcomm ranks, but also gets the specifier to work on and sends # this information to all ranks within subcomm elif (lrank == 0): i = -999 while i != -99: i = scomm.ration(tag=GWORK_TAG) # recv from global for x in range(1,lsize): inter_comm.ration(i, LWORK_TAG) # send to local ranks if i != -99: print "(",rank,"/",lrank,")"," start running ",i failures += run_PyConform(i, mappings[i], inter_comm) print "(",rank,"/",lrank,")"," finished running ",i print "(",rank,"/",lrank,")","FAILURES: ",failures inter_comm.sync() # all subcomm ranks - recv the specifier to work on and call the reshaper else: i = -999 while i != -99: i = inter_comm.ration(tag=LWORK_TAG) # recv from local root if i != -99: print "(",rank,"/",lrank,")"," start running ",i failures += run_PyConform(i, mappings[i], inter_comm) print "(",rank,"/",lrank,")"," finished running ",i print "(",rank,"/",lrank,")","FAILURES: ",failures inter_comm.sync() print "(",rank,"/",lrank,")"," FINISHED" scomm.sync()
def fill_list(nc_files, root_dir, extra_dir, comm, rank, size): variablelist = {} gridfile = None nc_files_l = comm.partition(nc_files,func=partition.EqualLength(),involved=True) for fn in nc_files_l: f = nc.Dataset(fn, "r") mt = fn.replace(root_dir,"").split("/")[-5] stri = fn model_type = mt if "lnd" in model_type or "rof" in model_type: model_type = 'lnd,rof' if "glc" in model_type: model_type = 'glc,lnd' if ("time" not in f.variables.keys() or "tseries" not in fn): variablelist["skip"] = {} else: lt = "none" ln = "none" lv = "none" lat_name = None lon_name = None lev_name = None time_name = None # Find which dim variables to use v_dims = f.variables[fn.split('.')[-3]].dimensions for i in grids[mt]['lat']: if i in v_dims: if 'nlat' in i: lat_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[1]) else: lat_name = i lt = len(f.dimensions[i]) for i in grids[mt]['lon']: if i in v_dims: if 'nlon' in i: lon_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[0]) if 'ULONG' in lon_name: ln = str(len(f.dimensions[i]))+"_UGRID" else: ln = str(len(f.dimensions[i]))+"_TGRID" else: lon_name = i ln = len(f.dimensions[i]) for i in grids[mt]['lev']: if i in v_dims: lev_name = i lv = len(f.dimensions[i]) for i in grids[mt]['time']: if i in v_dims: time_name = i lv = len(f.dimensions[i]) gridfile = '{0}/{1}x{2}x{3}.nc'.format(extra_dir,mt,lt,ln) for vn,ob in f.variables.iteritems(): if model_type not in variablelist.keys(): variablelist[model_type] = {} if vn not in variablelist[model_type].keys(): variablelist[model_type][vn] = {} if hasattr(f,"time_period_freq"): if f.time_period_freq not in variablelist[model_type][vn].keys(): variablelist[model_type][vn][f.time_period_freq] = {} date = stri.split('.')[-2] if date not in variablelist[model_type][vn][f.time_period_freq].keys(): variablelist[model_type][vn][f.time_period_freq][date] = {} if 'files' not in variablelist[model_type][vn][f.time_period_freq][date].keys(): variablelist[model_type][vn][f.time_period_freq][date]['files']=[stri,gridfile] variablelist[model_type][vn][f.time_period_freq][date]['lat']=lat_name variablelist[model_type][vn][f.time_period_freq][date]['lon']=lon_name variablelist[model_type][vn][f.time_period_freq][date]['lev']=lev_name variablelist[model_type][vn][f.time_period_freq][date]['time']=time_name else: if "unknown" not in variablelist[model_type][vn].keys(): variablelist[model_type][vn]["unknown"] = {} if stri not in variablelist[model_type][vn]["unknown"]: variablelist[model_type][vn]["unknown"]["unknown"] = {} variablelist[model_type][vn][f.time_period_freq][date]['files']=[stri,gridfile] variablelist[model_type][vn][f.time_period_freq][date]['lat']=lat_name variablelist[model_type][vn][f.time_period_freq][date]['lon']=lon_name variablelist[model_type][vn][f.time_period_freq][date]['lev']=lev_name variablelist[model_type][vn][f.time_period_freq][date]['time']=time_name f.close() VL_TAG = 30 variable_list = {} if size > 1: if rank==0: variable_list = variablelist for i in range(0,size-1): r,lvarList = comm.collect(data=None, tag=VL_TAG) for model_type,d1 in lvarList.iteritems(): if model_type not in variable_list.keys(): variable_list[model_type] = {} for vn,d2 in d1.iteritems(): if vn not in variable_list[model_type].keys(): variable_list[model_type][vn] = {} for tp,d3 in d2.iteritems(): if tp not in variable_list[model_type][vn].keys(): variable_list[model_type][vn][tp] = {} for date,l in d3.iteritems(): if date not in variable_list[model_type][vn][tp].keys(): variable_list[model_type][vn][tp][date] = {} if 'files' in variable_list[model_type][vn][tp][date].keys(): if len(lvarList[model_type][vn][tp][date]['files'])>0: variable_list[model_type][vn][tp][date]['files'].append(lvarList[model_type][vn][tp][date]['files'][0]) else: variable_list[model_type][vn][tp][date] = lvarList[model_type][vn][tp][date] # variable_list.update(lvarList) comm.partition(variable_list, func=partition.Duplicate(), involved=True) else: comm.collect(data=variablelist, tag=VL_TAG) variable_list = comm.partition(func=partition.Duplicate(), involved=True) comm.sync() return variable_list
def get_input_dates(glob_str, comm, rank, size): ''' Open up all of the files that match the search string and get the dates within the files. Also get the number of slices within each file, what calendar it uses and the time unit. Input: glob_str(string) - the search path to get files Output: stream_dates(dictionary) - keys->date, values->the file where this slice is located file_slices(dictionary) - keys->filename, values->the number of slices found in the file calendar(string) - the name of the calendar type (ie, noleap, ...) units(string) - the calendar unit (possibly in the form 'days since....') time_period_freq(string) - time_period_freq global attribute from first file ''' stream_files = glob.glob(glob_str) stream_dates = {} file_slices = {} att = {} if len(stream_files) < 1: return stream_dates, file_slices, None, None, None time_period_freq = None first = True stream_files_l = comm.partition(stream_files,func=partition.EqualLength(),involved=True) for fn in sorted(stream_files_l): print rank,'/',size,' opening ',fn # open file and get time dimension f = nc.Dataset(fn,"r") all_t = f.variables['time'] nc_atts = f.ncattrs() # add the file name are how many slices it contains file_slices[fn] = len(all_t) # add all dates and which file they are located in for t in all_t[:]: stream_dates[t] = fn # get all attributes of time in order to get cal and units for a in all_t.ncattrs(): att[a] = all_t.__getattribute__(a) # get the time_period_freq global attribute from the first file if first: try: time_period_freq = f.getncattr('time_period_freq') print 'time_period_freq = ',time_period_freq except: print 'Global attribute time_period_freq not found - set to XML tseries_tper element' first = False f.close() g_stream_dates = {} g_file_slices = {} if size > 1: T1 = 31 T2 = 32 T3 = 33 if rank==0: g_stream_dates = stream_dates g_file_slices = file_slices g_att = att for i in range(0,size-1): r,l_stream_dates = comm.collect(data=None, tag=T1) g_stream_dates.update(l_stream_dates) r,l_file_slices = comm.collect(data=None, tag=T2) g_file_slices.update(l_file_slices) r,l_att = comm.collect(data=None, tag=T3) g_att.update(l_att) comm.partition(g_stream_dates, func=partition.Duplicate(), involved=True) comm.partition(g_file_slices, func=partition.Duplicate(), involved=True) comm.partition(g_att, func=partition.Duplicate(), involved=True) else: comm.collect(data=stream_dates, tag=T1) comm.collect(data=file_slices, tag=T2) comm.collect(data=att, tag=T3) g_stream_dates = comm.partition(func=partition.Duplicate(), involved=True) g_file_slices = comm.partition(func=partition.Duplicate(), involved=True) g_att = comm.partition(func=partition.Duplicate(), involved=True) if 'calendar' in g_att.keys(): calendar = g_att['calendar'] else: calendar = "noleap" if 'units' in g_att.keys(): units = g_att['units'] else: units = "days since 0000-01-01 00:00:00" comm.sync() return g_stream_dates,g_file_slices,calendar.lower(),units,time_period_freq
def testDuplicate(self): for inp in self.inputs: pfunc = partition.Duplicate() actual = pfunc(*inp) expected = inp[0] self.assertEqual(actual, expected)
def main(options, main_comm, debugMsg): """setup the environment for running the pyAverager in parallel. Arguments: options (object) - command line options main_comm (object) - MPI simple communicator object debugMsg (object) - vprinter object for printing debugging messages The env_diags_ocn.xml configuration file defines the way the diagnostics are generated. See (website URL here...) for a complete desciption of the env_diags_ocn XML options. """ # initialize the environment dictionary envDict = dict() # CASEROOT is given on the command line as required option --caseroot if main_comm.is_manager(): caseroot = options.caseroot[0] debugMsg('caseroot = {0}'.format(caseroot), header=True) debugMsg('calling initialize_envDict', header=True) envDict = initialize_envDict(envDict, caseroot, debugMsg, options.standalone) # broadcast envDict to all tasks envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) sys.path.append(envDict['PATH']) main_comm.sync() # generate the climatology files used for all plotting types using the pyAverager if main_comm.is_manager(): debugMsg('calling checkHistoryFiles for model case', header=True) suffix = 'pop.h.*.nc' file_pattern = '.*\.pop\.h\.\d{4,4}-\d{2,2}\.nc' start_year, stop_year, in_dir, htype, firstHistoryFile = diagUtilsLib.checkHistoryFiles( envDict['MODELCASE_INPUT_TSERIES'], envDict['DOUT_S_ROOT'], envDict['CASE'], envDict['YEAR0'], envDict['YEAR1'], 'ocn', suffix, file_pattern, envDict['MODELCASE_SUBDIR']) envDict['YEAR0'] = start_year envDict['YEAR1'] = stop_year envDict['in_dir'] = in_dir envDict['htype'] = htype main_comm.sync() envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) main_comm.sync() # MODEL_TIMESERIES denotes the plotting diagnostic type requested and whether or # not to generate the necessary climo files for those plot sets tseries = False if envDict['MODEL_TIMESERIES'].lower() in ['t','true']: if main_comm.is_manager(): debugMsg('timeseries years before checkHistoryFiles {0} - {1}'.format(envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1']), header=True) tseries_start_year, tseries_stop_year, in_dir, htype, firstHistoryFile = \ diagUtilsLib.checkHistoryFiles(envDict['MODELCASE_INPUT_TSERIES'], envDict['DOUT_S_ROOT'], envDict['CASE'], envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1'], 'ocn', suffix, file_pattern, envDict['MODELCASE_SUBDIR']) debugMsg('timeseries years after checkHistoryFiles {0} - {1}'.format(envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1']), header=True) envDict['TSERIES_YEAR0'] = tseries_start_year envDict['TSERIES_YEAR1'] = tseries_stop_year main_comm.sync() tseries = True envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) main_comm.sync() try: if main_comm.is_manager(): debugMsg('calling createClimFiles for model and timeseries', header=True) createClimFiles(envDict['YEAR0'], envDict['YEAR1'], envDict['in_dir'], envDict['htype'], envDict['TAVGDIR'], envDict['CASE'], tseries, envDict['MODEL_VARLIST'], envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1'], envDict['DIAGOBSROOT'], envDict['netcdf_format'], int(envDict['VERTICAL']), envDict['TIMESERIES_OBSPATH'], main_comm, debugMsg) except Exception as error: print(str(error)) traceback.print_exc() sys.exit(1) main_comm.sync() # check that the necessary control climotology files exist if envDict['MODEL_VS_CONTROL'].upper() == 'TRUE': if main_comm.is_manager(): debugMsg('calling checkHistoryFiles for control case', header=True) suffix = 'pop.h.*.nc' file_pattern = '.*\.pop\.h\.\d{4,4}-\d{2,2}\.nc' start_year, stop_year, in_dir, htype, firstHistoryFile = diagUtilsLib.checkHistoryFiles( envDict['CNTRLCASE_INPUT_TSERIES'], envDict['CNTRLCASEDIR'], envDict['CNTRLCASE'], envDict['CNTRLYEAR0'], envDict['CNTRLYEAR1'], 'ocn', suffix, file_pattern, envDict['CNTRLCASE_SUBDIR']) envDict['CNTRLYEAR0'] = start_year envDict['CNTRLYEAR1'] = stop_year envDict['cntrl_in_dir'] = in_dir envDict['cntrl_htype'] = htype main_comm.sync() envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) main_comm.sync() if main_comm.is_manager(): debugMsg('before createClimFiles call for control', header=True) debugMsg('...CNTRLYEAR0 = {0}'.format(envDict['CNTRLYEAR0']), header=True) debugMsg('...CNTRLYEAR1 = {0}'.format(envDict['CNTRLYEAR1']), header=True) debugMsg('...cntrl_in_dir = {0}'.format(envDict['cntrl_in_dir']), header=True) debugMsg('...cntrl_htype = {0}'.format(envDict['cntrl_htype']), header=True) debugMsg('...CNTRLTAVGDIR = {0}'.format(envDict['CNTRLTAVGDIR']), header=True) debugMsg('...CNTRLCASE = {0}'.format(envDict['CNTRLCASE']), header=True) debugMsg('...CNTRLCASE_INPUT_TSERIES = {0}'.format(envDict['CNTRLCASE_INPUT_TSERIES']), header=True) debugMsg('...varlist = {0}'.format(envDict['CNTRL_VARLIST']), header=True) debugMsg('calling createClimFiles for control', header=True) # don't create timeseries averages for the control case so set to False and set the # tseries_start_year and tseries_stop_year to 0 try: createClimFiles(envDict['CNTRLYEAR0'], envDict['CNTRLYEAR1'], envDict['cntrl_in_dir'], envDict['cntrl_htype'], envDict['CNTRLTAVGDIR'], envDict['CNTRLCASE'], False, envDict['CNTRL_VARLIST'], 0, 0, envDict['DIAGOBSROOT'], envDict['netcdf_format'], int(envDict['VERTICAL']), envDict['TIMESERIES_OBSPATH'], main_comm, debugMsg) except Exception as error: print(str(error)) traceback.print_exc() sys.exit(1)
def main(options, main_comm, debugMsg): """setup the environment for running the diagnostics in parallel. Calls 2 different diagnostics generation types: model vs. observation model vs. model Arguments: options (object) - command line options main_comm (object) - MPI simple communicator object debugMsg (object) - vprinter object for printing debugging messages The env_diags_atm.xml configuration file defines the way the diagnostics are generated. See (website URL here...) for a complete desciption of the env_diags_atm XML options. """ # initialize the environment dictionary envDict = dict() # CASEROOT is given on the command line as required option --caseroot if main_comm.is_manager(): caseroot = options.caseroot[0] debugMsg('caseroot = {0}'.format(caseroot), header=True) debugMsg('calling initialize_main', header=True) envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone) debugMsg('calling check_ncl_nco', header=True) diagUtilsLib.check_ncl_nco(envDict) # broadcast envDict to all tasks envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) sys.path.append(envDict['PATH']) main_comm.sync() # check to see if the climos need to be regridded into a lat/lon grid if (envDict['test_regrid'] == 'True' or envDict['cntl_regrid'] == 'True'): regrid_climos(envDict, main_comm) main_comm.sync() # get list of diagnostics types to be created diag_list = list() diag_list = setup_diags(envDict) if main_comm.is_manager(): print('User requested diagnostics:') for diag in diag_list: print(' {0}'.format(diag)) main_comm.sync() # broadcast the diag_list to all tasks num_of_diags = len(diag_list) num_of_diags = main_comm.partition(num_of_diags, func=partition.Duplicate(), involved=True) diag_list = main_comm.partition(data=diag_list, func=partition.Duplicate(), involved=True) main_comm.sync() # initialize some variables for distributing diagnostics across the communicators diags_send = diag_list gmaster = main_comm.is_manager() gsize = main_comm.get_size() grank = main_comm.get_rank() local_diag_list = list() # divide the main communicator into sub_communicators to be passed to each diag class # split mpi comm world if the size of the communicator > 1 and the num_of_diags > 1 if gsize > 1 and num_of_diags > 1: temp_color = (grank % num_of_diags) if (temp_color == num_of_diags): temp_color = temp_color - 1 groups = list() for g in range(0, num_of_diags): groups.append(g) debugMsg( 'global_rank {0}, temp_color {1}, #of groups(diag types) {2}, groups {3}, diag_list {4}' .format(grank, temp_color, num_of_diags, groups, diag_list)) group = groups[temp_color] inter_comm, multi_comm = main_comm.divide(group) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() lmaster = inter_comm.is_manager() debugMsg('color {0}, lsize {1}, lrank {2}, lmaster {3}'.format( color, lsize, lrank, lmaster)) # partition the diag_list between communicators DIAG_LIST_TAG = 10 if lmaster: local_diag_list = multi_comm.partition( diag_list, func=partition.EqualStride(), involved=True) for b in range(1, lsize): diags_send = inter_comm.ration(data=local_diag_list, tag=DIAG_LIST_TAG) else: local_diag_list = inter_comm.ration(tag=DIAG_LIST_TAG) debugMsg('local_diag_list {0}', format(local_diag_list)) else: inter_comm = main_comm lmaster = main_comm.is_manager() lsize = main_comm.get_size() lrank = main_comm.get_rank() local_diag_list = diag_list inter_comm.sync() main_comm.sync() debugMsg('lsize = {0}, lrank = {1}'.format(lsize, lrank)) inter_comm.sync() # loop through the local_diag_list list for requested_diag in local_diag_list: try: diag = atm_diags_factory.atmosphereDiagnosticsFactory( requested_diag, envDict) # check the prerequisites for the diagnostics types debugMsg('Checking prerequisites for {0}'.format( diag.__class__.__name__), header=True) #if lmaster: envDict = diag.check_prerequisites(envDict, inter_comm) inter_comm.sync() ## broadcast the envDict #envDict = inter_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) # set the shell env using the values set in the XML and read into the envDict across all tasks #cesmEnvLib.setXmlEnv(envDict) debugMsg('inter_comm = {0}'.format(inter_comm)) diag.run_diagnostics(envDict, inter_comm) except atm_diags_bc.RecoverableError as e: # catch all recoverable errors, print a message and continue. print(e) print("Skipped '{0}' and continuing!".format(request_diag)) except RuntimeError as e: # unrecoverable error, bail! print(e) return 1 main_comm.sync()
def main(options, main_comm, debugMsg): """ read env_ocn_remap.xml settings to call the ocean_remap class """ # initialize the environment dictionary envDict = dict() # Get rank and size rank = main_comm.get_rank() size = main_comm.get_size() # CASEROOT is given on the command line as required option --caseroot if rank == 0: caseroot = options.caseroot[0] envDict['CASEROOT'] = options.caseroot[0] debugMsg('caseroot = {0}'.format(envDict['CASEROOT']), header=True, verbosity=2) env_file_list = ['./env_postprocess.xml', './env_ocn_remap.xml'] envDict = cesmEnvLib.readXML(caseroot, env_file_list) # strip the OCNREMAP_ prefix from the envDict entries before setting the # enviroment to allow for compatibility with all the diag routine calls envDict = diagUtilsLib.strip_prefix(envDict, 'OCNREMAP_') print("cmip6: {0}".format(envDict['cmip6'])) print("filelist: {0}".format(envDict['filelist'])) print("matrix_2d_fname: {0}".format(envDict['matrix_2d_fname'])) print("matrix_3d_fname: {0}".format(envDict['matrix_3d_fname'])) print("indir: {0}".format(envDict['indir'])) print("outdir: {0}".format(envDict['outdir'])) print("chunk size: {0}".format(envDict['chunk'])) # broadcast envDict to all tasks envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) main_comm.sync() files = [] if rank == 0: # Find files to regrid #Do we have a cmip6 variable list? if envDict['cmip6'] is not None: if envDict['indir'] is not None: with open(envDict['cmip6']) as f: for l in f: t = l.strip().split(':')[0] v = l.strip().split(':')[1] print("Trying to find: {0}_{1}*.nc".format(v, t)) for root, dirs, fns in os.walk(envDict['indir']): for fn in fnmatch.filter(fns, v + '_' + t + "*.nc"): if 'tmp.nc' not in fn and 'gr' not in fn.split( '_'): print("Found: {0}".format(fn.split('/'))) files.append(os.path.join(root, fn)) else: print( "You need to specify an indir argument with the cmip6 argument" ) file = None elif envDict['filelist'] is not None: with open(envDict['filelist']) as f: for l in f: files.append(l.strip()) elif envDict['indir'] is not None: for root, dirs, fns in os.walk(envDict['indir']): for fn in fnmatch.filter(fns, "*.nc"): files.append(os.path.join(root, fn)) else: print('Exiting because no input path or files where given') files = None # All call this main_comm.sync() files = main_comm.partition(files, func=partition.Duplicate(), involved=True) if files is None: sys.exit() #matrix_2d_fname = 'POP_gx1v7_to_latlon_1x1_0E_mask_conserve_20181015.nc' matrix_2d = remap.ocean_remap(envDict['matrix_2d_fname']) #matrix_3d_fname = 'POP_gx1v7_to_latlon_1x1_0E_fulldepth_conserve_20181015.nc' matrix_3d = remap.ocean_remap(envDict['matrix_3d_fname']) # names of coordinate dimensions in output files dim_names = {'depth': 'olevel', 'lat': 'latitude', 'lon': 'longitude'} dim_names = {'depth': 'lev', 'lat': 'lat', 'lon': 'lon'} main_comm.sync() # Have only root create these files if rank == 0: if len(files) > 0 and envDict['cmip6'] is not None: temp = files[0] # create CMIP Ofx files for var_name in ('areacello', 'deptho', 'thkcello', 'volcello'): new_outdir = temp.replace( temp.split('/')[-4], var_name).replace( temp.split('/')[-5], 'Ofx').replace( temp.split('/')[-3], 'gr').replace( '_' + temp.split('_')[-1], '') + '.nc' d = os.path.dirname(new_outdir) if not os.path.exists(d): os.makedirs(d) fptr_out = nc.Dataset(new_outdir, 'w') # pylint: disable=E1101 matrix_3d.dst_grid.def_dims_common(fptr_out, dim_names) matrix_3d.dst_grid.write_vars_common(fptr_out, dim_names) matrix_3d.dst_grid.write_var_CMIP_Ofx(fptr_out, dim_names, var_name) # Create a master slave parallel protocol GWORK_TAG = 10 # global comm mpi tag if (rank == 0): for i in files: main_comm.ration(data=i, tag=GWORK_TAG) for i in range(1, size): main_comm.ration(data=-99, tag=GWORK_TAG) else: f = -999 while f != -99: f = main_comm.ration(tag=GWORK_TAG) if f != -99: print("working on: {0}".format(f)) testfile_in_fname = f testfile_out_fname = f.replace(f.split('/')[-3], 'gr') if not os.path.exists(testfile_out_fname): d = os.path.dirname(testfile_out_fname) if not os.path.exists(d): os.makedirs(d) fptr_in = nc.Dataset(testfile_in_fname, 'r') # pylint: disable=E1101 if (len(fptr_in[f.split('/')[-4]].dimensions) == 4 or len(fptr_in[f.split('/')[-4]].dimensions) == 3): fptr_out = nc.Dataset(testfile_out_fname + '.tmp', 'w') # pylint: disable=E1101 remap.copy_time(fptr_in, fptr_out) remap.copy_gAttr(fptr_in, fptr_out) if dim_names['depth'] in fptr_in.dimensions: matrix_3d.dst_grid.def_dims_common( fptr_out, dim_names) matrix_3d.dst_grid.write_vars_common( fptr_out, dim_names) else: matrix_2d.dst_grid.def_dims_common( fptr_out, dim_names) matrix_2d.dst_grid.write_vars_common( fptr_out, dim_names) field_names = [] for v in fptr_in.variables: if v not in [ 'lat', 'lat_bnds', 'lon', 'lon_bnds', 'lev', 'lev_bnds', 'time', 'time_bnds', 'nlat', 'nlon' ]: field_names.append(v) for field_name in field_names: varid_out = remap.def_var(field_name, fptr_in, fptr_out, dim_names) # use appropriate matrix for regridding c = envDict['chunk'] if c is None: c = 1 else: c = int(c) try: if dim_names['depth'] in varid_out.dimensions: #print ("Running a 3D variable") b = 0 for i in range( 0, fptr_in.dimensions['time'].size, c): if b + c >= fptr_in.dimensions[ 'time'].size: c = fptr_in.dimensions[ 'time'].size - b varid_out[b:( b + c), :, :, :] = matrix_3d.remap_var( fptr_in.variables[field_name][ b:(b + c), :, :, :]) #, #fill_value=getattr(varid_out, 'missing_value')) b = b + c else: #print ("Running a 2D variable") b = 0 for i in range( 0, fptr_in.dimensions['time'].size, c): if b + c >= fptr_in.dimensions[ 'time'].size: c = fptr_in.dimensions[ 'time'].size - b varid_out[b:( b + c), :, :] = matrix_2d.remap_var( fptr_in.variables[field_name][ b:(b + c), :, :]) #, #fill_value=getattr(varid_out, 'missing_value')) b = b + c except TypeError as e: print('Type Error for variable {0} '.format( field_name)) fptr_in.close() fptr_out.close() try: os.rename(testfile_out_fname + '.tmp', testfile_out_fname) except OSError as e: print('Could not create {0}'.format( testfile_out_fname)) else: print("Not creating {0}".format(testfile_out_fname)) main_comm.sync()
def main(options, main_comm, debugMsg, timer): """setup the environment for running the diagnostics in parallel. Calls 2 different regridding types model1 only model1 and model2 Arguments: options (object) - command line options main_comm (object) - MPI simple communicator object debugMsg (object) - vprinter object for printing debugging messages timer (object) - timer object for keeping times The env_diags_lnd.xml configuration file defines whether or not to regrid the climatology files. See (website URL here...) for a complete desciption of the env_diags_lnd XML options. """ # initialize the environment dictionary envDict = dict() regrid_list = list() climo_list = list() # set some variables for all tasks regrid_script = 'se2fv_esmf.regrid2file.ncl' m_dir = 'lnd' # CASEROOT is given on the command line as required option --caseroot caseroot = options.caseroot[0] debugMsg('caseroot = {0}'.format(caseroot), header=True, verbosity=1) debugMsg('calling initialize_main', header=True, verbosity=1) envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone) if main_comm.is_manager(): debugMsg('calling check_ncl_nco', header=True, verbosity=1) diagUtilsLib.check_ncl_nco(envDict) if not os.path.exists(envDict['WKDIR']): os.makedirs(envDict['WKDIR']) # build up the climo files to be regridded in parallel if (envDict['regrid_1'] == 'True'): # setup the working directory first before calling the base class prerequisites endYr = (int(envDict['clim_first_yr_1']) + int(envDict['clim_num_yrs_1'])) - 1 subdir = '{0}.{1}-{2}'.format(envDict['caseid_1'], envDict['clim_first_yr_1'], endYr) workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_1'], envDict['caseid_1'], subdir, m_dir) regrid_list = get_climo_files_to_regrid( workdir, envDict['lnd_modelstream_1'], '1', envDict, debugMsg) debugMsg('t = 1 regrid_list = {0}'.format(regrid_list), header=True, verbosity=1) if (envDict['MODEL_VS_MODEL'] == 'True' and envDict['regrid_2'] == 'True'): # setup the working directory first before calling the base class prerequisites endYr = (int(envDict['clim_first_yr_2']) + int(envDict['clim_num_yrs_2'])) - 1 subdir = '{0}.{1}-{2}'.format(envDict['caseid_2'], envDict['clim_first_yr_2'], endYr) workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_2'], envDict['caseid_2'], subdir, m_dir) regrid_list = regrid_list + get_climo_files_to_regrid( workdir, envDict['lnd_modelstream_2'], '2', envDict, debugMsg) debugMsg('t = 2 regrid_list = {0}'.format(regrid_list), header=True, verbosity=1) main_comm.sync() # broadcast envDict to all tasks envDict['NCLPATH'] = envDict['POSTPROCESS_PATH'] + '/lnd_diag/shared/' envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True) # broadcast the regrid_list to all tasks regrid_list = main_comm.partition(data=regrid_list, func=partition.Duplicate(), involved=True) main_comm.sync() # initialize some variables for distributing regridding across the communicators size = main_comm.get_size() rank = main_comm.get_rank() main_comm.sync() # ration files to be regridded if main_comm.is_manager(): num_regrids = len(regrid_list) debugMsg('{0} num_regrids'.format(num_regrids), header=True, verbosity=1) for i in range(num_regrids): debugMsg('Sent out index {2!r}'.format(rank, size, i), header=True, verbosity=1) main_comm.ration(i) for i in range(size - 1): debugMsg('Sent None'.format(rank, size), header=True, verbosity=1) main_comm.ration(None) else: i = -1 while i is not None: debugMsg('Recvd index {2!r}'.format(rank, size, i), header=True, verbosity=1) i = main_comm.ration() if i is not None: # extract the i'th list of the regrid_list climo_list = regrid_list[i] t = climo_list[0] ext_dir = climo_list[1] climo_file = climo_list[2] # setup the working directory first for each climo file endYr = (int(envDict['clim_first_yr_' + t]) + int(envDict['clim_num_yrs_' + t])) - 1 subdir = '{0}.{1}-{2}'.format(envDict['caseid_' + t], envDict['clim_first_yr_' + t], endYr) workdir = '{0}/climo/{1}/{2}/{3}/'.format( envDict['PTMPDIR_' + t], envDict['caseid_' + t], subdir, m_dir) timer_tag = '{0}_{1}'.format(t, climo_file) timer.start(timer_tag) debugMsg( 'Before call to lnd_regrid using workdir = {0}/{1}'.format( workdir, ext_dir), header=True, verbosity=1) diagUtilsLib.lnd_regrid(climo_file, regrid_script, t, workdir, ext_dir, envDict) timer.stop(timer_tag) debugMsg("Total time to regrid file {0} = {1}".format( climo_file, timer.get_time(timer_tag)), header=True, verbosity=1)
def fill_list(nc_files, root_dir, extra_dir, comm, rank, size): grds = { 'atm':'192x288', 'lnd':'192x288', 'glc':'192x288', 'rof':'192x288', 'ice':'384x320', 'ocn':'384x320' } variablelist = {} gridfile = None nc_files.append(extra_dir+"/ocn_constants.nc") nc_files_l = comm.partition(nc_files,func=partition.EqualLength(),involved=True) for fn in nc_files_l: f = nc.Dataset(fn, "r") mt = fn.replace(root_dir,"").split("/")[-5] stri = fn model_type = mt if "ocn_constants" in fn: model_type = "ocn" mt = "ocn" if "lnd" in model_type or "rof" in model_type: model_type = 'lnd,rof' if "glc" in model_type: model_type = 'glc,lnd' if ("time" not in f.variables.keys() or "tseries" not in fn): variablelist["skip"] = {} else: lt = "none" ln = "none" lv = "none" lat_name = None lon_name = None lev_name = None time_name = None # Find which dim variables to use v_dims = f.variables[fn.split('.')[-3]].dimensions for i in grids[mt]['lat']: if i in v_dims: if 'nlat' in i or 'nj' in i: lat_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[1]) else: lat_name = i lt = len(f.dimensions[i]) for i in grids[mt]['lon']: if i in v_dims: if 'nlon' in i or 'ni' in i: lon_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[0]) if 'ULON' in lon_name: ln = str(len(f.dimensions[i]))+"_UGRID" else: ln = str(len(f.dimensions[i]))+"_TGRID" else: lon_name = i ln = len(f.dimensions[i]) for i in grids[mt]['lev']: if i in v_dims: lev_name = i lv = len(f.dimensions[i]) # for i in grids[mt]['time']: # if i in v_dims: # time_name = i # lv = len(f.dimensions[i]) if 'none' == lt or 'none' == ln: gridfile = '{0}/{1}x{2}.nc'.format(extra_dir,mt,grds[mt]) else: if 'atm' in mt: gridfile = '{0}/{1}x{2}x{3}x{4}.nc'.format(extra_dir,mt,lt,ln,lv) else: gridfile = '{0}/{1}x{2}x{3}.nc'.format(extra_dir,mt,lt,ln) if gridfile is not None: if not os.path.isfile(gridfile): gridfile = None for vn,ob in f.variables.iteritems(): if model_type not in variablelist.keys(): variablelist[model_type] = {} if vn not in variablelist[model_type].keys(): variablelist[model_type][vn] = {} if hasattr(f,"time_period_freq"): if 'day_365' in f.time_period_freq: time_period_freq = 'year_1' else: time_period_freq = f.time_period_freq if time_period_freq not in variablelist[model_type][vn].keys(): variablelist[model_type][vn][time_period_freq] = {} if 'ocn_constants' in stri: date = "0000" else: date = stri.split('.')[-2] if date not in variablelist[model_type][vn][time_period_freq].keys(): variablelist[model_type][vn][time_period_freq][date] = {} if 'files' not in variablelist[model_type][vn][time_period_freq][date].keys(): variablelist[model_type][vn][time_period_freq][date]['files']=[stri,gridfile] variablelist[model_type][vn][time_period_freq][date]['lat']=lat_name variablelist[model_type][vn][time_period_freq][date]['lon']=lon_name variablelist[model_type][vn][time_period_freq][date]['lev']=lev_name variablelist[model_type][vn][time_period_freq][date]['time']=time_name else: if "unknown" not in variablelist[model_type][vn].keys(): variablelist[model_type][vn]["unknown"] = {} if stri not in variablelist[model_type][vn]["unknown"]: variablelist[model_type][vn]["unknown"]["unknown"] = {} variablelist[model_type][vn][time_period_freq][date]['files']=[stri,gridfile] variablelist[model_type][vn][time_period_freq][date]['lat']=lat_name variablelist[model_type][vn][time_period_freq][date]['lon']=lon_name variablelist[model_type][vn][time_period_freq][date]['lev']=lev_name variablelist[model_type][vn][time_period_freq][date]['time']=time_name f.close() VL_TAG = 30 variable_list = {} if size > 1: if rank==0: variable_list = variablelist for i in range(0,size-1): r,lvarList = comm.collect(data=None, tag=VL_TAG) for model_type,d1 in lvarList.iteritems(): if model_type not in variable_list.keys(): variable_list[model_type] = {} for vn,d2 in d1.iteritems(): if vn not in variable_list[model_type].keys(): variable_list[model_type][vn] = {} for tp,d3 in d2.iteritems(): if tp not in variable_list[model_type][vn].keys(): variable_list[model_type][vn][tp] = {} for date,l in d3.iteritems(): if date not in variable_list[model_type][vn][tp].keys(): variable_list[model_type][vn][tp][date] = {} if 'files' in variable_list[model_type][vn][tp][date].keys(): if len(lvarList[model_type][vn][tp][date]['files'])>0: if lvarList[model_type][vn][tp][date]['files'][0] is not None: variable_list[model_type][vn][tp][date]['files'].append(lvarList[model_type][vn][tp][date]['files'][0]) else: variable_list[model_type][vn][tp][date] = lvarList[model_type][vn][tp][date] # variable_list.update(lvarList) comm.partition(variable_list, func=partition.Duplicate(), involved=True) else: comm.collect(data=variablelist, tag=VL_TAG) variable_list = comm.partition(func=partition.Duplicate(), involved=True) comm.sync() return variable_list