def main(options, scomm, rank, size):
    """
    """
    # initialize the CASEROOT environment dictionary
    cesmEnv = dict()

    # CASEROOT is given on the command line as required option --caseroot
    caseroot = options.caseroot[0]

    # set the caseroot based on standalone or not
    pp_caseroot = caseroot
    if not options.standalone:
        caseroot, pp_subdir = os.path.split(caseroot)
    if rank == 0:
        print('cesm_tseries_generator: caseroot = {0}'.format(caseroot))

    # set the debug level
    debug = options.debug[0]

    # cesmEnv["id"] = "value" parsed from the CASEROOT/env_*.xml files
    env_file_list = [
        'env_case.xml', 'env_run.xml', 'env_build.xml', 'env_mach_pes.xml'
    ]

    # check if the standalone option is set
    if options.standalone:
        env_file_list = ['env_postprocess.xml']
    cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list)

    # initialize the specifiers list to contain the list of specifier classes
    specifiers = list()

    # loading the specifiers from the env_timeseries.xml  only needs to run on the master task (rank=0)
    if rank == 0:
        specifiers = readArchiveXML(caseroot, cesmEnv['DOUT_S_ROOT'],
                                    cesmEnv['CASE'], options.standalone, debug)
    scomm.sync()

    # specifiers is a list of pyreshaper specification objects ready to pass to the reshaper
    specifiers = scomm.partition(specifiers,
                                 func=partition.Duplicate(),
                                 involved=True)

    # create the PyReshaper object - uncomment when multiple specifiers is allowed
    reshpr = reshaper.create_reshaper(specifiers,
                                      serial=False,
                                      verbosity=debug)

    # Run the conversion (slice-to-series) process
    reshpr.convert()

    # Print timing diagnostics
    reshpr.print_diagnostics()

    # TO-DO check if DOUT_S_SAVE_HISTORY_FILES is true or false and
    # delete history files accordingly

    return 0
示例#2
0
 def testDuplicate(self):
     for inp in self.inputs:
         pfunc = partition.Duplicate()
         actual = pfunc(*inp)
         expected = inp[0]
         msg = test_info_msg(
             'Duplicate', inp[0], inp[1], inp[2], actual, expected)
         print(msg)
         testing.assert_array_equal(actual, expected, msg)
def main(options, scomm, rank, size, debug, debugMsg):
    """
    """
    # initialize the CASEROOT environment dictionary
    cesmEnv = dict()

    # CASEROOT is given on the command line as required option --caseroot
    caseroot = options.caseroot[0]

    # get the XML variables loaded into a hash
    env_file_list = ['env_postprocess.xml']
    cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list)

    # initialize the specifiers list to contain the list of specifier classes
    specifiers = list()

    # loading the specifiers from the env_timeseries.xml  only needs to run on the master task (rank=0)
    if rank == 0:
        tseries_input_rootdir = cesmEnv['TIMESERIES_INPUT_ROOTDIR']
        tseries_output_rootdir = cesmEnv['TIMESERIES_OUTPUT_ROOTDIR']
        case = cesmEnv['CASE']
        completechunk = cesmEnv['TIMESERIES_COMPLETECHUNK']
        if completechunk.upper() in ['T', 'TRUE']:
            completechunk = 1
        else:
            completechunk = 0
        specifiers, log = readArchiveXML(caseroot, tseries_input_rootdir,
                                         tseries_output_rootdir, case,
                                         options.standalone, completechunk,
                                         debug, debugMsg)
    scomm.sync()

    # specifiers is a list of pyreshaper specification objects ready to pass to the reshaper
    specifiers = scomm.partition(specifiers,
                                 func=partition.Duplicate(),
                                 involved=True)
    if rank == 0:
        debugMsg("# of Specifiers: " + str(len(specifiers)),
                 header=True,
                 verbosity=1)

    if len(specifiers) > 0:
        # setup subcommunicators to do streams and chunks in parallel
        # everyone participates except for root
        inter_comm, lsubcomms = divide_comm(scomm, len(specifiers))
        color = inter_comm.get_color()
        lsize = inter_comm.get_size()
        lrank = inter_comm.get_rank()

        GWORK_TAG = 10  # global comm mpi tag
        LWORK_TAG = 20  # local comm mpi tag
        # global root - hands out specifiers to work on.  When complete, it must tell each subcomm all work is done.
        if (rank == 0):
            for i in range(0, len(specifiers)):  # hand out all specifiers
                scomm.ration(data=i, tag=GWORK_TAG)
            for i in range(0,
                           lsubcomms):  # complete, signal this to all subcomms
                scomm.ration(data=-99, tag=GWORK_TAG)

        # subcomm root - performs the same tasks as other subcomm ranks, but also gets the specifier to work on and sends
        # this information to all ranks within subcomm
        elif (lrank == 0):
            i = -999
            while i != -99:
                i = scomm.ration(tag=GWORK_TAG)  # recv from global
                for x in range(1, lsize):
                    inter_comm.ration(i, LWORK_TAG)  # send to local ranks
                if i != -99:
                    # create the PyReshaper object - uncomment when multiple specifiers is allowed
                    reshpr = reshaper.create_reshaper(specifiers[i],
                                                      serial=False,
                                                      verbosity=debug,
                                                      simplecomm=inter_comm)
                    # Run the conversion (slice-to-series) process
                    reshpr.convert()
                inter_comm.sync()

        # all subcomm ranks - recv the specifier to work on and call the reshaper
        else:
            i = -999
            while i != -99:
                i = inter_comm.ration(tag=LWORK_TAG)  # recv from local root
                if i != -99:
                    # create the PyReshaper object - uncomment when multiple specifiers is allowed
                    reshpr = reshaper.create_reshaper(specifiers[i],
                                                      serial=False,
                                                      verbosity=debug,
                                                      simplecomm=inter_comm)
                    # Run the conversion (slice-to-series) process
                    reshpr.convert()
                inter_comm.sync()

    if rank == 0:
        # Update system log with the dates that were just converted
        debugMsg('before chunking.write_log', header=True, verbosity=1)
        chunking.write_log('{0}/logs/ts_status.log'.format(caseroot), log)
        debugMsg('after chunking.write_log', header=True, verbosity=1)

    scomm.sync()

    return 0
def main(options, main_comm, debugMsg):
    """setup the environment for running the diagnostics in parallel. 

    Calls 6 different diagnostics generation types:
    model vs. observation (optional BGC - ecosystem)
    model vs. control (optional BGC - ecosystem)
    model time-series (optional BGC - ecosystem)

    Arguments:
    options (object) - command line options
    main_comm (object) - MPI simple communicator object
    debugMsg (object) - vprinter object for printing debugging messages

    The env_diags_ocn.xml configuration file defines the way the diagnostics are generated. 
    See (website URL here...) for a complete desciption of the env_diags_ocn XML options.
    """

    # initialize the environment dictionary
    envDict = dict()

    # CASEROOT is given on the command line as required option --caseroot
    if main_comm.is_manager():
        caseroot = options.caseroot[0]
        debugMsg('caseroot = {0}'.format(caseroot), header=True, verbosity=2)

        debugMsg('calling initialize_main', header=True, verbosity=2)
        envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone)

        debugMsg('calling check_ncl_nco', header=True, verbosity=2)
        diagUtilsLib.check_ncl_nco(envDict)

    # broadcast envDict to all tasks
    envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)
    sys.path.append(envDict['PATH'])
    main_comm.sync()

    # get list of diagnostics types to be created
    diag_list = list()
    num_of_diags = 0

    if main_comm.is_manager():
        diag_list, diag_dict = setup_diags(envDict)

        num_of_diags = len(diag_list)
        if num_of_diags == 0:
            print('No ocean diagnostics specified. Please check the {0}/env_diags_ocn.xml settings.'.format(envDict['PP_CASE_PATH']))
            sys.exit(1)

        print('User requested diagnostics:')
        for diag in diag_list:
            print('  {0}'.format(diag))

        try:
            os.makedirs(envDict['WORKDIR'])
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                err_msg = 'ERROR: ocn_diags_generator.py problem accessing the working directory {0}'.format(envDict['WORKDIR'])
                raise OSError(err_msg)

        debugMsg('Ocean diagnostics - Creating main index.html page', header=True, verbosity=2)

        # define the templatePath
        templatePath = '{0}/diagnostics/diagnostics/ocn/Templates'.format(envDict['POSTPROCESS_PATH']) 

        templateLoader = jinja2.FileSystemLoader( searchpath=templatePath )
        templateEnv = jinja2.Environment( loader=templateLoader )
            
        template_file = 'ocean_diagnostics.tmpl'
        template = templateEnv.get_template( template_file )
            
        # get the current datatime string for the template
        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        # set the template variables
        templateVars = { 'casename' : envDict['CASE'],
                         'tagname' : envDict['CESM_TAG'],
                         'diag_dict' : diag_dict,
                         'control_casename' : envDict['CNTRLCASE'],
                         'start_year' : envDict['YEAR0'],
                         'stop_year' : envDict['YEAR1'],
                         'control_start_year' : envDict['CNTRLYEAR0'],
                         'control_stop_year' : envDict['CNTRLYEAR1'],
                         'today': now,
                         'tseries_start_year' : envDict['TSERIES_YEAR0'],
                         'tseries_stop_year' : envDict['TSERIES_YEAR1']
                         }

        # write the main index.html page to the top working directory
        main_html = template.render( templateVars )
        with open( '{0}/index.html'.format(envDict['WORKDIR']), 'w') as index:
            index.write(main_html)

        debugMsg('Ocean diagnostics - Copying stylesheet', header=True, verbosity=2)
        shutil.copy2('{0}/Templates/diag_style.css'.format(envDict['POSTPROCESS_PATH']), '{0}/diag_style.css'.format(envDict['WORKDIR']))

        debugMsg('Ocean diagnostics - Copying logo files', header=True, verbosity=2)
        if not os.path.exists('{0}/logos'.format(envDict['WORKDIR'])):
            os.mkdir('{0}/logos'.format(envDict['WORKDIR']))

        for filename in glob.glob(os.path.join('{0}/Templates/logos'.format(envDict['POSTPROCESS_PATH']), '*.*')):
            shutil.copy(filename, '{0}/logos'.format(envDict['WORKDIR']))
 
        # setup the unique OCNDIAG_WEBDIR output file
        env_file = '{0}/env_diags_ocn.xml'.format(envDict['PP_CASE_PATH'])
        key = 'OCNDIAG_WEBDIR'
        value = envDict['WORKDIR']
        ##web_file = '{0}/web_dirs/{1}.{2}-{3}'.format(envDict['PP_CASE_PATH'], key, main_comm.get_size(), main_comm.get_rank() )
        web_file = '{0}/web_dirs/{1}.{2}'.format(envDict['PP_CASE_PATH'], key, datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S'))
        try:
            diagUtilsLib.write_web_file(web_file, 'ocn', key, value)
        except:
            print('WARNING ocn_diags_generator unable to write {0}={1} to {2}'.format(key, value, web_file))

    main_comm.sync()

    # broadcast the diag_list to all tasks
    num_of_diags = main_comm.partition(num_of_diags, func=partition.Duplicate(), involved=True)
    diag_list = main_comm.partition(data=diag_list, func=partition.Duplicate(), involved=True)
    main_comm.sync()

    # initialize some variables for distributing diagnostics across the communicators
    diags_send = diag_list
    gmaster = main_comm.is_manager()
    gsize = main_comm.get_size()
    grank = main_comm.get_rank()
    local_diag_list = list()

    # divide the main communicator into sub_communicators to be passed to each diag class
    # split mpi comm world if the size of the communicator > 1 and the num_of_diags > 1
    if gsize > 1 and num_of_diags > 1:
        temp_color = (grank % num_of_diags)
        if (temp_color == num_of_diags):
            temp_color = temp_color - 1
        groups = list()
        for g in range(0,num_of_diags):
            groups.append(g)
        debugMsg('global_rank {0}, temp_color {1}, #of groups(diag types) {2}, groups {3}, diag_list {4}'.format(grank, temp_color, num_of_diags, groups, diag_list), header=True, verbosity=2)
        group = groups[temp_color]
        inter_comm, multi_comm = main_comm.divide(group)
        color = inter_comm.get_color()
        lsize = inter_comm.get_size()
        lrank = inter_comm.get_rank()
        lmaster = inter_comm.is_manager()
        debugMsg('color {0}, lsize {1}, lrank {2}, lmaster {3}'.format(color, lsize, lrank, lmaster), header=True, verbosity=2)

        # partition the diag_list between communicators
        DIAG_LIST_TAG = 10
        if lmaster:
            local_diag_list = multi_comm.partition(diag_list,func=partition.EqualStride(),involved=True)
            debugMsg('lrank = {0} local_diag_list = {1}'.format(lrank, local_diag_list), header=True, verbosity=2)
            for b in range(1, lsize):
                diags_send = inter_comm.ration(data=local_diag_list, tag=DIAG_LIST_TAG) 
                debugMsg('b = {0} diags_send = {1} lsize = {2}'.format(b, diags_send, lsize), header=True, verbosity=2)
        else:
            local_diag_list = inter_comm.ration(tag=DIAG_LIST_TAG)
        debugMsg('local_diag_list {0}',format(local_diag_list), header=True, verbosity=2)
    else:
        inter_comm = main_comm
        lmaster = main_comm.is_manager()
        lsize = main_comm.get_size()
        lrank = main_comm.get_rank()
        local_diag_list = diag_list

    inter_comm.sync()
    main_comm.sync()

    # loop through the local_diag_list 
    for requested_diag in local_diag_list:
        try:
            debugMsg('requested_diag {0}, lrank {1}, lsize {2}, lmaster {3}'.format(requested_diag, lrank, lsize, lmaster), header=True, verbosity=2)
            diag = ocn_diags_factory.oceanDiagnosticsFactory(requested_diag)

            # check the prerequisites for the diagnostics types
            debugMsg('Checking prerequisites for {0}'.format(diag.__class__.__name__), header=True, verbosity=2)
            
            skip_key = '{0}_SKIP'.format(requested_diag)
            if lmaster:
                try:
                    envDict = diag.check_prerequisites(envDict)
                except ocn_diags_bc.PrerequisitesError:
                    print("Problem with check_prerequisites for '{0}' skipping!".format(requested_diag))
                    envDict[skip_key] = True
                except RuntimeError as e:
                    # unrecoverable error, bail!
                    print(e)
                    envDict['unrecoverableErrorOnMaster'] = True

            inter_comm.sync()

            # broadcast the envDict
            envDict = inter_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)

            if envDict.has_key('unrecoverableErrorOnMaster'):
                raise RuntimeError

            # run the diagnostics type on each inter_comm
            if not envDict.has_key(skip_key):
                # set the shell env using the values set in the XML and read into the envDict across all tasks
                cesmEnvLib.setXmlEnv(envDict)
                # run the diagnostics
                envDict = diag.run_diagnostics(envDict, inter_comm)

            inter_comm.sync()
            
        except ocn_diags_bc.RecoverableError as e:
            # catch all recoverable errors, print a message and continue.
            print(e)
            print("Skipped '{0}' and continuing!".format(requested_diag))
        except RuntimeError as e:
            # unrecoverable error, bail!
            print(e)
            return 1

    main_comm.sync()
示例#5
0
def main(options, main_comm, debugMsg):
    """setup the environment for running the pyAverager in parallel. 

    Arguments:
    options (object) - command line options
    main_comm (object) - MPI simple communicator object
    debugMsg (object) - vprinter object for printing debugging messages

    The env_diags_ice.xml configuration file defines the way the diagnostics are generated. 
    See (website URL here...) for a complete desciption of the env_diags_ice XML options.
    """

    # initialize the environment dictionary
    envDict = dict()

    # CASEROOT is given on the command line as required option --caseroot
    caseroot = options.caseroot[0]
    if main_comm.is_manager():
        debugMsg('caseroot = {0}'.format(caseroot), header=True)
        debugMsg('calling initialize_envDict', header=True)
        envDict = initialize_envDict(envDict, caseroot, debugMsg,
                                     options.standalone)

    # broadcast envDict to all tasks
    envDict = main_comm.partition(data=envDict,
                                  func=partition.Duplicate(),
                                  involved=True)
    main_comm.sync()

    # specify variables to include in the averages, empty list implies get them all
    varList = []

    # generate the climatology files used for all plotting types using the pyAverager
    if main_comm.is_manager():
        debugMsg('calling createClimFiles', header=True)
        debugMsg('calling checkHistoryFiles for control run', header=True)
    main_comm.sync()

    cont_time_series = envDict['CONT_TIMESERIES']
    suffix = 'cice.h.*.nc'
    filep = '.*\.cice.h.\d{4,4}-\d{2,2}\.nc'
    main_comm.sync()

    # get model history file information from the DOUT_S_ROOT archive location
    start_year, stop_year, in_dir, envDict['cont_htype'], envDict[
        'cont_key_infile'] = diagUtilsLib.checkHistoryFiles(
            cont_time_series, envDict['PATH_CONT'], envDict['CASE_TO_CONT'],
            envDict['BEGYR_CONT'], envDict['ENDYR_CONT'], 'ice', suffix, filep)

    if envDict['COMPUTE_CLIMO_CONT'].lower() == 'true':
        try:
            split_size = None
            if cont_time_series.lower() == 'true':
                if main_comm.is_manager():
                    debugMsg('Computing averages for model vs. obs',
                             header=True)

                h_path = envDict['PATH_CONT'] + '/ice/proc/tseries/monthly/'
                # Check to see if tseries is split into hemispheres
                split = checkIceSplit(envDict['ICE_NY_CONT'],
                                      envDict['cont_key_infile'])
                if split:
                    split_size = 'nj=' + envDict[
                        'ICE_NY_CONT'] + ',ni=' + envDict['ICE_NX_CONT']
            else:
                h_path = envDict['PATH_CONT'] + '/ice/hist/'
                split = False

            avg_BEGYR = (int(envDict['ENDYR_CONT']) -
                         int(envDict['YRS_TO_AVG'])) + 1
            createClimFiles(avg_BEGYR, envDict['ENDYR_CONT'], h_path, split,
                            split_size, envDict['cont_htype'],
                            envDict['cont_key_infile'],
                            envDict['PATH_CLIMO_CONT'],
                            envDict['CASE_TO_CONT'], 'cice.h', varList,
                            envDict, envDict['GRIDFILECONT'],
                            envDict['BEGYR_CONT'], envDict['ENDYR_CONT'],
                            main_comm, debugMsg)
        except Exception as error:
            print(str(error))
            traceback.print_exc()
            sys.exit(1)

    if envDict['MODEL_VS_MODEL'].lower(
    ) == 'true' and envDict['COMPUTE_CLIMO_DIFF'].lower() == 'true':
        try:
            diff_time_series = envDict['DIFF_TIMESERIES']
            split_size = None
            suffix = 'cice.h.*.nc'
            filep = '.*\.cice.h.\d{4,4}-\d{2,2}\.nc'
            start_year, stop_year, in_dir, envDict['diff_htype'], envDict[
                'diff_key_infile'] = diagUtilsLib.checkHistoryFiles(
                    diff_time_series, envDict['PATH_DIFF'],
                    envDict['CASE_TO_DIFF'], envDict['BEGYR_DIFF'],
                    envDict['ENDYR_DIFF'], 'ice', suffix, filep)

            if diff_time_series.lower() == 'true':
                if main_comm.is_manager():
                    debugMsg('Computing averages for model vs. model',
                             header=True)

                h_path = envDict['PATH_DIFF'] + '/ice/proc/tseries/monthly/'
                # Check to see if tseries is split into hemispheres
                split = checkIceSplit(envDict['ICE_NY_DIFF'],
                                      envDict['diff_key_infile'])
                if split:
                    split_size = 'nj=' + envDict[
                        'ICE_NY_DIFF'] + ',ni=' + envDict['ICE_NX_DIFF']
            else:
                h_path = envDict['PATH_DIFF'] + '/ice/hist/'
                split = False

            avg_BEGYR_DIFF = (int(envDict['ENDYR_DIFF']) -
                              int(envDict['YRS_TO_AVG'])) + 1
            createClimFiles(avg_BEGYR_DIFF, envDict['ENDYR_DIFF'], h_path,
                            split, split_size, envDict['diff_htype'],
                            envDict['diff_key_infile'],
                            envDict['PATH_CLIMO_DIFF'],
                            envDict['CASE_TO_DIFF'], 'cice.h', varList,
                            envDict, envDict['GRIDFILEDIFF'],
                            envDict['BEGYR_DIFF'], envDict['ENDYR_DIFF'],
                            main_comm, debugMsg)
        except Exception as error:
            print(str(error))
            traceback.print_exc()
            sys.exit(1)
def main(options, scomm, rank, size):
    """
    """
    # initialize the CASEROOT environment dictionary
    cesmEnv = dict()

    # CASEROOT is given on the command line as required option --caseroot
    caseroot = options.caseroot[0]

    # set the debug level 
    debug = options.debug[0]

    # is there only one mip definition in each file?
    ind = "True" 

    # get the XML variables loaded into a hash
    env_file_list = ['env_postprocess.xml','env_conform.xml']
    cesmEnv = cesmEnvLib.readXML(caseroot, env_file_list);

    # We want to have warnings and not errors (at least for the first sets of cmip simulations)
    simplefilter("default", ValidationWarning)

    # Get the extra modules pyconform needs
    pp_path = cesmEnv["POSTPROCESS_PATH"]
    conform_module_path = pp_path+'/conformer/conformer/source/pyconform/modules/'
    for i, m in enumerate(external_mods):
        print("Loading: "+conform_module_path+"/"+m)
        load_source('user{}'.format(i), conform_module_path+"/"+m)

    # create the cesm stream to table mapping
#    if rank == 0:
    dout_s_root = cesmEnv['DOUT_S_ROOT']
    case = cesmEnv['CASE']
    pc_inpur_dir = cesmEnv['CONFORM_JSON_DIRECTORY']+'/PyConform_input/'
    #readArchiveXML(caseroot, dout_s_root, case, debug)
    nc_files = find_nc_files(dout_s_root)
    variable_list = fill_list(nc_files, pc_inpur_dir, cesmEnv["CONFORM_EXTRA_FIELD_NETCDF_DIR"], scomm, rank, size)

    mappings = {}
    if rank == 0:
        mappings = match_tableSpec_to_stream(pc_inpur_dir, variable_list)
        for k,v in sorted(mappings.iteritems()):
            print k
            for f in sorted(v):
                print f
            print len(v),'\n\n'
    scomm.sync()

    # Pass the stream and mapping information to the other procs
    mappings = scomm.partition(mappings, func=partition.Duplicate(), involved=True)
    print("I CAN RUN ",len(mappings.keys())," json files")
    failures = 0

    if len(mappings.keys()) > 0:
        # setup subcommunicators to do streams and chunks in parallel
        # everyone participates except for root
        inter_comm, lsubcomms = divide_comm(scomm, len(mappings.keys()), ind)
        color = inter_comm.get_color()
        lsize = inter_comm.get_size()
        lrank = inter_comm.get_rank()
        print "MPI INFO: ",color," ",lrank,"/",lsize,"  ",rank,"/",size

        GWORK_TAG = 10 # global comm mpi tag
        LWORK_TAG = 20 # local comm mpi tag
        # global root - hands out mappings to work on.  When complete, it must tell each subcomm all work is done.
        if (rank == 0):
            #for i in range(0,len(mappings.keys())): # hand out all mappings
            for i in mappings.keys():
                scomm.ration(data=i, tag=GWORK_TAG)
            for i in range(1,lsubcomms): # complete, signal this to all subcomms
                scomm.ration(data=-99, tag=GWORK_TAG)

        # subcomm root - performs the same tasks as other subcomm ranks, but also gets the specifier to work on and sends
        # this information to all ranks within subcomm
        elif (lrank == 0):
            i = -999
            while i != -99:
                i = scomm.ration(tag=GWORK_TAG) # recv from global
                for x in range(1,lsize):
                    inter_comm.ration(i, LWORK_TAG) # send to local ranks  
                if i != -99:
                    print "(",rank,"/",lrank,")","  start running ",i
                    failures += run_PyConform(i, mappings[i], inter_comm)
                    print "(",rank,"/",lrank,")","  finished running ",i
                    print "(",rank,"/",lrank,")","FAILURES: ",failures
                inter_comm.sync()

        # all subcomm ranks - recv the specifier to work on and call the reshaper
        else:
            i = -999
            while i != -99:
                i = inter_comm.ration(tag=LWORK_TAG) # recv from local root    
                if i != -99:
                    print "(",rank,"/",lrank,")","  start running ",i
                    failures += run_PyConform(i, mappings[i], inter_comm)
                    print "(",rank,"/",lrank,")","  finished running ",i
                    print "(",rank,"/",lrank,")","FAILURES: ",failures
                inter_comm.sync()
    print "(",rank,"/",lrank,")","  FINISHED"
    scomm.sync()
def fill_list(nc_files, root_dir, extra_dir, comm, rank, size):

    variablelist = {}
    gridfile = None
    nc_files_l = comm.partition(nc_files,func=partition.EqualLength(),involved=True)
    for fn in nc_files_l:
        f = nc.Dataset(fn, "r")
        mt = fn.replace(root_dir,"").split("/")[-5]         
        stri = fn
        model_type = mt
        if "lnd" in model_type or "rof" in model_type:
            model_type = 'lnd,rof'
        if "glc" in model_type:
            model_type = 'glc,lnd'
        if ("time" not in f.variables.keys() or "tseries" not in fn):
            variablelist["skip"] = {}
        else:
            lt = "none"
            ln = "none"
            lv = "none"
            lat_name = None
            lon_name = None
            lev_name = None
            time_name = None
            # Find which dim variables to use
            v_dims = f.variables[fn.split('.')[-3]].dimensions 
            for i in grids[mt]['lat']:
              if i in v_dims:
                  if 'nlat' in i:
                      lat_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[1])
                  else:
                      lat_name = i
                  lt = len(f.dimensions[i])
            for i in grids[mt]['lon']:
              if i in v_dims:
                  if 'nlon' in i:
                      lon_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[0])
                      if 'ULONG' in lon_name:
                          ln = str(len(f.dimensions[i]))+"_UGRID"
                      else:
                          ln = str(len(f.dimensions[i]))+"_TGRID"
                  else: 
                      lon_name = i
                      ln = len(f.dimensions[i])
            for i in grids[mt]['lev']:
              if i in v_dims: 
                  lev_name = i
                  lv = len(f.dimensions[i])
            for i in grids[mt]['time']:
              if i in v_dims:
                  time_name = i
                  lv = len(f.dimensions[i]) 
            gridfile = '{0}/{1}x{2}x{3}.nc'.format(extra_dir,mt,lt,ln)

            for vn,ob in f.variables.iteritems():
                if model_type not in variablelist.keys():
                    variablelist[model_type] = {}
                if vn not in variablelist[model_type].keys():
                     variablelist[model_type][vn] = {}
                if hasattr(f,"time_period_freq"):
                    if f.time_period_freq not in variablelist[model_type][vn].keys():
                        variablelist[model_type][vn][f.time_period_freq] = {}
                    date = stri.split('.')[-2]      
                    if date not in variablelist[model_type][vn][f.time_period_freq].keys():
                        variablelist[model_type][vn][f.time_period_freq][date] = {}
                    if 'files' not in variablelist[model_type][vn][f.time_period_freq][date].keys():
                        variablelist[model_type][vn][f.time_period_freq][date]['files']=[stri,gridfile]
                        variablelist[model_type][vn][f.time_period_freq][date]['lat']=lat_name
                        variablelist[model_type][vn][f.time_period_freq][date]['lon']=lon_name
                        variablelist[model_type][vn][f.time_period_freq][date]['lev']=lev_name
                        variablelist[model_type][vn][f.time_period_freq][date]['time']=time_name
                else:
                    if "unknown" not in variablelist[model_type][vn].keys():
                        variablelist[model_type][vn]["unknown"] = {}
                    if stri not in variablelist[model_type][vn]["unknown"]:
                        variablelist[model_type][vn]["unknown"]["unknown"] = {}
                        variablelist[model_type][vn][f.time_period_freq][date]['files']=[stri,gridfile]
                        variablelist[model_type][vn][f.time_period_freq][date]['lat']=lat_name
                        variablelist[model_type][vn][f.time_period_freq][date]['lon']=lon_name
                        variablelist[model_type][vn][f.time_period_freq][date]['lev']=lev_name
                        variablelist[model_type][vn][f.time_period_freq][date]['time']=time_name
        f.close()
    VL_TAG = 30
    variable_list = {}
    if size > 1:
        if rank==0:
            variable_list = variablelist
            for i in range(0,size-1): 
                r,lvarList = comm.collect(data=None, tag=VL_TAG)
                for model_type,d1 in lvarList.iteritems():
                    if model_type not in variable_list.keys():
                        variable_list[model_type] = {}
                    for vn,d2 in d1.iteritems():
                        if vn not in variable_list[model_type].keys():
                            variable_list[model_type][vn] = {}
                        for tp,d3 in d2.iteritems():
                            if tp not in variable_list[model_type][vn].keys():
                                variable_list[model_type][vn][tp] = {}
                            for date,l in d3.iteritems():
                                if date not in variable_list[model_type][vn][tp].keys():
                                    variable_list[model_type][vn][tp][date] = {}
                                if 'files' in variable_list[model_type][vn][tp][date].keys():
                                    if len(lvarList[model_type][vn][tp][date]['files'])>0:
                                        variable_list[model_type][vn][tp][date]['files'].append(lvarList[model_type][vn][tp][date]['files'][0])
                                else:
                                    variable_list[model_type][vn][tp][date] = lvarList[model_type][vn][tp][date]          

#                variable_list.update(lvarList)
            comm.partition(variable_list, func=partition.Duplicate(), involved=True)
        else:
            comm.collect(data=variablelist, tag=VL_TAG)
            variable_list = comm.partition(func=partition.Duplicate(), involved=True)
        comm.sync()
    return variable_list
示例#8
0
def get_input_dates(glob_str, comm, rank, size):

    '''
    Open up all of the files that match the search string and get
    the dates within the files.  Also get the number of slices within
    each file, what calendar it uses and the time unit.

    Input:
    glob_str(string) - the search path to get files

    Output:
    stream_dates(dictionary) - keys->date, values->the file where this slice is located
    file_slices(dictionary) - keys->filename, values->the number of slices found in the file 
    calendar(string) - the name of the calendar type (ie, noleap, ...)
    units(string) - the calendar unit (possibly in the form 'days since....')
    time_period_freq(string) - time_period_freq global attribute from first file
    '''
    stream_files = glob.glob(glob_str)

    stream_dates = {}
    file_slices = {}
    att = {}

    if len(stream_files) < 1:
        return stream_dates, file_slices, None, None, None

    time_period_freq = None
    first = True
    stream_files_l = comm.partition(stream_files,func=partition.EqualLength(),involved=True)
    for fn in sorted(stream_files_l):
        print rank,'/',size,' opening ',fn
        # open file and get time dimension
        f = nc.Dataset(fn,"r")    
        all_t = f.variables['time']
        nc_atts = f.ncattrs()

        # add the file name are how many slices it contains
        file_slices[fn] = len(all_t)

        # add all dates and which file they are located in
        for t in all_t[:]:
            stream_dates[t] = fn

        # get all attributes of time in order to get cal and units 
        for a in all_t.ncattrs():
            att[a] = all_t.__getattribute__(a)

        # get the time_period_freq global attribute from the first file
        if first:
            try:
                time_period_freq = f.getncattr('time_period_freq')
                print 'time_period_freq = ',time_period_freq
            except:
                print 'Global attribute time_period_freq not found - set to XML tseries_tper element'
            first = False
        f.close()

    g_stream_dates = {}
    g_file_slices = {}
    if size > 1:
        T1 = 31
        T2 = 32 
        T3 = 33
        if rank==0:
            g_stream_dates = stream_dates
            g_file_slices = file_slices
            g_att = att
            for i in range(0,size-1):
                r,l_stream_dates = comm.collect(data=None, tag=T1)
                g_stream_dates.update(l_stream_dates)               
 
                r,l_file_slices = comm.collect(data=None, tag=T2)
                g_file_slices.update(l_file_slices)

                r,l_att = comm.collect(data=None, tag=T3)
                g_att.update(l_att)

            comm.partition(g_stream_dates, func=partition.Duplicate(), involved=True)
            comm.partition(g_file_slices, func=partition.Duplicate(), involved=True)
            comm.partition(g_att, func=partition.Duplicate(), involved=True)
        else:
            comm.collect(data=stream_dates, tag=T1) 
            comm.collect(data=file_slices, tag=T2)
            comm.collect(data=att, tag=T3) 
 
            g_stream_dates = comm.partition(func=partition.Duplicate(), involved=True)
            g_file_slices = comm.partition(func=partition.Duplicate(), involved=True)
            g_att = comm.partition(func=partition.Duplicate(), involved=True)
        if 'calendar' in g_att.keys():
            calendar = g_att['calendar'] 
        else:
            calendar = "noleap"
        if 'units' in g_att.keys():
            units = g_att['units'] 
        else:
            units = "days since 0000-01-01 00:00:00"     
    comm.sync()
    return g_stream_dates,g_file_slices,calendar.lower(),units,time_period_freq
示例#9
0
 def testDuplicate(self):
     for inp in self.inputs:
         pfunc = partition.Duplicate()
         actual = pfunc(*inp)
         expected = inp[0]
         self.assertEqual(actual, expected)
示例#10
0
def main(options, main_comm, debugMsg):
    """setup the environment for running the pyAverager in parallel. 

    Arguments:
    options (object) - command line options
    main_comm (object) - MPI simple communicator object
    debugMsg (object) - vprinter object for printing debugging messages

    The env_diags_ocn.xml configuration file defines the way the diagnostics are generated. 
    See (website URL here...) for a complete desciption of the env_diags_ocn XML options.
    """

    # initialize the environment dictionary
    envDict = dict()

    # CASEROOT is given on the command line as required option --caseroot
    if main_comm.is_manager():
        caseroot = options.caseroot[0]
        debugMsg('caseroot = {0}'.format(caseroot), header=True)
        debugMsg('calling initialize_envDict', header=True)
        envDict = initialize_envDict(envDict, caseroot, debugMsg, options.standalone)

    # broadcast envDict to all tasks
    envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)
    sys.path.append(envDict['PATH'])
    main_comm.sync()

    # generate the climatology files used for all plotting types using the pyAverager
    if main_comm.is_manager():
        debugMsg('calling checkHistoryFiles for model case', header=True)
        suffix = 'pop.h.*.nc'
        file_pattern = '.*\.pop\.h\.\d{4,4}-\d{2,2}\.nc'
        start_year, stop_year, in_dir, htype, firstHistoryFile = diagUtilsLib.checkHistoryFiles(
            envDict['MODELCASE_INPUT_TSERIES'], envDict['DOUT_S_ROOT'], envDict['CASE'],
            envDict['YEAR0'], envDict['YEAR1'], 'ocn', suffix, file_pattern, envDict['MODELCASE_SUBDIR'])
        envDict['YEAR0'] = start_year
        envDict['YEAR1'] = stop_year
        envDict['in_dir'] = in_dir
        envDict['htype'] = htype

    main_comm.sync()

    envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)
    main_comm.sync()


    # MODEL_TIMESERIES denotes the plotting diagnostic type requested and whether or
    # not to generate the necessary climo files for those plot sets
    tseries = False
    if envDict['MODEL_TIMESERIES'].lower() in ['t','true']:
        if main_comm.is_manager():
            debugMsg('timeseries years before checkHistoryFiles {0} - {1}'.format(envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1']), header=True)
            tseries_start_year, tseries_stop_year, in_dir, htype, firstHistoryFile = \
                diagUtilsLib.checkHistoryFiles(envDict['MODELCASE_INPUT_TSERIES'], envDict['DOUT_S_ROOT'], 
                                               envDict['CASE'], envDict['TSERIES_YEAR0'], 
                                               envDict['TSERIES_YEAR1'], 'ocn', suffix, file_pattern,
                                               envDict['MODELCASE_SUBDIR'])
            debugMsg('timeseries years after checkHistoryFiles {0} - {1}'.format(envDict['TSERIES_YEAR0'], envDict['TSERIES_YEAR1']), header=True)
            envDict['TSERIES_YEAR0'] = tseries_start_year
            envDict['TSERIES_YEAR1'] = tseries_stop_year

        main_comm.sync()
        tseries = True
        envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)
        main_comm.sync()

    try:
        if main_comm.is_manager():
            debugMsg('calling createClimFiles for model and timeseries', header=True)

        createClimFiles(envDict['YEAR0'], envDict['YEAR1'], envDict['in_dir'],
                        envDict['htype'], envDict['TAVGDIR'], envDict['CASE'], 
                        tseries, envDict['MODEL_VARLIST'], envDict['TSERIES_YEAR0'], 
                        envDict['TSERIES_YEAR1'], envDict['DIAGOBSROOT'], 
                        envDict['netcdf_format'], int(envDict['VERTICAL']), 
                        envDict['TIMESERIES_OBSPATH'], main_comm, debugMsg)
    except Exception as error:
        print(str(error))
        traceback.print_exc()
        sys.exit(1)

    main_comm.sync()

    # check that the necessary control climotology files exist
    if envDict['MODEL_VS_CONTROL'].upper() == 'TRUE':

        if main_comm.is_manager():
            debugMsg('calling checkHistoryFiles for control case', header=True)
            suffix = 'pop.h.*.nc'
            file_pattern = '.*\.pop\.h\.\d{4,4}-\d{2,2}\.nc'
            start_year, stop_year, in_dir, htype, firstHistoryFile = diagUtilsLib.checkHistoryFiles(
                envDict['CNTRLCASE_INPUT_TSERIES'], envDict['CNTRLCASEDIR'], envDict['CNTRLCASE'], 
                envDict['CNTRLYEAR0'], envDict['CNTRLYEAR1'], 'ocn', suffix, file_pattern,
                envDict['CNTRLCASE_SUBDIR'])
            envDict['CNTRLYEAR0'] = start_year
            envDict['CNTRLYEAR1'] = stop_year
            envDict['cntrl_in_dir'] = in_dir
            envDict['cntrl_htype'] = htype

        main_comm.sync()
        envDict = main_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)
        main_comm.sync()

        if main_comm.is_manager():
            debugMsg('before createClimFiles call for control', header=True)
            debugMsg('...CNTRLYEAR0 = {0}'.format(envDict['CNTRLYEAR0']), header=True)
            debugMsg('...CNTRLYEAR1 = {0}'.format(envDict['CNTRLYEAR1']), header=True)
            debugMsg('...cntrl_in_dir = {0}'.format(envDict['cntrl_in_dir']), header=True)
            debugMsg('...cntrl_htype = {0}'.format(envDict['cntrl_htype']), header=True)
            debugMsg('...CNTRLTAVGDIR = {0}'.format(envDict['CNTRLTAVGDIR']), header=True)
            debugMsg('...CNTRLCASE = {0}'.format(envDict['CNTRLCASE']), header=True)
            debugMsg('...CNTRLCASE_INPUT_TSERIES = {0}'.format(envDict['CNTRLCASE_INPUT_TSERIES']), header=True)
            debugMsg('...varlist = {0}'.format(envDict['CNTRL_VARLIST']), header=True)
            debugMsg('calling createClimFiles for control', header=True)
        
        # don't create timeseries averages for the control case so set to False and set the
        # tseries_start_year and tseries_stop_year to 0
        try:
            createClimFiles(envDict['CNTRLYEAR0'], envDict['CNTRLYEAR1'], envDict['cntrl_in_dir'],
                            envDict['cntrl_htype'], envDict['CNTRLTAVGDIR'], envDict['CNTRLCASE'], 
                            False, envDict['CNTRL_VARLIST'], 0, 0, envDict['DIAGOBSROOT'],
                            envDict['netcdf_format'], int(envDict['VERTICAL']), 
                            envDict['TIMESERIES_OBSPATH'], main_comm, debugMsg)
        except Exception as error:
            print(str(error))
            traceback.print_exc()
            sys.exit(1)
示例#11
0
def main(options, main_comm, debugMsg):
    """setup the environment for running the diagnostics in parallel. 

    Calls 2 different diagnostics generation types:
    model vs. observation 
    model vs. model 

    Arguments:
    options (object) - command line options
    main_comm (object) - MPI simple communicator object
    debugMsg (object) - vprinter object for printing debugging messages

    The env_diags_atm.xml configuration file defines the way the diagnostics are generated. 
    See (website URL here...) for a complete desciption of the env_diags_atm XML options.
    """

    # initialize the environment dictionary
    envDict = dict()

    # CASEROOT is given on the command line as required option --caseroot
    if main_comm.is_manager():
        caseroot = options.caseroot[0]
        debugMsg('caseroot = {0}'.format(caseroot), header=True)

        debugMsg('calling initialize_main', header=True)
        envDict = initialize_main(envDict, caseroot, debugMsg,
                                  options.standalone)

        debugMsg('calling check_ncl_nco', header=True)
        diagUtilsLib.check_ncl_nco(envDict)

    # broadcast envDict to all tasks
    envDict = main_comm.partition(data=envDict,
                                  func=partition.Duplicate(),
                                  involved=True)
    sys.path.append(envDict['PATH'])
    main_comm.sync()

    # check to see if the climos need to be regridded into a lat/lon grid
    if (envDict['test_regrid'] == 'True' or envDict['cntl_regrid'] == 'True'):
        regrid_climos(envDict, main_comm)
    main_comm.sync()

    # get list of diagnostics types to be created
    diag_list = list()
    diag_list = setup_diags(envDict)
    if main_comm.is_manager():
        print('User requested diagnostics:')
        for diag in diag_list:
            print('  {0}'.format(diag))

    main_comm.sync()

    # broadcast the diag_list to all tasks
    num_of_diags = len(diag_list)
    num_of_diags = main_comm.partition(num_of_diags,
                                       func=partition.Duplicate(),
                                       involved=True)
    diag_list = main_comm.partition(data=diag_list,
                                    func=partition.Duplicate(),
                                    involved=True)
    main_comm.sync()

    # initialize some variables for distributing diagnostics across the communicators
    diags_send = diag_list
    gmaster = main_comm.is_manager()
    gsize = main_comm.get_size()
    grank = main_comm.get_rank()
    local_diag_list = list()

    # divide the main communicator into sub_communicators to be passed to each diag class
    # split mpi comm world if the size of the communicator > 1 and the num_of_diags > 1
    if gsize > 1 and num_of_diags > 1:
        temp_color = (grank % num_of_diags)
        if (temp_color == num_of_diags):
            temp_color = temp_color - 1
        groups = list()
        for g in range(0, num_of_diags):
            groups.append(g)
        debugMsg(
            'global_rank {0}, temp_color {1}, #of groups(diag types) {2}, groups {3}, diag_list {4}'
            .format(grank, temp_color, num_of_diags, groups, diag_list))
        group = groups[temp_color]
        inter_comm, multi_comm = main_comm.divide(group)
        color = inter_comm.get_color()
        lsize = inter_comm.get_size()
        lrank = inter_comm.get_rank()
        lmaster = inter_comm.is_manager()
        debugMsg('color {0}, lsize {1}, lrank {2}, lmaster {3}'.format(
            color, lsize, lrank, lmaster))

        # partition the diag_list between communicators
        DIAG_LIST_TAG = 10
        if lmaster:
            local_diag_list = multi_comm.partition(
                diag_list, func=partition.EqualStride(), involved=True)
            for b in range(1, lsize):
                diags_send = inter_comm.ration(data=local_diag_list,
                                               tag=DIAG_LIST_TAG)
        else:
            local_diag_list = inter_comm.ration(tag=DIAG_LIST_TAG)
        debugMsg('local_diag_list {0}', format(local_diag_list))
    else:
        inter_comm = main_comm
        lmaster = main_comm.is_manager()
        lsize = main_comm.get_size()
        lrank = main_comm.get_rank()
        local_diag_list = diag_list

    inter_comm.sync()
    main_comm.sync()

    debugMsg('lsize = {0}, lrank = {1}'.format(lsize, lrank))
    inter_comm.sync()

    # loop through the local_diag_list list
    for requested_diag in local_diag_list:
        try:
            diag = atm_diags_factory.atmosphereDiagnosticsFactory(
                requested_diag, envDict)

            # check the prerequisites for the diagnostics types
            debugMsg('Checking prerequisites for {0}'.format(
                diag.__class__.__name__),
                     header=True)

            #if lmaster:
            envDict = diag.check_prerequisites(envDict, inter_comm)

            inter_comm.sync()

            ## broadcast the envDict
            #envDict = inter_comm.partition(data=envDict, func=partition.Duplicate(), involved=True)

            # set the shell env using the values set in the XML and read into the envDict across all tasks
            #cesmEnvLib.setXmlEnv(envDict)

            debugMsg('inter_comm = {0}'.format(inter_comm))
            diag.run_diagnostics(envDict, inter_comm)

        except atm_diags_bc.RecoverableError as e:
            # catch all recoverable errors, print a message and continue.
            print(e)
            print("Skipped '{0}' and continuing!".format(request_diag))
        except RuntimeError as e:
            # unrecoverable error, bail!
            print(e)
            return 1

    main_comm.sync()
示例#12
0
def main(options, main_comm, debugMsg):
    """
    read env_ocn_remap.xml settings to call the ocean_remap class
    """
    # initialize the environment dictionary
    envDict = dict()

    # Get rank and size
    rank = main_comm.get_rank()
    size = main_comm.get_size()

    # CASEROOT is given on the command line as required option --caseroot
    if rank == 0:
        caseroot = options.caseroot[0]
        envDict['CASEROOT'] = options.caseroot[0]
        debugMsg('caseroot = {0}'.format(envDict['CASEROOT']),
                 header=True,
                 verbosity=2)

        env_file_list = ['./env_postprocess.xml', './env_ocn_remap.xml']
        envDict = cesmEnvLib.readXML(caseroot, env_file_list)

        # strip the OCNREMAP_ prefix from the envDict entries before setting the
        # enviroment to allow for compatibility with all the diag routine calls
        envDict = diagUtilsLib.strip_prefix(envDict, 'OCNREMAP_')

        print("cmip6: {0}".format(envDict['cmip6']))
        print("filelist: {0}".format(envDict['filelist']))
        print("matrix_2d_fname: {0}".format(envDict['matrix_2d_fname']))
        print("matrix_3d_fname: {0}".format(envDict['matrix_3d_fname']))
        print("indir: {0}".format(envDict['indir']))
        print("outdir: {0}".format(envDict['outdir']))
        print("chunk size: {0}".format(envDict['chunk']))

    # broadcast envDict to all tasks
    envDict = main_comm.partition(data=envDict,
                                  func=partition.Duplicate(),
                                  involved=True)
    main_comm.sync()

    files = []
    if rank == 0:
        # Find files to regrid
        #Do we have a cmip6 variable list?
        if envDict['cmip6'] is not None:
            if envDict['indir'] is not None:
                with open(envDict['cmip6']) as f:
                    for l in f:
                        t = l.strip().split(':')[0]
                        v = l.strip().split(':')[1]
                        print("Trying to find: {0}_{1}*.nc".format(v, t))
                        for root, dirs, fns in os.walk(envDict['indir']):
                            for fn in fnmatch.filter(fns,
                                                     v + '_' + t + "*.nc"):
                                if 'tmp.nc' not in fn and 'gr' not in fn.split(
                                        '_'):
                                    print("Found: {0}".format(fn.split('/')))
                                    files.append(os.path.join(root, fn))
            else:
                print(
                    "You need to specify an indir argument with the cmip6 argument"
                )
                file = None
        elif envDict['filelist'] is not None:
            with open(envDict['filelist']) as f:
                for l in f:
                    files.append(l.strip())
        elif envDict['indir'] is not None:
            for root, dirs, fns in os.walk(envDict['indir']):
                for fn in fnmatch.filter(fns, "*.nc"):
                    files.append(os.path.join(root, fn))
        else:
            print('Exiting because no input path or files where given')
            files = None

    # All call this
    main_comm.sync()
    files = main_comm.partition(files,
                                func=partition.Duplicate(),
                                involved=True)
    if files is None:
        sys.exit()

    #matrix_2d_fname = 'POP_gx1v7_to_latlon_1x1_0E_mask_conserve_20181015.nc'
    matrix_2d = remap.ocean_remap(envDict['matrix_2d_fname'])

    #matrix_3d_fname = 'POP_gx1v7_to_latlon_1x1_0E_fulldepth_conserve_20181015.nc'
    matrix_3d = remap.ocean_remap(envDict['matrix_3d_fname'])

    # names of coordinate dimensions in output files
    dim_names = {'depth': 'olevel', 'lat': 'latitude', 'lon': 'longitude'}
    dim_names = {'depth': 'lev', 'lat': 'lat', 'lon': 'lon'}

    main_comm.sync()
    # Have only root create these files
    if rank == 0:
        if len(files) > 0 and envDict['cmip6'] is not None:
            temp = files[0]
            # create CMIP Ofx files
            for var_name in ('areacello', 'deptho', 'thkcello', 'volcello'):
                new_outdir = temp.replace(
                    temp.split('/')[-4], var_name).replace(
                        temp.split('/')[-5], 'Ofx').replace(
                            temp.split('/')[-3], 'gr').replace(
                                '_' + temp.split('_')[-1], '') + '.nc'
                d = os.path.dirname(new_outdir)
                if not os.path.exists(d):
                    os.makedirs(d)
                fptr_out = nc.Dataset(new_outdir, 'w')  # pylint: disable=E1101
                matrix_3d.dst_grid.def_dims_common(fptr_out, dim_names)
                matrix_3d.dst_grid.write_vars_common(fptr_out, dim_names)
                matrix_3d.dst_grid.write_var_CMIP_Ofx(fptr_out, dim_names,
                                                      var_name)

    # Create a master slave parallel protocol
    GWORK_TAG = 10  # global comm mpi tag
    if (rank == 0):
        for i in files:
            main_comm.ration(data=i, tag=GWORK_TAG)
        for i in range(1, size):
            main_comm.ration(data=-99, tag=GWORK_TAG)
    else:
        f = -999
        while f != -99:
            f = main_comm.ration(tag=GWORK_TAG)
            if f != -99:
                print("working on: {0}".format(f))
                testfile_in_fname = f
                testfile_out_fname = f.replace(f.split('/')[-3], 'gr')
                if not os.path.exists(testfile_out_fname):
                    d = os.path.dirname(testfile_out_fname)
                    if not os.path.exists(d):
                        os.makedirs(d)
                    fptr_in = nc.Dataset(testfile_in_fname, 'r')  # pylint: disable=E1101
                    if (len(fptr_in[f.split('/')[-4]].dimensions) == 4
                            or len(fptr_in[f.split('/')[-4]].dimensions) == 3):
                        fptr_out = nc.Dataset(testfile_out_fname + '.tmp', 'w')  # pylint: disable=E1101

                        remap.copy_time(fptr_in, fptr_out)
                        remap.copy_gAttr(fptr_in, fptr_out)

                        if dim_names['depth'] in fptr_in.dimensions:
                            matrix_3d.dst_grid.def_dims_common(
                                fptr_out, dim_names)
                            matrix_3d.dst_grid.write_vars_common(
                                fptr_out, dim_names)
                        else:
                            matrix_2d.dst_grid.def_dims_common(
                                fptr_out, dim_names)
                            matrix_2d.dst_grid.write_vars_common(
                                fptr_out, dim_names)

                        field_names = []
                        for v in fptr_in.variables:
                            if v not in [
                                    'lat', 'lat_bnds', 'lon', 'lon_bnds',
                                    'lev', 'lev_bnds', 'time', 'time_bnds',
                                    'nlat', 'nlon'
                            ]:
                                field_names.append(v)

                        for field_name in field_names:

                            varid_out = remap.def_var(field_name, fptr_in,
                                                      fptr_out, dim_names)

                            # use appropriate matrix for regridding
                            c = envDict['chunk']
                            if c is None:
                                c = 1
                            else:
                                c = int(c)
                            try:
                                if dim_names['depth'] in varid_out.dimensions:
                                    #print ("Running a 3D variable")
                                    b = 0
                                    for i in range(
                                            0, fptr_in.dimensions['time'].size,
                                            c):
                                        if b + c >= fptr_in.dimensions[
                                                'time'].size:
                                            c = fptr_in.dimensions[
                                                'time'].size - b
                                        varid_out[b:(
                                            b +
                                            c), :, :, :] = matrix_3d.remap_var(
                                                fptr_in.variables[field_name][
                                                    b:(b + c), :, :, :])  #,
                                        #fill_value=getattr(varid_out, 'missing_value'))
                                        b = b + c
                                else:
                                    #print ("Running a 2D variable")
                                    b = 0
                                    for i in range(
                                            0, fptr_in.dimensions['time'].size,
                                            c):
                                        if b + c >= fptr_in.dimensions[
                                                'time'].size:
                                            c = fptr_in.dimensions[
                                                'time'].size - b
                                        varid_out[b:(
                                            b +
                                            c), :, :] = matrix_2d.remap_var(
                                                fptr_in.variables[field_name][
                                                    b:(b + c), :, :])  #,
                                        #fill_value=getattr(varid_out, 'missing_value'))
                                        b = b + c
                            except TypeError as e:
                                print('Type Error for variable {0} '.format(
                                    field_name))
                        fptr_in.close()
                        fptr_out.close()
                        try:
                            os.rename(testfile_out_fname + '.tmp',
                                      testfile_out_fname)
                        except OSError as e:
                            print('Could not create {0}'.format(
                                testfile_out_fname))
                    else:
                        print("Not creating {0}".format(testfile_out_fname))
    main_comm.sync()
def main(options, main_comm, debugMsg, timer):
    """setup the environment for running the diagnostics in parallel. 

    Calls 2 different regridding types
    model1 only 
    model1 and model2

    Arguments:
    options (object) - command line options
    main_comm (object) - MPI simple communicator object
    debugMsg (object) - vprinter object for printing debugging messages
    timer (object) - timer object for keeping times

    The env_diags_lnd.xml configuration file defines whether or not to regrid the climatology files. 
    See (website URL here...) for a complete desciption of the env_diags_lnd XML options.
    """

    # initialize the environment dictionary
    envDict = dict()
    regrid_list = list()
    climo_list = list()

    # set some variables for all tasks
    regrid_script = 'se2fv_esmf.regrid2file.ncl'
    m_dir = 'lnd'

    # CASEROOT is given on the command line as required option --caseroot
    caseroot = options.caseroot[0]
    debugMsg('caseroot = {0}'.format(caseroot), header=True, verbosity=1)

    debugMsg('calling initialize_main', header=True, verbosity=1)
    envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone)

    if main_comm.is_manager():

        debugMsg('calling check_ncl_nco', header=True, verbosity=1)
        diagUtilsLib.check_ncl_nco(envDict)

        if not os.path.exists(envDict['WKDIR']):
            os.makedirs(envDict['WKDIR'])

        # build up the climo files to be regridded in parallel
        if (envDict['regrid_1'] == 'True'):
            # setup the working directory first before calling the base class prerequisites
            endYr = (int(envDict['clim_first_yr_1']) +
                     int(envDict['clim_num_yrs_1'])) - 1
            subdir = '{0}.{1}-{2}'.format(envDict['caseid_1'],
                                          envDict['clim_first_yr_1'], endYr)
            workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_1'],
                                                      envDict['caseid_1'],
                                                      subdir, m_dir)
            regrid_list = get_climo_files_to_regrid(
                workdir, envDict['lnd_modelstream_1'], '1', envDict, debugMsg)
            debugMsg('t = 1 regrid_list = {0}'.format(regrid_list),
                     header=True,
                     verbosity=1)

        if (envDict['MODEL_VS_MODEL'] == 'True'
                and envDict['regrid_2'] == 'True'):

            # setup the working directory first before calling the base class prerequisites
            endYr = (int(envDict['clim_first_yr_2']) +
                     int(envDict['clim_num_yrs_2'])) - 1
            subdir = '{0}.{1}-{2}'.format(envDict['caseid_2'],
                                          envDict['clim_first_yr_2'], endYr)
            workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_2'],
                                                      envDict['caseid_2'],
                                                      subdir, m_dir)
            regrid_list = regrid_list + get_climo_files_to_regrid(
                workdir, envDict['lnd_modelstream_2'], '2', envDict, debugMsg)
            debugMsg('t = 2 regrid_list = {0}'.format(regrid_list),
                     header=True,
                     verbosity=1)

    main_comm.sync()

    # broadcast envDict to all tasks
    envDict['NCLPATH'] = envDict['POSTPROCESS_PATH'] + '/lnd_diag/shared/'
    envDict = main_comm.partition(data=envDict,
                                  func=partition.Duplicate(),
                                  involved=True)

    # broadcast the regrid_list to all tasks
    regrid_list = main_comm.partition(data=regrid_list,
                                      func=partition.Duplicate(),
                                      involved=True)
    main_comm.sync()

    # initialize some variables for distributing regridding across the communicators
    size = main_comm.get_size()
    rank = main_comm.get_rank()
    main_comm.sync()

    # ration files to be regridded
    if main_comm.is_manager():
        num_regrids = len(regrid_list)
        debugMsg('{0} num_regrids'.format(num_regrids),
                 header=True,
                 verbosity=1)

        for i in range(num_regrids):
            debugMsg('Sent out index {2!r}'.format(rank, size, i),
                     header=True,
                     verbosity=1)
            main_comm.ration(i)

        for i in range(size - 1):
            debugMsg('Sent None'.format(rank, size), header=True, verbosity=1)
            main_comm.ration(None)

    else:
        i = -1
        while i is not None:
            debugMsg('Recvd index {2!r}'.format(rank, size, i),
                     header=True,
                     verbosity=1)
            i = main_comm.ration()

            if i is not None:
                # extract the i'th list of the regrid_list
                climo_list = regrid_list[i]
                t = climo_list[0]
                ext_dir = climo_list[1]
                climo_file = climo_list[2]

                # setup the working directory first for each climo file
                endYr = (int(envDict['clim_first_yr_' + t]) +
                         int(envDict['clim_num_yrs_' + t])) - 1
                subdir = '{0}.{1}-{2}'.format(envDict['caseid_' + t],
                                              envDict['clim_first_yr_' + t],
                                              endYr)
                workdir = '{0}/climo/{1}/{2}/{3}/'.format(
                    envDict['PTMPDIR_' + t], envDict['caseid_' + t], subdir,
                    m_dir)

                timer_tag = '{0}_{1}'.format(t, climo_file)
                timer.start(timer_tag)
                debugMsg(
                    'Before call to lnd_regrid using workdir = {0}/{1}'.format(
                        workdir, ext_dir),
                    header=True,
                    verbosity=1)
                diagUtilsLib.lnd_regrid(climo_file, regrid_script, t, workdir,
                                        ext_dir, envDict)
                timer.stop(timer_tag)

                debugMsg("Total time to regrid file {0} = {1}".format(
                    climo_file, timer.get_time(timer_tag)),
                         header=True,
                         verbosity=1)
示例#14
0
def fill_list(nc_files, root_dir, extra_dir, comm, rank, size):

    grds = {
        'atm':'192x288',
        'lnd':'192x288',
        'glc':'192x288',
        'rof':'192x288',
        'ice':'384x320',
        'ocn':'384x320'
    }

    variablelist = {}
    gridfile = None
    nc_files.append(extra_dir+"/ocn_constants.nc")
    nc_files_l = comm.partition(nc_files,func=partition.EqualLength(),involved=True)
    for fn in nc_files_l:
        f = nc.Dataset(fn, "r")
        mt = fn.replace(root_dir,"").split("/")[-5]         
        stri = fn
        model_type = mt
        if "ocn_constants" in fn:
            model_type = "ocn"
            mt = "ocn"
        if "lnd" in model_type or "rof" in model_type:
            model_type = 'lnd,rof'
        if "glc" in model_type:
            model_type = 'glc,lnd'
        if ("time" not in f.variables.keys() or "tseries" not in fn):
            variablelist["skip"] = {}
        else:
            lt = "none"
            ln = "none"
            lv = "none"
            lat_name = None
            lon_name = None
            lev_name = None
            time_name = None
            # Find which dim variables to use
            v_dims = f.variables[fn.split('.')[-3]].dimensions 
            for i in grids[mt]['lat']:
              if i in v_dims:
                  if 'nlat' in i or 'nj' in i:
                      lat_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[1])
                  else:
                      lat_name = i
                  lt = len(f.dimensions[i])
            for i in grids[mt]['lon']:
              if i in v_dims:
                  if 'nlon' in i or 'ni' in i:
                      lon_name = str(f.variables[fn.split('.')[-3]].coordinates.split()[0])
                      if 'ULON' in lon_name:
                          ln = str(len(f.dimensions[i]))+"_UGRID"
                      else:
                          ln = str(len(f.dimensions[i]))+"_TGRID"
                  else: 
                      lon_name = i
                      ln = len(f.dimensions[i])
            for i in grids[mt]['lev']:
              if i in v_dims: 
                  lev_name = i
                  lv = len(f.dimensions[i])
#            for i in grids[mt]['time']:
#              if i in v_dims:
#                  time_name = i
#                  lv = len(f.dimensions[i])
            if 'none' == lt or 'none' == ln:
                gridfile = '{0}/{1}x{2}.nc'.format(extra_dir,mt,grds[mt])
            else:
                if 'atm' in mt:
                    gridfile = '{0}/{1}x{2}x{3}x{4}.nc'.format(extra_dir,mt,lt,ln,lv)
                else: 
                    gridfile = '{0}/{1}x{2}x{3}.nc'.format(extra_dir,mt,lt,ln)
            if gridfile is not None:
                if not os.path.isfile(gridfile):
                    gridfile = None

            for vn,ob in f.variables.iteritems():
                if model_type not in variablelist.keys():
                    variablelist[model_type] = {}
                if vn not in variablelist[model_type].keys():
                     variablelist[model_type][vn] = {}
                if hasattr(f,"time_period_freq"):
                    if 'day_365' in f.time_period_freq:
                        time_period_freq = 'year_1'
                    else:
                        time_period_freq = f.time_period_freq
                    if time_period_freq not in variablelist[model_type][vn].keys():
                        variablelist[model_type][vn][time_period_freq] = {}
                    if 'ocn_constants' in stri:
                        date = "0000"
                    else:
                        date = stri.split('.')[-2]      
                    if date not in variablelist[model_type][vn][time_period_freq].keys():
                        variablelist[model_type][vn][time_period_freq][date] = {}
                    if 'files' not in variablelist[model_type][vn][time_period_freq][date].keys():
                        variablelist[model_type][vn][time_period_freq][date]['files']=[stri,gridfile]
                        variablelist[model_type][vn][time_period_freq][date]['lat']=lat_name
                        variablelist[model_type][vn][time_period_freq][date]['lon']=lon_name
                        variablelist[model_type][vn][time_period_freq][date]['lev']=lev_name
                        variablelist[model_type][vn][time_period_freq][date]['time']=time_name
                else:
                    if "unknown" not in variablelist[model_type][vn].keys():
                        variablelist[model_type][vn]["unknown"] = {}
                    if stri not in variablelist[model_type][vn]["unknown"]:
                        variablelist[model_type][vn]["unknown"]["unknown"] = {}
                        variablelist[model_type][vn][time_period_freq][date]['files']=[stri,gridfile]
                        variablelist[model_type][vn][time_period_freq][date]['lat']=lat_name
                        variablelist[model_type][vn][time_period_freq][date]['lon']=lon_name
                        variablelist[model_type][vn][time_period_freq][date]['lev']=lev_name
                        variablelist[model_type][vn][time_period_freq][date]['time']=time_name
        f.close()
    VL_TAG = 30
    variable_list = {}
    if size > 1:
        if rank==0:
            variable_list = variablelist
            for i in range(0,size-1): 
                r,lvarList = comm.collect(data=None, tag=VL_TAG)
                for model_type,d1 in lvarList.iteritems():
                    if model_type not in variable_list.keys():
                        variable_list[model_type] = {}
                    for vn,d2 in d1.iteritems():
                        if vn not in variable_list[model_type].keys():
                            variable_list[model_type][vn] = {}
                        for tp,d3 in d2.iteritems():
                            if tp not in variable_list[model_type][vn].keys():
                                variable_list[model_type][vn][tp] = {}
                            for date,l in d3.iteritems():
                                if date not in variable_list[model_type][vn][tp].keys():
                                    variable_list[model_type][vn][tp][date] = {}
                                if 'files' in variable_list[model_type][vn][tp][date].keys():
                                    if len(lvarList[model_type][vn][tp][date]['files'])>0:
                                        if lvarList[model_type][vn][tp][date]['files'][0] is not None:
                                            variable_list[model_type][vn][tp][date]['files'].append(lvarList[model_type][vn][tp][date]['files'][0])
                                else:
                                    variable_list[model_type][vn][tp][date] = lvarList[model_type][vn][tp][date]          

#                variable_list.update(lvarList)
            comm.partition(variable_list, func=partition.Duplicate(), involved=True)
        else:
            comm.collect(data=variablelist, tag=VL_TAG)
            variable_list = comm.partition(func=partition.Duplicate(), involved=True)
        comm.sync()
    return variable_list