def testPartitionList(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride())
     presult = self.pcomm.partition(data, func=EqualStride())
     msg = test_info_msg('partition(list)', data, sresult, presult)
     print(msg)
     self.assertEqual(sresult, presult, msg)
 def testPartitionListInvolved(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride(), involved=True)
     presult = self.pcomm.partition(data, func=EqualStride(), involved=True)
     msg = test_info_msg('partition(list, T)', data, sresult, presult)
     print msg
     self.assertEqual(sresult, presult, msg)
示例#3
0
 def testPartitionArrayInvolved(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride(), involved=True)
     expected = np.arange(self.rank, 10, self.size)
     np.testing.assert_array_equal(actual, expected)
示例#4
0
 def testPartitionArrayInvolved(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride(), involved=True)
     expected = np.arange(self.rank, 10, self.size)
     msg = test_info_msg(self.rank, self.size, 'partition(array, T)', data,
                         actual, expected)
     print msg
     np.testing.assert_array_equal(actual, expected, msg)
示例#5
0
 def testMonoPartitionListInvolved(self):
     if self.monocomm.is_manager():
         data = list(range(10 + self.grank))
     else:
         data = None
     actual = self.monocomm.partition(data,
                                      func=EqualStride(),
                                      involved=True)
     expected = list(
         range(self.rank, 10 + self.color, self.monocomm.get_size()))
     self.assertEqual(actual, expected)
 def testMonoPartitionListInvolved(self):
     if self.monocomm.is_manager():
         data = range(10 + self.grank)
     else:
         data = None
     actual = self.monocomm.partition(data,
                                      func=EqualStride(),
                                      involved=True)
     expected = range(self.rank, 10 + self.color, self.monocomm.get_size())
     msg = test_info_msg(self.grank, self.gsize, 'mono.partition(list,T)',
                         data, actual, expected)
     print msg
     self.assertEqual(actual, expected, msg)
示例#7
0
 def testPartitionArray(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.arange(self.rank - 1, 10, self.size - 1)
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected)
     else:
         np.testing.assert_array_equal(actual, expected)
示例#8
0
 def testMultiPartitionListInvolved(self):
     if self.multicomm.is_manager():
         data = list(range(10 + self.grank))
     else:
         data = None
     actual = self.multicomm.partition(data,
                                       func=EqualStride(),
                                       involved=True)
     expected = list(
         range(self.color, 10 + self.rank * len(self.groups),
               self.multicomm.get_size()))
     msg = test_info_msg(self.grank, self.gsize, 'multi.partition(list,T)',
                         data, actual, expected)
     print(msg)
     self.assertEqual(actual, expected, msg)
示例#9
0
 def testPartitionStrArray(self):
     indata = list('abcdefghi')
     if self.gcomm.is_manager():
         data = np.array(indata)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.array(indata[self.rank - 1::self.size - 1])
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected)
     else:
         np.testing.assert_array_equal(actual, expected)
示例#10
0
 def testPartitionArray(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.arange(self.rank - 1, 10, self.size - 1)
     msg = test_info_msg(self.rank, self.size, 'partition(array)', data,
                         actual, expected)
     print msg
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected, msg)
     else:
         np.testing.assert_array_equal(actual, expected, msg)
示例#11
0
 def testPartitionStrArray(self):
     indata = list('abcdefghi')
     if self.gcomm.is_manager():
         data = data = np.array(indata)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.array(indata[self.rank - 1::self.size - 1])
     msg = test_info_msg(self.rank, self.size, 'partition(string-array)',
                         data, actual, expected)
     print msg
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected, msg)
     else:
         np.testing.assert_array_equal(actual, expected, msg)
示例#12
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0'
    opts_dict['compset'] = 'F2000climo'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if opts_dict['popens'] == True:
        print(
            "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens)  => EXITING...."
        )
        sys.exit()

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print(
            'ERROR: Please specify --tag, --compset, --mach and --res options  => EXITING....'
        )
        sys.exit()

    if opts_dict['mpi_disable'] == True:
        opts_dict['mpi_enable'] = False

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSum.py')

    if me.get_rank() == 0 and (verbose == True):
        print(opts_dict)
        print('STATUS: Ensemble size for summary = ', esize)

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: Number of files in input directory = ', num_files)
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Note that the number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files')
    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir, ' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: in_files  = ', in_files)

    # Check full file names in input directory (don't open yet)
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are: ')

    for onefile in in_files[0:esize]:
        fname = input_dir + '/' + onefile
        if me.get_rank() == 0 and opts_dict['verbose']:
            print(fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            if me.get_rank() == 0:
                print("ERROR: Could not locate file ", fname,
                      " => EXITING....")
            sys.exit()

    #open just the first file
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = len(input_dims["lev"])
        elif key == "ilev":
            nilev = len(input_dims["ilev"])
        elif key == "ncol":
            ncol = len(input_dims["ncol"])
        elif (key == "nlon") or (key == "lon"):
            nlon = len(input_dims[key])
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = len(input_dims[key])
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print(
                "ERROR: could not locate a valid dimension (lev) => EXITING...."
            )
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print("ERROR: Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # output dimensions
    if me.get_rank() == 0 and (verbose == True):
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = first_file.variables

    # Remove the excluded variables (specified in json file) from variable dictionary
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all the variables that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.items():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                del vars_dict[k]

    num_vars = len(vars_dict)

    str_size = 0
    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.items():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = len(v.dimensions)  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    if me.get_rank() == 0 and (verbose == True):
        print('VERBOSE: Number of variables found:  ', num_3d + num_2d)
        print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' +
              str(num_2d))

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print(
                "************************************************************************************************************************************"
            )
            print("  ERROR: the total number of 3D and 2D variables " +
                  str(num_2d + num_3d) +
                  " is larger than the number of ensemble files " + str(esize))
            print(
                "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            )
            print(
                "  or add more variables in your excluded variable list  => EXITING...."
            )
            print(
                "************************************************************************************************************************************"
            )
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    # Rank 0 - Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    #check if directory is valid
    sum_dir = os.path.dirname(this_sumfile)
    if len(sum_dir) == 0:
        sum_dir = '.'
    if (os.path.exists(sum_dir) == False):
        if me.get_rank() == 0:
            print('ERROR: Summary file directory: ', sum_dir, ' not found')
        sys.exit(2)

    if (me.get_rank() == 0):

        if (verbose == True):
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if (verbose == True):
            print("VERBOSE: Setting dimensions .....")
        if (is_SE == True):
            nc_sumfile.createDimension('ncol', ncol)
        else:
            nc_sumfile.createDimension('nlat', nlat)
            nc_sumfile.createDimension('nlon', nlon)

        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('ens_size', esize)
        nc_sumfile.createDimension('nvars', num_3d + num_2d)
        nc_sumfile.createDimension('nvars3d', num_3d)
        nc_sumfile.createDimension('nvars2d', num_2d)
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'CAM verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if (verbose == True):
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))

        v_gm = nc_sumfile.createVariable("global_mean", 'f8',
                                         ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8',
                                                      ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8',
                                                  ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', ))
        v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8',
                                                      ('nvars', ))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print("VERBOSE: Assigning time invariant metadata .....")


#        lev_data = np.zeros(num_lev,dtype=np.float64)
        lev_data = first_file.variables["lev"]
        v_lev[:] = lev_data[:]
    #end of rank=0 work

    # All:
    tslice = opts_dict['tslice']
    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    #close first_file
    first_file.close()

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            full_in_files, var3_list_loc, var2_list_loc, is_SE, False,
            opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Finished calculating global means .....")

    #gather to rank = 0
    if opts_dict['mpi_enable']:

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(full_in_files)))

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(full_in_files)))

            #gather variables ro exclude (in pre_pca)
            var_list = gather_list(var_list, me)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))

    # rank =0 : complete calculations for summary file
    if me.get_rank() == 0:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp

        #PCA prep and calculation
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)

        #if PCA calc encounters an error, then remove the summary file and exit
        if b_exit:
            nc_sumfile.close()
            os.unlink(this_sumfile)
            print("STATUS: Summary could not be created.")
            sys.exit(2)

        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:]
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print("STATUS: Summary file is complete.")

        nc_sumfile.close()
示例#13
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/'

    # This creates the dictionary of input arguments
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
        print opts_dict

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Open the files in the input directory
    o_files = []
    for onefile in in_file_list:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
              or ( nlon != int(input_dims["nlon"]))):
            print "Dimension mismatch between ", in_file_list[
                0], 'and', in_file_list[count], '!!!'
            sys.exit()

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'

        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if (verbose == True):
            print "Setting dimensions ....."
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('time', None)
        nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
        nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
        nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.create_dimension('nvars3d', len(Var3d))
        nc_sumfile.create_dimension('nvars2d', len(Var2d))
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        v_time = nc_sumfile.create_variable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.create_variable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))
        if not opts_dict['zscoreonly']:
            v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                              ('time', 'nvars', 'ens_size'))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print "Assigning time invariant metadata ....."
        vars_dict = o_files[0].variables
        lev_data = vars_dict["z_t"]
        v_lev = lev_data

    # Time-varient metadata
    if verbose:
        print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    # Calculate global mean, average, standard deviation
    if verbose:
        print "Calculating global means ....."
    is_SE = False
    tslice = 0
    if not opts_dict['zscoreonly']:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, Var3d, Var2d, is_SE, False, opts_dict)
    if verbose:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (verbose == True):
        print "Calculating RMSZ scores ....."
    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
        o_files, Var3d, Var2d, is_SE, opts_dict)

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor
        # Gather global means 3d results
        if not opts_dict['zscoreonly']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            #print "before gather, gmall.shape=",gmall.shape
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(Var3d) + len(Var2d), len(o_files)))
        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin))
        #print 'zmall=',zmall

        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to file:
    if me.get_rank() == 0:
        #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]
        if not opts_dict['zscoreonly']:
            v_gm[:, :, :] = gmall[:, :, :]
        print "All done"
示例#14
0
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
示例#15
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_1_0'
    opts_dict['compset'] = 'G'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 1
    opts_dict['nmonth'] = 12
    opts_dict['esize'] = 40
    opts_dict['npert'] = 0  #for backwards compatible
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'T62_g17'
    opts_dict['sumfile'] = 'pop.ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = 'pop_ensemble.json'
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    #opts_dict['zscoreonly'] = True
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = './'

    # This creates the dictionary of input arguments
    #print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if opts_dict['mpi_disable']:
        opts_dict['mpi_enable'] = False

    #still have npert for backwards compatibility - check if it was set
    #and override esize
    if opts_dict['npert'] > 0:
        user_size = opts_dict['npert']
        print(
            'WARNING: User specified value for --npert will override --esize.  Please consider using --esize instead of --npert in the future.'
        )
        opts_dict['esize'] = user_size

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(False)

    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSumPop!')

        if verbose:
            print("VERBOSE: opts_dict = ")
            print(opts_dict)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        num_files = len(in_files)

    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir,
                  ' not found => EXITING....')
        sys.exit(2)

    #make sure we have enough files
    files_needed = opts_dict['nmonth'] * opts_dict['esize'] * opts_dict['nyear']
    if (num_files < files_needed):
        if me.get_rank() == 0:
            print(
                'ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ',
                files_needed, ' ) and it has only ', num_files, ' files).')
        sys.exit(2)

    #Partition the input file list (ideally we have one processor per month)
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Check the files in the input directory
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are:')

    for onefile in in_file_list:
        fname = input_dir + '/' + onefile
        if opts_dict['verbose']:
            print("my_rank = ", me.get_rank(), "  ", fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            print("ERROR: Could not locate file: " + fname + " => EXITING....")
            sys.exit()

    #open just the first file (all procs)
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Checking dimensions ...")
    for key in input_dims:
        if key == "z_t":
            nlev = len(input_dims["z_t"])
        elif key == "nlon":
            nlon = len(input_dims["nlon"])
        elif key == "nlat":
            nlat = len(input_dims["nlat"])

    # Rank 0: prepare new summary ensemble file
    this_sumfile = opts_dict["sumfile"]
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        if verbose:
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if verbose:
            print("VERBOSE: Setting dimensions .....")
        nc_sumfile.createDimension('nlat', nlat)
        nc_sumfile.createDimension('nlon', nlon)
        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('time', None)
        nc_sumfile.createDimension('ens_size', opts_dict['esize'])
        nc_sumfile.createDimension('nbin', opts_dict['nbin'])
        nc_sumfile.createDimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.createDimension('nvars3d', len(Var3d))
        nc_sumfile.createDimension('nvars2d', len(Var2d))
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose:
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'POP verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if verbose:
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("z_t", 'f', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))
        v_time = nc_sumfile.createVariable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.createVariable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.createVariable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.createVariable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.createVariable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_RMSZ = nc_sumfile.createVariable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))

        # Assign vars, var3d and var2d
        if verbose:
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose:
            print("VERBOSE: Assigning time invariant metadata .....")
        vars_dict = first_file.variables
        lev_data = vars_dict["z_t"]
        v_lev[:] = lev_data[:]

        #end of rank 0

    #All:
    # Time-varient metadata
    if verbose:
        if me.get_rank() == 0:
            print("VERBOSE: Assigning time variant metadata .....")
    vars_dict = first_file.variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    #Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d
    #in case of a calculation problem before finishing
    e_size = opts_dict['esize']
    b_size = opts_dict['nbin']
    z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size),
                      dtype=np.float32)

    #rank 0 (put zero values in summary file)
    if me.get_rank() == 0:
        v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :]
        v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :]
        v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :]
        v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :]
        v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :]

    #close file[0]
    first_file.close()

    # Calculate RMSZ scores
    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Calculating RMSZ scores .....")

    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d = pyEnsLib.calc_rmsz(
        full_in_files, Var3d, Var2d, opts_dict)

    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Finished with RMSZ scores .....")

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor

        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin))

        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to summary file:
    if me.get_rank() == 0:

        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]

        print("STATUS: PyEnsSumPop has completed.")

        nc_sumfile.close()
示例#16
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']
    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

        #print opts_dict

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
            print 'Ensemble summary file = ' + opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = ' + opts_dict['indir']
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print "ERROR: " + opts_dict['json_case'] + " does not exist."
            sys.exit()
        print "in_files=", in_files
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir"
                sys.exit()
            else:
                print "Found " + str(
                    num_file) + " matching files in specified --indir"
            if opts_dict['numRunFile'] > num_file:
                print "ERROR: more files needed (" + str(
                    opts_dict['numRunFile']
                ) + ") than available in the indir (" + str(num_file) + ")."
                sys.exit()
            #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick non pop files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(Nio.open_file(frun_temp, "r"))
        else:
            print "ERROR: COULD NOT LOCATE FILE " + frun_temp
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print ' '
            print 'Validation file    : machineid = ' + machineid + ', compiler = ' + compiler
            print 'Found summary file : ' + opts_dict['sumfile']
            print ' '
        else:
            print 'Warning: machine and compiler are unknown'

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print ' '
        print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}
        if not gmonly:
            for k, v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables, k, 'zscoreRange', v)

        for k, v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'

        # Compare the new run and the ensemble summary file to get rmsz score
        results = {}
        countzscore = np.zeros(len(ifiles), dtype=np.int32)
        countgm = np.zeros(len(ifiles), dtype=np.int32)
        if not gmonly:
            for fcount, fid in enumerate(ifiles):
                otimeSeries = fid.variables
                for var_name in ens_var_name:
                    orig = otimeSeries[var_name]
                    Zscore, has_zscore = pyEnsLib.calculate_raw_score(
                        var_name, orig[opts_dict['tslice']], npts3d, npts2d,
                        ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0)
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results, 'zscore', Zscore,
                                            var_name, 'f' + str(fcount))

            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount, fid in enumerate(ifiles):
                countzscore[fcount] = pyEnsLib.evaluatestatus(
                    'zscore', 'zscoreRange', variables, 'ens', results,
                    'f' + str(fcount))

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]
                print "value len=", key, len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    sns.plt.xticks(range(len(list_array)),
                                   list_var,
                                   fontsize=8,
                                   rotation=-45)
                    if decision == 'FAILED':
                        sns.plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        sns.plt.savefig(part_name + "_" + key + "_pass.png")
                    sns.plt.clf()
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount, fid in enumerate(ifiles):
                print ' '
                print 'Run ' + str(fcount + 1) + ":"
                print ' '
                if not gmonly:
                    print '***' + str(countzscore[fcount]), " of " + str(
                        len(ens_var_name)
                    ) + ' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results, 'ens', 'zscore',
                                          'zscoreRange', (fcount), variables,
                                          'RMSZ')
                    print ' '
                print '***' + str(countgm[fcount]), " of " + str(
                    len(ens_var_name)
                ) + ' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '
示例#17
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)


    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    print('--------pyCECT--------')
    print(' ')
    print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
    print(' ')
    print('Ensemble summary file = '+opts_dict['sumfile'])
    print(' ')
    print('Testcase file directory = '+opts_dict['indir']    )
    print(' ')
    print(' ')

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    else:
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING")
             sys.exit()

    if popens:

        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print(' ')
        print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold']))
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin']))
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
        for i in range(me.get_size()):
            for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
    # Read all variables from the ensemble summary file
    ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile'])

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables={}
    if not gmonly:
        for k,v in ens_rmsz.iteritems():
        pyEnsLib.addvariables(variables,k,'zscoreRange',v)

    for k,v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables,k,'gmRange',v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d=[]
    var_name2d=[]
    for vcount,v in enumerate(ens_var_name):
      if vcount < num_3d:
        var_name3d.append(v)
      else:
        var_name2d.append(v)

    # Get ncol and nlev value
    npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
           print('Warning: please note the ensemble summary file is different from the testing files, they use different grids')


    # Compare the new run and the ensemble summary file to get rmsz score
    results={}
    countzscore=np.zeros(len(ifiles),dtype=np.int32)
    countgm=np.zeros(len(ifiles),dtype=np.int32)
    if not gmonly:
        for fcount,fid in enumerate(ifiles):
        otimeSeries = fid.variables
        for var_name in ens_var_name:
            orig=otimeSeries[var_name]
            Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0)
            if has_zscore:
            # Add the new run rmsz zscore to the dictionary "results"
            pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


        # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
        for fcount,fid in enumerate(ifiles):
        countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

    # Calculate the new run global mean
    mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
    means=np.concatenate((mean3d,mean2d),axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
        pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount,fid in enumerate(ifiles):
        countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))

    # Calculate the PCA scores of the new run
    new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm)
    pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict)

    # Print out
    if opts_dict['printVarTest']:
        print('*********************************************** ')
        print('Variable-based testing (for reference only - not used to determine pass/fail)')
        print('*********************************************** ')
        for fcount,fid in enumerate(ifiles):
        print(' ')
        print('Run '+str(fcount+1)+":")
        print(' ')
        if not gmonly:
            print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***')
            pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
            print(' ')
        print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***')
        pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
        print(' ')
        print('----------------------------------------------------------------------------')

if __name__ == "__main__":
    main(sys.argv[1:])
    print(' ')
    print("Testing complete.")
示例#18
0
 def testPartitionListInvolved(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride(), involved=True)
     presult = self.pcomm.partition(data, func=EqualStride(), involved=True)
     self.assertEqual(sresult, presult)
示例#19
0
 def testPartitionList(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride())
     presult = self.pcomm.partition(data, func=EqualStride())
     self.assertEqual(sresult, presult)
示例#20
0
文件: pyCECT.py 项目: yelizy/cime
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVars popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVars'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['printStdMean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    opts_dict['saveResults'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']

    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print(' ')
        print('--------pyCECT--------')
        print(' ')
        print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
        print(' ')
        if not opts_dict['web_enabled']:
            print('Ensemble summary file = ' + opts_dict['sumfile'])
        print(' ')
        print('Testcase file directory = ' + opts_dict['indir'])
        print(' ')
        print(' ')

    #make sure these are valid
    if opts_dict['web_enabled'] == False and os.path.isfile(
            opts_dict['sumfile']) == False:
        print("ERROR: Summary file name is not valid.")
        sys.exit()
    if os.path.exists(opts_dict['indir']) == False:
        print("ERROR: --indir path is not valid.")
        sys.exit()

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print("ERROR: " + opts_dict['json_case'] + " does not exist.")
            sys.exit()
        print("in_files=", in_files)
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print("ERROR: no matching files for wildcard=" + wildname +
                      " found in specified --indir")
                sys.exit()
            else:
                print("Found " + str(num_file) +
                      " matching files in specified --indir")
            if opts_dict['numRunFile'] > num_file:
                print("ERROR: more files needed (" +
                      str(opts_dict['numRunFile']) +
                      ") than available in the indir (" + str(num_file) + ").")
                sys.exit()

    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick cam files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(frun_temp)
        else:
            print("ERROR: COULD NOT LOCATE FILE " + frun_temp)
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        #need to open ifiles

        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print(' ')
            print('Validation file    : machineid = ' + machineid +
                  ', compiler = ' + compiler)
            print('Found summary file : ' + opts_dict['sumfile'])
            print(' ')
        else:
            print('Warning: machine and compiler are unknown')

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print(
                "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            )
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print(' ')
        print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']))
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)

        np.set_printoptions(threshold=sys.maxsize)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        #Only doing gm

        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}

        for k, v in ens_gm.items():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print(
                'Warning: please note the ensemble summary file is different from the testing files: they use different grids'
            )

        # Compare the new run and the ensemble summary file
        results = {}
        countgm = np.zeros(len(ifiles), dtype=np.int32)

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        #        if opts_dict['printStdMean'] and decision == 'FAILED':
        if opts_dict['printStdMean']:

            import seaborn as sns
            import matplotlib
            matplotlib.use('Agg')  #don't display figures
            import matplotlib.pyplot as plt

            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Test run variable standardized means (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            print(" ")

            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]

                if key == "all_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 3 test run global means outside of the 99th percentile."
                    )
                elif key == "two_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 2 test run global means outside of the 99th percentile."
                    )
                elif key == "one_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 1 test run global mean outside of the 99th percentile."
                    )
                elif key == "all_oneside_outside1QR":
                    print(
                        "*** ", len(value),
                        " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)."
                    )

                if len(value) > 0:
                    print(" => generating plot ...")
                    if len(value) > 20:
                        print(
                            "    NOTE: truncating to only plot the first 20 variables."
                        )
                        value = value[0:20]

                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    name = each_var[0]
                    if isinstance(name, str) == False:
                        name = name.decode("utf-8")

                    list_var.append(name)

                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    plt.xticks(list(range(len(list_array))),
                               list_var,
                               fontsize=8,
                               rotation=-45)

                    if decision == 'FAILED':
                        plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        plt.savefig(part_name + "_" + key + "_pass.png")
                    plt.close()


##
# Print file with info about new test runs....to a netcdf file
##
        if opts_dict['saveResults']:

            num_vars = comp_std_gm.shape[0]
            tsize = comp_std_gm.shape[1]
            esize = std_gm_array.shape[1]
            this_savefile = 'savefile.nc'
            if (verbose == True):
                print("VERBOSE: Creating ", this_savefile, "  ...")

            if os.path.exists(this_savefile):
                os.unlink(this_savefile)
            nc_savefile = nc.Dataset(this_savefile,
                                     "w",
                                     format="NETCDF4_CLASSIC")
            nc_savefile.createDimension('ens_size', esize)
            nc_savefile.createDimension('test_size', tsize)
            nc_savefile.createDimension('nvars', num_vars)
            nc_savefile.createDimension('str_size', str_size)

            # Set global attributes
            now = time.strftime("%c")
            nc_savefile.creation_date = now
            nc_savefile.title = 'PyCECT compare results file'
            nc_savefile.summaryfile = opts_dict['sumfile']
            #nc_savefile.testfiles = in_files

            #variables
            v_vars = nc_savefile.createVariable("vars", 'S1',
                                                ('nvars', 'str_size'))
            v_std_gm = nc_savefile.createVariable("std_gm", 'f8',
                                                  ('nvars', 'test_size'))
            v_scores = nc_savefile.createVariable("scores", 'f8',
                                                  ('nvars', 'test_size'))
            v_ens_sigma_scores = nc_savefile.createVariable(
                'ens_sigma_scores', 'f8', ('nvars', ))
            v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8',
                                                      ('nvars', 'ens_size'))

            #hard-coded size
            str_out = nc.stringtochar(np.array(ens_var_name, 'S10'))

            v_vars[:] = str_out
            v_std_gm[:, :] = comp_std_gm[:, :]
            v_scores[:, :] = new_scores[:, :]
            v_ens_sigma_scores[:] = sigma_scores_gm[:]
            v_ens_std_gm[:, :] = std_gm_array[:, :]

            nc_savefile.close()

        # Print variables (optional)
        if opts_dict['printVars']:
            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Variable global mean information (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            for fcount, fid in enumerate(ifiles):
                print(' ')
                print('Run ' + str(fcount + 1) + ":")
                print(' ')
                print(
                    '***' + str(countgm[fcount]),
                    " of " + str(len(ens_var_name)) +
                    ' variables are outside of ensemble global mean distribution***'
                )
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print(' ')
                print(
                    '----------------------------------------------------------------------------'
                )

    if me.get_rank() == 0:
        print(' ')
        print("Testing complete.")
        print(' ')
示例#21
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print 'Please specify --tag, --compset, --mach and --res options'
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print 'Running pyEnsSum!'

    if me.get_rank() == 0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'NOTE: Number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank() == 0:
            print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print 'in_files=', in_files

    # Open the files in the input directory
    o_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print 'Input files are: '
        print "\n".join(in_files)
        #for i in in_files:
        #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            if me.get_rank() == 0:
                print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if me.get_rank() == 0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if (nlev != int(input_dims["lev"])
                    or (ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.iteritems():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                #print vars_dict_all[k].typecode()
                #print k
                del vars_dict[k]

    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d + num_2d
        print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print "************************************************************************************************************************************"
            print "  Error: the total number of 3D and 2D variables " + str(
                num_2d + num_3d
            ) + " is larger than the number of ensemble files " + str(esize)
            print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            print "  or add more varaibles in your excluded variable list!!!"
            print "************************************************************************************************************************************"
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f',
                                                ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                          ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f',
                                                       ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f',
                                                   ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', ))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f',
                                                       ('nvars', ))

        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list

        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict['mpi_enable'] & (not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index,
                                          (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names),
                                          me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index,
                                           shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index,
                                              shape_tuple3d)

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(o_files)))

            var_list = gather_list(var_list, me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index,
                                          (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names),
                                          me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index,
                                           shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index,
                                              shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens']:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict['gmonly']:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)
        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        if me.get_rank() == 0:
            print "All Done"
示例#22
0
    def _inspect_input_files(self):
        """
        Inspect the input data files themselves.

        We check the file contents here, which means opening and reading heading information from the files.
        """
        # Set the I/O backend according to what is specified
        iobackend.set_backend(self._backend)

        # Initialize the list of variable names for each category
        udim = None
        timeta = []
        xtra_timeta = []
        tvmeta = []

        # Initialize the local dictionary of time-series variables and sizes
        all_tsvars = {}
        file_times = {}

        #===== INSPECT FIRST INPUT FILE (ON MASTER PROCESS ONLY) =====

        # Open first file
        if self._simplecomm.is_manager():
            ifile = iobackend.NCFile(self._input_filenames[0])

            # Look for the 'unlimited' dimension
            try:
                udim = next(
                    dim for dim in ifile.dimensions if ifile.unlimited(dim))
            except StopIteration:
                err_msg = 'Unlimited dimension not found.'
                raise LookupError(err_msg)

            # Get the first file's time values
            file_times[self._input_filenames[0]] = ifile.variables[udim][:]

            # Categorize each variable (only looking at first file)
            for var_name, var in ifile.variables.iteritems():
                if udim not in var.dimensions:
                    if var_name not in self._exclude_list:
                        timeta.append(var_name)
                elif var_name in self._metadata_names or (self._1d_metadata and len(var.dimensions) == 1):
                    tvmeta.append(var_name)
                elif self._time_series_names is None or var_name in self._time_series_names:
                    all_tsvars[var_name] = var.datatype.itemsize * var.size

            # Close the first file
            ifile.close()

            # Find variables only in the metadata file
            if self._metadata_filename is not None:
                ifile = iobackend.NCFile(self._metadata_filename)
                for var_name, var in ifile.variables.iteritems():
                    if udim not in var.dimensions and var_name not in timeta:
                        xtra_timeta.append(var_name)
                ifile.close()

        self._simplecomm.sync()

        # Send information to worker processes
        self._unlimited_dim = self._simplecomm.partition(
            udim, func=Duplicate(), involved=True)
        self._time_invariant_metadata = self._simplecomm.partition(
            timeta, func=Duplicate(), involved=True)
        self._time_invariant_metafile_vars = self._simplecomm.partition(
            xtra_timeta, func=Duplicate(), involved=True)
        self._time_variant_metadata = self._simplecomm.partition(
            tvmeta, func=Duplicate(), involved=True)
        all_tsvars = self._simplecomm.partition(
            all_tsvars, func=Duplicate(), involved=True)

        self._simplecomm.sync()
        if self._simplecomm.is_manager():
            self._vprint('  First input file inspected.', verbosity=2)

        #===== INSPECT REMAINING INPUT FILES (IN PARALLEL) =====

        # Get the list of variable names and missing variables
        var_names = set(
            all_tsvars.keys() + self._time_invariant_metadata + self._time_invariant_metafile_vars + self._time_variant_metadata)
        missing_vars = set()

        # Partition the remaining filenames to inspect
        input_filenames = self._simplecomm.partition(
            self._input_filenames[1:], func=EqualStride(), involved=True)

        # Make a pass through remaining files and:
        # (1) Make sure it has the 'unlimited' dimension
        # (2) Make sure this dimension is truely 'unlimited'
        # (3) Check that this dimension has a corresponding variable
        # (4) Check if there are any missing variables
        # (5) Get the time values from the files
        for ifilename in input_filenames:
            ifile = iobackend.NCFile(ifilename)

            # Determine the unlimited dimension
            if self._unlimited_dim not in ifile.dimensions:
                err_msg = 'Unlimited dimension not found in file "{0}"'.format(
                    ifilename)
                raise LookupError(err_msg)
            if not ifile.unlimited(self._unlimited_dim):
                err_msg = 'Dimension "{0}" not unlimited in file "{1}"'.format(
                    self._unlimited_dim, ifilename)
                raise LookupError(err_msg)
            if self._unlimited_dim not in ifile.variables:
                err_msg = 'Unlimited dimension variable not found in file "{0}"'.format(
                    ifilename)
                raise LookupError(err_msg)

            # Get the time values (list of NDArrays)
            file_times[ifilename] = ifile.variables[self._unlimited_dim][:]

            # Get the missing variables
            var_names_next = set(ifile.variables.keys())
            missing_vars.update(var_names - var_names_next)

            # Close the file
            ifile.close()

        self._simplecomm.sync()
        if self._simplecomm.is_manager():
            self._vprint('  Remaining input files inspected.', verbosity=2)

        #===== CHECK FOR MISSING VARIABLES =====

        # Gather all missing variables on the master process
        if self._simplecomm.get_size() > 1:
            if self._simplecomm.is_manager():
                for _ in range(1, self._simplecomm.get_size()):
                    missing_vars.update(self._simplecomm.collect()[1])
            else:
                self._simplecomm.collect(missing_vars)
        self._simplecomm.sync()

        # Check for missing variables only on master process
        if self._simplecomm.is_manager():

            # Remove metafile variables from missing vars set
            missing_vars -= set(self._time_invariant_metafile_vars)

            # Make sure that the list of variables in each file is the same
            if len(missing_vars) != 0:
                warning = ("WARNING: Some variables are not in all input files:{0}   "
                           "{1}").format(linesep, ', '.join(sorted(missing_vars)))
                self._vprint(warning, header=False, verbosity=0)

            self._vprint('  Checked for missing variables.', verbosity=2)

        #===== SORT INPUT FILES BY TIME =====

        # Gather the file time values onto the master process
        if self._simplecomm.get_size() > 1:
            if self._simplecomm.is_manager():
                for _ in range(1, self._simplecomm.get_size()):
                    file_times.update(self._simplecomm.collect()[1])
            else:
                self._simplecomm.collect(file_times)
        self._simplecomm.sync()

        # Check the order of the input files based on the time values
        if self._simplecomm.is_manager():

            # Determine the sort order based on the first time in the time
            # values
            old_order = range(len(self._input_filenames))
            new_order = sorted(
                old_order, key=lambda i: file_times[self._input_filenames[i]][0])

            # Re-order the list of input filenames and time values
            new_filenames = [self._input_filenames[i] for i in new_order]
            new_values = [file_times[self._input_filenames[i]]
                          for i in new_order]

            # Now, check that the largest time in each file is less than the smallest time
            # in the next file (so that the time spans of each file do not
            # overlap)
            for i in xrange(1, len(new_values)):
                if new_values[i - 1][-1] >= new_values[i][0]:
                    err_msg = ('Times in input files {0} and {1} appear to '
                               'overlap').format(new_filenames[i - 1], new_filenames[i])
                    raise ValueError(err_msg)

        else:
            new_filenames = None

        # Now that this is validated, save the time values and filename in the
        # new order
        self._input_filenames = self._simplecomm.partition(
            new_filenames, func=Duplicate(), involved=True)

        if self._simplecomm.is_manager():
            self._vprint('  Input files sorted by time.', verbosity=2)

        #===== FINALIZING OUTPUT =====
        self._simplecomm.sync()

        # Debug output
        if self._simplecomm.is_manager():
            self._vprint('  Time-Invariant Metadata: {0}'.format(
                ', '.join(self._time_invariant_metadata)), verbosity=1)
            if len(self._time_invariant_metafile_vars) > 0:
                self._vprint('  Additional Time-Invariant Metadata: {0}'.format(
                    ', '.join(self._time_invariant_metafile_vars)), verbosity=1)
            self._vprint('  Time-Variant Metadata: {0}'.format(
                ', '.join(self._time_variant_metadata)), verbosity=1)
            self._vprint(
                '  Time-Series Variables: {0}'.format(', '.join(all_tsvars.keys())), verbosity=1)

        # Add 'once' variable if writing to a once file
        # NOTE: This is a "cheat"!  There is no 'once' variable.  It's just
        #       a catch for all metadata IFF the 'once-file' is enabled.
        if self._use_once_file:
            all_tsvars['once'] = max(all_tsvars.values())

        # Partition the time-series variables across processors
        self._time_series_variables = self._simplecomm.partition(
            all_tsvars.items(), func=WeightBalanced(), involved=True)
def main(argv):

    print('Running pyEnsSum!')

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = ''
    opts_dict['compset'] = ''
    opts_dict['mach'] = ''
    opts_dict['esize'] = 151
    opts_dict['tslice'] = 0
    opts_dict['res'] = ''
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = False
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if (verbose == True):
        print(opts_dict)
        print('Ensemble size for summary = ', esize)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print('Please specify --tag, --compset, --mach and --res options')
       sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])


    if me.get_rank() == 0:
    if opts_dict['jsonfile']:
        # Read in the excluded var list
        ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
    ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)

    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)
        #print in_files
        # Make sure we have enough
        num_files = len(in_files)
        if (verbose == True):
            print('Number of files in input directory = ', num_files)
        if (num_files < esize):
            print('Number of files in input directory (',num_files,
                ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            print('NOTE: Number of files in ', input_dir,
                'is greater than specified ensemble size of ', esize,
                '\nwill just use the first ',  esize, 'files')
    else:
        print('Input directory: ',input_dir,' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0:
           print('in_files=',in_files)

    # Open the files in the input directory
    o_files=[]
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....")
            sys.exit()

    # Store dimensions of the input fields
    if (verbose == True):
        print("Getting spatial dimensions")
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key

    if (nlev == -1) :
        print("COULD NOT LOCATE valid dimension lev => EXITING....")
        sys.exit()

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        print("Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if (verbose == True):
        print("Checking dimensions across files....")
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
    for i in ex_varlist:
            if i in vars_dict:
           del vars_dict[i]
    num_vars = len(vars_dict)
    if (verbose == True):
        print('Number of variables (including metadata) found =  ', num_vars)
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k,v in vars_dict.iteritems():
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
        if ((vr == 2) and (vs[1] == ncol)):
        is_2d = True
        num_2d += 1
        elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):
        is_3d = True
        num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )):
                is_3d = True
                num_3d += 1
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)


    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()


    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if (verbose == True):
        print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")")

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if (verbose == True):
        print("Creating ", this_sumfile, "  ...")
    if(me.get_rank() ==0 | opts_dict["popens"]):
    if os.path.exists(this_sumfile):
        os.unlink(this_sumfile)

    opt = Nio.options()
    opt.PreFill = False
    opt.Format = 'NetCDF4Classic'
    nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

    # Set dimensions
    if (verbose == True):
        print("Setting dimensions .....")
    if (is_SE == True):
        nc_sumfile.create_dimension('ncol', ncol)
    else:
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
    nc_sumfile.create_dimension('nlev', nlev)
    nc_sumfile.create_dimension('ens_size', esize)
    nc_sumfile.create_dimension('nvars', num_3d + num_2d)
    nc_sumfile.create_dimension('nvars3d', num_3d)
    nc_sumfile.create_dimension('nvars2d', num_2d)
    nc_sumfile.create_dimension('str_size', str_size)

    # Set global attributes
    now = time.strftime("%c")
    if (verbose == True):
        print("Setting global attributes .....")
    setattr(nc_sumfile, 'creation_date',now)
    setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
    setattr(nc_sumfile, 'tag', opts_dict["tag"])
    setattr(nc_sumfile, 'compset', opts_dict["compset"])
    setattr(nc_sumfile, 'resolution', opts_dict["res"])
    setattr(nc_sumfile, 'machine', opts_dict["mach"])

    # Create variables
    if (verbose == True):
        print("Creating variables .....")
    v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
    v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
    v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
    v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
        if (is_SE == True):
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
        else:
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
    v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
    v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
    v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
    v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
    v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


    # Assign vars, var3d and var2d
    if (verbose == True):
        print("Assigning vars, var3d, and var2d .....")

    eq_all_var_names =[]
    eq_d3_var_names = []
    eq_d2_var_names = []

    l_eq = len(all_var_names)
    for i in range(l_eq):
        tt = list(all_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_all_var_names.append(tt)

    l_eq = len(d3_var_names)
    for i in range(l_eq):
        tt = list(d3_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d3_var_names.append(tt)

    l_eq = len(d2_var_names)
    for i in range(l_eq):
        tt = list(d2_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d2_var_names.append(tt)

    v_vars[:] = eq_all_var_names[:]
    v_var3d[:] = eq_d3_var_names[:]
    v_var2d[:] = eq_d2_var_names[:]

    # Time-invarient metadata
    if (verbose == True):
        print("Assigning time invariant metadata .....")
    lev_data = vars_dict["lev"]
    v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if (verbose == True):
        print("Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if (verbose == True):
        print("Finish calculating global means .....")

    # Calculate RMSZ scores
    if (verbose == True):
        print("Calculating RMSZ scores .....")
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
    if (verbose == True):
        print("Calculating max norm of ensembles .....")
    pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
    pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
        # Gather the 3d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d3_var_names),me)

        # Gather global means 3d results
        gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore3d results
        zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev3d results
        shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
        ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d)
        ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d)

        # Gather 2d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d2_var_names),me)

        # Gather global means 2d results
        gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore2d results
        zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev2d results
        shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
        ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d)
        ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d)

        else:
        gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
        gmall=np.concatenate((gm3d,gm2d),axis=0)
        if not opts_dict['gmonly']:
        Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:,:]=Zscoreall[:,:]
        if not opts_dict['gmonly']:
        if (is_SE == True):
            v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
            v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
            v_ens_avg2d[:,:]=ens_avg2d[:,:]
            v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
        else:
            v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
            v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
            v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
            v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
    mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall)
    v_gm[:,:]=gmall[:,:]
    v_mu_gm[:]=mu_gm[:]
    v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
    v_loadings_gm[:,:]=loadings_gm[:,:]
    v_sigma_scores_gm[:]=scores_gm[:]

    print("All Done")

def get_cumul_filelist(opts_dict,indir,regx):
   if not opts_dict['indir']:
      print('input dir is not specified')
      sys.exit(2)
   #regx='(pgi(.)*-(01|02))'
   regx_list=["mon","gnu","pgi"]
   all_files=[]
   for prefix in regx_list:
       for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3):
       for j in range(opts_dict['startMon'],opts_dict['endMon']+1):
           mon_str=str(j).zfill(2)
           regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))'
           print('regx=',regx)
           res=[f for f in os.listdir(indir) if re.search(regx,f)]
           in_files=sorted(res)
           all_files.extend(in_files)
   print("all_files=",all_files)
   #in_files=res
   return all_files





#
# Get the shape of all variable list in tuple for all processor
#
def get_shape(shape_tuple,shape1,rank):
    lst=list(shape_tuple)
    lst[0]=shape1
    shape_tuple=tuple(lst)
    return shape_tuple

#
# Get the mpi partition list for each processor
#
def get_stride_list(len_of_list,me):
    slice_index=[]
    for i in range(me.get_size()):
    index_arr=np.arange(len_of_list)
    slice_index.append(index_arr[i::me.get_size()])
    return slice_index

#
# Gather arrays from each processor by the var_list to the master processor and make it an array
#
def gather_npArray(npArray,me,slice_index,array_shape):
    the_array=np.zeros(array_shape,dtype=np.float32)
    if me.get_rank()==0:
    k=0
    for j in slice_index[me.get_rank()]:
         the_array[j,:]=npArray[k,:]
         k=k+1
    for i in range(1,me.get_size()):
    if me.get_rank() == 0:
        rank,npArray=me.collect()
        k=0
        for j in slice_index[rank]:
        the_array[j,:]=npArray[k,:]
        k=k+1
    if me.get_rank() != 0:
    message={"from_rank":me.get_rank(),"shape":npArray.shape}
    me.collect(npArray)
    me.sync()
    return the_array

if __name__ == "__main__":
    main(sys.argv[1:])