def setUp(self): #print 'Creating temporary file: ', filename self.filename = tempfile.mktemp(prefix="test_", suffix=".nc") do_setup(self.filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedAlways' self.f = Nio.open_file(self.filename, options=opt)
def create_file(self, new_file_name, ncformat, hist_string=None): """ Create a NetCDF file for writing. Parameters: new_file_name (str): Name, including full path, of the new NetCDF file to create. ncformat (str): Type of NetCDF file to create. Options: 'netcdf4c': NetCDF4 with Level 1 compression 'NetCDF4Classic': NetCDF4 Classic 'Classic': NetCDF3 'netcdfLarge': NetCDF 64bit Offset hist_string (str): Optional. A string to append to the histroy attribute. Returns: new_file (NioFile): A pointer to a NioFile object. """ # Set pyNIO netcdf file options opt = Nio.options() # The netcdf output format if "netcdf4c" in ncformat: opt.Format = "NetCDF4Classic" if ncformat[-1].isdigit(): opt.CompressionLevel = ncformat[-1] elif ncformat == "netcdf4": opt.Format = "NetCDF4Classic" elif ncformat == "netcdf": opt.Format = "Classic" elif ncformat == "netcdfLarge": opt.Format = "64BitOffset" else: print( "WARNING: Selected netcdf file format (", ncformat, ") is not recongnized.", ) print("Defaulting to netcdf4Classic format.") opt.Format = "NetCDF4Classic" opt.PreFill = False if hist_string is None: hist_string = "clim-convert" + new_file_name # Open new output file new_file = Nio.open_file(new_file_name, "w", options=opt, history=hist_string) return new_file
def test_large(self): # # Creating a file # #init_time = time.clock() opt = Nio.options() opt.Format = "LargeFile" opt.PreFill = False f = Nio.open_file(self.filename, 'w', options=opt) f.title = "Testing large files and dimensions" f.create_dimension('big', 2500000000) bigvar = f.create_variable('bigvar', "b", ('big',)) #print("created bigvar") # note it is incredibly slow to write a scalar to a large file variable # so create an temporary variable x that will get assigned in steps x = np.empty(1000000,dtype = 'int8') #print x x[:] = 42 t = list(range(0,2500000000,1000000)) ii = 0 for i in t: if (i == 0): continue #print(t[ii],i) bigvar[t[ii]:i] = x[:] ii += 1 x[:] = 84 bigvar[2499000000:2500000000] = x[:] bigvar[-1] = 84 bigvar.units = "big var units" #print bigvar[-1] #print(bigvar.dimensions) # check unlimited status #print(f) nt.assert_equal(bigvar.dimensions, ('big',)) nt.assert_equal(f.unlimited('big'), False) nt.assert_equal(f.attributes, {'title': 'Testing large files and dimensions'}) nt.assert_equal(f.dimensions, {'big': 2500000000}) nt.assert_equal(list(f.variables.keys()), ['bigvar']) #print("closing file") #print('elapsed time: ',time.clock() - init_time) f.close() #quit() # # Reading a file # #print('opening file for read') #print('elapsed time: ',time.clock() - init_time) f = Nio.open_file(self.filename, 'r') #print('file is open') #print('elapsed time: ',time.clock() - init_time) nt.assert_equal(f.attributes, {'title': 'Testing large files and dimensions'}) nt.assert_equal(f.dimensions, {'big': 2500000000}) nt.assert_equal(list(f.variables.keys()), ['bigvar']) #print(f.dimensions) #print(list(f.variables.keys())) #print(f) #print("reading variable") #print('elapsed time: ',time.clock() - init_time) x = f.variables['bigvar'] #print(x[0],x[1000000],x[249000000],x[2499999999]) nt.assert_equal((x[0],x[1000000],x[249000000],x[2499999999]), (42, 42, 42, 84)) #print("max and min") min = x[:].min() max = x[:].max() nt.assert_equal((x[:].min(), x[:].max()), (42, 84)) # check unlimited status nt.assert_equal(f.variables['bigvar'].dimensions, ('big',)) nt.assert_equal(f.unlimited('big'), False) #print("closing file") #print('elapsed time: ',time.clock() - init_time) f.close()
def __init__(self, specifier, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not (isinstance(simplecomm, SimpleComm) or \ isinstance(simplecomm, SimpleCommMPI)): err_msg = ("Simple communicator object is not a SimpleComm or ", "SimpleCommMPI") raise TypeError(err_msg) # Whether to write a once file self._use_once_file = once # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join(['[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ']) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper', verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint('Specifier validated', verbosity=1) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format == 'netcdf4': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif specifier.netcdf_format == 'netcdf4c': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.netcdf_deflate if self._simplecomm.is_manager(): self._vprint('PyNIO compression level: {0}'.format(\ specifier.netcdf_deflate), verbosity=2) self._nio_options = opt if self._simplecomm.is_manager(): self._vprint('PyNIO options set', verbosity=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') if self._simplecomm.is_manager(): self._vprint('Input files opened', verbosity=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files(specifier) self._timer.stop('Input File Validation') if self._simplecomm.is_manager(): self._vprint('Input files validated', verbosity=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time(specifier) self._timer.stop('Sort Input Files') if self._simplecomm.is_manager(): self._vprint('Input files sorted', verbosity=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables(specifier) self._timer.stop('Sort Variables') if self._simplecomm.is_manager(): self._vprint('Variables sorted', verbosity=2) # Validate the output files self._timer.start('Output File Validation') self._validate_output_files(specifier, skip_existing, overwrite) self._timer.stop('Output File Validation') if self._simplecomm.is_manager(): self._vprint('Output files validated', verbosity=2) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=1) # Sync before continuing.. self._simplecomm.sync()
def setUp(self): do_setup(filename) opt = Nio.options() opt.UseAxisAttribute = True self.f = Nio.open_file(filename, options=opt)
def setUp(self): # print 'Creating temporary file: ', filename do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = "MaskedAlways" self.f = Nio.open_file(filename, options=opt)
def setUp(self): do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = "MaskedExplicit" self.f = Nio.open_file(filename, options=opt)
def setUp(self): self.filename = tempfile.mktemp(prefix="test_", suffix=".nc") do_setup(self.filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedExplicit' self.f = Nio.open_file(self.filename, options=opt)
def open_new_file(self, file_name, z_values=numpy.arange(10), z_units='m', var_names=['X'], long_names=[None], units_names=['None'], dtypes=['float64'], time_units='minutes', comment=''): #---------------------------------------------------- # Notes: It might be okay to have "nz" be an # unlimited dimension, like "time". This # would mean replacing "int(profile_length)" # with "None". #---------------------------------------------------- #-------------------------------------------------- # Try to import the Nio module from PyNIO package #-------------------------------------------------- Nio = self.import_nio() if not (Nio): return False #---------------------------- # Does file already exist ? #---------------------------- file_name = file_utils.check_overwrite(file_name) self.file_name = file_name #--------------------------------------- # Check and store the time series info #--------------------------------------- self.format = 'ncps' self.file_name = file_name self.time_index = 0 if (long_names[0] is None): long_names = var_names #------------------------------------------- self.z_values = z_values self.z_units = z_units nz = numpy.size(z_values) #------------------------------------------- # We may not need to save these in self. # I don't think they're used anywhere yet. #------------------------------------------- self.var_names = var_names self.long_names = long_names self.units_names = units_names self.dtypes = dtypes #--------------------------------------------- # Create array of Nio type codes from dtypes #--------------------------------------------- nio_type_map = self.get_nio_type_map() nio_type_codes = [] if (len(dtypes) == len(var_names)): for dtype in dtypes: nio_type_code = nio_type_map[dtype.lower()] nio_type_codes.append(nio_type_code) else: dtype = dtypes[0] nio_type_code = nio_type_map[dtype.lower()] for k in xrange(len(var_names)): nio_type_codes.append(nio_type_code) self.nio_type_codes = nio_type_codes #------------------------------------- # Open a new netCDF file for writing #------------------------------------- # Sample output from time.asctime(): # "Thu Oct 8 17:10:18 2009" #------------------------------------- opt = Nio.options() opt.PreFill = False # (for efficiency) opt.HeaderReserveSpace = 4000 # (4000 bytes, for efficiency) history = "Created using PyNIO " + Nio.__version__ + " on " history = history + time.asctime() + ". " history = history + comment try: ncps_unit = Nio.open_file(file_name, mode="w", options=opt, history=history) OK = True except: OK = False return OK #------------------------------------------------ # Create an unlimited time dimension (via None) #------------------------------------------------ # Without using "int()" here, we get this: # TypeError: size must be None or integer #------------------------------------------------ ncps_unit.create_dimension("nz", int(nz)) ncps_unit.create_dimension("time", None) #------------------------- # Create a time variable #--------------------------------------------------- #('f' = float32; must match in add_values_at_IDs() #--------------------------------------------------- # NB! Can't use "time" vs. "tvar" here unless we # add "import time" inside this function. #--------------------------------------------------- tvar = ncps_unit.create_variable('time', 'd', ("time", )) ncps_unit.variables['time'].units = time_units #-------------------------------------- # Create a distance/depth variable, z #-------------------------------------- zvar = ncps_unit.create_variable('z', 'd', ("nz", )) zvar[:] = z_values # (store the z-values) ncps_unit.variables['z'].units = z_units #----------------------------------- # Create variables using var_names #----------------------------------- # Returns "var" as a PyNIO object #--------------------------------------------------- # NB! The 3rd argument here (dimension), must be a # tuple. If there is only one dimension, then # we need to add a comma, as shown. #--------------------------------------------------- for k in xrange(len(var_names)): var_name = var_names[k] var = ncps_unit.create_variable(var_name, nio_type_codes[k], ("time", "nz")) #------------------------------------ # Create attributes of the variable #------------------------------------ ncps_unit.variables[var_name].long_name = long_names[k] ncps_unit.variables[var_name].units = units_names[k] #---------------------------------- # Specify a "nodata" fill value ? #---------------------------------- var._FillValue = -9999.0 ## Does this jive with Prefill above ?? self.ncps_unit = ncps_unit return OK
def var_nc2d(parameters=['AREA', 'HEFF'], ofile='MIT_output_2d', bswap=1, sstart_date="seconds since 2002-10-01 07:00", deltaT=1800, FillValue=-1.0e+23, dump='no'): ''' Convert 2d fields produced by MITgcm to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables (like AREA or AREAtave). ofile - name of the output file. bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds FillValue - missing value dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data', bswap)[0, 0, :, :] lat = mitbin2('YC.data', bswap)[0, 0, :, :] lsmask = mitbin2('maskCtrlC.data', bswap)[:, 0, :, :] fileList = glob.glob(parameters[0] + "*.data") if os.path.exists(ofile + ".nc") == True: os.system("rm " + ofile + ".nc") ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( fileList[0][:-4] + "meta") ttime = numpy.zeros((len(fileList))) #ttime[0] = timeStepNumber*deltaT opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile + ".nc", "w", opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x', xdim) f.create_dimension('y', ydim) f.create_dimension('time', ttime.shape[0]) f.create_variable('time', 'd', ('time', )) f.variables['time'].units = sstart_date f.create_variable('latitude', 'd', ('x', 'y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude', 'd', ('x', 'y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] for parameter in parameters: f.create_variable(parameter, 'd', ('time', 'x', 'y')) f.variables[parameter].long_name = gatrib(parameter)[0] f.variables[parameter].units = gatrib(parameter)[1] f.variables[parameter]._FillValue = FillValue f.variables[parameter].missing_value = FillValue adatemp_final = numpy.zeros((len(fileList), xdim, ydim)) iterator = 0 for fileName in fileList: adatemp = mitbin2(parameter + fileName[-16:], bswap=bswap)[0, 0, :, :] ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( fileName[:-4] + "meta") adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:]) adatemp_final[iterator, :, :] = adatemp ttime[iterator] = timeStepNumber * deltaT iterator = iterator + 1 f.variables[parameter][:] = adatemp_final f.variables['time'][:] = ttime f.close() if dump == 'yes': return adatemp
def nc3d(parameters=['adxx_atemp'], ofile='adxx', iteration='0', bswap=1, sstart_date="seconds since 2002-10-01 07:00", deltaT=1200, xx_period=240000.0, FillValue=-1.0e+23, meta=None, dump="no"): ''' Convert 3d fields from adxx* and xx* fles to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta DRC.data DRC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables. ofile - name of the output file. iteration - iteration of optimisation, should be STRING! bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds xx_period - xx_*period FillValue - missing value meta - flag to fix problem with wrong adxx*.meta files. If meta = 'xx', use .meta file from xx files dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data', bswap)[0, 0, :, :] lat = mitbin2('YC.data', bswap)[0, 0, :, :] lev = mitbin2('DRC.data', bswap)[0, :, 0, 0] lev = numpy.cumsum(lev) lsmask = mitbin2('maskCtrlC.data', bswap)[:, :, :, :] if os.path.exists(ofile + ".nc") == True: os.system("rm " + ofile + ".nc") if meta == None: ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( parameters[0] + "." + iteration.zfill(10) + ".meta") elif meta == 'xx': ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( parameters[0][2:] + "." + iteration.zfill(10) + ".meta") if nrecords == 1: ttime = numpy.zeros((nrecords)) ttime[0] = timeStepNumber * deltaT elif nrecords > 1: ttime = numpy.zeros((nrecords)) for i in range(nrecords): ttime[i] = xx_period * i opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile + ".nc", "w", opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x', xdim) f.create_dimension('y', ydim) f.create_dimension('z', zdim) f.create_dimension('time', ttime.shape[0]) f.create_variable('time', 'd', ('time', )) f.variables['time'].units = sstart_date f.variables['time'][:] = ttime f.create_variable('z', 'd', ('z', )) f.variables['z'].units = "meters" f.variables['z'][:] = lev[:] f.create_variable('latitude', 'd', ('x', 'y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude', 'd', ('x', 'y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] #vvariables = ["atemp","aqh", "uwind", "vwind", ] #vvariables = ["atemp"] for parameter in parameters: adatemp = mitbin2(parameter + "." + iteration.zfill(10) + ".data", bswap=bswap, meta=meta)[:, :, :, :] # adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:]) adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:]) f.create_variable(parameter, 'd', ('time', 'z', 'x', 'y')) nname, unit, grid = gatrib(parameter) f.variables[parameter].long_name = nname f.variables[parameter].units = unit f.variables[parameter].grid = grid f.variables[parameter]._FillValue = FillValue #print(adatemp.shape()) f.variables[parameter][:] = adatemp f.close() if dump == 'yes': return adatemp
def nc3d(parameters=['adxx_atemp'], ofile='adxx', iteration='0', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1200, xx_period=240000.0, FillValue=-1.0e+23, meta=None, dump="no"): ''' Convert 3d fields from adxx* and xx* fles to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta DRC.data DRC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables. ofile - name of the output file. iteration - iteration of optimisation, should be STRING! bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds xx_period - xx_*period FillValue - missing value meta - flag to fix problem with wrong adxx*.meta files. If meta = 'xx', use .meta file from xx files dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data',bswap)[0,0,:,:] lat = mitbin2('YC.data',bswap)[0,0,:,:] lev = mitbin2('DRC.data',bswap)[0,:,0,0] lev = numpy.cumsum(lev) lsmask = mitbin2('maskCtrlC.data',bswap)[:,:,:,:] if os.path.exists(ofile+".nc") == True: os.system("rm "+ofile+".nc") if meta == None: ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(parameters[0]+"."+iteration.zfill(10)+".meta") elif meta == 'xx': ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(parameters[0][2:]+"."+iteration.zfill(10)+".meta") if nrecords == 1: ttime = numpy.zeros((nrecords)) ttime[0] = timeStepNumber*deltaT elif nrecords > 1: ttime = numpy.zeros((nrecords)) for i in range (nrecords): ttime[i] = xx_period*i opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile+".nc","w",opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x',xdim) f.create_dimension('y',ydim) f.create_dimension('z',zdim) f.create_dimension('time',ttime.shape[0]) f.create_variable('time','d',('time',)) f.variables['time'].units = sstart_date f.variables['time'][:] = ttime f.create_variable('z','d',('z',)) f.variables['z'].units = "meters" f.variables['z'][:] = lev[:] f.create_variable('latitude','d',('x','y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude','d',('x','y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] #vvariables = ["atemp","aqh", "uwind", "vwind", ] #vvariables = ["atemp"] for parameter in parameters: adatemp = mitbin2(parameter+"."+iteration.zfill(10)+".data", bswap=bswap, meta=meta)[:,:,:,:] # adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:]) adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:]) f.create_variable(parameter,'d',('time','z','x','y')) nname, unit, grid = gatrib(parameter) f.variables[parameter].long_name = nname f.variables[parameter].units = unit f.variables[parameter].grid = grid f.variables[parameter]._FillValue = FillValue #print(adatemp.shape()) f.variables[parameter][:] = adatemp f.close() if dump == 'yes': return adatemp
def var_nc3d(parameters=['Ttave'], ofile='MIT_output_3d', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1200, FillValue=-1.0e+23, dump="no"): ''' Convert 3d fields produced by MITgcm to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta DRC.data DRC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables. ofile - name of the output file. iteration - iteration of optimisation, should be STRING! bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds xx_period - xx_*period FillValue - missing value meta - flag to fix problem with wrong adxx*.meta files. If meta = 'xx', use .meta file from xx files dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data',bswap)[0,0,:,:] lat = mitbin2('YC.data',bswap)[0,0,:,:] lev = mitbin2('DRC.data',bswap)[0,:,0,0] lev = numpy.cumsum(lev) lsmask = mitbin2('maskCtrlC.data',bswap)[:,:,:,:] fileList = glob.glob(parameters[0]+"*.data") if os.path.exists(ofile+".nc") == True: os.system("rm "+ofile+".nc") ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileList[0][:-4]+"meta") ttime = numpy.zeros((len(fileList))) opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile+".nc","w",opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x',xdim) f.create_dimension('y',ydim) f.create_dimension('z',zdim) f.create_dimension('time',ttime.shape[0]) f.create_variable('time','d',('time',)) f.variables['time'].units = sstart_date f.create_variable('z','d',('z',)) f.variables['z'].units = "meters" f.variables['z'][:] = lev[:] f.create_variable('latitude','d',('x','y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude','d',('x','y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] #vvariables = ["atemp","aqh", "uwind", "vwind", ] #vvariables = ["atemp"] for parameter in parameters: f.create_variable(parameter,'d',('time','z','x','y')) f.variables[parameter].long_name = gatrib(parameter)[0] f.variables[parameter].units = gatrib(parameter)[1] f.variables[parameter]._FillValue = FillValue f.variables[parameter].missing_value = FillValue adatemp_final = numpy.zeros((len(fileList), zdim, xdim, ydim)) for ind, fileName in enumerate(fileList): adatemp = mitbin2(parameter+fileName[-16:], bswap=bswap)[:,:,:,:] ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileName[:-4]+"meta") # adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:]) adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:]) adatemp_final[ind,:,:,:] = adatemp ttime[ind] = timeStepNumber*deltaT f.variables[parameter][:] = adatemp f.close() if dump == 'yes': return adatemp
def var_nc2d(parameters=['AREA','HEFF'], ofile='MIT_output_2d', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1800, FillValue=-1.0e+23, dump='no'): ''' Convert 2d fields produced by MITgcm to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables (like AREA or AREAtave). ofile - name of the output file. bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds FillValue - missing value dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data',bswap)[0,0,:,:] lat = mitbin2('YC.data',bswap)[0,0,:,:] lsmask = mitbin2('maskCtrlC.data',bswap)[:,0,:,:] fileList = glob.glob(parameters[0]+"*.data") if os.path.exists(ofile+".nc") == True: os.system("rm "+ofile+".nc") ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileList[0][:-4]+"meta") ttime = numpy.zeros((len(fileList))) #ttime[0] = timeStepNumber*deltaT opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile+".nc","w",opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x',xdim) f.create_dimension('y',ydim) f.create_dimension('time',ttime.shape[0]) f.create_variable('time','d',('time',)) f.variables['time'].units = sstart_date f.create_variable('latitude','d',('x','y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude','d',('x','y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] for parameter in parameters: f.create_variable(parameter,'d',('time','x','y')) f.variables[parameter].long_name = gatrib(parameter)[0] f.variables[parameter].units = gatrib(parameter)[1] f.variables[parameter]._FillValue = FillValue f.variables[parameter].missing_value = FillValue adatemp_final = numpy.zeros((len(fileList), xdim, ydim)) iterator = 0 for fileName in fileList: adatemp = mitbin2(parameter+fileName[-16:], bswap=bswap)[0,0,:,:] ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileName[:-4]+"meta") adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:]) adatemp_final[iterator,:,:] = adatemp ttime[iterator] = timeStepNumber*deltaT iterator = iterator + 1 f.variables[parameter][:] = adatemp_final f.variables['time'][:] = ttime f.close() if dump == 'yes': return adatemp
def open_new_file(self, file_name, z_values=numpy.arange(10), z_units='m', var_names=['X'], long_names=[None], units_names=['None'], dtypes=['float64'], time_units='minutes', comment=''): #---------------------------------------------------- # Notes: It might be okay to have "nz" be an # unlimited dimension, like "time". This # would mean replacing "int(profile_length)" # with "None". #---------------------------------------------------- #-------------------------------------------------- # Try to import the Nio module from PyNIO package #-------------------------------------------------- Nio = self.import_nio() if not(Nio): return False #---------------------------- # Does file already exist ? #---------------------------- file_name = file_utils.check_overwrite( file_name ) self.file_name = file_name #--------------------------------------- # Check and store the time series info #--------------------------------------- self.format = 'ncps' self.file_name = file_name self.time_index = 0 if (long_names[0] == None): long_names = var_names #------------------------------------------- self.z_values = z_values self.z_units = z_units nz = numpy.size(z_values) #------------------------------------------- # We may not need to save these in self. # I don't think they're used anywhere yet. #------------------------------------------- self.var_names = var_names self.long_names = long_names self.units_names = units_names self.dtypes = dtypes #--------------------------------------------- # Create array of Nio type codes from dtypes #--------------------------------------------- nio_type_map = self.get_nio_type_map() nio_type_codes = [] if (len(dtypes) == len(var_names)): for dtype in dtypes: nio_type_code = nio_type_map[ dtype.lower() ] nio_type_codes.append( nio_type_code ) else: dtype = dtypes[0] nio_type_code = nio_type_map[ dtype.lower() ] for k in xrange(len(var_names)): nio_type_codes.append( nio_type_code ) self.nio_type_codes = nio_type_codes #------------------------------------- # Open a new netCDF file for writing #------------------------------------- # Sample output from time.asctime(): # "Thu Oct 8 17:10:18 2009" #------------------------------------- opt = Nio.options() opt.PreFill = False # (for efficiency) opt.HeaderReserveSpace = 4000 # (4000 bytes, for efficiency) history = "Created using PyNIO " + Nio.__version__ + " on " history = history + time.asctime() + ". " history = history + comment try: ncps_unit = Nio.open_file(file_name, mode="w", options=opt, history=history ) OK = True except: OK = False return OK #------------------------------------------------ # Create an unlimited time dimension (via None) #------------------------------------------------ # Without using "int()" here, we get this: # TypeError: size must be None or integer #------------------------------------------------ ncps_unit.create_dimension("nz", int(nz)) ncps_unit.create_dimension("time", None) #------------------------- # Create a time variable #--------------------------------------------------- #('f' = float32; must match in add_values_at_IDs() #--------------------------------------------------- # NB! Can't use "time" vs. "tvar" here unless we # add "import time" inside this function. #--------------------------------------------------- tvar = ncps_unit.create_variable('time', 'd', ("time",)) ncps_unit.variables['time'].units = time_units #-------------------------------------- # Create a distance/depth variable, z #-------------------------------------- zvar = ncps_unit.create_variable('z', 'd', ("nz",)) zvar[ : ] = z_values # (store the z-values) ncps_unit.variables['z'].units = z_units #----------------------------------- # Create variables using var_names #----------------------------------- # Returns "var" as a PyNIO object #--------------------------------------------------- # NB! The 3rd argument here (dimension), must be a # tuple. If there is only one dimension, then # we need to add a comma, as shown. #--------------------------------------------------- for k in xrange(len(var_names)): var_name = var_names[k] var = ncps_unit.create_variable(var_name, nio_type_codes[k], ("time", "nz")) #------------------------------------ # Create attributes of the variable #------------------------------------ ncps_unit.variables[var_name].long_name = long_names[k] ncps_unit.variables[var_name].units = units_names[k] #---------------------------------- # Specify a "nodata" fill value ? #---------------------------------- var._FillValue = -9999.0 ## Does this jive with Prefill above ?? self.ncps_unit = ncps_unit return OK
def main(argv): print('Running pyEnsSum!') # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = '' opts_dict['compset'] = '' opts_dict['mach'] = '' opts_dict['esize'] = 151 opts_dict['tslice'] = 0 opts_dict['res'] = '' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = False opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if (verbose == True): print(opts_dict) print('Ensemble size for summary = ', esize) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print('Please specify --tag, --compset, --mach and --res options') sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: if opts_dict['jsonfile']: # Read in the excluded var list ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) #print in_files # Make sure we have enough num_files = len(in_files) if (verbose == True): print('Number of files in input directory = ', num_files) if (num_files < esize): print('Number of files in input directory (',num_files, ') is less than specified ensemble size of ', esize) sys.exit(2) if (num_files > esize): print('NOTE: Number of files in ', input_dir, 'is greater than specified ensemble size of ', esize, '\nwill just use the first ', esize, 'files') else: print('Input directory: ',input_dir,' not found') sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0: print('in_files=',in_files) # Open the files in the input directory o_files=[] for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....") sys.exit() # Store dimensions of the input fields if (verbose == True): print("Getting spatial dimensions") nlev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : print("COULD NOT LOCATE valid dimension lev => EXITING....") sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): print("Need either lat/lon or ncol => EXITING....") sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if (verbose == True): print("Checking dimensions across files....") print('lev = ', nlev) if (is_SE == True): print('ncol = ', ncol) else: print('nlat = ', nlat) print('nlon = ', nlon) for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary if ex_varlist: for i in ex_varlist: if i in vars_dict: del vars_dict[i] num_vars = len(vars_dict) if (verbose == True): print('Number of variables (including metadata) found = ', num_vars) str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k,v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )): is_3d = True num_3d += 1 if (is_3d == True) : str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) if (verbose == True): print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")") # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if (verbose == True): print("Creating ", this_sumfile, " ...") if(me.get_rank() ==0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print("Setting dimensions .....") if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print("Setting global attributes .....") setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print("Creating variables .....") v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',)) v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',)) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',)) # Assign vars, var3d and var2d if (verbose == True): print("Assigning vars, var3d, and var2d .....") eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print("Assigning time invariant metadata .....") lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if (verbose == True): print("Calculating global means .....") if not opts_dict['cumul']: gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if (verbose == True): print("Finish calculating global means .....") # Calculate RMSZ scores if (verbose == True): print("Calculating RMSZ scores .....") if (not opts_dict['gmonly']) | (opts_dict['cumul']): zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if (verbose == True): print("Calculating max norm of ensembles .....") pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc) if opts_dict['mpi_enable'] & ( not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index=get_stride_list(len(d3_var_names),me) # Gather global means 3d results gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank()) ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index=get_stride_list(len(d2_var_names),me) # Gather global means 2d results gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank()) ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) else: gmall=np.concatenate((temp1,temp2),axis=0) gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens'] : if not opts_dict['cumul']: gmall=np.concatenate((gm3d,gm2d),axis=0) if not opts_dict['gmonly']: Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:]=Zscoreall[:,:] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:] v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:] v_ens_avg2d[:,:]=ens_avg2d[:,:] v_ens_stddev2d[:,:]=ens_stddev2d[:,:] else: v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:] v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:] v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:] v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:] else: gmall_temp=np.transpose(gmall[:,:]) gmall=gmall_temp mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall) v_gm[:,:]=gmall[:,:] v_mu_gm[:]=mu_gm[:] v_sigma_gm[:]=sigma_gm[:].astype(np.float32) v_loadings_gm[:,:]=loadings_gm[:,:] v_sigma_scores_gm[:]=scores_gm[:] print("All Done") def get_cumul_filelist(opts_dict,indir,regx): if not opts_dict['indir']: print('input dir is not specified') sys.exit(2) #regx='(pgi(.)*-(01|02))' regx_list=["mon","gnu","pgi"] all_files=[] for prefix in regx_list: for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3): for j in range(opts_dict['startMon'],opts_dict['endMon']+1): mon_str=str(j).zfill(2) regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))' print('regx=',regx) res=[f for f in os.listdir(indir) if re.search(regx,f)] in_files=sorted(res) all_files.extend(in_files) print("all_files=",all_files) #in_files=res return all_files # # Get the shape of all variable list in tuple for all processor # def get_shape(shape_tuple,shape1,rank): lst=list(shape_tuple) lst[0]=shape1 shape_tuple=tuple(lst) return shape_tuple # # Get the mpi partition list for each processor # def get_stride_list(len_of_list,me): slice_index=[] for i in range(me.get_size()): index_arr=np.arange(len_of_list) slice_index.append(index_arr[i::me.get_size()]) return slice_index # # Gather arrays from each processor by the var_list to the master processor and make it an array # def gather_npArray(npArray,me,slice_index,array_shape): the_array=np.zeros(array_shape,dtype=np.float32) if me.get_rank()==0: k=0 for j in slice_index[me.get_rank()]: the_array[j,:]=npArray[k,:] k=k+1 for i in range(1,me.get_size()): if me.get_rank() == 0: rank,npArray=me.collect() k=0 for j in slice_index[rank]: the_array[j,:]=npArray[k,:] k=k+1 if me.get_rank() != 0: message={"from_rank":me.get_rank(),"shape":npArray.shape} me.collect(npArray) me.sync() return the_array if __name__ == "__main__": main(sys.argv[1:])
def main(argv): print 'Running pyEnsSumPop!' # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm1_2_0' opts_dict['compset'] = 'FC5' opts_dict['mach'] = 'yellowstone' opts_dict['tslice'] = 0 opts_dict['nyear'] = 3 opts_dict['nmonth'] = 12 opts_dict['npert'] = 40 opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'ne30_ne30' opts_dict['sumfile'] = 'ens.pop.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['zscoreonly'] = False opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = '/glade/scratch/haiyingx/' # This creates the dictionary of input arguments print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if verbose: print opts_dict # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if opts_dict['jsonfile']: # Read in the included var list Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') str_size = 0 for str in Var3d: if str_size < len(str): str_size = len(str) for str in Var2d: if str_size < len(str): str_size = len(str) in_files = [] if (os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict, opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) else: print 'Input directory: ', input_dir, ' not found' sys.exit(2) # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) #Partition the input file list in_file_list = me.partition(in_files, func=EqualStride(), involved=True) # Open the files in the input directory o_files = [] for onefile in in_file_list: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() print in_file_list # Store dimensions of the input fields if (verbose == True): print "Getting spatial dimensions" nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) # Make sure all files have the same dimensions for key in input_dims: if key == "z_t": nlev = input_dims["z_t"] elif key == "nlon": nlon = input_dims["nlon"] elif key == "nlat": nlat = input_dims["nlat"] for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\ or ( nlon != int(input_dims["nlon"]))): print "Dimension mismatch between ", in_file_list[ 0], 'and', in_file_list[count], '!!!' sys.exit() # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose: print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print "Setting dimensions ....." nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('time', None) nc_sumfile.create_dimension('ens_size', opts_dict['npert']) nc_sumfile.create_dimension('nbin', opts_dict['nbin']) nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.create_dimension('nvars3d', len(Var3d)) nc_sumfile.create_dimension('nvars2d', len(Var2d)) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'POP verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.create_variable("time", 'd', ('time', )) v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable( "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin')) if not opts_dict['zscoreonly']: v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time', 'nvars', 'ens_size')) # Assign vars, var3d and var2d if (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print "Assigning time invariant metadata ....." vars_dict = o_files[0].variables lev_data = vars_dict["z_t"] v_lev = lev_data # Time-varient metadata if verbose: print "Assigning time variant metadata ....." vars_dict = o_files[0].variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), )) if me.get_rank() == 0: v_time[:] = time_array[:] # Calculate global mean, average, standard deviation if verbose: print "Calculating global means ....." is_SE = False tslice = 0 if not opts_dict['zscoreonly']: gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary( o_files, Var3d, Var2d, is_SE, False, opts_dict) if verbose: print "Finish calculating global means ....." # Calculate RMSZ scores if (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, Var3d, Var2d, is_SE, opts_dict) # Collect from all processors if opts_dict['mpi_enable']: # Gather the 3d variable results from all processors to the master processor # Gather global means 3d results if not opts_dict['zscoreonly']: gmall = np.concatenate((gm3d, gm2d), axis=0) #print "before gather, gmall.shape=",gmall.shape gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files))) zmall = np.concatenate((zscore3d, zscore2d), axis=0) zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin)) #print 'zmall=',zmall #print "after gather, gmall.shape=",gmall.shape ens_avg3d = pyEnsLib.gather_npArray_pop( ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) ens_stddev3d = pyEnsLib.gather_npArray_pop( ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) # Assign to file: if me.get_rank() == 0: #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:, :, :, :] = zmall[:, :, :, :] v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] if not opts_dict['zscoreonly']: v_gm[:, :, :] = gmall[:, :, :] print "All done"
def var_nc3d(parameters=['Ttave'], ofile='MIT_output_3d', bswap=1, sstart_date="seconds since 2002-10-01 07:00", deltaT=1200, FillValue=-1.0e+23, dump="no"): ''' Convert 3d fields produced by MITgcm to netCDF format with use of Nio module. Names of the files should be defined in form of the list, even if we have only one variable. I put everything on the C grid! You have to have following files in the the directory where you run your code: XC.data XC.meta YC.data YC.meta DRC.data DRC.meta maskCtrlC.data maskCtrlC.meta Input: parameters - list with names of the variables. ofile - name of the output file. iteration - iteration of optimisation, should be STRING! bswap - do we need a byte swap? Yes (1) or no (0) [default 1] sstart_date - should be "seconds since", [default "seconds since 2002-10-01 07:00" deltaT - time step in seconds xx_period - xx_*period FillValue - missing value meta - flag to fix problem with wrong adxx*.meta files. If meta = 'xx', use .meta file from xx files dump - if dump='yes' will return numpy array with data ''' lon = mitbin2('XC.data', bswap)[0, 0, :, :] lat = mitbin2('YC.data', bswap)[0, 0, :, :] lev = mitbin2('DRC.data', bswap)[0, :, 0, 0] lev = numpy.cumsum(lev) lsmask = mitbin2('maskCtrlC.data', bswap)[:, :, :, :] fileList = glob.glob(parameters[0] + "*.data") if os.path.exists(ofile + ".nc") == True: os.system("rm " + ofile + ".nc") ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( fileList[0][:-4] + "meta") ttime = numpy.zeros((len(fileList))) opt = Nio.options() opt.PreFill = False opt.HeaderReserveSpace = 4000 f = Nio.open_file(ofile + ".nc", "w", opt) f.title = "MITgcm variables in netCDF format" f.create_dimension('x', xdim) f.create_dimension('y', ydim) f.create_dimension('z', zdim) f.create_dimension('time', ttime.shape[0]) f.create_variable('time', 'd', ('time', )) f.variables['time'].units = sstart_date f.create_variable('z', 'd', ('z', )) f.variables['z'].units = "meters" f.variables['z'][:] = lev[:] f.create_variable('latitude', 'd', ('x', 'y')) f.variables['latitude'].long_name = "latitude" f.variables['latitude'].units = "degrees_north" f.variables['latitude'].standard_name = "grid_latitude" f.variables['latitude'][:] = lat[:] f.create_variable('longitude', 'd', ('x', 'y')) f.variables['longitude'].long_name = "longitude" f.variables['longitude'].units = "degrees_east" f.variables['longitude'].standard_name = "grid_longitude" f.variables['longitude'][:] = lon[:] #vvariables = ["atemp","aqh", "uwind", "vwind", ] #vvariables = ["atemp"] for parameter in parameters: f.create_variable(parameter, 'd', ('time', 'z', 'x', 'y')) f.variables[parameter].long_name = gatrib(parameter)[0] f.variables[parameter].units = gatrib(parameter)[1] f.variables[parameter]._FillValue = FillValue f.variables[parameter].missing_value = FillValue adatemp_final = numpy.zeros((len(fileList), zdim, xdim, ydim)) for ind, fileName in enumerate(fileList): adatemp = mitbin2(parameter + fileName[-16:], bswap=bswap)[:, :, :, :] ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta( fileName[:-4] + "meta") # adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:]) adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:]) adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:]) adatemp_final[ind, :, :, :] = adatemp ttime[ind] = timeStepNumber * deltaT f.variables[parameter][:] = adatemp f.close() if dump == 'yes': return adatemp
import numpy as np import Nio fn = "MSG3-SEVI-MSG15-0100-NA-20130521001244.164000000Z-1074164.h5" opt = Nio.options() opt.FileStructure = "advanced" f = Nio.open_file(fn, "r", options=opt) # f = Nio.open_file(fn) print f.variables.keys() # print f.groups # n = 0 # for key in f.groups.keys(): # n += 1 # print "groun %d: <%s>" %(n, key) # g = f.groups['/U_MARF/MSG/Level1_5/DATA/Channel_07'] g = f.groups["U-MARF/MSG/Level1.5/DATA/Channel 07"] print g palette = g.variables["Palette"] print palette print "\nLineSideInfo_DESCR:" lsid = g.variables["LineSideInfo_DESCR"][:] print lsid[:] dims = lsid.shape for n in xrange(dims[0]): name = str(lsid[:][n][0]) value = str(lsid[:][n][1])
def create_ave_file(my_file, outfile, hist_string, ncformat, years, collapse_dim=''): ''' Opens up/Creates a new file to put the computed averages into. @param my_file A sampled input file pointer. @param outfile Filename of the new output/average file. @param hist_string A string that contains the file history for the history attribute @param ncformat Format to write the NetCDF file out as. @param collapse_dim Dimension to collapse across @return new_file Returns a file pointer to the newly opened file. ''' dims = my_file.dimensions attr = my_file.attributes vars = {} new_file_name = outfile # Set pyNIO netcdf file options opt = Nio.options() # The netcdf output format if (ncformat == 'netcdf4c'): opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 1 elif (ncformat == 'netcdf4'): opt.Format = 'NetCDF4Classic' elif (ncformat == 'netcdf'): opt.Format = 'Classic' elif (ncformat == 'netcdfLarge'): opt.Format = '64BitOffset' else: print "WARNING: Seltected netcdf file format (", ncformat, ") is not recongnized." print "Defaulting to netcdf3Classic format." opt.Format = 'Classic' opt.PreFill = False new_file = Nio.open_file(new_file_name, "w", options=opt, history=hist_string) #setattr(new_file,'yrs_averaged',years) # Create attributes, dimensions, and variables for n, v in attr.items(): if n == 'history': v = hist_string + '\n' + v setattr(new_file, n, v) for var_d, l in dims.items(): if var_d == "time": if "time" not in collapse_dim: new_file.create_dimension(var_d, None) else: if var_d not in collapse_dim: new_file.create_dimension(var_d, l) setattr(new_file, 'yrs_averaged', years) return new_file
def setUp(self): do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedExplicit' self.f = Nio.open_file(filename, options=opt)
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = 'cesm2_0_beta08' opts_dict['compset'] = 'F2000' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] inc_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() ==0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude=False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist=[] # Read in the excluded or included var list ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') if exclude == False: inc_varlist=ex_varlist ex_varlist=[] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude=me.partition(exclude,func=Duplicate(),involved=True) if exclude: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) else: inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank()==0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank()==0: print 'Input directory: ',input_dir,' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0 and (verbose == True): print 'in_files=',in_files # Open the files in the input directory o_files=[] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: if me.get_rank()==0: print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank()==0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : if me.get_rank()==0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank()==0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank()==0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict=vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict=vars_dict_all.copy() for k,v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k,v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1]==nilev ))): is_3d = True num_3d += 1 if (is_3d == True) : str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) #else: # print 'var=',k if me.get_rank() == 0 and (verbose == True): print 'Number of variables found: ', num_3d+num_2d print '3D variables: '+str(num_3d)+', 2D variables: '+str(num_2d) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize<num_2d+num_3d: if me.get_rank()==0: print "************************************************************************************************************************************" print " Error: the total number of 3D and 2D variables "+str(num_2d+num_3d)+" is larger than the number of ensemble files "+str(esize) print " Cannot generate ensemble summary file, please remove more variables from your included variable list," print " or add more varaibles in your excluded variable list!!!" print "************************************************************************************************************************************" sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) #if me.get_rank() == 0 and (verbose == True): # print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if me.get_rank() == 0 and (verbose == True): print "Creating ", this_sumfile, " ..." if(me.get_rank() ==0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if me.get_rank() == 0 and (verbose == True): print "Setting dimensions ....." if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if me.get_rank() == 0 and (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if me.get_rank() == 0 and (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_standardized_gm=nc_sumfile.create_variable("standardized_gm",'f',('nvars','ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',)) v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',)) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',)) # Assign vars, var3d and var2d if me.get_rank() == 0 and (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']): if me.get_rank() == 0 and (verbose == True): print "Calculating RMSZ scores ....." zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if me.get_rank() == 0 and (verbose == True): print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc) if opts_dict['mpi_enable'] & ( not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index=get_stride_list(len(d3_var_names),me) # Gather global means 3d results gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank()) ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index=get_stride_list(len(d2_var_names),me) # Gather global means 2d results gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files))) var_list=gather_list(var_list,me) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank()) ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) else: gmall=np.concatenate((temp1,temp2),axis=0) gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens'] : if not opts_dict['cumul']: gmall=np.concatenate((gm3d,gm2d),axis=0) if not opts_dict['gmonly']: Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:]=Zscoreall[:,:] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:] v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:] v_ens_avg2d[:,:]=ens_avg2d[:,:] v_ens_stddev2d[:,:]=ens_stddev2d[:,:] else: v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:] v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:] v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:] v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:] else: gmall_temp=np.transpose(gmall[:,:]) gmall=gmall_temp mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall,all_var_names,var_list,me) v_gm[:,:]=gmall[:,:] v_standardized_gm[:,:]=standardized_global_mean[:,:] v_mu_gm[:]=mu_gm[:] v_sigma_gm[:]=sigma_gm[:].astype(np.float32) v_loadings_gm[:,:]=loadings_gm[:,:] v_sigma_scores_gm[:]=scores_gm[:] if me.get_rank() == 0: print "All Done"
def main(argv): print "Running pyEnsSum!" # Get command line stuff and store in a dictionary s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=" optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict["tag"] = "" opts_dict["compset"] = "" opts_dict["mach"] = "" opts_dict["esize"] = 151 opts_dict["tslice"] = 0 opts_dict["res"] = "" opts_dict["sumfile"] = "ens.summary.nc" opts_dict["indir"] = "./" opts_dict["sumfiledir"] = "./" opts_dict["jsonfile"] = "" opts_dict["verbose"] = True opts_dict["mpi_enable"] = False opts_dict["maxnorm"] = False opts_dict["gmonly"] = False opts_dict["popens"] = False opts_dict["cumul"] = False opts_dict["regx"] = "test" opts_dict["startMon"] = 1 opts_dict["endMon"] = 1 opts_dict["fIndex"] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict) verbose = opts_dict["verbose"] st = opts_dict["esize"] esize = int(st) if verbose == True: print opts_dict print "Ensemble size for summary = ", esize if not (opts_dict["tag"] and opts_dict["compset"] and opts_dict["mach"] or opts_dict["res"]): print "Please specify --tag, --compset, --mach and --res options" sys.exit() # Now find file names in indir input_dir = opts_dict["indir"] # The var list that will be excluded ex_varlist = [] # Create a mpi simplecomm object if opts_dict["mpi_enable"]: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict["mpi_enable"]) if me.get_rank() == 0: if opts_dict["jsonfile"]: # Read in the excluded var list ex_varlist = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES") # Broadcast the excluded var list to each processor if opts_dict["mpi_enable"]: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) in_files = [] if os.path.exists(input_dir): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if verbose == True: print "Number of files in input directory = ", num_files if num_files < esize: print "Number of files in input directory (", num_files, ") is less than specified ensemble size of ", esize sys.exit(2) if num_files > esize: print "NOTE: Number of files in ", input_dir, "is greater than specified ensemble size of ", esize, "\nwill just use the first ", esize, "files" else: print "Input directory: ", input_dir, " not found" sys.exit(2) if opts_dict["cumul"]: if opts_dict["regx"]: in_files_list = get_cumul_filelist(opts_dict, opts_dict["indir"], opts_dict["regx"]) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0: print "in_files=", in_files # Open the files in the input directory o_files = [] for onefile in in_files[0:esize]: if os.path.isfile(input_dir + "/" + onefile): o_files.append(Nio.open_file(input_dir + "/" + onefile, "r")) else: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if verbose == True: print "Getting spatial dimensions" nlev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = "" latkey = "" # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key == "lon"): nlon = input_dims[key] lonkey = key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey = key if nlev == -1: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if (ncol == -1) and ((nlat == -1) or (nlon == -1)): print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if ncol != -1: is_SE = True else: is_SE = False # Make sure all files have the same dimensions if verbose == True: print "Checking dimensions across files...." print "lev = ", nlev if is_SE == True: print "ncol = ", ncol else: print "nlat = ", nlat print "nlon = ", nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if is_SE == True: if nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"])): print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!" sys.exit() else: if nlev != int(input_dims["lev"]) or (nlat != int(input_dims[latkey])) or (nlon != int(input_dims[lonkey])): print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!" sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary if ex_varlist: for i in ex_varlist: del vars_dict[i] num_vars = len(vars_dict) if verbose == True: print "Number of variables (including metadata) found = ", num_vars str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if is_SE == True: # (time, lev, ncol) or (time, ncol) if (vr == 2) and (vs[1] == ncol): is_2d = True num_2d += 1 elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if (vr == 3) and (vs[1] == nlat and vs[2] == nlon): is_2d = True num_2d += 1 elif (vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev): is_3d = True num_3d += 1 if is_3d == True: str_size = max(str_size, len(k)) d3_var_names.append(k) elif is_2d == True: str_size = max(str_size, len(k)) d2_var_names.append(k) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) if verbose == True: print "num vars = ", n_all_var_names, "(3d = ", num_3d, " and 2d = ", num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose == True: print "Creating ", this_sumfile, " ..." if me.get_rank() == 0 | opts_dict["popens"]: if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = "NetCDF4Classic" nc_sumfile = Nio.open_file(this_sumfile, "w", options=opt) # Set dimensions if verbose == True: print "Setting dimensions ....." if is_SE == True: nc_sumfile.create_dimension("ncol", ncol) else: nc_sumfile.create_dimension("nlat", nlat) nc_sumfile.create_dimension("nlon", nlon) nc_sumfile.create_dimension("nlev", nlev) nc_sumfile.create_dimension("ens_size", esize) nc_sumfile.create_dimension("nvars", num_3d + num_2d) nc_sumfile.create_dimension("nvars3d", num_3d) nc_sumfile.create_dimension("nvars2d", num_2d) nc_sumfile.create_dimension("str_size", str_size) # Set global attributes now = time.strftime("%c") if verbose == True: print "Setting global attributes ....." setattr(nc_sumfile, "creation_date", now) setattr(nc_sumfile, "title", "CAM verification ensemble summary file") setattr(nc_sumfile, "tag", opts_dict["tag"]) setattr(nc_sumfile, "compset", opts_dict["compset"]) setattr(nc_sumfile, "resolution", opts_dict["res"]) setattr(nc_sumfile, "machine", opts_dict["mach"]) # Create variables if verbose == True: print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", "f", ("nlev",)) v_vars = nc_sumfile.create_variable("vars", "S1", ("nvars", "str_size")) v_var3d = nc_sumfile.create_variable("var3d", "S1", ("nvars3d", "str_size")) v_var2d = nc_sumfile.create_variable("var2d", "S1", ("nvars2d", "str_size")) if not opts_dict["gmonly"]: if is_SE == True: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "ncol")) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "ncol")) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "ncol")) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "ncol")) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "nlat", "nlon")) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "nlat", "nlon")) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "nlat", "nlon")) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "nlat", "nlon")) v_RMSZ = nc_sumfile.create_variable("RMSZ", "f", ("nvars", "ens_size")) v_gm = nc_sumfile.create_variable("global_mean", "f", ("nvars", "ens_size")) v_loadings_gm = nc_sumfile.create_variable("loadings_gm", "f", ("nvars", "nvars")) v_mu_gm = nc_sumfile.create_variable("mu_gm", "f", ("nvars",)) v_sigma_gm = nc_sumfile.create_variable("sigma_gm", "f", ("nvars",)) v_sigma_scores_gm = nc_sumfile.create_variable("sigma_scores_gm", "f", ("nvars",)) # Assign vars, var3d and var2d if verbose == True: print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if verbose == True: print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means # for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict["tslice"] if not opts_dict["cumul"]: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names # Calculate global means # if verbose == True: print "Calculating global means ....." if not opts_dict["cumul"]: gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary( o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict ) if verbose == True: print "Finish calculating global means ....." # Calculate RMSZ scores if verbose == True: print "Calculating RMSZ scores ....." if (not opts_dict["gmonly"]) | (opts_dict["cumul"]): zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict ) # Calculate max norm ensemble if opts_dict["maxnorm"]: if verbose == True: print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc) if opts_dict["mpi_enable"] & (not opts_dict["popens"]): if not opts_dict["cumul"]: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files))) if not opts_dict["gmonly"]: # Gather zscore3d results zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank()) ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d) ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files))) if not opts_dict["gmonly"]: # Gather zscore2d results zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank()) ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d) ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop(gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict["popens"]: if not opts_dict["cumul"]: gmall = np.concatenate((gm3d, gm2d), axis=0) if not opts_dict["gmonly"]: Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0) v_RMSZ[:, :] = Zscoreall[:, :] if not opts_dict["gmonly"]: if is_SE == True: v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :] v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :] v_ens_avg2d[:, :] = ens_avg2d[:, :] v_ens_stddev2d[:, :] = ens_stddev2d[:, :] else: v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :] v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :] v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :] v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :] else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(gmall) v_gm[:, :] = gmall[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:].astype(np.float32) v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] print "All Done"
def combine_met_data(rootdir, date, ofilepath): # # Set the dirs based on date # day = timedelta(days=1) date0 = date - day dir0 = rootdir + "/" + date0.strftime("Y%Y/M%m/D%d") dir1 = rootdir + "/" + date.strftime("Y%Y/M%m/D%d") # # Set the PreFill option to False to improve writing performance # opt = Nio.options() opt.PreFill = False # # Options for writing NetCDF4 "classic" file. # # If Nio wasn't built with netcdf 4 support, you will get a # warning here, and the code will use netcdf 3 instead. # opt.Format = "netcdf4classic" #opt.Format = "LargeFile" vrt_file = Nio.open_file( '/glade/p/acom/acom-climate/fvitt/GEOS/GEOS5_orig_res_20180715.nc', mode='r') # define vertical coordinate yyyymmdd = date.strftime("%Y%m%d") os.system("/bin/rm -f " + ofilepath) now = datetime.now() hist_str = 'created by combine_met_data.py : ' + now.strftime( "%a %d %b %Y %H:%M:%S") out_file = Nio.open_file(ofilepath, mode='c', options=opt, history=hist_str) # vertical dimension ... # define dimensions and ALL variables before writing the data .... length = vrt_file.dimensions["lev"] out_file.create_dimension("lev", length) length = vrt_file.dimensions["ilev"] out_file.create_dimension("ilev", length) # define horizontal coordinates hrz_file = Nio.open_file( '/glade/p/acom/acom-climate/fvitt/GEOS/GEOS.fp.asm.const_2d_asm_Nx.00000000_0000.V01.nc4' ) length = hrz_file.dimensions["lat"] out_file.create_dimension("lat", length) length = hrz_file.dimensions["lon"] out_file.create_dimension("lon", length) # time dimension ... out_file.create_dimension("time", None) refdate = datetime(1900, 01, 01) dims = ('time', ) out_file.create_variable("time", 'd', dims) setattr(out_file.variables['time'], 'units', 'days') setattr(out_file.variables['time'], 'long_name', 'days since ' + refdate.strftime("%d %b %Y %H:%M:%S")) out_file.create_variable("date", 'i', dims) setattr(out_file.variables["date"], 'units', 'current date (YYYYMMDD)') setattr(out_file.variables["date"], 'long_name', 'current date (YYYYMMDD)') out_file.create_variable("datesec", 'i', dims) setattr(out_file.variables["datesec"], 'units', 'seconds') setattr(out_file.variables["datesec"], 'long_name', 'current seconds of current date') vrt_vars = ["lev", "ilev", "hyam", "hybm", "hyai", "hybi"] for var in vrt_vars: type = vrt_file.variables[var].typecode() vdims = vrt_file.variables[var].dimensions out_file.create_variable(var, type, vdims) varatts = vrt_file.variables[var].__dict__.keys() for att in varatts: val = getattr(vrt_file.variables[var], att) setattr(out_file.variables[var], att, val) hrz_vars = ["lon", "lat", "PHIS"] for var in hrz_vars: type = hrz_file.variables[var].typecode() vdims = hrz_file.variables[var].dimensions out_file.create_variable(var, type, vdims) varatts = hrz_file.variables[var].__dict__.keys() for att in varatts: val = getattr(hrz_file.variables[var], att) setattr(out_file.variables[var], att, val) type = hrz_file.variables["FRLAND"].typecode() vdims = hrz_file.variables["FRLAND"].dimensions out_file.create_variable("ORO", type, vdims) varatts = hrz_file.variables["FRLAND"].__dict__.keys() for att in varatts: val = getattr(hrz_file.variables["FRLAND"], att) setattr(out_file.variables["ORO"], att, val) tavg_flx_vars = { 'HFLUX': 'SHFLX', 'TAUX': 'TAUX', 'TAUY': 'TAUY', 'EVAP': 'QFLX' } # flx tavg_flx_filem = glob.glob(dir0 + '/GEOS.fp.asm.tavg1_2d_flx_Nx.*_2330.V01.nc4') define_flds(tavg_flx_vars, tavg_flx_filem, out_file) tavg_rad_vars = {'ALBEDO': 'ALB', 'TS': 'TS', 'SWGDN': 'FSDS'} # rad tavg_rad_filem = glob.glob(dir0 + '/GEOS.fp.asm.tavg1_2d_rad_Nx.*_2330.V01.nc4') define_flds(tavg_rad_vars, tavg_rad_filem, out_file) tavg_lnd_vars = {'GWETTOP': 'SOILW', 'SNOMAS': 'SNOWH'} # lnd tavg_lnd_filem = glob.glob(dir0 + '/GEOS.fp.asm.tavg1_2d_lnd_Nx.*_2330.V01.nc4') define_flds(tavg_lnd_vars, tavg_lnd_filem, out_file) inst_vars = {'PS': 'PS', 'T': 'T', 'U': 'U', 'V': 'V', 'QV': 'Q'} inst_files = glob.glob(dir1 + '/GEOS.fp.asm.inst3_3d_asm_Nv.*.nc4') inst_files.sort() define_flds(inst_vars, inst_files, out_file) # definitions should be done at this point # Write coordinate dimension variables first for var in vrt_vars: if vrt_file.dimensions.keys().count(var) > 0: v = vrt_file.variables[var].get_value() out_file.variables[var].assign_value(v) for var in vrt_vars: if vrt_file.dimensions.keys().count(var) == 0: v = vrt_file.variables[var].get_value() out_file.variables[var].assign_value(v) vrt_file.close() # set time/date data ... times = [i * 3 for i in range(8)] # hours days = list() datesecs = list() for hr in times: d = datetime(date.year, date.month, date.day, hr, 0, 0) dd = d - refdate days.append(dd.days + (dd.seconds / 86400.0)) datesecs.append(dd.seconds) out_file.variables['time'].assign_value(days) out_file.variables['date'].assign_value(int(yyyymmdd)) out_file.variables['datesec'].assign_value(datesecs) var = "lat" v = hrz_file.variables[var].get_value() out_file.variables[var].assign_value(v) var = "lon" v = hrz_file.variables[var].get_value() # want logitudes from 0 to 360 ( rather than -180 to 180) neglons = numpy.where(v < 0.0) nroll = neglons[0][-1] + 1 lons = numpy.roll(v, nroll) lons = numpy.where(lons < 0., lons + 360., lons) lons = numpy.where(lons < 1.e-3, 0., lons) # GEOS data has a small value rather than zero out_file.variables[var].assign_value(lons) for var in hrz_vars: if hrz_file.dimensions.keys().count(var) == 0: v = hrz_file.variables[var].get_value() v = numpy.roll(v, nroll, axis=2) v = numpy.tile(v, (8, 1, 1)) out_file.variables[var].assign_value(v) files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_flx_Nx.*.nc4') files.sort() filepaths = tavg_flx_filem + files write_tavg_flds(tavg_flx_vars, filepaths, nroll, out_file) # special code for ORO ivar = 'FRSEAICE' for n in range(1, 24, 3): filem = Nio.open_file(filepaths[n - 1]) filep = Nio.open_file(filepaths[n]) valm = filem.variables[ivar].get_value() valp = filep.variables[ivar].get_value() ndims = filep.variables[ivar].rank vala = 0.5 * (valm + valp) vala = numpy.roll(vala, nroll, ndims - 1) if n > 1: val = numpy.append(val, vala, axis=0) else: val = vala seaice = val v = hrz_file.variables["FRLAND"].get_value() v = numpy.roll(v, nroll, axis=2) v = numpy.tile(v, (8, 1, 1)) #v = numpy.where(v==2, 1, v) v = numpy.where(seaice > 0.5, 2, v) out_file.variables["ORO"].assign_value(v) hrz_file.close() files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_rad_Nx.*.nc4') files.sort() filepaths = tavg_rad_filem + files write_tavg_flds(tavg_rad_vars, filepaths, nroll, out_file) files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_lnd_Nx.*.nc4') files.sort() filepaths = tavg_lnd_filem + files write_tavg_flds(tavg_lnd_vars, filepaths, nroll, out_file) # instantaneous fields .... write_inst_flds(inst_vars, inst_files, nroll, out_file) out_file.close() return True
def setUp(self): do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = "MaskedIfFillAttAndValue" self.f = Nio.open_file(filename, options=opt)
def open_new_file(self, file_name, info=None, var_name='X', long_name=None, units_name='None', dtype='float64', ### dtype='float64' time_units='minutes', comment='', shape=(1,1,1), res=(1.,1.,1.), MAKE_RTI=True, MAKE_BOV=False): #-------------------------------------------------- # Try to import the Nio module from PyNIO package #-------------------------------------------------- Nio = self.import_nio () if not Nio: return False #---------------------------- # Does file already exist ? #---------------------------- file_name = file_utils.check_overwrite( file_name ) self.file_name = file_name #--------------------------------------- # Check and store the grid information #--------------------------------------- self.format = 'nccs' self.file_name = file_name self.time_index = 0 self.var_name = var_name self.shape = shape self.res = res if (long_name is None): long_name = var_name self.long_name = long_name self.units_name = units_name self.dtype = dtype #----------------------------------- # Get Nio type code for this dtype #------------------------------------ nio_type_map = self.get_nio_type_map() nio_type_code = nio_type_map[ dtype.lower() ] self.nio_type_code = nio_type_code #------------------------------------- # Open a new netCDF file for writing #------------------------------------- # Sample output from time.asctime(): # "Thu Oct 8 17:10:18 2009" #------------------------------------- opt = Nio.options() opt.PreFill = False # (for efficiency) opt.HeaderReserveSpace = 4000 # (4000 bytes, for efficiency) history = "Created using PyNIO " + Nio.__version__ + " on " history = history + time.asctime() + ". " history = history + comment # print 'MADE IT PAST history BLOCK' try: nccs_unit = Nio.open_file (file_name, mode="w", options=opt, history=history) OK = True except: OK = False return OK #---------------------------------------------- # Create grid dimensions nx and ny, plus time #---------------------------------------------- # Without using "int()" here, we get this: # TypeError: size must be None or integer #---------------------------------------------- nccs_unit.create_dimension("nz", self.shape[0]) nccs_unit.create_dimension("ny", self.shape[1]) nccs_unit.create_dimension("nx", self.shape[2]) nccs_unit.create_dimension("time", None) # (unlimited dimension) # print 'MADE IT PAST create_dimension CALLS.' #------------------------- # Create a time variable #------------------------------------------ #('d' = float64; must match in add_cube() #------------------------------------------ tvar = nccs_unit.create_variable ('time', 'd', ("time",)) nccs_unit.variables['time'].units = time_units #-------------------------------- # Create a variable in the file #---------------------------------- # Returns "var" as a PyNIO object #---------------------------------- var = nccs_unit.create_variable (var_name, nio_type_code, ("time", "nz", "ny", "nx")) #---------------------------------- # Specify a "nodata" fill value ? #---------------------------------- var._FillValue = -9999.0 ## Does this jive with Prefill above ?? #------------------------------------ # Create attributes of the variable #------------------------------------ nccs_unit.variables[var_name].long_name = long_name nccs_unit.variables[var_name].units = units_name nccs_unit.variables[var_name].dz = self.res[0] nccs_unit.variables[var_name].dy = self.res[1] nccs_unit.variables[var_name].dx = self.res[2] nccs_unit.variables[var_name].y_south_edge = 0. nccs_unit.variables[var_name].y_north_edge = self.res[1]*self.shape[1] nccs_unit.variables[var_name].x_west_edge = 0. nccs_unit.variables[var_name].x_east_edge = self.res[2]*self.shape[2] nccs_unit.variables[var_name].z_bottom_edge = 0. nccs_unit.variables[var_name].z_top_edge = self.res[0]*self.shape[0] self.nccs_unit = nccs_unit return OK
def setUp(self): # print 'Creating temporary file' do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = "MaskedIfFillAtt" self.f = Nio.open_file(filename, options=opt)
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_0_beta08' opts_dict['compset'] = 'F2000' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist = [] inc_varlist = [] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() == 0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude = False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist = [] # Read in the excluded or included var list ex_varlist, exclude = pyEnsLib.read_jsonlist( opts_dict['jsonfile'], 'ES') if exclude == False: inc_varlist = ex_varlist ex_varlist = [] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude = me.partition(exclude, func=Duplicate(), involved=True) if exclude: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) else: inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) in_files = [] if (os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank() == 0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank() == 0: print 'Input directory: ', input_dir, ' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx']) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0 and (verbose == True): print 'in_files=', in_files # Open the files in the input directory o_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: if me.get_rank() == 0: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank() == 0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = '' latkey = '' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key == "lon"): nlon = input_dims[key] lonkey = key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey = key if (nlev == -1): if me.get_rank() == 0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if ((ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank() == 0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank() == 0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if (nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict = vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict = vars_dict_all.copy() for k, v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev))): is_3d = True num_3d += 1 if (is_3d == True): str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) #else: # print 'var=',k if me.get_rank() == 0 and (verbose == True): print 'Number of variables found: ', num_3d + num_2d print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize < num_2d + num_3d: if me.get_rank() == 0: print "************************************************************************************************************************************" print " Error: the total number of 3D and 2D variables " + str( num_2d + num_3d ) + " is larger than the number of ensemble files " + str(esize) print " Cannot generate ensemble summary file, please remove more variables from your included variable list," print " or add more varaibles in your excluded variable list!!!" print "************************************************************************************************************************************" sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) #if me.get_rank() == 0 and (verbose == True): # print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if me.get_rank() == 0 and (verbose == True): print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if me.get_rank() == 0 and (verbose == True): print "Setting dimensions ....." if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if me.get_rank() == 0 and (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if me.get_rank() == 0 and (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f', ('nvars', 'nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', )) v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', )) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f', ('nvars', )) # Assign vars, var3d and var2d if me.get_rank() == 0 and (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']): if me.get_rank() == 0 and (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if me.get_rank() == 0 and (verbose == True): print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc) if opts_dict['mpi_enable'] & (not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank()) ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d) ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files))) var_list = gather_list(var_list, me) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank()) ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d) ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens']: if not opts_dict['cumul']: gmall = np.concatenate((gm3d, gm2d), axis=0) if not opts_dict['gmonly']: Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0) v_RMSZ[:, :] = Zscoreall[:, :] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :] v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :] v_ens_avg2d[:, :] = ens_avg2d[:, :] v_ens_stddev2d[:, :] = ens_stddev2d[:, :] else: v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :] v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :] v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :] v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :] else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA( gmall, all_var_names, var_list, me) v_gm[:, :] = gmall[:, :] v_standardized_gm[:, :] = standardized_global_mean[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:].astype(np.float32) v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] if me.get_rank() == 0: print "All Done"
def main(argv): print 'Running pyEnsSumPop!' # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = 'cesm1_2_0' opts_dict['compset'] = 'FC5' opts_dict['mach'] = 'yellowstone' opts_dict['tslice'] = 0 opts_dict['nyear'] = 3 opts_dict['nmonth'] = 12 opts_dict['npert'] = 40 opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'ne30_ne30' opts_dict['sumfile'] = 'ens.pop.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['zscoreonly'] = False opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = '/glade/scratch/haiyingx/' # This creates the dictionary of input arguments print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ESP',opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if verbose: print opts_dict # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if opts_dict['jsonfile']: # Read in the included var list Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP') str_size=0 for str in Var3d: if str_size < len(str): str_size=len(str) for str in Var2d: if str_size < len(str): str_size=len(str) in_files=[] if(os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files=pyEnsLib.Random_pickup_pop(input_dir,opts_dict,opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) else: print 'Input directory: ',input_dir,' not found' sys.exit(2) # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) #Partition the input file list in_file_list=me.partition(in_files,func=EqualStride(),involved=True) # Open the files in the input directory o_files=[] for onefile in in_file_list: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...." sys.exit() print in_file_list # Store dimensions of the input fields if (verbose == True): print "Getting spatial dimensions" nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) # Make sure all files have the same dimensions for key in input_dims: if key == "z_t": nlev = input_dims["z_t"] elif key == "nlon": nlon = input_dims["nlon"] elif key == "nlat": nlat = input_dims["nlat"] for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\ or ( nlon != int(input_dims["nlon"]))): print "Dimension mismatch between ", in_file_list[0], 'and', in_file_list[count], '!!!' sys.exit() # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose: print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0 ): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt =Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print "Setting dimensions ....." nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('time',None) nc_sumfile.create_dimension('ens_size', opts_dict['npert']) nc_sumfile.create_dimension('nbin', opts_dict['nbin']) nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.create_dimension('nvars3d', len(Var3d)) nc_sumfile.create_dimension('nvars2d', len(Var2d)) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'POP verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.create_variable("time",'d',('time',)) v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('time','nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('time','nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('time','nvars', 'ens_size','nbin')) if not opts_dict['zscoreonly']: v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time','nvars', 'ens_size')) # Assign vars, var3d and var2d if (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print "Assigning time invariant metadata ....." vars_dict = o_files[0].variables lev_data = vars_dict["z_t"] v_lev = lev_data # Time-varient metadata if verbose: print "Assigning time variant metadata ....." vars_dict = o_files[0].variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array,me,(me.get_size(),)) if me.get_rank() == 0: v_time[:]=time_array[:] # Calculate global mean, average, standard deviation if verbose: print "Calculating global means ....." is_SE = False tslice=0 if not opts_dict['zscoreonly']: gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,Var3d,Var2d, is_SE,False,opts_dict) if verbose: print "Finish calculating global means ....." # Calculate RMSZ scores if (verbose == True): print "Calculating RMSZ scores ....." zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,Var3d,Var2d,is_SE,opts_dict) # Collect from all processors if opts_dict['mpi_enable'] : # Gather the 3d variable results from all processors to the master processor # Gather global means 3d results if not opts_dict['zscoreonly']: gmall=np.concatenate((gm3d,gm2d),axis=0) #print "before gather, gmall.shape=",gmall.shape gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files))) zmall=np.concatenate((zscore3d,zscore2d),axis=0) zmall=pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files),nbin)) #print 'zmall=',zmall #print "after gather, gmall.shape=",gmall.shape ens_avg3d=pyEnsLib.gather_npArray_pop(ens_avg3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon)) ens_avg2d=pyEnsLib.gather_npArray_pop(ens_avg2d,me,(me.get_size(),len(Var2d),(nlat),nlon)) ens_stddev3d=pyEnsLib.gather_npArray_pop(ens_stddev3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon)) ens_stddev2d=pyEnsLib.gather_npArray_pop(ens_stddev2d,me,(me.get_size(),len(Var2d),(nlat),nlon)) # Assign to file: if me.get_rank() == 0 : #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:,:,:]=zmall[:,:,:,:] v_ens_avg3d[:,:,:,:,:]=ens_avg3d[:,:,:,:,:] v_ens_stddev3d[:,:,:,:,:]=ens_stddev3d[:,:,:,:,:] v_ens_avg2d[:,:,:,:]=ens_avg2d[:,:,:,:] v_ens_stddev2d[:,:,:,:]=ens_stddev2d[:,:,:,:] if not opts_dict['zscoreonly']: v_gm[:,:,:]=gmall[:,:,:] print "All done"
def __init__(self, spec, serial=False, verbosity=1, once=False): ''' Constructor @param spec An instance of the Specifier class, defining the input specification for this reshaper operation. @param serial True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. @param verbosity Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. @param once True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). ''' # Type checking (or double-checking) if (not isinstance(spec, Slice2SeriesSpecifier)): err_msg = "Slice2SeriesReshaper requires a Slice2SeriesSpecifier" \ + " as input." raise TypeError(err_msg) # Call the base-class constructor super(Slice2SeriesReshaper, self).__init__(spec, serial=serial, verbosity=verbosity, once=once) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if (self._specifier.netcdf_format == 'netcdf'): opt.Format = 'Classic' elif (self._specifier.netcdf_format == 'netcdf4'): opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif (self._specifier.netcdf_format == 'netcdf4c'): opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 1 self._nio_options = opt self._messenger.print_once('PyNIO options set', vlevel=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in self._specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') self._messenger.print_once('Input files opened', vlevel=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files() self._timer.stop('Input File Validation') self._messenger.print_once('Input files validated', vlevel=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time() self._timer.stop('Sort Input Files') self._messenger.print_once('Input files sorted', vlevel=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables() self._timer.stop('Sort Variables') self._messenger.print_once('Variables sorted', vlevel=2) # Helpful debugging message self._messenger.print_once('Reshaper initialized.', vlevel=1) # Sync before continuing.. self._messenger.sync()
def setUp(self): do_setup(filename) opt = Nio.options() opt.UseAxisAttribute = True self.f = Nio.open_file(filename, options = opt)
def open_new_file( self, file_name, info=None, var_name='X', long_name=None, units_name='None', dtype='float64', ### dtype='float64' time_units='minutes', comment='', shape=(1, 1, 1), res=(1., 1., 1.), MAKE_RTI=True, MAKE_BOV=False): #-------------------------------------------------- # Try to import the Nio module from PyNIO package #-------------------------------------------------- Nio = self.import_nio() if not Nio: return False #---------------------------- # Does file already exist ? #---------------------------- file_name = file_utils.check_overwrite(file_name) self.file_name = file_name #--------------------------------------- # Check and store the grid information #--------------------------------------- self.format = 'nccs' self.file_name = file_name self.time_index = 0 self.var_name = var_name self.shape = shape self.res = res if (long_name is None): long_name = var_name self.long_name = long_name self.units_name = units_name self.dtype = dtype #----------------------------------- # Get Nio type code for this dtype #------------------------------------ nio_type_map = self.get_nio_type_map() nio_type_code = nio_type_map[dtype.lower()] self.nio_type_code = nio_type_code #------------------------------------- # Open a new netCDF file for writing #------------------------------------- # Sample output from time.asctime(): # "Thu Oct 8 17:10:18 2009" #------------------------------------- opt = Nio.options() opt.PreFill = False # (for efficiency) opt.HeaderReserveSpace = 4000 # (4000 bytes, for efficiency) history = "Created using PyNIO " + Nio.__version__ + " on " history = history + time.asctime() + ". " history = history + comment # print 'MADE IT PAST history BLOCK' try: nccs_unit = Nio.open_file(file_name, mode="w", options=opt, history=history) OK = True except: OK = False return OK #---------------------------------------------- # Create grid dimensions nx and ny, plus time #---------------------------------------------- # Without using "int()" here, we get this: # TypeError: size must be None or integer #---------------------------------------------- nccs_unit.create_dimension("nz", self.shape[0]) nccs_unit.create_dimension("ny", self.shape[1]) nccs_unit.create_dimension("nx", self.shape[2]) nccs_unit.create_dimension("time", None) # (unlimited dimension) # print 'MADE IT PAST create_dimension CALLS.' #------------------------- # Create a time variable #------------------------------------------ #('d' = float64; must match in add_cube() #------------------------------------------ tvar = nccs_unit.create_variable('time', 'd', ("time", )) nccs_unit.variables['time'].units = time_units #-------------------------------- # Create a variable in the file #---------------------------------- # Returns "var" as a PyNIO object #---------------------------------- var = nccs_unit.create_variable(var_name, nio_type_code, ("time", "nz", "ny", "nx")) #---------------------------------- # Specify a "nodata" fill value ? #---------------------------------- var._FillValue = -9999.0 ## Does this jive with Prefill above ?? #------------------------------------ # Create attributes of the variable #------------------------------------ nccs_unit.variables[var_name].long_name = long_name nccs_unit.variables[var_name].units = units_name nccs_unit.variables[var_name].dz = self.res[0] nccs_unit.variables[var_name].dy = self.res[1] nccs_unit.variables[var_name].dx = self.res[2] nccs_unit.variables[var_name].y_south_edge = 0. nccs_unit.variables[ var_name].y_north_edge = self.res[1] * self.shape[1] nccs_unit.variables[var_name].x_west_edge = 0. nccs_unit.variables[var_name].x_east_edge = self.res[2] * self.shape[2] nccs_unit.variables[var_name].z_bottom_edge = 0. nccs_unit.variables[var_name].z_top_edge = self.res[0] * self.shape[0] self.nccs_unit = nccs_unit return OK
def open_new_file(self, file_name, info=None, var_name='X', long_name=None, units_name='None', dtype='float32', ### dtype='float64' time_units='minutes', comment='', MAKE_RTI=True, MAKE_BOV=False): #-------------------------------------------------- # Try to import the Nio module from PyNIO package #-------------------------------------------------- Nio = self.import_nio() if not(Nio): return False #---------------------------- # Does file already exist ? #---------------------------- file_name = file_utils.check_overwrite( file_name ) self.file_name = file_name #--------------------------------------- # Check and store the grid information #--------------------------------------- self.check_and_store_info( file_name, info, var_name, dtype, MAKE_RTI, MAKE_BOV ) if (long_name == None): long_name = var_name self.long_name = long_name self.units_name = units_name self.dtype = dtype #------------------------- # Save the Nio type code #------------------------- nio_type_map = self.get_nio_type_map() nio_type_code = nio_type_map[ dtype.lower() ] self.nio_type_code = nio_type_code #------------------------------------- # Open a new netCDF file for writing #------------------------------------- # Sample output from time.asctime(): # "Thu Oct 8 17:10:18 2009" #------------------------------------- opt = Nio.options() opt.PreFill = False # (for efficiency) opt.HeaderReserveSpace = 4000 # (4000 bytes, for efficiency) history = "Created using PyNIO " + Nio.__version__ + " on " history = history + time.asctime() + ". " history = history + comment # print 'MADE IT PAST history BLOCK' try: ncgs_unit = Nio.open_file(file_name, mode="w", options=opt, history=history ) OK = True except: OK = False return OK ## print 'nx =', self.info.ncols ## print 'ny =', self.info.nrows ## print 'dx =', self.info.xres ## print 'dy =', self.info.yres ## print ' ' #---------------------------------------------- # Create grid dimensions nx and ny, plus time #---------------------------------------------- # Without using "int()" here, we get this: # TypeError: size must be None or integer #---------------------------------------------- ncgs_unit.create_dimension("nx", int(self.info.ncols)) ncgs_unit.create_dimension("ny", int(self.info.nrows)) ncgs_unit.create_dimension("time", None) # (unlimited dimension) # print 'MADE IT PAST create_dimension CALLS.' #------------------------- # Create a time variable #------------------------------------------ #('d' = float64; must match in add_grid() #------------------------------------------ tvar = ncgs_unit.create_variable('time', 'd', ("time",)) ncgs_unit.variables['time'].units = time_units #-------------------------------- # Create a variable in the file #---------------------------------- # Returns "var" as a PyNIO object #---------------------------------- var = ncgs_unit.create_variable(var_name, nio_type_code, ("time", "ny", "nx")) ## var = nc_unit.create_variable(var_name, nio_type_code, ## ("time", "nx", "ny")) #------------------------------------------- # Create a separate, scalar "time stamp" ? #------------------------------------------- # t = nc_unit.create_variable("time", nio_type_code, ("time")) #---------------------------------- # Specify a "nodata" fill value ? #---------------------------------- var._FillValue = -9999.0 ## Does this jive with Prefill above ?? #------------------------------------ # Create attributes of the variable #------------------------------------ ncgs_unit.variables[var_name].long_name = long_name ncgs_unit.variables[var_name].units = units_name ncgs_unit.variables[var_name].dx = self.info.xres ncgs_unit.variables[var_name].dy = self.info.yres ### (12/2/09) ## ncgs_unit.variables[var_name].dx = dx ## ncgs_unit.variables[var_name].dy = dy ### (10/15/09) ncgs_unit.variables[var_name].y_south_edge = self.info.y_south_edge ncgs_unit.variables[var_name].y_north_edge = self.info.y_north_edge ncgs_unit.variables[var_name].x_west_edge = self.info.x_west_edge ncgs_unit.variables[var_name].x_east_edge = self.info.x_east_edge self.ncgs_unit = ncgs_unit return OK
def setUp(self): self.filename = tempfile.mktemp(prefix="test_", suffix=".nc") do_setup(self.filename) opt = Nio.options() opt.UseAxisAttribute = True self.f = Nio.open_file(self.filename, options=opt)
def setUp(self): #print 'Creating temporary file' do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedIfFillAtt' self.f = Nio.open_file(filename, options=opt)
f = Nio.open_file(dirc + fname) # # Print the input file contents # # print f # # If the output file already exists, remove it # os.system("rm -f " + fname + ".nc") # # Set the PreFill option to False to improve writing performance # opt = Nio.options() opt.PreFill = False # # Options for writing NetCDF4 "classic" file. # # If Nio wasn't built with netcdf 4 support, you will get a # warning here, and the code will use netcdf 3 instead. # opt.Format = "netcdf4classic" opt.CompressionLevel = 5 # Can go up to 9 # # Set the history attribute # hatt = "Converted from GRIB2: " + time.ctime(time.time())
def setUp(self): #print 'Creating temporary file: ', filename do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedAlways' self.f = Nio.open_file(filename, options=opt)
def __init__(self, specifier, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not (isinstance(simplecomm, SimpleComm) or \ isinstance(simplecomm, SimpleCommMPI)): err_msg = ( "Simple communicator object is not a SimpleComm or ", "SimpleCommMPI") raise TypeError(err_msg) # Whether to write a once file self._use_once_file = once # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join([ '[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ' ]) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper', verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint('Specifier validated', verbosity=1) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format == 'netcdf4': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif specifier.netcdf_format == 'netcdf4c': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.netcdf_deflate if self._simplecomm.is_manager(): self._vprint('PyNIO compression level: {0}'.format(\ specifier.netcdf_deflate), verbosity=2) self._nio_options = opt if self._simplecomm.is_manager(): self._vprint('PyNIO options set', verbosity=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') if self._simplecomm.is_manager(): self._vprint('Input files opened', verbosity=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files(specifier) self._timer.stop('Input File Validation') if self._simplecomm.is_manager(): self._vprint('Input files validated', verbosity=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time(specifier) self._timer.stop('Sort Input Files') if self._simplecomm.is_manager(): self._vprint('Input files sorted', verbosity=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables(specifier) self._timer.stop('Sort Variables') if self._simplecomm.is_manager(): self._vprint('Variables sorted', verbosity=2) # Validate the output files self._timer.start('Output File Validation') self._validate_output_files(specifier, skip_existing, overwrite) self._timer.stop('Output File Validation') if self._simplecomm.is_manager(): self._vprint('Output files validated', verbosity=2) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=1) # Sync before continuing.. self._simplecomm.sync()
def setUp(self): do_setup(filename) opt = Nio.options() opt.MaskedArrayMode = 'MaskedIfFillAttAndValue' self.f = Nio.open_file(filename, options=opt)