def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{0}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write a once file self._use_once_file = once # The output write mode to use self._write_mode = wmode # Internal timer data self._timer = TimeKeeper() self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} # Contruct the print header header = ''.join(['[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ']) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper...', verbosity=0) self._vprint(' MPI Communicator Size: {}'.format( self._simplecomm.get_size()), verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint(' Specifier validated', verbosity=1) # The I/O backend to use if iobackend.is_available(specifier.io_backend): self._backend = specifier.io_backend else: self._backend = iobackend.get_backend() self._vprint((' I/O Backend {0} not available. Using {1} ' 'instead').format(specifier.io_backend, self._backend), verbosity=1) # Store the input file names self._input_filenames = specifier.input_file_list # Store the time-series variable names self._time_series_names = specifier.time_series if self._time_series_names is not None: vnames = ', '.join(self._time_series_names) if self._simplecomm.is_manager(): self._vprint('WARNING: Extracting only variables: {0}'.format( vnames), verbosity=-1) # Store the list of metadata names self._metadata_names = specifier.time_variant_metadata # Store whether to treat 1D time-variant variables as metadata self._1d_metadata = specifier.assume_1d_time_variant_metadata # Store the metadata filename self._metadata_filename = specifier.metadata_filename # Store time invariant variables that should be excluded from the timeseries files self._exclude_list = specifier.exclude_list # Store the output file prefix and suffix self._output_prefix = specifier.output_file_prefix self._output_suffix = specifier.output_file_suffix # Setup NetCDF file options self._netcdf_format = specifier.netcdf_format self._netcdf_compression = specifier.compression_level self._netcdf_least_significant_digit = specifier.least_significant_digit if self._simplecomm.is_manager(): self._vprint( ' NetCDF I/O Backend: {0}'.format(self._backend), verbosity=1) self._vprint(' NetCDF Output Format: {0}'.format( self._netcdf_format), verbosity=1) self._vprint(' NetCDF Compression: {0}'.format( self._netcdf_compression), verbosity=1) trunc_str = ('{} decimal places'.format(self._netcdf_least_significant_digit) if self._netcdf_least_significant_digit else 'Disabled') self._vprint(' NetCDF Truncation: {0}'.format( trunc_str), verbosity=1) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('...Reshaper initialized.', verbosity=0) # Sync before continuing.. self._simplecomm.sync()
def __init__(self, specifiers, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifiers (dict): A dict of named Specifier instances, each defining an input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Check types if not isinstance(specifiers, dict): err_msg = "Input must be given in a dictionary of Specifiers" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if simplecomm is not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) # Whether to write to a once file self._use_once_file = once # Whether to write to a once file self._skip_existing = skip_existing # Whether to write to overwrite output files self._overwrite = overwrite # Store the list of specifiers self._specifiers = specifiers # Store the serial specifier self._serial = serial # Check for a SimpleComm, and if none create it if simplecomm is None: simplecomm = create_comm(serial=serial) # Pointer to its own messenger self._simplecomm = simplecomm # Store the verbosity self._verbosity = verbosity # Set the verbose printer self._vprint = VPrinter(verbosity=verbosity) # Storage for timing data self._times = {} # Orders for printing timing data self._time_orders = {} # Storage for all byte counters self._byte_counts = {}
def __init__(self, specifiers, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifiers (dict): A dict of named Specifier instances, each defining an input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Check types if not isinstance(specifiers, dict): err_msg = "Input must be given in a dictionary of Specifiers" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write to a once file self._use_once_file = once # Output file write mode self._write_mode = wmode # Store the list of specifiers self._specifiers = specifiers # Store the serial specifier self._serial = serial # Check for a SimpleComm, and if none create it if simplecomm is None: simplecomm = create_comm(serial=serial) # Pointer to its own messenger self._simplecomm = simplecomm # Store the verbosity self._verbosity = verbosity # Set the verbose printer self._vprint = VPrinter(verbosity=verbosity) # Storage for timing data self._times = {} # Orders for printing timing data self._time_orders = {} # Storage for all byte counters self._byte_counts = {}
def __init__(self, specifier, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not (isinstance(simplecomm, SimpleComm) or \ isinstance(simplecomm, SimpleCommMPI)): err_msg = ( "Simple communicator object is not a SimpleComm or ", "SimpleCommMPI") raise TypeError(err_msg) # Whether to write a once file self._use_once_file = once # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join([ '[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ' ]) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper', verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint('Specifier validated', verbosity=1) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format == 'netcdf4': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif specifier.netcdf_format == 'netcdf4c': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.netcdf_deflate if self._simplecomm.is_manager(): self._vprint('PyNIO compression level: {0}'.format(\ specifier.netcdf_deflate), verbosity=2) self._nio_options = opt if self._simplecomm.is_manager(): self._vprint('PyNIO options set', verbosity=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') if self._simplecomm.is_manager(): self._vprint('Input files opened', verbosity=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files(specifier) self._timer.stop('Input File Validation') if self._simplecomm.is_manager(): self._vprint('Input files validated', verbosity=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time(specifier) self._timer.stop('Sort Input Files') if self._simplecomm.is_manager(): self._vprint('Input files sorted', verbosity=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables(specifier) self._timer.stop('Sort Variables') if self._simplecomm.is_manager(): self._vprint('Variables sorted', verbosity=2) # Validate the output files self._timer.start('Output File Validation') self._validate_output_files(specifier, skip_existing, overwrite) self._timer.stop('Output File Validation') if self._simplecomm.is_manager(): self._vprint('Output files validated', verbosity=2) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=1) # Sync before continuing.. self._simplecomm.sync()
def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write a once file self._use_once_file = once # The output write mode to use self._write_mode = wmode # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join([ '[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ' ]) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper...', verbosity=0) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint(' Specifier validated', verbosity=1) # Store the input file names self._input_filenames = specifier.input_file_list # Store the list of metadata names self._metadata_names = specifier.time_variant_metadata # Store the output file prefix and suffix self._output_prefix = specifier.output_file_prefix self._output_suffix = specifier.output_file_suffix # Setup PyNIO options (including disabling the default PreFill option) opt = nio_options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']: opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.compression_level self._nio_options = opt if self._simplecomm.is_manager(): self._vprint(' PyNIO options set', verbosity=1) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=0) # Sync before continuing.. self._simplecomm.sync()