def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs): """ Constructor for PersistenceLayer @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>) @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>) @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file @param tdom Concrete instance of AbstractDomain for the temporal domain component @param sdom Concrete instance of AbstractDomain for the spatial domain component @param bricking_scheme A dictionary containing the brick and chunk sizes @param auto_flush_values True = Values flushed to HDF5 files automatically, False = Manual @param value_caching if True (default), value requests should be cached for rapid duplicate retrieval @param kwargs @return None """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type) self.mode = mode if not hasattr(self.master_manager, 'auto_flush_values'): self.master_manager.auto_flush_values = auto_flush_values if not hasattr(self.master_manager, 'inline_data_writes'): self.master_manager.inline_data_writes = inline_data_writes if not hasattr(self.master_manager, 'value_caching'): self.master_manager.value_caching = value_caching if not hasattr(self.master_manager, 'coverage_type'): self.master_manager.coverage_type = coverage_type # TODO: This is not done correctly if tdom != None: self._init_master(tdom.shape.extents, bricking_scheme) self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.mode != 'r': if self.master_manager.is_dirty(): self.master_manager.flush() if self.mode == 'r' or self.inline_data_writes: self.brick_dispatcher = None else: self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.debug('Persistence Layer Successfully Initialized')
def __init__(self, root, guid, name=None, tdom=None, sdom=None, bricking_scheme=None, auto_flush_values=True, **kwargs): """ Constructor for Persistence Layer @param root: Where to save/look for HDF5 files @param guid: CoverageModel GUID @param name: CoverageModel Name @param tdom: Temporal Domain @param sdom: Spatial Domain @param kwargs: @return: """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme) self.auto_flush_values = auto_flush_values self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.master_manager.is_dirty(): self.master_manager.flush() self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.info('Persistence Layer Successfully Initialized')
def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs): """ Constructor for PersistenceLayer @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>) @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>) @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file @param tdom Concrete instance of AbstractDomain for the temporal domain component @param sdom Concrete instance of AbstractDomain for the spatial domain component @param bricking_scheme A dictionary containing the brick and chunk sizes @param auto_flush_values True = Values flushed to HDF5 files automatically, False = Manual @param value_caching if True (default), value requests should be cached for rapid duplicate retrieval @param kwargs @return None """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root # self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs) self.master_manager = MetadataManagerFactory.buildMetadataManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs) self.mode = mode if not hasattr(self.master_manager, 'auto_flush_values'): self.master_manager.auto_flush_values = auto_flush_values if not hasattr(self.master_manager, 'inline_data_writes'): self.master_manager.inline_data_writes = inline_data_writes if not hasattr(self.master_manager, 'value_caching'): self.master_manager.value_caching = value_caching if not hasattr(self.master_manager, 'coverage_type'): self.master_manager.coverage_type = coverage_type # TODO: This is not done correctly if tdom != None: self._init_master(tdom.shape.extents, bricking_scheme) self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.mode != 'r': if self.master_manager.is_dirty(): self.master_manager.flush() if self.mode == 'r' or self.inline_data_writes: self.brick_dispatcher = None else: self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.debug('Persistence Layer Successfully Initialized')
class PersistenceLayer(object): """ The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files. """ def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs): """ Constructor for PersistenceLayer @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>) @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>) @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file @param tdom Concrete instance of AbstractDomain for the temporal domain component @param sdom Concrete instance of AbstractDomain for the spatial domain component @param bricking_scheme A dictionary containing the brick and chunk sizes @param auto_flush_values True = Values flushed to HDF5 files automatically, False = Manual @param value_caching if True (default), value requests should be cached for rapid duplicate retrieval @param kwargs @return None """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root # self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs) self.master_manager = MetadataManagerFactory.buildMetadataManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs) self.mode = mode if not hasattr(self.master_manager, 'auto_flush_values'): self.master_manager.auto_flush_values = auto_flush_values if not hasattr(self.master_manager, 'inline_data_writes'): self.master_manager.inline_data_writes = inline_data_writes if not hasattr(self.master_manager, 'value_caching'): self.master_manager.value_caching = value_caching if not hasattr(self.master_manager, 'coverage_type'): self.master_manager.coverage_type = coverage_type # TODO: This is not done correctly if tdom != None: self._init_master(tdom.shape.extents, bricking_scheme) self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.mode != 'r': if self.master_manager.is_dirty(): self.master_manager.flush() if self.mode == 'r' or self.inline_data_writes: self.brick_dispatcher = None else: self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.debug('Persistence Layer Successfully Initialized') def __getattr__(self, key): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): return getattr(self.master_manager, key) else: return getattr(super(PersistenceLayer, self), key) def __setattr__(self, key, value): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): setattr(self.master_manager, key, value) else: super(PersistenceLayer, self).__setattr__(key, value) def update_parameter_bounds(self, parameter_name, bounds): dmin, dmax = bounds if parameter_name in self.parameter_bounds: pmin, pmax = self.parameter_bounds[parameter_name] dmin = min(dmin, pmin) dmax = max(dmax, pmax) self.parameter_bounds[parameter_name] = (dmin, dmax) self.master_manager.flush() def _init_master(self, tD, bricking_scheme): log.debug('Performing Rtree dict setup') # tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter self.master_manager._init_rtree(bD) self.master_manager.brick_list = {} self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme] # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler" def write_failure_callback(self, message, work): log.error('WORK DISCARDED!!!; %s: %s', message, work) def calculate_brick_size(self, tD, bricking_scheme): """ Calculates and returns the brick and chunk size for each dimension in the total domain based on the bricking scheme @param tD Total domain @param bricking_scheme A dictionary containing the brick and chunk sizes @return Brick and Chunk sizes based on the total domain """ log.debug('Calculating the size of a brick...') log.debug('Bricking scheme: %s', bricking_scheme) log.debug('tD: %s', tD) bD = [bricking_scheme['brick_size'] for x in tD] cD = [bricking_scheme['chunk_size'] for x in tD] log.debug('bD: %s', bD) log.debug('cD: %s', cD) return bD,tuple(cD) def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) if parameter_context.param_type._value_class == 'SparseConstantValue': v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) else: v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() # Put the pm into read_only mode pm.read_only = True # If there are already bricks, ensure there are appropriate links for this new parameter for brick_guid in self.master_manager.brick_list: brick_file_name = '{0}.hdf5'.format(brick_guid) self._add_brick_link(parameter_name, brick_guid, brick_file_name) self.master_manager.flush() return v def calculate_extents(self, origin, bD, total_extents): """ Calculates and returns the Rtree extents, brick extents and active brick size for the parameter @param origin The origin of the brick in index space @param bD The brick's domain in index space @param parameter_name The parameter name @return rtree_extents, tuple(brick_extents), tuple(brick_active_size) """ # Calculate the brick extents origin = list(origin) # Calculate the extents for the Rtree (index space) rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD) # Fake out the rtree if rank == 1 if len(origin) == 1: rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext] log.debug('Rtree extents: %s', rtree_extents) # Calculate the extents of the brick (index space) brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD)) log.debug('Brick extents: %s', brick_extents) # Calculate active size using the inner extent of the domain within a brick (value space) brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents) log.debug('Brick active size: %s', brick_active_size) # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison return rtree_extents, tuple(brick_extents), tuple(brick_active_size) def _brick_exists_master(self, brick_extents): do_write = True brick_guid = '' for x,v in self.master_manager.brick_list.iteritems(): if brick_extents == v[0]: log.debug('Brick found with matching extents: guid=%s', x) do_write = False brick_guid = x break return do_write, brick_guid def _add_brick_link(self, parameter_name, brick_guid, brick_file_name): brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name) link_path = '/{0}/{1}'.format(parameter_name, brick_guid) # Add brick to Master HDF file self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid) # Write empty HDF5 brick to the filesystem def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD): """ Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create) @param rtree_extents Total extents of brick's domain in rtree format @param brick_extents Size of brick @param brick_active_size Size of brick (same rank as parameter) @param origin Domain origin offset @param bD Slice-friendly size of brick's domain @return N/A """ log.debug('Writing virtual brick...') # Set HDF5 file and group # Create a GUID for the brick brick_guid = create_guid() brick_file_name = '{0}.hdf5'.format(brick_guid) #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant for parameter_name in self.parameter_metadata.keys(): self._add_brick_link(parameter_name, brick_guid, brick_file_name) # Update the brick listing log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size) brick_count = len(self.master_manager.brick_list) self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size] log.debug('Brick count is %s', brick_count) # Insert into Rtree log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid) self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid) # Expand the domain def expand_domain(self, total_extents, do_flush=False): """ Expands a parameter's total domain based on the requested new temporal and/or spatial domains. Temporal domain expansion is most typical. Number of dimensions may not change for the parameter. @param total_extents The total extents of the domain @return N/A """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) if self.master_manager.brick_domains[0] is not None: log.debug('Expanding domain (n-dimension)') # Check if the number of dimensions of the total domain has changed # TODO: Will this ever happen??? If so, how to handle? if len(total_extents) != len(self.master_manager.brick_domains[0]): raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.') else: tD = self.master_manager.brick_domains[0] bD = self.master_manager.brick_domains[1] cD = self.master_manager.brick_domains[2] delta_domain = [(x - y) for x, y in zip(total_extents, tD)] log.debug('delta domain: %s', delta_domain) tD = [(x + y) for x, y in zip(tD, delta_domain)] self.master_manager.brick_domains[0] = tD else: tD = total_extents bricking_scheme = self.master_manager.brick_domains[3] bD,cD = self.calculate_brick_size(tD, bricking_scheme) self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme] try: # Gather block list log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD) lst = [range(d)[::bD[i]] for i,d in enumerate(tD)] # Gather brick origins need_origins = set(itertools.product(*lst)) log.trace('need_origins: %s', need_origins) have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])]) log.trace('have_origins: %s', have_origins) need_origins.difference_update(have_origins) log.trace('need_origins: %s', need_origins) need_origins = list(need_origins) need_origins.sort() if len(need_origins)>0: log.debug('Number of Bricks to Create: %s', len(need_origins)) # Write virtual HDF5 brick file for origin in need_origins: rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents) do_write, bguid = self._brick_exists_master(brick_extents) if not do_write: log.debug('Brick already exists! Updating brick metadata...') self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] else: self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD) else: log.debug('No bricks to create to satisfy the domain expansion...') except Exception: raise ## .flush() is called by insert_timesteps - no need to call these here self.master_manager.flush() if do_flush: # If necessary (i.e. write_brick has been called), flush the master_manager461 if self.master_manager.is_dirty(): self.master_manager.flush() def shrink_domain(self, total_domain, do_flush=True): from coverage_model import bricking_utils # Find the last brick needed to contain the domain brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree) bid, bguid = brick[0] # Get the brick_guids for all the bricks after the one we need rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]] # Remove everything that comes after the brick we still need from the RTree self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1] # Remove the unnecessary bricks from the brick list for r in rm_bricks: del self.master_manager.brick_list[r] # and the file system... # Reset the first member of brick_domains self.master_manager.brick_domains[0] = list(total_domain) # And the appropriate entry in brick_list self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),) if do_flush: if self.master_manager.is_dirty(): self.master_manager.flush() def has_dirty_values(self): """ Checks if the master file values have been modified @return True if master file metadata has been modified """ for v in self.value_list.itervalues(): if v.has_dirty_values(): return True return False def get_dirty_values_async_result(self): return_now = False if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return_now = True if self.brick_dispatcher is None: log.debug('\'brick_dispatcher\' is None') return_now = True if return_now: from gevent.event import AsyncResult ret = AsyncResult() ret.set(True) return ret return self.brick_dispatcher.get_dirty_values_async_result() def update_domain(self, tdom=None, sdom=None, do_flush=True): """ Updates the temporal and/or spatial domain in the MasterManager. If do_flush is unspecified or True, the MasterManager is flushed within this call @param tdom the value to update the Temporal Domain to @param sdom the value to update the Spatial Domain to @param do_flush Flush the MasterManager after updating the value(s); Default is True """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) # Update the global tdom & sdom as necessary if tdom is not None: self.master_manager.tdom = tdom if sdom is not None: self.master_manager.sdom = sdom if do_flush: self.master_manager.flush() def flush_values(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return for k, v in self.value_list.iteritems(): v.flush_values() return self.get_dirty_values_async_result() def flush(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return self.flush_values() log.debug('Flushing MasterManager...') self.master_manager.flush() for pk, pm in self.parameter_metadata.iteritems(): log.debug('Flushing ParameterManager for \'%s\'...', pk) pm.flush() def close(self, force=False, timeout=None): if not self._closed: if self.mode != 'r': self.flush() if self.brick_dispatcher is not None: self.brick_dispatcher.shutdown(force=force, timeout=timeout) self._closed = True
class PersistenceLayer(object): """ The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files. """ def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, **kwargs): """ Constructor for PersistenceLayer @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>) @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>) @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file @param tdom Concrete instance of AbstractDomain for the temporal domain component @param sdom Concrete instance of AbstractDomain for the spatial domain component @param bricking_scheme A dictionary containing the brick and chunk sizes @param auto_flush_values True = Values flushed to HDF5 files automatically, False = Manual @param kwargs @return None """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme) self.mode = mode if not hasattr(self.master_manager, 'auto_flush_values'): self.master_manager.auto_flush_values = auto_flush_values if not hasattr(self.master_manager, 'inline_data_writes'): self.master_manager.inline_data_writes = inline_data_writes self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.mode != 'r': if self.master_manager.is_dirty(): self.master_manager.flush() if self.mode == 'r' or self.inline_data_writes: self.brick_dispatcher = None else: self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.info('Persistence Layer Successfully Initialized') def __getattr__(self, key): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): return getattr(self.master_manager, key) else: return getattr(super(PersistenceLayer, self), key) def __setattr__(self, key, value): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): setattr(self.master_manager, key, value) else: super(PersistenceLayer, self).__setattr__(key, value) # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler" def write_failure_callback(self, message, work): log.error('WORK DISCARDED!!!; %s: %s', message, work) def calculate_brick_size(self, tD, bricking_scheme): """ Calculates and returns the brick and chunk size for each dimension in the total domain based on the bricking scheme @param tD Total domain @param bricking_scheme A dictionary containing the brick and chunk sizes @return Brick and Chunk sizes based on the total domain """ log.debug('Calculating the size of a brick...') log.debug('Bricking scheme: %s', bricking_scheme) log.debug('tD: %s', tD) bD = [bricking_scheme['brick_size'] for x in tD] cD = [bricking_scheme['chunk_size'] for x in tD] log.debug('bD: %s', bD) log.debug('cD: %s', cD) return bD,tuple(cD) def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) log.debug('Performing Rtree dict setup') tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter # Verify domain is Rtree friendly tree_rank = len(bD) log.debug('tree_rank: %s', tree_rank) if tree_rank == 1: tree_rank += 1 log.debug('tree_rank: %s', tree_rank) p = rtree.index.Property() p.dimension = tree_rank brick_tree = rtree.index.Index(properties=p) pm.brick_list = {} if isinstance(parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! pm.brick_domains = [(1,),(1,),(1,),bricking_scheme] else: pm.brick_domains = [tD, bD, cD, bricking_scheme] pm.tree_rank = tree_rank pm.brick_tree = brick_tree v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v self.expand_domain(parameter_context) # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() self.master_manager.flush() return v def calculate_extents(self, origin, bD, parameter_name): """ Calculates and returns the Rtree extents, brick extents and active brick size for the parameter @param origin The origin of the brick in index space @param bD The brick's domain in index space @param parameter_name The parameter name @return rtree_extents, tuple(brick_extents), tuple(brick_active_size) """ log.debug('origin: %s', origin) log.debug('bD: %s', bD) log.debug('parameter_name: %s', parameter_name) # Calculate the brick extents origin = list(origin) pc = self.parameter_metadata[parameter_name].parameter_context total_extents = pc.dom.total_extents # index space log.debug('Total extents for parameter %s: %s', parameter_name, total_extents) # Calculate the extents for the Rtree (index space) rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD) # Fake out the rtree if rank == 1 if len(origin) == 1: rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext] log.debug('Rtree extents: %s', rtree_extents) # Calculate the extents of the brick (index space) brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD)) log.debug('Brick extents: %s', brick_extents) # Calculate active size using the inner extent of the domain within a brick (value space) brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents) log.debug('Brick active size: %s', brick_active_size) # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison return rtree_extents, tuple(brick_extents), tuple(brick_active_size) def _brick_exists(self, parameter_name, brick_extents): """ Checks if a brick exists for a given parameter and extents @param parameter_name The parameter name @param brick_extents The brick extents @return Boolean (do_write) = False if found, returns found brick's GUID; otherwise returns True with an empty brick GUID """ # Make sure the brick doesn't already exist if we already have some bricks do_write = True brick_guid = '' log.debug('Check bricks for parameter \'%s\'',parameter_name) if parameter_name in self.parameter_metadata: for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems(): if brick_extents == v[0]: log.debug('Brick found with matching extents: guid=%s', x) do_write = False brick_guid = x break return do_write, brick_guid # Write empty HDF5 brick to the filesystem def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name): """ Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create) @param rtree_extents Total extents of brick's domain in rtree format @param brick_extents Size of brick @param brick_active_size Size of brick (same rank as parameter) @param origin Domain origin offset @param bD Slice-friendly size of brick's domain @param parameter_name Parameter name as string @return N/A """ pm = self.parameter_metadata[parameter_name] # rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name) # # do_write, bguid = self._brick_exists(parameter_name, brick_extents) # if not do_write: # log.debug('Brick already exists! Updating brick metadata...') # pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] # else: log.debug('Writing virtual brick for parameter %s', parameter_name) # Set HDF5 file and group # Create a GUID for the brick brick_guid = create_guid() brick_file_name = '{0}.hdf5'.format(brick_guid) brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name) link_path = '/{0}/{1}'.format(parameter_name, brick_guid) # Add brick to Master HDF file self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid) # Update the brick listing log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents) brick_count = self.parameter_brick_count(parameter_name) pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size] log.debug('Brick count for %s is %s', parameter_name, brick_count) # Insert into Rtree log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid) pm.update_rtree(brick_count, rtree_extents, obj=brick_guid) # Expand the domain def expand_domain(self, parameter_context, do_flush=False): """ Expands a parameter's total domain based on the requested new temporal and/or spatial domains. Temporal domain expansion is most typical. Number of dimensions may not change for the parameter. @param parameter_context ParameterContext object @param tdom Requested new temporal domain size @param sdom Requested new spatial domain size @return N/A """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name log.debug('Expand %s', parameter_name) pm = self.parameter_metadata[parameter_name] if pm.brick_domains[0] is not None: log.debug('Expanding domain (n-dimension)') # Check if the number of dimensions of the total domain has changed # TODO: Will this ever happen??? If so, how to handle? if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]): raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.') else: tD = pm.brick_domains[0] bD = pm.brick_domains[1] cD = pm.brick_domains[2] if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! new_domain = parameter_context.dom.total_extents delta_domain = [(x - y) for x, y in zip(new_domain, tD)] log.debug('delta domain: %s', delta_domain) tD = [(x + y) for x, y in zip(tD, delta_domain)] pm.brick_domains[0] = tD else: tD = parameter_context.dom.total_extents bricking_scheme = pm.brick_domains[3] bD,cD = self.calculate_brick_size(tD, bricking_scheme) pm.brick_domains = [tD, bD, cD, bricking_scheme] try: # Gather block list log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD) lst = [range(d)[::bD[i]] for i,d in enumerate(tD)] # Gather brick origins need_origins = set(itertools.product(*lst)) log.trace('need_origins: %s', need_origins) have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]]) log.trace('have_origins: %s', have_origins) need_origins.difference_update(have_origins) log.trace('need_origins: %s', need_origins) need_origins = list(need_origins) need_origins.sort() if len(need_origins)>0: log.debug('Number of Bricks to Create: %s', len(need_origins)) # # Write brick to HDF5 file # map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins) # Write brick to HDF5 file for origin in need_origins: rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name) do_write, bguid = self._brick_exists(parameter_name, brick_extents) if not do_write: log.debug('Brick already exists! Updating brick metadata...') pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] else: self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name) else: log.debug('No bricks to create to satisfy the domain expansion...') except Exception: raise ## .flush() is called by insert_timesteps - no need to call these here if do_flush: # Flush the parameter_metadata pm.flush() # If necessary (i.e. write_brick has been called), flush the master_manager if self.master_manager.is_dirty(): self.master_manager.flush() # Returns a count of bricks for a parameter def parameter_brick_count(self, parameter_name): """ Counts and returns the number of bricks in a given parameter's brick list @param parameter_name Name of parameter @return The number of virtual bricks """ ret = 0 if parameter_name in self.parameter_metadata: ret = len(self.parameter_metadata[parameter_name].brick_list) else: log.debug('No bricks found for parameter: %s', parameter_name) return ret def has_dirty_values(self): """ Checks if the master file values have been modified @return True if master file metadata has been modified """ for v in self.value_list.itervalues(): if v.has_dirty_values(): return True return False def get_dirty_values_async_result(self): return_now = False if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return_now = True if self.brick_dispatcher is None: log.debug('\'brick_dispatcher\' is None') return_now = True if return_now: from gevent.event import AsyncResult ret = AsyncResult() ret.set(True) return ret return self.brick_dispatcher.get_dirty_values_async_result() def update_domain(self, tdom=None, sdom=None, do_flush=True): """ Updates the temporal and/or spatial domain in the MasterManager. If do_flush is unspecified or True, the MasterManager is flushed within this call @param tdom the value to update the Temporal Domain to @param sdom the value to update the Spatial Domain to @param do_flush Flush the MasterManager after updating the value(s); Default is True """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) # Update the global tdom & sdom as necessary if tdom is not None: self.master_manager.tdom = tdom if sdom is not None: self.master_manager.sdom = sdom if do_flush: self.master_manager.flush() def flush_values(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return for k, v in self.value_list.iteritems(): v.flush_values() return self.get_dirty_values_async_result() def flush(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return self.flush_values() for pk, pm in self.parameter_metadata.iteritems(): log.debug('Flushing ParameterManager for \'%s\'...', pk) pm.flush() log.debug('Flushing MasterManager...') self.master_manager.flush() def close(self, force=False, timeout=None): if not self._closed: if self.mode != 'r': self.flush() if self.brick_dispatcher is not None: self.brick_dispatcher.shutdown(force=force, timeout=timeout) self._closed = True
class PersistenceLayer(object): """ The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files. """ def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs): """ Constructor for PersistenceLayer @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>) @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>) @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file @param tdom Concrete instance of AbstractDomain for the temporal domain component @param sdom Concrete instance of AbstractDomain for the spatial domain component @param bricking_scheme A dictionary containing the brick and chunk sizes @param auto_flush_values True = Values flushed to HDF5 files automatically, False = Manual @param value_caching if True (default), value requests should be cached for rapid duplicate retrieval @param kwargs @return None """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type) self.mode = mode if not hasattr(self.master_manager, 'auto_flush_values'): self.master_manager.auto_flush_values = auto_flush_values if not hasattr(self.master_manager, 'inline_data_writes'): self.master_manager.inline_data_writes = inline_data_writes if not hasattr(self.master_manager, 'value_caching'): self.master_manager.value_caching = value_caching if not hasattr(self.master_manager, 'coverage_type'): self.master_manager.coverage_type = coverage_type # TODO: This is not done correctly if tdom != None: self._init_master(tdom.shape.extents, bricking_scheme) self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.mode != 'r': if self.master_manager.is_dirty(): self.master_manager.flush() if self.mode == 'r' or self.inline_data_writes: self.brick_dispatcher = None else: self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.debug('Persistence Layer Successfully Initialized') def __getattr__(self, key): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): return getattr(self.master_manager, key) else: return getattr(super(PersistenceLayer, self), key) def __setattr__(self, key, value): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): setattr(self.master_manager, key, value) else: super(PersistenceLayer, self).__setattr__(key, value) def update_parameter_bounds(self, parameter_name, bounds): dmin, dmax = bounds if parameter_name in self.parameter_bounds: pmin, pmax = self.parameter_bounds[parameter_name] dmin = min(dmin, pmin) dmax = max(dmax, pmax) self.parameter_bounds[parameter_name] = (dmin, dmax) self.master_manager.flush() def _init_master(self, tD, bricking_scheme): log.debug('Performing Rtree dict setup') # tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter self.master_manager._init_rtree(bD) self.master_manager.brick_list = {} self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme] # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler" def write_failure_callback(self, message, work): log.error('WORK DISCARDED!!!; %s: %s', message, work) def calculate_brick_size(self, tD, bricking_scheme): """ Calculates and returns the brick and chunk size for each dimension in the total domain based on the bricking scheme @param tD Total domain @param bricking_scheme A dictionary containing the brick and chunk sizes @return Brick and Chunk sizes based on the total domain """ log.debug('Calculating the size of a brick...') log.debug('Bricking scheme: %s', bricking_scheme) log.debug('tD: %s', tD) bD = [bricking_scheme['brick_size'] for x in tD] cD = [bricking_scheme['chunk_size'] for x in tD] log.debug('bD: %s', bD) log.debug('cD: %s', cD) return bD,tuple(cD) def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) if parameter_context.param_type._value_class == 'SparseConstantValue': v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) else: v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() # Put the pm into read_only mode pm.read_only = True # If there are already bricks, ensure there are appropriate links for this new parameter for brick_guid in self.master_manager.brick_list: brick_file_name = '{0}.hdf5'.format(brick_guid) self._add_brick_link(parameter_name, brick_guid, brick_file_name) self.master_manager.flush() return v def calculate_extents(self, origin, bD, total_extents): """ Calculates and returns the Rtree extents, brick extents and active brick size for the parameter @param origin The origin of the brick in index space @param bD The brick's domain in index space @param parameter_name The parameter name @return rtree_extents, tuple(brick_extents), tuple(brick_active_size) """ # Calculate the brick extents origin = list(origin) # Calculate the extents for the Rtree (index space) rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD) # Fake out the rtree if rank == 1 if len(origin) == 1: rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext] log.debug('Rtree extents: %s', rtree_extents) # Calculate the extents of the brick (index space) brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD)) log.debug('Brick extents: %s', brick_extents) # Calculate active size using the inner extent of the domain within a brick (value space) brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents) log.debug('Brick active size: %s', brick_active_size) # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison return rtree_extents, tuple(brick_extents), tuple(brick_active_size) def _brick_exists_master(self, brick_extents): do_write = True brick_guid = '' for x,v in self.master_manager.brick_list.iteritems(): if brick_extents == v[0]: log.debug('Brick found with matching extents: guid=%s', x) do_write = False brick_guid = x break return do_write, brick_guid def _add_brick_link(self, parameter_name, brick_guid, brick_file_name): brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name) link_path = '/{0}/{1}'.format(parameter_name, brick_guid) # Add brick to Master HDF file self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid) # Write empty HDF5 brick to the filesystem def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD): """ Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create) @param rtree_extents Total extents of brick's domain in rtree format @param brick_extents Size of brick @param brick_active_size Size of brick (same rank as parameter) @param origin Domain origin offset @param bD Slice-friendly size of brick's domain @return N/A """ log.debug('Writing virtual brick...') # Set HDF5 file and group # Create a GUID for the brick brick_guid = create_guid() brick_file_name = '{0}.hdf5'.format(brick_guid) #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant for parameter_name in self.parameter_metadata.keys(): self._add_brick_link(parameter_name, brick_guid, brick_file_name) # Update the brick listing log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size) brick_count = len(self.master_manager.brick_list) self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size] log.debug('Brick count is %s', brick_count) # Insert into Rtree log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid) self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid) # Expand the domain def expand_domain(self, total_extents, do_flush=False): """ Expands a parameter's total domain based on the requested new temporal and/or spatial domains. Temporal domain expansion is most typical. Number of dimensions may not change for the parameter. @param total_extents The total extents of the domain @return N/A """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) if self.master_manager.brick_domains[0] is not None: log.debug('Expanding domain (n-dimension)') # Check if the number of dimensions of the total domain has changed # TODO: Will this ever happen??? If so, how to handle? if len(total_extents) != len(self.master_manager.brick_domains[0]): raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.') else: tD = self.master_manager.brick_domains[0] bD = self.master_manager.brick_domains[1] cD = self.master_manager.brick_domains[2] delta_domain = [(x - y) for x, y in zip(total_extents, tD)] log.debug('delta domain: %s', delta_domain) tD = [(x + y) for x, y in zip(tD, delta_domain)] self.master_manager.brick_domains[0] = tD else: tD = total_extents bricking_scheme = self.master_manager.brick_domains[3] bD,cD = self.calculate_brick_size(tD, bricking_scheme) self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme] try: # Gather block list log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD) lst = [range(d)[::bD[i]] for i,d in enumerate(tD)] # Gather brick origins need_origins = set(itertools.product(*lst)) log.trace('need_origins: %s', need_origins) have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])]) log.trace('have_origins: %s', have_origins) need_origins.difference_update(have_origins) log.trace('need_origins: %s', need_origins) need_origins = list(need_origins) need_origins.sort() if len(need_origins)>0: log.debug('Number of Bricks to Create: %s', len(need_origins)) # Write virtual HDF5 brick file for origin in need_origins: rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents) do_write, bguid = self._brick_exists_master(brick_extents) if not do_write: log.debug('Brick already exists! Updating brick metadata...') self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] else: self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD) else: log.debug('No bricks to create to satisfy the domain expansion...') except Exception: raise ## .flush() is called by insert_timesteps - no need to call these here self.master_manager.flush() if do_flush: # If necessary (i.e. write_brick has been called), flush the master_manager461 if self.master_manager.is_dirty(): self.master_manager.flush() def shrink_domain(self, total_domain, do_flush=True): from coverage_model import bricking_utils # Find the last brick needed to contain the domain brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree) bid, bguid = brick[0] # Get the brick_guids for all the bricks after the one we need rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]] # Remove everything that comes after the brick we still need from the RTree self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1] # Remove the unnecessary bricks from the brick list for r in rm_bricks: del self.master_manager.brick_list[r] # and the file system... # Reset the first member of brick_domains self.master_manager.brick_domains[0] = list(total_domain) # And the appropriate entry in brick_list self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),) if do_flush: if self.master_manager.is_dirty(): self.master_manager.flush() def has_dirty_values(self): """ Checks if the master file values have been modified @return True if master file metadata has been modified """ for v in self.value_list.itervalues(): if v.has_dirty_values(): return True return False def get_dirty_values_async_result(self): return_now = False if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return_now = True if self.brick_dispatcher is None: log.debug('\'brick_dispatcher\' is None') return_now = True if return_now: from gevent.event import AsyncResult ret = AsyncResult() ret.set(True) return ret return self.brick_dispatcher.get_dirty_values_async_result() def update_domain(self, tdom=None, sdom=None, do_flush=True): """ Updates the temporal and/or spatial domain in the MasterManager. If do_flush is unspecified or True, the MasterManager is flushed within this call @param tdom the value to update the Temporal Domain to @param sdom the value to update the Spatial Domain to @param do_flush Flush the MasterManager after updating the value(s); Default is True """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) # Update the global tdom & sdom as necessary if tdom is not None: self.master_manager.tdom = tdom if sdom is not None: self.master_manager.sdom = sdom if do_flush: self.master_manager.flush() def flush_values(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return for k, v in self.value_list.iteritems(): v.flush_values() return self.get_dirty_values_async_result() def flush(self): if self.mode == 'r': log.warn('PersistenceLayer not open for writing: mode=%s', self.mode) return self.flush_values() log.debug('Flushing MasterManager...') self.master_manager.flush() for pk, pm in self.parameter_metadata.iteritems(): log.debug('Flushing ParameterManager for \'%s\'...', pk) pm.flush() def close(self, force=False, timeout=None): if not self._closed: if self.mode != 'r': self.flush() if self.brick_dispatcher is not None: self.brick_dispatcher.shutdown(force=force, timeout=timeout) self._closed = True
class PersistenceLayer(object): def __init__(self, root, guid, name=None, tdom=None, sdom=None, bricking_scheme=None, auto_flush_values=True, **kwargs): """ Constructor for Persistence Layer @param root: Where to save/look for HDF5 files @param guid: CoverageModel GUID @param name: CoverageModel Name @param tdom: Temporal Domain @param sdom: Spatial Domain @param kwargs: @return: """ log.debug('Persistence GUID: %s', guid) root = '.' if root is ('' or None) else root self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme) self.auto_flush_values = auto_flush_values self.value_list = {} self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]} for pname in self.param_groups: log.debug('parameter group: %s', pname) self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname) if self.master_manager.is_dirty(): self.master_manager.flush() self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback) self.brick_dispatcher.run() self._closed = False log.info('Persistence Layer Successfully Initialized') def __getattr__(self, key): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): return getattr(self.master_manager, key) else: return getattr(super(PersistenceLayer, self), key) def __setattr__(self, key, value): if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key): setattr(self.master_manager, key, value) else: super(PersistenceLayer, self).__setattr__(key, value) # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler" def write_failure_callback(self, message, work): log.error('WORK DISCARDED!!!; %s: %s', message, work) def calculate_brick_size(self, tD, bricking_scheme): """ Calculate brick domain size given a target file system brick size (Mbytes) and dtype @param tD: @param bricking_scheme: @return: """ log.debug('Calculating the size of a brick...') log.debug('Bricking scheme: %s', bricking_scheme) log.debug('tD: %s', tD) bD = [bricking_scheme['brick_size'] for x in tD] cD = [bricking_scheme['chunk_size'] for x in tD] log.debug('bD: %s', bD) log.debug('cD: %s', cD) return bD,tuple(cD) def init_parameter(self, parameter_context, bricking_scheme): parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) log.debug('Performing Rtree dict setup') tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter # Verify domain is Rtree friendly tree_rank = len(bD) log.debug('tree_rank: %s', tree_rank) if tree_rank == 1: tree_rank += 1 log.debug('tree_rank: %s', tree_rank) p = rtree.index.Property() p.dimension = tree_rank brick_tree = rtree.index.Index(properties=p) pm.brick_list = {} if isinstance(parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! pm.brick_domains = [(1,),(1,),(1,),bricking_scheme] else: pm.brick_domains = [tD, bD, cD, bricking_scheme] pm.tree_rank = tree_rank pm.brick_tree = brick_tree v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v self.expand_domain(parameter_context) if pm.is_dirty(): pm.flush() if self.master_manager.is_dirty(): self.master_manager.flush() return v def calculate_extents(self, origin, bD, parameter_name): """ Calculates the Rtree extents, brick extents and active brick size for the parameter @param origin: @param bD: @param parameter_name: @return: """ log.debug('origin: %s', origin) log.debug('bD: %s', bD) log.debug('parameter_name: %s', parameter_name) # Calculate the brick extents origin = list(origin) pc = self.parameter_metadata[parameter_name].parameter_context total_extents = pc.dom.total_extents # index space log.debug('Total extents for parameter %s: %s', parameter_name, total_extents) # Calculate the extents for the Rtree (index space) rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD) # Fake out the rtree if rank == 1 if len(origin) == 1: rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext] log.debug('Rtree extents: %s', rtree_extents) # Calculate the extents of the brick (index space) brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD)) log.debug('Brick extents: %s', brick_extents) # Calculate active size using the inner extent of the domain within a brick (value space) brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents) log.debug('Brick active size: %s', brick_active_size) # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison return rtree_extents, tuple(brick_extents), tuple(brick_active_size) def _brick_exists(self, parameter_name, brick_extents): # Make sure the brick doesn't already exist if we already have some bricks do_write = True brick_guid = '' log.debug('Check bricks for parameter \'%s\'',parameter_name) if parameter_name in self.parameter_metadata: for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems(): if brick_extents == v[0]: log.debug('Brick found with matching extents: guid=%s', x) do_write = False brick_guid = x break return do_write, brick_guid # Write empty HDF5 brick to the filesystem def write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name): pm = self.parameter_metadata[parameter_name] # rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name) # # do_write, bguid = self._brick_exists(parameter_name, brick_extents) # if not do_write: # log.debug('Brick already exists! Updating brick metadata...') # pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] # else: log.debug('Writing virtual brick for parameter %s', parameter_name) # Set HDF5 file and group # Create a GUID for the brick brick_guid = create_guid() brick_file_name = '{0}.hdf5'.format(brick_guid) brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name) link_path = '/{0}/{1}'.format(parameter_name, brick_guid) # Add brick to Master HDF file self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid) # Update the brick listing log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents) brick_count = self.parameter_brick_count(parameter_name) pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size] log.debug('Brick count for %s is %s', parameter_name, brick_count) # Insert into Rtree log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid) pm.update_rtree(brick_count, rtree_extents, obj=brick_guid) # Flush the parameter_metadata if pm.is_dirty(): pm.flush() if self.master_manager.is_dirty(): self.master_manager.flush() # Expand the domain def expand_domain(self, parameter_context, tdom=None, sdom=None): parameter_name = parameter_context.name log.debug('Expand %s', parameter_name) pm = self.parameter_metadata[parameter_name] if pm.brick_domains[0] is not None: log.debug('Expanding domain (n-dimension)') # Check if the number of dimensions of the total domain has changed # TODO: Will this ever happen??? If so, how to handle? if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]): raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.') else: tD = pm.brick_domains[0] bD = pm.brick_domains[1] cD = pm.brick_domains[2] if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! new_domain = parameter_context.dom.total_extents delta_domain = [(x - y) for x, y in zip(new_domain, tD)] log.debug('delta domain: %s', delta_domain) tD = [(x + y) for x, y in zip(tD, delta_domain)] pm.brick_domains[0] = tD else: tD = parameter_context.dom.total_extents bricking_scheme = pm.brick_domains[3] bD,cD = self.calculate_brick_size(tD, bricking_scheme) pm.brick_domains = [tD, bD, cD, bricking_scheme] try: # Gather block list log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD) lst = [range(d)[::bD[i]] for i,d in enumerate(tD)] # Gather brick origins need_origins = set(itertools.product(*lst)) log.trace('need_origins: %s', need_origins) have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]]) log.trace('have_origins: %s', have_origins) need_origins.difference_update(have_origins) log.trace('need_origins: %s', need_origins) need_origins = list(need_origins) need_origins.sort() if len(need_origins)>0: log.debug('Number of Bricks to Create: %s', len(need_origins)) # # Write brick to HDF5 file # map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins) # Write brick to HDF5 file for origin in need_origins: rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name) do_write, bguid = self._brick_exists(parameter_name, brick_extents) if not do_write: log.debug('Brick already exists! Updating brick metadata...') pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size] else: self.write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name) else: log.debug('No bricks to create to satisfy the domain expansion...') except Exception: raise # Flush the parameter_metadata if pm.is_dirty(): pm.flush() # Update the global tdom & sdom as necessary if tdom is not None: self.master_manager.tdom = tdom if sdom is not None: self.master_manager.sdom = sdom if self.master_manager.is_dirty(): self.master_manager.flush() # Returns a count of bricks for a parameter def parameter_brick_count(self, parameter_name): ret = 0 if parameter_name in self.parameter_metadata: ret = len(self.parameter_metadata[parameter_name].brick_list) else: log.debug('No bricks found for parameter: %s', parameter_name) return ret def has_dirty_values(self): for v in self.value_list.itervalues(): if v.has_dirty_values(): return True return False def get_dirty_values_async_result(self): return self.brick_dispatcher.get_dirty_values_async_result() def flush_values(self): for k, v in self.value_list.iteritems(): v.flush_values() return self.get_dirty_values_async_result() def flush(self): for pk, pm in self.parameter_metadata.iteritems(): log.debug('Flushing ParameterManager for \'%s\'...', pk) pm.flush() log.debug('Flushing MasterManager...') self.master_manager.flush() def close(self, force=False, timeout=None): if not self._closed: self.flush() self.brick_dispatcher.shutdown(force=force, timeout=timeout) self._closed = True