Пример #1
0
    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')
Пример #2
0
    def __init__(self, root, guid, name=None, tdom=None, sdom=None, bricking_scheme=None, auto_flush_values=True, **kwargs):
        """
        Constructor for Persistence Layer
        @param root: Where to save/look for HDF5 files
        @param guid: CoverageModel GUID
        @param name: CoverageModel Name
        @param tdom: Temporal Domain
        @param sdom: Spatial Domain
        @param kwargs:
        @return:
        """
        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme)

        self.auto_flush_values = auto_flush_values
        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
        self.brick_dispatcher.run()

        self._closed = False

        log.info('Persistence Layer Successfully Initialized')
Пример #3
0
    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

#        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs)
        self.master_manager = MetadataManagerFactory.buildMetadataManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')
Пример #4
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

#        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs)
        self.master_manager = MetadataManagerFactory.buildMetadataManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        dmin, dmax = bounds
        if parameter_name in self.parameter_bounds:
            pmin, pmax = self.parameter_bounds[parameter_name]
            dmin = min(dmin, pmin)
            dmax = max(dmax, pmax)
        self.parameter_bounds[parameter_name] = (dmin, dmax)
        self.master_manager.flush()

    def _init_master(self, tD, bricking_scheme):
        log.debug('Performing Rtree dict setup')
        # tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter

        self.master_manager._init_rtree(bD)

        self.master_manager.brick_list = {}
        self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        if parameter_context.param_type._value_class == 'SparseConstantValue':
            v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        else:
            v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()

        # Put the pm into read_only mode
        pm.read_only = True

        # If there are already bricks, ensure there are appropriate links for this new parameter
        for brick_guid in self.master_manager.brick_list:
            brick_file_name = '{0}.hdf5'.format(brick_guid)
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, total_extents):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """
        # Calculate the brick extents
        origin = list(origin)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists_master(self, brick_extents):
        do_write = True
        brick_guid = ''
        for x,v in self.master_manager.brick_list.iteritems():
            if brick_extents == v[0]:
                log.debug('Brick found with matching extents: guid=%s', x)
                do_write = False
                brick_guid = x
                break

        return do_write, brick_guid

    def _add_brick_link(self, parameter_name, brick_guid, brick_file_name):
        brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @return N/A
        """
        log.debug('Writing virtual brick...')

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)

        #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant
        for parameter_name in self.parameter_metadata.keys():
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size)
        brick_count = len(self.master_manager.brick_list)
        self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count is %s', brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, total_extents, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param total_extents    The total extents of the domain
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        if self.master_manager.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(total_extents) != len(self.master_manager.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = self.master_manager.brick_domains[0]
                bD = self.master_manager.brick_domains[1]
                cD = self.master_manager.brick_domains[2]

                delta_domain = [(x - y) for x, y in zip(total_extents, tD)]
                log.debug('delta domain: %s', delta_domain)

                tD = [(x + y) for x, y in zip(tD, delta_domain)]
                self.master_manager.brick_domains[0] = tD
        else:
            tD = total_extents
            bricking_scheme = self.master_manager.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

                # Write virtual HDF5 brick file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents)

                    do_write, bguid = self._brick_exists_master(brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here
        self.master_manager.flush()
        if do_flush:
            # If necessary (i.e. write_brick has been called), flush the master_manager461
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def shrink_domain(self, total_domain, do_flush=True):
        from coverage_model import bricking_utils
        # Find the last brick needed to contain the domain
        brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree)

        bid, bguid = brick[0]

        # Get the brick_guids for all the bricks after the one we need
        rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]]
        # Remove everything that comes after the brick we still need from the RTree
        self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1]

        # Remove the unnecessary bricks from the brick list
        for r in rm_bricks:
            del self.master_manager.brick_list[r]
            # and the file system...



        # Reset the first member of brick_domains
        self.master_manager.brick_domains[0] = list(total_domain)
        # And the appropriate entry in brick_list
        self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),)

        if do_flush:
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
Пример #5
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.info('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()
        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, parameter_name):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """

        log.debug('origin: %s', origin)
        log.debug('bD: %s', bD)
        log.debug('parameter_name: %s', parameter_name)

        # Calculate the brick extents
        origin = list(origin)

        pc = self.parameter_metadata[parameter_name].parameter_context
        total_extents = pc.dom.total_extents # index space
        log.debug('Total extents for parameter %s: %s', parameter_name, total_extents)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists(self, parameter_name, brick_extents):
        """
        Checks if a brick exists for a given parameter and extents

        @param parameter_name   The parameter name
        @param brick_extents    The brick extents
        @return Boolean (do_write) = False if found, returns found brick's GUID;
         otherwise returns True with an empty brick GUID
        """

        # Make sure the brick doesn't already exist if we already have some bricks
        do_write = True
        brick_guid = ''
        log.debug('Check bricks for parameter \'%s\'',parameter_name)
        if parameter_name in self.parameter_metadata:
            for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems():
                if brick_extents == v[0]:
                    log.debug('Brick found with matching extents: guid=%s', x)
                    do_write = False
                    brick_guid = x
                    break

        return do_write, brick_guid

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @param parameter_name   Parameter name as string
        @return N/A
        """
        pm = self.parameter_metadata[parameter_name]

#        rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)
#
#        do_write, bguid = self._brick_exists(parameter_name, brick_extents)
#        if not do_write:
#            log.debug('Brick already exists!  Updating brick metadata...')
#            pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
#        else:
        log.debug('Writing virtual brick for parameter %s', parameter_name)

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)
        brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents)
        brick_count = self.parameter_brick_count(parameter_name)
        pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count for %s is %s', parameter_name, brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        pm.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, parameter_context, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param parameter_context    ParameterContext object
        @param tdom Requested new temporal domain size
        @param sdom Requested new spatial domain size
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name
        log.debug('Expand %s', parameter_name)
        pm = self.parameter_metadata[parameter_name]

        if pm.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = pm.brick_domains[0]
                bD = pm.brick_domains[1]
                cD = pm.brick_domains[2]
                if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!!
                    new_domain = parameter_context.dom.total_extents

                    delta_domain = [(x - y) for x, y in zip(new_domain, tD)]
                    log.debug('delta domain: %s', delta_domain)

                    tD = [(x + y) for x, y in zip(tD, delta_domain)]
                    pm.brick_domains[0] = tD
        else:
            tD = parameter_context.dom.total_extents
            bricking_scheme = pm.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

#                # Write brick to HDF5 file
#                map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins)

                # Write brick to HDF5 file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)

                    do_write, bguid = self._brick_exists(parameter_name, brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here

        if do_flush:
        # Flush the parameter_metadata
            pm.flush()
            # If necessary (i.e. write_brick has been called), flush the master_manager
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    # Returns a count of bricks for a parameter
    def parameter_brick_count(self, parameter_name):
        """
        Counts and returns the number of bricks in a given parameter's brick list

        @param parameter_name   Name of parameter
        @return The number of virtual bricks
        """
        ret = 0
        if parameter_name in self.parameter_metadata:
            ret = len(self.parameter_metadata[parameter_name].brick_list)
        else:
            log.debug('No bricks found for parameter: %s', parameter_name)

        return ret

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
Пример #6
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        dmin, dmax = bounds
        if parameter_name in self.parameter_bounds:
            pmin, pmax = self.parameter_bounds[parameter_name]
            dmin = min(dmin, pmin)
            dmax = max(dmax, pmax)
        self.parameter_bounds[parameter_name] = (dmin, dmax)
        self.master_manager.flush()

    def _init_master(self, tD, bricking_scheme):
        log.debug('Performing Rtree dict setup')
        # tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter

        self.master_manager._init_rtree(bD)

        self.master_manager.brick_list = {}
        self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        if parameter_context.param_type._value_class == 'SparseConstantValue':
            v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        else:
            v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()

        # Put the pm into read_only mode
        pm.read_only = True

        # If there are already bricks, ensure there are appropriate links for this new parameter
        for brick_guid in self.master_manager.brick_list:
            brick_file_name = '{0}.hdf5'.format(brick_guid)
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, total_extents):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """
        # Calculate the brick extents
        origin = list(origin)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists_master(self, brick_extents):
        do_write = True
        brick_guid = ''
        for x,v in self.master_manager.brick_list.iteritems():
            if brick_extents == v[0]:
                log.debug('Brick found with matching extents: guid=%s', x)
                do_write = False
                brick_guid = x
                break

        return do_write, brick_guid

    def _add_brick_link(self, parameter_name, brick_guid, brick_file_name):
        brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @return N/A
        """
        log.debug('Writing virtual brick...')

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)

        #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant
        for parameter_name in self.parameter_metadata.keys():
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size)
        brick_count = len(self.master_manager.brick_list)
        self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count is %s', brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, total_extents, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param total_extents    The total extents of the domain
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        if self.master_manager.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(total_extents) != len(self.master_manager.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = self.master_manager.brick_domains[0]
                bD = self.master_manager.brick_domains[1]
                cD = self.master_manager.brick_domains[2]

                delta_domain = [(x - y) for x, y in zip(total_extents, tD)]
                log.debug('delta domain: %s', delta_domain)

                tD = [(x + y) for x, y in zip(tD, delta_domain)]
                self.master_manager.brick_domains[0] = tD
        else:
            tD = total_extents
            bricking_scheme = self.master_manager.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

                # Write virtual HDF5 brick file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents)

                    do_write, bguid = self._brick_exists_master(brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here
        self.master_manager.flush()
        if do_flush:
            # If necessary (i.e. write_brick has been called), flush the master_manager461
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def shrink_domain(self, total_domain, do_flush=True):
        from coverage_model import bricking_utils
        # Find the last brick needed to contain the domain
        brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree)

        bid, bguid = brick[0]

        # Get the brick_guids for all the bricks after the one we need
        rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]]
        # Remove everything that comes after the brick we still need from the RTree
        self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1]

        # Remove the unnecessary bricks from the brick list
        for r in rm_bricks:
            del self.master_manager.brick_list[r]
            # and the file system...



        # Reset the first member of brick_domains
        self.master_manager.brick_domains[0] = list(total_domain)
        # And the appropriate entry in brick_list
        self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),)

        if do_flush:
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
Пример #7
0
class PersistenceLayer(object):
    def __init__(self, root, guid, name=None, tdom=None, sdom=None, bricking_scheme=None, auto_flush_values=True, **kwargs):
        """
        Constructor for Persistence Layer
        @param root: Where to save/look for HDF5 files
        @param guid: CoverageModel GUID
        @param name: CoverageModel Name
        @param tdom: Temporal Domain
        @param sdom: Spatial Domain
        @param kwargs:
        @return:
        """
        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme)

        self.auto_flush_values = auto_flush_values
        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
        self.brick_dispatcher.run()

        self._closed = False

        log.info('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculate brick domain size given a target file system brick size (Mbytes) and dtype
        @param tD:
        @param bricking_scheme:
        @return:
        """
        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        if pm.is_dirty():
            pm.flush()

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, parameter_name):
        """
        Calculates the Rtree extents, brick extents and active brick size for the parameter
        @param origin:
        @param bD:
        @param parameter_name:
        @return:
        """
        log.debug('origin: %s', origin)
        log.debug('bD: %s', bD)
        log.debug('parameter_name: %s', parameter_name)

        # Calculate the brick extents
        origin = list(origin)

        pc = self.parameter_metadata[parameter_name].parameter_context
        total_extents = pc.dom.total_extents # index space
        log.debug('Total extents for parameter %s: %s', parameter_name, total_extents)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists(self, parameter_name, brick_extents):
        # Make sure the brick doesn't already exist if we already have some bricks
        do_write = True
        brick_guid = ''
        log.debug('Check bricks for parameter \'%s\'',parameter_name)
        if parameter_name in self.parameter_metadata:
            for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems():
                if brick_extents == v[0]:
                    log.debug('Brick found with matching extents: guid=%s', x)
                    do_write = False
                    brick_guid = x
                    break

        return do_write, brick_guid

    # Write empty HDF5 brick to the filesystem
    def write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name):
        pm = self.parameter_metadata[parameter_name]

#        rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)
#
#        do_write, bguid = self._brick_exists(parameter_name, brick_extents)
#        if not do_write:
#            log.debug('Brick already exists!  Updating brick metadata...')
#            pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
#        else:
        log.debug('Writing virtual brick for parameter %s', parameter_name)

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)
        brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents)
        brick_count = self.parameter_brick_count(parameter_name)
        pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count for %s is %s', parameter_name, brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        pm.update_rtree(brick_count, rtree_extents, obj=brick_guid)

        # Flush the parameter_metadata
        if pm.is_dirty():
            pm.flush()

        if self.master_manager.is_dirty():
            self.master_manager.flush()

    # Expand the domain
    def expand_domain(self, parameter_context, tdom=None, sdom=None):
        parameter_name = parameter_context.name
        log.debug('Expand %s', parameter_name)
        pm = self.parameter_metadata[parameter_name]

        if pm.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = pm.brick_domains[0]
                bD = pm.brick_domains[1]
                cD = pm.brick_domains[2]
                if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!!
                    new_domain = parameter_context.dom.total_extents

                    delta_domain = [(x - y) for x, y in zip(new_domain, tD)]
                    log.debug('delta domain: %s', delta_domain)

                    tD = [(x + y) for x, y in zip(tD, delta_domain)]
                    pm.brick_domains[0] = tD
        else:
            tD = parameter_context.dom.total_extents
            bricking_scheme = pm.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

#                # Write brick to HDF5 file
#                map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins)

                # Write brick to HDF5 file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)

                    do_write, bguid = self._brick_exists(parameter_name, brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self.write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        # Flush the parameter_metadata
        if pm.is_dirty():
            pm.flush()

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if self.master_manager.is_dirty():
            self.master_manager.flush()

    # Returns a count of bricks for a parameter
    def parameter_brick_count(self, parameter_name):
        ret = 0
        if parameter_name in self.parameter_metadata:
            ret = len(self.parameter_metadata[parameter_name].brick_list)
        else:
            log.debug('No bricks found for parameter: %s', parameter_name)

        return ret

    def has_dirty_values(self):
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return self.brick_dispatcher.get_dirty_values_async_result()

    def flush_values(self):
        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            self.flush()
            self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True