Exemplo n.º 1
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.info('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()
        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, parameter_name):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """

        log.debug('origin: %s', origin)
        log.debug('bD: %s', bD)
        log.debug('parameter_name: %s', parameter_name)

        # Calculate the brick extents
        origin = list(origin)

        pc = self.parameter_metadata[parameter_name].parameter_context
        total_extents = pc.dom.total_extents # index space
        log.debug('Total extents for parameter %s: %s', parameter_name, total_extents)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists(self, parameter_name, brick_extents):
        """
        Checks if a brick exists for a given parameter and extents

        @param parameter_name   The parameter name
        @param brick_extents    The brick extents
        @return Boolean (do_write) = False if found, returns found brick's GUID;
         otherwise returns True with an empty brick GUID
        """

        # Make sure the brick doesn't already exist if we already have some bricks
        do_write = True
        brick_guid = ''
        log.debug('Check bricks for parameter \'%s\'',parameter_name)
        if parameter_name in self.parameter_metadata:
            for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems():
                if brick_extents == v[0]:
                    log.debug('Brick found with matching extents: guid=%s', x)
                    do_write = False
                    brick_guid = x
                    break

        return do_write, brick_guid

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @param parameter_name   Parameter name as string
        @return N/A
        """
        pm = self.parameter_metadata[parameter_name]

#        rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)
#
#        do_write, bguid = self._brick_exists(parameter_name, brick_extents)
#        if not do_write:
#            log.debug('Brick already exists!  Updating brick metadata...')
#            pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
#        else:
        log.debug('Writing virtual brick for parameter %s', parameter_name)

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)
        brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents)
        brick_count = self.parameter_brick_count(parameter_name)
        pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count for %s is %s', parameter_name, brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        pm.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, parameter_context, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param parameter_context    ParameterContext object
        @param tdom Requested new temporal domain size
        @param sdom Requested new spatial domain size
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name
        log.debug('Expand %s', parameter_name)
        pm = self.parameter_metadata[parameter_name]

        if pm.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = pm.brick_domains[0]
                bD = pm.brick_domains[1]
                cD = pm.brick_domains[2]
                if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!!
                    new_domain = parameter_context.dom.total_extents

                    delta_domain = [(x - y) for x, y in zip(new_domain, tD)]
                    log.debug('delta domain: %s', delta_domain)

                    tD = [(x + y) for x, y in zip(tD, delta_domain)]
                    pm.brick_domains[0] = tD
        else:
            tD = parameter_context.dom.total_extents
            bricking_scheme = pm.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

#                # Write brick to HDF5 file
#                map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins)

                # Write brick to HDF5 file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)

                    do_write, bguid = self._brick_exists(parameter_name, brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here

        if do_flush:
        # Flush the parameter_metadata
            pm.flush()
            # If necessary (i.e. write_brick has been called), flush the master_manager
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    # Returns a count of bricks for a parameter
    def parameter_brick_count(self, parameter_name):
        """
        Counts and returns the number of bricks in a given parameter's brick list

        @param parameter_name   Name of parameter
        @return The number of virtual bricks
        """
        ret = 0
        if parameter_name in self.parameter_metadata:
            ret = len(self.parameter_metadata[parameter_name].brick_list)
        else:
            log.debug('No bricks found for parameter: %s', parameter_name)

        return ret

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
Exemplo n.º 2
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type, **kwargs)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        dmin, dmax = bounds
        if parameter_name in self.parameter_bounds:
            pmin, pmax = self.parameter_bounds[parameter_name]
            dmin = min(dmin, pmin)
            dmax = max(dmax, pmax)
        self.parameter_bounds[parameter_name] = (dmin, dmax)
        self.master_manager.flush()

    def _init_master(self, tD, bricking_scheme):
        log.debug('Performing Rtree dict setup')
        # tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter

        self.master_manager._init_rtree(bD)

        self.master_manager.brick_list = {}
        self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        if parameter_context.param_type._value_class == 'SparseConstantValue':
            v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        else:
            v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()

        # Put the pm into read_only mode
        pm.read_only = True

        # If there are already bricks, ensure there are appropriate links for this new parameter
        for brick_guid in self.master_manager.brick_list:
            brick_file_name = '{0}.hdf5'.format(brick_guid)
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, total_extents):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """
        # Calculate the brick extents
        origin = list(origin)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists_master(self, brick_extents):
        do_write = True
        brick_guid = ''
        for x,v in self.master_manager.brick_list.iteritems():
            if brick_extents == v[0]:
                log.debug('Brick found with matching extents: guid=%s', x)
                do_write = False
                brick_guid = x
                break

        return do_write, brick_guid

    def _add_brick_link(self, parameter_name, brick_guid, brick_file_name):
        brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @return N/A
        """
        log.debug('Writing virtual brick...')

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)

        #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant
        for parameter_name in self.parameter_metadata.keys():
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size)
        brick_count = len(self.master_manager.brick_list)
        self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count is %s', brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, total_extents, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param total_extents    The total extents of the domain
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        if self.master_manager.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(total_extents) != len(self.master_manager.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = self.master_manager.brick_domains[0]
                bD = self.master_manager.brick_domains[1]
                cD = self.master_manager.brick_domains[2]

                delta_domain = [(x - y) for x, y in zip(total_extents, tD)]
                log.debug('delta domain: %s', delta_domain)

                tD = [(x + y) for x, y in zip(tD, delta_domain)]
                self.master_manager.brick_domains[0] = tD
        else:
            tD = total_extents
            bricking_scheme = self.master_manager.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

                # Write virtual HDF5 brick file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents)

                    do_write, bguid = self._brick_exists_master(brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here
        self.master_manager.flush()
        if do_flush:
            # If necessary (i.e. write_brick has been called), flush the master_manager461
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def shrink_domain(self, total_domain, do_flush=True):
        from coverage_model import bricking_utils
        # Find the last brick needed to contain the domain
        brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree)

        bid, bguid = brick[0]

        # Get the brick_guids for all the bricks after the one we need
        rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]]
        # Remove everything that comes after the brick we still need from the RTree
        self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1]

        # Remove the unnecessary bricks from the brick list
        for r in rm_bricks:
            del self.master_manager.brick_list[r]
            # and the file system...



        # Reset the first member of brick_domains
        self.master_manager.brick_domains[0] = list(total_domain)
        # And the appropriate entry in brick_list
        self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),)

        if do_flush:
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
Exemplo n.º 3
0
class SimplePersistenceLayer(object):

    def __init__(self, root, guid, name=None, param_dict=None, mode=None, coverage_type=None, **kwargs):
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root_dir=root, guid=guid, name=name, param_dict=param_dict,
                                            parameter_bounds=None, tree_rank=2, coverage_type=coverage_type, **kwargs)

        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        self.mode = mode

        if self.mode != 'r':
            self.master_manager.flush()

        self._closed = False

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(SimplePersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(SimplePersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        # No-op - would be called by parameters stored in a ComplexCoverage, which can only be ParameterFunctions
        pass

    def has_dirty_values(self):
        # Never has dirty values
        return False

    def get_dirty_values_async_result(self):
        from gevent.event import AsyncResult
        ret = AsyncResult()
        ret.set(True)
        return ret

    def flush_values(self):
        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('SimplePersistenceLayer not open for writing: mode=%s', self.mode)
            return

        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()

        self._closed = True

    def expand_domain(self, *args, **kwargs):
        # No Op - storage expanded by *Value classes
        pass

    def shrink_domain(self, total_domain, do_flush=True):
        pass

    def init_parameter(self, parameter_context, *args, **kwargs):
        return InMemoryStorage(dtype=parameter_context.param_type.value_encoding, fill_value=parameter_context.param_type.fill_value)

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        # No Op
        pass
Exemplo n.º 4
0
class SimplePersistenceLayer(object):

    def __init__(self, root, guid, name=None, param_dict=None, mode=None, coverage_type=None, **kwargs):
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root_dir=root, guid=guid, name=name, param_dict=param_dict,
                                            parameter_bounds=None, tree_rank=2, coverage_type=coverage_type, **kwargs)

        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        self.mode = mode

        if self.mode != 'r':
            self.master_manager.flush()

        self._closed = False

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(SimplePersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(SimplePersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        # No-op - would be called by parameters stored in a ComplexCoverage, which can only be ParameterFunctions
        pass

    def has_dirty_values(self):
        # Never has dirty values
        return False

    def get_dirty_values_async_result(self):
        from gevent.event import AsyncResult
        ret = AsyncResult()
        ret.set(True)
        return ret

    def flush_values(self):
        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('SimplePersistenceLayer not open for writing: mode=%s', self.mode)
            return

        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()

        self._closed = True

    def expand_domain(self, *args, **kwargs):
        # No Op - storage expanded by *Value classes
        pass

    def shrink_domain(self, total_domain, do_flush=True):
        pass

    def init_parameter(self, parameter_context, *args, **kwargs):
        return InMemoryStorage(dtype=parameter_context.param_type.value_encoding, fill_value=parameter_context.param_type.fill_value)

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        # No Op
        pass
Exemplo n.º 5
0
class PersistenceLayer(object):
    """
    The PersistenceLayer class manages the disk-level storage (and retrieval) of the Coverage Model using HDF5 files.
    """

    def __init__(self, root, guid, name=None, tdom=None, sdom=None, mode=None, bricking_scheme=None, inline_data_writes=True, auto_flush_values=True, value_caching=True, coverage_type=None, **kwargs):
        """
        Constructor for PersistenceLayer

        @param root The <root> component of the filesystem path for the coverage (/<root>/<guid>)
        @param guid The <guid> component of the filesystem path for the coverage (/<root>/<guid>)
        @param name CoverageModel's name persisted to the metadata attribute in the master HDF5 file
        @param tdom Concrete instance of AbstractDomain for the temporal domain component
        @param sdom Concrete instance of AbstractDomain for the spatial domain component
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @param auto_flush_values    True = Values flushed to HDF5 files automatically, False = Manual
        @param value_caching  if True (default), value requests should be cached for rapid duplicate retrieval
        @param kwargs
        @return None
        """

        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme, parameter_bounds=None, coverage_type=coverage_type)

        self.mode = mode
        if not hasattr(self.master_manager, 'auto_flush_values'):
            self.master_manager.auto_flush_values = auto_flush_values
        if not hasattr(self.master_manager, 'inline_data_writes'):
            self.master_manager.inline_data_writes = inline_data_writes
        if not hasattr(self.master_manager, 'value_caching'):
            self.master_manager.value_caching = value_caching
        if not hasattr(self.master_manager, 'coverage_type'):
            self.master_manager.coverage_type = coverage_type

        # TODO: This is not done correctly
        if tdom != None:
            self._init_master(tdom.shape.extents, bricking_scheme)

        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.mode != 'r':
            if self.master_manager.is_dirty():
                self.master_manager.flush()

        if self.mode == 'r' or self.inline_data_writes:
            self.brick_dispatcher = None
        else:
            self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
            self.brick_dispatcher.run()

        self._closed = False

        log.debug('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    def update_parameter_bounds(self, parameter_name, bounds):
        dmin, dmax = bounds
        if parameter_name in self.parameter_bounds:
            pmin, pmax = self.parameter_bounds[parameter_name]
            dmin = min(dmin, pmin)
            dmax = max(dmax, pmax)
        self.parameter_bounds[parameter_name] = (dmin, dmax)
        self.master_manager.flush()

    def _init_master(self, tD, bricking_scheme):
        log.debug('Performing Rtree dict setup')
        # tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter

        self.master_manager._init_rtree(bD)

        self.master_manager.brick_list = {}
        self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculates and returns the brick and chunk size for each dimension
        in the total domain based on the bricking scheme

        @param tD   Total domain
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return Brick and Chunk sizes based on the total domain
        """

        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        """
        Initializes a parameter using a ParameterContext object and a bricking
        scheme for that parameter

        @param parameter_context    ParameterContext object describing the parameter to initialize
        @param bricking_scheme  A dictionary containing the brick and chunk sizes
        @return A PersistedStorage object
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        if parameter_context.param_type._value_class == 'SparseConstantValue':
            v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        else:
            v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized
        # No need to check if they're dirty, we know they are!
        pm.flush()

        # Put the pm into read_only mode
        pm.read_only = True

        # If there are already bricks, ensure there are appropriate links for this new parameter
        for brick_guid in self.master_manager.brick_list:
            brick_file_name = '{0}.hdf5'.format(brick_guid)
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, total_extents):
        """
        Calculates and returns the Rtree extents, brick extents and active brick size for the parameter

        @param origin   The origin of the brick in index space
        @param bD   The brick's domain in index space
        @param parameter_name   The parameter name
        @return rtree_extents, tuple(brick_extents), tuple(brick_active_size)
        """
        # Calculate the brick extents
        origin = list(origin)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists_master(self, brick_extents):
        do_write = True
        brick_guid = ''
        for x,v in self.master_manager.brick_list.iteritems():
            if brick_extents == v[0]:
                log.debug('Brick found with matching extents: guid=%s', x)
                do_write = False
                brick_guid = x
                break

        return do_write, brick_guid

    def _add_brick_link(self, parameter_name, brick_guid, brick_file_name):
        brick_rel_path = os.path.join(self.parameter_metadata[parameter_name].root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

    # Write empty HDF5 brick to the filesystem
    def _write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD):
        """
        Creates a virtual brick in the PersistenceLayer by updating the HDF5 master file's
        brick list, rtree and ExternalLink to where the HDF5 file will be saved in the future (lazy create)

        @param rtree_extents    Total extents of brick's domain in rtree format
        @param brick_extents    Size of brick
        @param brick_active_size    Size of brick (same rank as parameter)
        @param origin   Domain origin offset
        @param bD   Slice-friendly size of brick's domain
        @return N/A
        """
        log.debug('Writing virtual brick...')

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)

        #TODO: Inclusion of external links only used for external viewing of master file, remove if non-performant
        for parameter_name in self.parameter_metadata.keys():
            self._add_brick_link(parameter_name, brick_guid, brick_file_name)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s, %s, %s)', brick_guid, brick_extents, origin, tuple(bD), brick_active_size)
        brick_count = len(self.master_manager.brick_list)
        self.master_manager.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count is %s', brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        self.master_manager.update_rtree(brick_count, rtree_extents, obj=brick_guid)

    # Expand the domain
    def expand_domain(self, total_extents, do_flush=False):
        """
        Expands a parameter's total domain based on the requested new temporal and/or spatial domains.
        Temporal domain expansion is most typical.
        Number of dimensions may not change for the parameter.

        @param total_extents    The total extents of the domain
        @return N/A
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        if self.master_manager.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(total_extents) != len(self.master_manager.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = self.master_manager.brick_domains[0]
                bD = self.master_manager.brick_domains[1]
                cD = self.master_manager.brick_domains[2]

                delta_domain = [(x - y) for x, y in zip(total_extents, tD)]
                log.debug('delta domain: %s', delta_domain)

                tD = [(x + y) for x, y in zip(tD, delta_domain)]
                self.master_manager.brick_domains[0] = tD
        else:
            tD = total_extents
            bricking_scheme = self.master_manager.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            self.master_manager.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in self.master_manager.brick_list.iteritems() if (v[2] == v[3])])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

                # Write virtual HDF5 brick file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, total_extents)

                    do_write, bguid = self._brick_exists_master(brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        self.master_manager.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self._write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        ## .flush() is called by insert_timesteps - no need to call these here
        self.master_manager.flush()
        if do_flush:
            # If necessary (i.e. write_brick has been called), flush the master_manager461
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def shrink_domain(self, total_domain, do_flush=True):
        from coverage_model import bricking_utils
        # Find the last brick needed to contain the domain
        brick = bricking_utils.get_bricks_from_slice(total_domain, self.master_manager.brick_tree)

        bid, bguid = brick[0]

        # Get the brick_guids for all the bricks after the one we need
        rm_bricks = [s.value for s in self.master_manager.brick_tree._spans[bid+1:]]
        # Remove everything that comes after the brick we still need from the RTree
        self.master_manager.brick_tree._spans = self.master_manager.brick_tree._spans[:bid+1]

        # Remove the unnecessary bricks from the brick list
        for r in rm_bricks:
            del self.master_manager.brick_list[r]
            # and the file system...



        # Reset the first member of brick_domains
        self.master_manager.brick_domains[0] = list(total_domain)
        # And the appropriate entry in brick_list
        self.master_manager.brick_list[bguid] = tuple(self.master_manager.brick_list[bguid][:-1]) + ((total_domain[0] - self.master_manager.brick_list[bguid][1][0],),)

        if do_flush:
            if self.master_manager.is_dirty():
                self.master_manager.flush()

    def has_dirty_values(self):
        """
        Checks if the master file values have been modified

        @return True if master file metadata has been modified
        """
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return_now = False
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return_now = True

        if self.brick_dispatcher is None:
            log.debug('\'brick_dispatcher\' is None')
            return_now = True

        if return_now:
            from gevent.event import AsyncResult
            ret = AsyncResult()
            ret.set(True)
            return ret

        return self.brick_dispatcher.get_dirty_values_async_result()

    def update_domain(self, tdom=None, sdom=None, do_flush=True):
        """
        Updates the temporal and/or spatial domain in the MasterManager.

        If do_flush is unspecified or True, the MasterManager is flushed within this call

        @param tdom     the value to update the Temporal Domain to
        @param sdom     the value to update the Spatial Domain to
        @param do_flush    Flush the MasterManager after updating the value(s); Default is True
        """
        if self.mode == 'r':
            raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode))

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if do_flush:
            self.master_manager.flush()

    def flush_values(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        if self.mode == 'r':
            log.warn('PersistenceLayer not open for writing: mode=%s', self.mode)
            return

        self.flush_values()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            if self.mode != 'r':
                self.flush()
                if self.brick_dispatcher is not None:
                    self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
class BrickingAssessor(object):
    def __init__(self,
                 total_domain=(10, 10),
                 brick_size=5,
                 use_hdf=False,
                 root_dir='test_data/multi_dim_trials',
                 guid=None,
                 dtype='int16'):
        self.total_domain = total_domain
        self.brick_sizes = tuple(brick_size for x in total_domain)
        self.use_hdf = use_hdf
        self.dtype = np.dtype(dtype).name
        if self.use_hdf:
            self.guid = guid or create_guid()
            name = '%s_%s' % (self.guid, self.dtype)
            self.root_dir = root_dir
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)

            if os.path.exists(os.path.join(self.root_dir, name)):
                shutil.rmtree(os.path.join(self.root_dir, name))

            self.master_manager = MasterManager(
                self.root_dir, name, name='md_test_{0}'.format(name))

            self.master_manager.flush()

            pc = ParameterContext('test_param',
                                  param_type=QuantityType(self.dtype),
                                  fill_value=-1)
            self.param_manager = ParameterManager(
                os.path.join(self.root_dir, name, pc.name), pc.name)
            self.param_manager.parameter_context = pc
            self.master_manager.create_group(pc.name)

            self.param_manager.flush()

        self.bricks = {}

        self.brick_origins = bricking_utils.calc_brick_origins(
            self.total_domain, self.brick_sizes)
        self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents(
            self.brick_origins, self.brick_sizes)
        self.build_bricks()

        self.rtree = RTreeProxy()
        for x in BrickingAssessor.rtree_populator(self.rtree_extents,
                                                  self.brick_extents):
            self.rtree.insert(*x)

    @classmethod
    def rtree_populator(cls, rtree_extents, brick_extents):
        for i, e in enumerate(rtree_extents):
            yield i, e, brick_extents[i]

    def _get_numpy_array(self, shape):
        if not isinstance(shape, tuple):
            shape = tuple(shape)

        return np.arange(utils.prod(shape), dtype=self.dtype).reshape(shape)

    def build_bricks(self):
        for x in xrange(len(self.brick_origins)):
            if not self.use_hdf:
                self.bricks[x] = np.empty(self.brick_sizes, dtype=self.dtype)
                self.bricks[x].fill(-1)
            else:
                id = str(x)
                fn = '{0}.hdf5'.format(id)
                pth = os.path.join(self.param_manager.root_dir, fn)
                relpth = os.path.join(
                    self.param_manager.root_dir.replace(
                        self.master_manager.root_dir, '.'), fn)
                lnpth = '/{0}/{1}'.format(self.param_manager.parameter_name,
                                          id)

                self.master_manager.add_external_link(lnpth, relpth, id)
                self.bricks[x] = pth

    def reset_bricks(self):
        for i, arr in enumerate(self.bricks.itervalues()):
            if not self.use_hdf:
                arr.fill(-1)
            else:
                with h5py.File(arr) as f:
                    ds = f.require_dataset(str(i),
                                           shape=self.brick_sizes,
                                           dtype=self.dtype,
                                           chunks=None,
                                           fillvalue=-1)
                    ds[:] = -1

    def put_values_to_bricks(self, slice_, values):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(
            slice_, self.rtree, self.total_domain
        )  # this is a list of tuples [(b_id, (bounds...),), ...]

        values = np.asanyarray(values)
        v_shp = values.shape
        log.debug('value_shape: %s', v_shp)
        s_shp = utils.slice_shape(slice_, self.total_domain)
        log.debug('slice_shape: %s', s_shp)
        is_broadcast = False
        if v_shp == ():
            log.debug('Broadcast!!')
            is_broadcast = True
            value_slice = ()
        elif v_shp != s_shp:
            if v_shp == tuple([
                    i for i in s_shp if i != 1
            ]):  # Missing dimensions are singleton, just reshape to fit
                values = values.reshape(s_shp)
                v_shp = values.shape
            else:
                raise IndexError(
                    'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'
                    .format(s_shp, v_shp))
        else:
            value_slice = None

        log.debug('value_shape: %s', v_shp)

        for b in bricks:
            # b is (brick_id, (brick_bounds per dim...),)
            bid, bbnds = b
            log.debug('Determining slice for brick: %s', b)
            bexts = tuple([x + 1 for x in zip(*bbnds)[1]
                           ])  # Shift from index to size
            log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts)

            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(
                slice_, bbnds)

            if None in brick_slice:  # Brick does not contain any of the requested indices
                log.debug(
                    'Brick does not contain any of the requested indices: Move to next brick'
                )
                continue

            try:
                brick_slice = utils.fix_slice(brick_slice, bexts)
            except IndexError:
                log.debug('Malformed brick_slice: move to next brick')
                continue

            if not is_broadcast:
                value_slice = bricking_utils.get_value_slice_nd(
                    slice_, v_shp, bbnds, brick_slice, brick_mm)

                try:
                    value_slice = utils.fix_slice(value_slice, v_shp)
                except IndexError:
                    log.debug('Malformed value_slice: move to next brick')
                    continue

            log.debug(
                '\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s',
                b, utils.slice_shape(brick_slice,
                                     bexts), brick_slice, brick_mm,
                utils.slice_shape(value_slice, v_shp), value_slice)
            v = values[value_slice]
            log.debug('\nvalues %s=\n%s', v.shape, v)
            if not self.use_hdf:
                self.bricks[bid][brick_slice] = v
            else:
                fi = self.bricks[bid]
                with h5py.File(fi) as f:
                    ds = f.require_dataset(str(bid),
                                           shape=self.brick_sizes,
                                           dtype=self.dtype,
                                           chunks=None,
                                           fillvalue=-1)
                    ds[brick_slice] = v

    def get_values_from_bricks(self, slice_):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(
            slice_, self.rtree, self.total_domain
        )  # this is a list of tuples [(b_id, (bounds...),), ...]

        ret_shp = utils.slice_shape(slice_, self.total_domain)
        ret_arr = np.empty(ret_shp, dtype=self.dtype)

        for b in bricks:
            bid, bbnds = b
            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(
                slice_, bbnds)

            if None in brick_slice:
                continue

            ret_slice = bricking_utils.get_value_slice_nd(
                slice_, ret_shp, bbnds, brick_slice, brick_mm)

            if not self.use_hdf:
                ret_vals = self.bricks[bid][brick_slice]
            else:
                fi = self.bricks[bid]
                with h5py.File(fi) as f:
                    ds = f.require_dataset(str(bid),
                                           shape=self.brick_sizes,
                                           dtype=self.dtype,
                                           chunks=None,
                                           fillvalue=-1)
                    ret_vals = ds[brick_slice]

            ret_arr[ret_slice] = ret_vals

        ret_arr = ret_arr.squeeze()

        if ret_arr.size == 1:
            if ret_arr.ndim == 0:
                ret_arr = ret_arr[()]
            else:
                ret_arr = ret_arr[0]

        return ret_arr
Exemplo n.º 7
0
class PersistenceLayer(object):
    def __init__(self, root, guid, name=None, tdom=None, sdom=None, bricking_scheme=None, auto_flush_values=True, **kwargs):
        """
        Constructor for Persistence Layer
        @param root: Where to save/look for HDF5 files
        @param guid: CoverageModel GUID
        @param name: CoverageModel Name
        @param tdom: Temporal Domain
        @param sdom: Spatial Domain
        @param kwargs:
        @return:
        """
        log.debug('Persistence GUID: %s', guid)
        root = '.' if root is ('' or None) else root

        self.master_manager = MasterManager(root, guid, name=name, tdom=tdom, sdom=sdom, global_bricking_scheme=bricking_scheme)

        self.auto_flush_values = auto_flush_values
        self.value_list = {}

        self.parameter_metadata = {} # {parameter_name: [brick_list, parameter_domains, rtree]}

        for pname in self.param_groups:
            log.debug('parameter group: %s', pname)
            self.parameter_metadata[pname] = ParameterManager(os.path.join(self.root_dir, self.guid, pname), pname)

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        self.brick_dispatcher = BrickWriterDispatcher(self.write_failure_callback)
        self.brick_dispatcher.run()

        self._closed = False

        log.info('Persistence Layer Successfully Initialized')

    def __getattr__(self, key):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            return getattr(self.master_manager, key)
        else:
            return getattr(super(PersistenceLayer, self), key)

    def __setattr__(self, key, value):
        if 'master_manager' in self.__dict__ and hasattr(self.master_manager, key):
            setattr(self.master_manager, key, value)
        else:
            super(PersistenceLayer, self).__setattr__(key, value)

    # CBM TODO: This needs to be improved greatly - should callback all the way to the Application layer as a "failure handler"
    def write_failure_callback(self, message, work):
        log.error('WORK DISCARDED!!!; %s: %s', message, work)

    def calculate_brick_size(self, tD, bricking_scheme):
        """
        Calculate brick domain size given a target file system brick size (Mbytes) and dtype
        @param tD:
        @param bricking_scheme:
        @return:
        """
        log.debug('Calculating the size of a brick...')
        log.debug('Bricking scheme: %s', bricking_scheme)
        log.debug('tD: %s', tD)
        bD = [bricking_scheme['brick_size'] for x in tD]
        cD = [bricking_scheme['chunk_size'] for x in tD]
        log.debug('bD: %s', bD)
        log.debug('cD: %s', cD)
        return bD,tuple(cD)

    def init_parameter(self, parameter_context, bricking_scheme):
        parameter_name = parameter_context.name

        self.global_bricking_scheme = bricking_scheme

        pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name)
        self.parameter_metadata[parameter_name] = pm

        pm.parameter_context = parameter_context

        log.debug('Initialize %s', parameter_name)

        self.master_manager.create_group(parameter_name)

        log.debug('Performing Rtree dict setup')
        tD = parameter_context.dom.total_extents
        bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter
        # Verify domain is Rtree friendly
        tree_rank = len(bD)
        log.debug('tree_rank: %s', tree_rank)
        if tree_rank == 1:
            tree_rank += 1
        log.debug('tree_rank: %s', tree_rank)
        p = rtree.index.Property()
        p.dimension = tree_rank

        brick_tree = rtree.index.Index(properties=p)

        pm.brick_list = {}
        if isinstance(parameter_context.param_type, (FunctionType, ConstantType)):
            # These have constant storage, never expand!!
                pm.brick_domains = [(1,),(1,),(1,),bricking_scheme]
        else:
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        pm.tree_rank = tree_rank
        pm.brick_tree = brick_tree

        v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, auto_flush=self.auto_flush_values)
        self.value_list[parameter_name] = v

        self.expand_domain(parameter_context)

        if pm.is_dirty():
            pm.flush()

        if self.master_manager.is_dirty():
            self.master_manager.flush()

        return v

    def calculate_extents(self, origin, bD, parameter_name):
        """
        Calculates the Rtree extents, brick extents and active brick size for the parameter
        @param origin:
        @param bD:
        @param parameter_name:
        @return:
        """
        log.debug('origin: %s', origin)
        log.debug('bD: %s', bD)
        log.debug('parameter_name: %s', parameter_name)

        # Calculate the brick extents
        origin = list(origin)

        pc = self.parameter_metadata[parameter_name].parameter_context
        total_extents = pc.dom.total_extents # index space
        log.debug('Total extents for parameter %s: %s', parameter_name, total_extents)

        # Calculate the extents for the Rtree (index space)
        rtree_extents = origin + map(lambda o,s: o+s-1, origin, bD)
        # Fake out the rtree if rank == 1
        if len(origin) == 1:
            rtree_extents = [e for ext in zip(rtree_extents,[0 for x in rtree_extents]) for e in ext]
        log.debug('Rtree extents: %s', rtree_extents)

        # Calculate the extents of the brick (index space)
        brick_extents = zip(origin,map(lambda o,s: o+s-1, origin, bD))
        log.debug('Brick extents: %s', brick_extents)

        # Calculate active size using the inner extent of the domain within a brick (value space)
        brick_active_size = map(lambda o,s: min(o,s[1]+1)-s[0], total_extents, brick_extents)
        log.debug('Brick active size: %s', brick_active_size)

        # When loaded, brick_extents and brick_active_extents will be tuples...so, convert them now to allow clean comparison
        return rtree_extents, tuple(brick_extents), tuple(brick_active_size)

    def _brick_exists(self, parameter_name, brick_extents):
        # Make sure the brick doesn't already exist if we already have some bricks
        do_write = True
        brick_guid = ''
        log.debug('Check bricks for parameter \'%s\'',parameter_name)
        if parameter_name in self.parameter_metadata:
            for x,v in self.parameter_metadata[parameter_name].brick_list.iteritems():
                if brick_extents == v[0]:
                    log.debug('Brick found with matching extents: guid=%s', x)
                    do_write = False
                    brick_guid = x
                    break

        return do_write, brick_guid

    # Write empty HDF5 brick to the filesystem
    def write_brick(self, rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name):
        pm = self.parameter_metadata[parameter_name]

#        rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)
#
#        do_write, bguid = self._brick_exists(parameter_name, brick_extents)
#        if not do_write:
#            log.debug('Brick already exists!  Updating brick metadata...')
#            pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
#        else:
        log.debug('Writing virtual brick for parameter %s', parameter_name)

        # Set HDF5 file and group
        # Create a GUID for the brick
        brick_guid = create_guid()
        brick_file_name = '{0}.hdf5'.format(brick_guid)
        brick_rel_path = os.path.join(pm.root_dir.replace(self.root_dir,'.'), brick_file_name)
        link_path = '/{0}/{1}'.format(parameter_name, brick_guid)

        # Add brick to Master HDF file
        self.master_manager.add_external_link(link_path, brick_rel_path, brick_guid)

        # Update the brick listing
        log.debug('Updating brick list[%s] with (%s, %s)', parameter_name, brick_guid, brick_extents)
        brick_count = self.parameter_brick_count(parameter_name)
        pm.brick_list[brick_guid] = [brick_extents, origin, tuple(bD), brick_active_size]
        log.debug('Brick count for %s is %s', parameter_name, brick_count)

        # Insert into Rtree
        log.debug('Inserting into Rtree %s:%s:%s', brick_count, rtree_extents, brick_guid)
        pm.update_rtree(brick_count, rtree_extents, obj=brick_guid)

        # Flush the parameter_metadata
        if pm.is_dirty():
            pm.flush()

        if self.master_manager.is_dirty():
            self.master_manager.flush()

    # Expand the domain
    def expand_domain(self, parameter_context, tdom=None, sdom=None):
        parameter_name = parameter_context.name
        log.debug('Expand %s', parameter_name)
        pm = self.parameter_metadata[parameter_name]

        if pm.brick_domains[0] is not None:
            log.debug('Expanding domain (n-dimension)')

            # Check if the number of dimensions of the total domain has changed
            # TODO: Will this ever happen???  If so, how to handle?
            if len(parameter_context.dom.total_extents) != len(pm.brick_domains[0]):
                raise SystemError('Number of dimensions for parameter cannot change, only expand in size! No action performed.')
            else:
                tD = pm.brick_domains[0]
                bD = pm.brick_domains[1]
                cD = pm.brick_domains[2]
                if not isinstance(pm.parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!!
                    new_domain = parameter_context.dom.total_extents

                    delta_domain = [(x - y) for x, y in zip(new_domain, tD)]
                    log.debug('delta domain: %s', delta_domain)

                    tD = [(x + y) for x, y in zip(tD, delta_domain)]
                    pm.brick_domains[0] = tD
        else:
            tD = parameter_context.dom.total_extents
            bricking_scheme = pm.brick_domains[3]
            bD,cD = self.calculate_brick_size(tD, bricking_scheme)
            pm.brick_domains = [tD, bD, cD, bricking_scheme]

        try:
            # Gather block list
            log.trace('tD, bD, cD: %s, %s, %s', tD, bD, cD)
            lst = [range(d)[::bD[i]] for i,d in enumerate(tD)]

            # Gather brick origins
            need_origins = set(itertools.product(*lst))
            log.trace('need_origins: %s', need_origins)
            have_origins = set([v[1] for k,v in pm.brick_list.iteritems() if v[2] == v[3]])
            log.trace('have_origins: %s', have_origins)
            need_origins.difference_update(have_origins)
            log.trace('need_origins: %s', need_origins)

            need_origins = list(need_origins)
            need_origins.sort()

            if len(need_origins)>0:
                log.debug('Number of Bricks to Create: %s', len(need_origins))

#                # Write brick to HDF5 file
#                map(lambda origin: self.write_brick(origin,bD,parameter_name), need_origins)

                # Write brick to HDF5 file
                for origin in need_origins:
                    rtree_extents, brick_extents, brick_active_size = self.calculate_extents(origin, bD, parameter_name)

                    do_write, bguid = self._brick_exists(parameter_name, brick_extents)
                    if not do_write:
                        log.debug('Brick already exists!  Updating brick metadata...')
                        pm.brick_list[bguid] = [brick_extents, origin, tuple(bD), brick_active_size]
                    else:
                        self.write_brick(rtree_extents, brick_extents, brick_active_size, origin, bD, parameter_name)

            else:
                log.debug('No bricks to create to satisfy the domain expansion...')
        except Exception:
            raise

        # Flush the parameter_metadata
        if pm.is_dirty():
            pm.flush()

        # Update the global tdom & sdom as necessary
        if tdom is not None:
            self.master_manager.tdom = tdom
        if sdom is not None:
            self.master_manager.sdom = sdom

        if self.master_manager.is_dirty():
            self.master_manager.flush()

    # Returns a count of bricks for a parameter
    def parameter_brick_count(self, parameter_name):
        ret = 0
        if parameter_name in self.parameter_metadata:
            ret = len(self.parameter_metadata[parameter_name].brick_list)
        else:
            log.debug('No bricks found for parameter: %s', parameter_name)

        return ret

    def has_dirty_values(self):
        for v in self.value_list.itervalues():
            if v.has_dirty_values():
                return True

        return False

    def get_dirty_values_async_result(self):
        return self.brick_dispatcher.get_dirty_values_async_result()

    def flush_values(self):
        for k, v in self.value_list.iteritems():
            v.flush_values()

        return self.get_dirty_values_async_result()

    def flush(self):
        for pk, pm in self.parameter_metadata.iteritems():
            log.debug('Flushing ParameterManager for \'%s\'...', pk)
            pm.flush()
        log.debug('Flushing MasterManager...')
        self.master_manager.flush()

    def close(self, force=False, timeout=None):
        if not self._closed:
            self.flush()
            self.brick_dispatcher.shutdown(force=force, timeout=timeout)

        self._closed = True
class BrickingAssessor(object):
    def __init__(self, total_domain=(10, 10), brick_size=5, use_hdf=False, root_dir='test_data/multi_dim_trials',
                 guid=None, dtype='int16'):
        self.total_domain = total_domain
        self.brick_sizes = tuple(brick_size for x in total_domain)
        self.use_hdf = use_hdf
        self.dtype = np.dtype(dtype).name
        if self.use_hdf:
            self.guid = guid or create_guid()
            name = '%s_%s' % (self.guid, self.dtype)
            self.root_dir = root_dir
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)

            if os.path.exists(os.path.join(self.root_dir, name)):
                shutil.rmtree(os.path.join(self.root_dir, name))

            self.master_manager = MasterManager(self.root_dir, name, name='md_test_{0}'.format(name))

            self.master_manager.flush()

            pc = ParameterContext('test_param', param_type=QuantityType(self.dtype), fill_value=-1)
            self.param_manager = ParameterManager(os.path.join(self.root_dir, name, pc.name), pc.name)
            self.param_manager.parameter_context = pc
            self.master_manager.create_group(pc.name)

            self.param_manager.flush()

        self.bricks = {}

        self.brick_origins = bricking_utils.calc_brick_origins(self.total_domain, self.brick_sizes)
        self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents(self.brick_origins,
                                                                                             self.brick_sizes)
        self.build_bricks()

        self.rtree = RTreeProxy()
        for x in BrickingAssessor.rtree_populator(self.rtree_extents, self.brick_extents):
            self.rtree.insert(*x)

    @classmethod
    def rtree_populator(cls, rtree_extents, brick_extents):
        for i, e in enumerate(rtree_extents):
            yield i, e, brick_extents[i]

    def _get_numpy_array(self, shape):
        if not isinstance(shape, tuple):
            shape = tuple(shape)

        return np.arange(utils.prod(shape), dtype=self.dtype).reshape(shape)

    def build_bricks(self):
        for x in xrange(len(self.brick_origins)):
            if not self.use_hdf:
                self.bricks[x] = np.empty(self.brick_sizes, dtype=self.dtype)
                self.bricks[x].fill(-1)
            else:
                id = str(x)
                fn = '{0}.hdf5'.format(id)
                pth = os.path.join(self.param_manager.root_dir, fn)
                relpth = os.path.join(self.param_manager.root_dir.replace(self.master_manager.root_dir, '.'), fn)
                lnpth = '/{0}/{1}'.format(self.param_manager.parameter_name, id)

                self.master_manager.add_external_link(lnpth, relpth, id)
                self.bricks[x] = pth

    def reset_bricks(self):
        for i, arr in enumerate(self.bricks.itervalues()):
            if not self.use_hdf:
                arr.fill(-1)
            else:
                with HDFLockingFile(arr, mode='a') as f:
                    ds = f.require_dataset(str(i), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1)
                    ds[:] = -1

    def put_values_to_bricks(self, slice_, values):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree,
                                                      self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...]

        values = np.asanyarray(values)
        v_shp = values.shape
        log.debug('value_shape: %s', v_shp)
        s_shp = utils.slice_shape(slice_, self.total_domain)
        log.debug('slice_shape: %s', s_shp)
        is_broadcast = False
        if v_shp == ():
            log.debug('Broadcast!!')
            is_broadcast = True
            value_slice = ()
        elif v_shp != s_shp:
            if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit
                values = values.reshape(s_shp)
                v_shp = values.shape
            else:
                raise IndexError(
                    'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format(
                        s_shp, v_shp))
        else:
            value_slice = None

        log.debug('value_shape: %s', v_shp)

        for b in bricks:
            # b is (brick_id, (brick_bounds per dim...),)
            bid, bbnds = b
            log.debug('Determining slice for brick: %s', b)
            bexts = tuple([x + 1 for x in zip(*bbnds)[1]]) # Shift from index to size
            log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts)

            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds)

            if None in brick_slice: # Brick does not contain any of the requested indices
                log.debug('Brick does not contain any of the requested indices: Move to next brick')
                continue

            try:
                brick_slice = utils.fix_slice(brick_slice, bexts)
            except IndexError:
                log.debug('Malformed brick_slice: move to next brick')
                continue

            if not is_broadcast:
                value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm)

                try:
                    value_slice = utils.fix_slice(value_slice, v_shp)
                except IndexError:
                    log.debug('Malformed value_slice: move to next brick')
                    continue

            log.debug('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b,
                      utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm,
                      utils.slice_shape(value_slice, v_shp), value_slice)
            v = values[value_slice]
            log.debug('\nvalues %s=\n%s', v.shape, v)
            if not self.use_hdf:
                self.bricks[bid][brick_slice] = v
            else:
                fi = self.bricks[bid]
                with HDFLockingFile(fi, 'a') as f:
                    ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None,
                                           fillvalue=-1)
                    ds[brick_slice] = v

    def get_values_from_bricks(self, slice_):
        slice_ = utils.fix_slice(slice_, self.total_domain)
        bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree,
                                                      self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...]

        ret_shp = utils.slice_shape(slice_, self.total_domain)
        ret_arr = np.empty(ret_shp, dtype=self.dtype)

        for b in bricks:
            bid, bbnds = b
            brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds)

            if None in brick_slice:
                continue

            ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm)

            if not self.use_hdf:
                ret_vals = self.bricks[bid][brick_slice]
            else:
                fi = self.bricks[bid]
                with HDFLockingFile(fi) as f:
                    ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None,
                                           fillvalue=-1)
                    ret_vals = ds[brick_slice]

            ret_arr[ret_slice] = ret_vals

        ret_arr = ret_arr.squeeze()

        if ret_arr.size == 1:
            if ret_arr.ndim == 0:
                ret_arr = ret_arr[()]
            else:
                ret_arr = ret_arr[0]

        return ret_arr