def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) log.debug('Performing Rtree dict setup') tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter # Verify domain is Rtree friendly tree_rank = len(bD) log.debug('tree_rank: %s', tree_rank) if tree_rank == 1: tree_rank += 1 log.debug('tree_rank: %s', tree_rank) p = rtree.index.Property() p.dimension = tree_rank brick_tree = rtree.index.Index(properties=p) pm.brick_list = {} if isinstance(parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! pm.brick_domains = [(1,),(1,),(1,),bricking_scheme] else: pm.brick_domains = [tD, bD, cD, bricking_scheme] pm.tree_rank = tree_rank pm.brick_tree = brick_tree v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v self.expand_domain(parameter_context) # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() self.master_manager.flush() return v
def init_parameter(self, parameter_context, bricking_scheme): parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) log.debug('Performing Rtree dict setup') tD = parameter_context.dom.total_extents bD,cD = self.calculate_brick_size(tD, bricking_scheme) #remains same for each parameter # Verify domain is Rtree friendly tree_rank = len(bD) log.debug('tree_rank: %s', tree_rank) if tree_rank == 1: tree_rank += 1 log.debug('tree_rank: %s', tree_rank) p = rtree.index.Property() p.dimension = tree_rank brick_tree = rtree.index.Index(properties=p) pm.brick_list = {} if isinstance(parameter_context.param_type, (FunctionType, ConstantType)): # These have constant storage, never expand!! pm.brick_domains = [(1,),(1,),(1,),bricking_scheme] else: pm.brick_domains = [tD, bD, cD, bricking_scheme] pm.tree_rank = tree_rank pm.brick_tree = brick_tree v = PersistedStorage(pm, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v self.expand_domain(parameter_context) if pm.is_dirty(): pm.flush() if self.master_manager.is_dirty(): self.master_manager.flush() return v
def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) if parameter_context.param_type._value_class == 'SparseConstantValue': v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) else: v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() # Put the pm into read_only mode pm.read_only = True # If there are already bricks, ensure there are appropriate links for this new parameter for brick_guid in self.master_manager.brick_list: brick_file_name = '{0}.hdf5'.format(brick_guid) self._add_brick_link(parameter_name, brick_guid, brick_file_name) self.master_manager.flush() return v
def init_parameter(self, parameter_context, bricking_scheme): """ Initializes a parameter using a ParameterContext object and a bricking scheme for that parameter @param parameter_context ParameterContext object describing the parameter to initialize @param bricking_scheme A dictionary containing the brick and chunk sizes @return A PersistedStorage object """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) parameter_name = parameter_context.name self.global_bricking_scheme = bricking_scheme pm = ParameterManager(os.path.join(self.root_dir, self.guid, parameter_name), parameter_name, read_only=False) self.parameter_metadata[parameter_name] = pm pm.parameter_context = parameter_context log.debug('Initialize %s', parameter_name) self.master_manager.create_group(parameter_name) if parameter_context.param_type._value_class == 'SparseConstantValue': v = SparsePersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) else: v = PersistedStorage(pm, self.master_manager, self.brick_dispatcher, dtype=parameter_context.param_type.storage_encoding, fill_value=parameter_context.param_type.fill_value, mode=self.mode, inline_data_writes=self.inline_data_writes, auto_flush=self.auto_flush_values) self.value_list[parameter_name] = v # CBM TODO: Consider making this optional and bulk-flushing from the coverage after all parameters have been initialized # No need to check if they're dirty, we know they are! pm.flush() # Put the pm into read_only mode pm.read_only = True # If there are already bricks, ensure there are appropriate links for this new parameter for brick_guid in self.master_manager.brick_list: brick_file_name = '{0}.hdf5'.format(brick_guid) self._add_brick_link(parameter_name, brick_guid, brick_file_name) self.master_manager.flush() return v
class BrickingAssessor(object): def __init__(self, total_domain=(10, 10), brick_size=5, use_hdf=False, root_dir='test_data/multi_dim_trials', guid=None, dtype='int16'): self.total_domain = total_domain self.brick_sizes = tuple(brick_size for x in total_domain) self.use_hdf = use_hdf self.dtype = np.dtype(dtype).name if self.use_hdf: self.guid = guid or create_guid() name = '%s_%s' % (self.guid, self.dtype) self.root_dir = root_dir if not os.path.exists(self.root_dir): os.makedirs(self.root_dir) if os.path.exists(os.path.join(self.root_dir, name)): shutil.rmtree(os.path.join(self.root_dir, name)) self.master_manager = MasterManager( self.root_dir, name, name='md_test_{0}'.format(name)) self.master_manager.flush() pc = ParameterContext('test_param', param_type=QuantityType(self.dtype), fill_value=-1) self.param_manager = ParameterManager( os.path.join(self.root_dir, name, pc.name), pc.name) self.param_manager.parameter_context = pc self.master_manager.create_group(pc.name) self.param_manager.flush() self.bricks = {} self.brick_origins = bricking_utils.calc_brick_origins( self.total_domain, self.brick_sizes) self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents( self.brick_origins, self.brick_sizes) self.build_bricks() self.rtree = RTreeProxy() for x in BrickingAssessor.rtree_populator(self.rtree_extents, self.brick_extents): self.rtree.insert(*x) @classmethod def rtree_populator(cls, rtree_extents, brick_extents): for i, e in enumerate(rtree_extents): yield i, e, brick_extents[i] def _get_numpy_array(self, shape): if not isinstance(shape, tuple): shape = tuple(shape) return np.arange(utils.prod(shape), dtype=self.dtype).reshape(shape) def build_bricks(self): for x in xrange(len(self.brick_origins)): if not self.use_hdf: self.bricks[x] = np.empty(self.brick_sizes, dtype=self.dtype) self.bricks[x].fill(-1) else: id = str(x) fn = '{0}.hdf5'.format(id) pth = os.path.join(self.param_manager.root_dir, fn) relpth = os.path.join( self.param_manager.root_dir.replace( self.master_manager.root_dir, '.'), fn) lnpth = '/{0}/{1}'.format(self.param_manager.parameter_name, id) self.master_manager.add_external_link(lnpth, relpth, id) self.bricks[x] = pth def reset_bricks(self): for i, arr in enumerate(self.bricks.itervalues()): if not self.use_hdf: arr.fill(-1) else: with h5py.File(arr) as f: ds = f.require_dataset(str(i), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[:] = -1 def put_values_to_bricks(self, slice_, values): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice( slice_, self.rtree, self.total_domain ) # this is a list of tuples [(b_id, (bounds...),), ...] values = np.asanyarray(values) v_shp = values.shape log.debug('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, self.total_domain) log.debug('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.debug('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([ i for i in s_shp if i != 1 ]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}' .format(s_shp, v_shp)) else: value_slice = None log.debug('value_shape: %s', v_shp) for b in bricks: # b is (brick_id, (brick_bounds per dim...),) bid, bbnds = b log.debug('Determining slice for brick: %s', b) bexts = tuple([x + 1 for x in zip(*bbnds)[1] ]) # Shift from index to size log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd( slice_, bbnds) if None in brick_slice: # Brick does not contain any of the requested indices log.debug( 'Brick does not contain any of the requested indices: Move to next brick' ) continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd( slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.debug( '\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] log.debug('\nvalues %s=\n%s', v.shape, v) if not self.use_hdf: self.bricks[bid][brick_slice] = v else: fi = self.bricks[bid] with h5py.File(fi) as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[brick_slice] = v def get_values_from_bricks(self, slice_): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice( slice_, self.rtree, self.total_domain ) # this is a list of tuples [(b_id, (bounds...),), ...] ret_shp = utils.slice_shape(slice_, self.total_domain) ret_arr = np.empty(ret_shp, dtype=self.dtype) for b in bricks: bid, bbnds = b brick_slice, brick_mm = bricking_utils.get_brick_slice_nd( slice_, bbnds) if None in brick_slice: continue ret_slice = bricking_utils.get_value_slice_nd( slice_, ret_shp, bbnds, brick_slice, brick_mm) if not self.use_hdf: ret_vals = self.bricks[bid][brick_slice] else: fi = self.bricks[bid] with h5py.File(fi) as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ret_vals = ds[brick_slice] ret_arr[ret_slice] = ret_vals ret_arr = ret_arr.squeeze() if ret_arr.size == 1: if ret_arr.ndim == 0: ret_arr = ret_arr[()] else: ret_arr = ret_arr[0] return ret_arr
class BrickingAssessor(object): def __init__(self, total_domain=(10, 10), brick_size=5, use_hdf=False, root_dir='test_data/multi_dim_trials', guid=None, dtype='int16'): self.total_domain = total_domain self.brick_sizes = tuple(brick_size for x in total_domain) self.use_hdf = use_hdf self.dtype = np.dtype(dtype).name if self.use_hdf: self.guid = guid or create_guid() name = '%s_%s' % (self.guid, self.dtype) self.root_dir = root_dir if not os.path.exists(self.root_dir): os.makedirs(self.root_dir) if os.path.exists(os.path.join(self.root_dir, name)): shutil.rmtree(os.path.join(self.root_dir, name)) self.master_manager = MasterManager(self.root_dir, name, name='md_test_{0}'.format(name)) self.master_manager.flush() pc = ParameterContext('test_param', param_type=QuantityType(self.dtype), fill_value=-1) self.param_manager = ParameterManager(os.path.join(self.root_dir, name, pc.name), pc.name) self.param_manager.parameter_context = pc self.master_manager.create_group(pc.name) self.param_manager.flush() self.bricks = {} self.brick_origins = bricking_utils.calc_brick_origins(self.total_domain, self.brick_sizes) self.brick_extents, self.rtree_extents = bricking_utils.calc_brick_and_rtree_extents(self.brick_origins, self.brick_sizes) self.build_bricks() self.rtree = RTreeProxy() for x in BrickingAssessor.rtree_populator(self.rtree_extents, self.brick_extents): self.rtree.insert(*x) @classmethod def rtree_populator(cls, rtree_extents, brick_extents): for i, e in enumerate(rtree_extents): yield i, e, brick_extents[i] def _get_numpy_array(self, shape): if not isinstance(shape, tuple): shape = tuple(shape) return np.arange(utils.prod(shape), dtype=self.dtype).reshape(shape) def build_bricks(self): for x in xrange(len(self.brick_origins)): if not self.use_hdf: self.bricks[x] = np.empty(self.brick_sizes, dtype=self.dtype) self.bricks[x].fill(-1) else: id = str(x) fn = '{0}.hdf5'.format(id) pth = os.path.join(self.param_manager.root_dir, fn) relpth = os.path.join(self.param_manager.root_dir.replace(self.master_manager.root_dir, '.'), fn) lnpth = '/{0}/{1}'.format(self.param_manager.parameter_name, id) self.master_manager.add_external_link(lnpth, relpth, id) self.bricks[x] = pth def reset_bricks(self): for i, arr in enumerate(self.bricks.itervalues()): if not self.use_hdf: arr.fill(-1) else: with HDFLockingFile(arr, mode='a') as f: ds = f.require_dataset(str(i), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[:] = -1 def put_values_to_bricks(self, slice_, values): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree, self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...] values = np.asanyarray(values) v_shp = values.shape log.debug('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, self.total_domain) log.debug('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.debug('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format( s_shp, v_shp)) else: value_slice = None log.debug('value_shape: %s', v_shp) for b in bricks: # b is (brick_id, (brick_bounds per dim...),) bid, bbnds = b log.debug('Determining slice for brick: %s', b) bexts = tuple([x + 1 for x in zip(*bbnds)[1]]) # Shift from index to size log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) if None in brick_slice: # Brick does not contain any of the requested indices log.debug('Brick does not contain any of the requested indices: Move to next brick') continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.debug('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] log.debug('\nvalues %s=\n%s', v.shape, v) if not self.use_hdf: self.bricks[bid][brick_slice] = v else: fi = self.bricks[bid] with HDFLockingFile(fi, 'a') as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[brick_slice] = v def get_values_from_bricks(self, slice_): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree, self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...] ret_shp = utils.slice_shape(slice_, self.total_domain) ret_arr = np.empty(ret_shp, dtype=self.dtype) for b in bricks: bid, bbnds = b brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) if None in brick_slice: continue ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm) if not self.use_hdf: ret_vals = self.bricks[bid][brick_slice] else: fi = self.bricks[bid] with HDFLockingFile(fi) as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ret_vals = ds[brick_slice] ret_arr[ret_slice] = ret_vals ret_arr = ret_arr.squeeze() if ret_arr.size == 1: if ret_arr.ndim == 0: ret_arr = ret_arr[()] else: ret_arr = ret_arr[0] return ret_arr