def map_cov_rdt(cls, coverage, rdt, field, slice_): log.trace('Slice is %s', slice_) try: n = coverage.get_parameter_values(field, tdoa=slice_) except ParameterFunctionException: return if n is None: rdt[field] = [n] elif isinstance(n, np.ndarray): if coverage.get_data_extents(field)[0] < coverage.num_timesteps: log.error( "Misformed coverage detected, padding with fill_value") arr_len = utils.slice_shape(slice_, (coverage.num_timesteps, ))[0] fill_arr = np.empty(arr_len - n.shape[0], dtype=n.dtype) fill_arr.fill(coverage.get_parameter_context(field).fill_value) n = np.append(n, fill_arr) elif coverage.get_data_extents(field)[0] > coverage.num_timesteps: raise CorruptionError( 'The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s' % (field, coverage.get_data_extents(field), coverage.num_timesteps)) rdt[field] = np.atleast_1d(n) else: rdt[field] = [n]
def _get_param_vals(self, name, slice_, dims): shp = utils.slice_shape(slice_, dims) def _getarr(vmin, shp, vmax=None,): if vmax is None: return np.empty(shp).fill(vmin) return np.arange(vmin, vmax, (vmax - vmin) / int(utils.prod(shp)), dtype='float32').reshape(shp) if name == 'LAT': ret = np.empty(shp) ret.fill(45) elif name == 'LON': ret = np.empty(shp) ret.fill(-71) elif name == 'TEMPWAT_L0': ret = _getarr(280000, shp, 350000) elif name == 'CONDWAT_L0': ret = _getarr(100000, shp, 750000) elif name == 'PRESWAT_L0': ret = _getarr(3000, shp, 10000) elif name in self.value_classes: # Non-L0 parameters ret = self.value_classes[name][:] else: return np.zeros(shp) return ret
def __indexify_slice(self, slice_, total_shape): ## ONLY WORKS FOR 1D ARRAYS!!! fsl = utils.fix_slice(slice_, total_shape) ss = utils.slice_shape(slice_, total_shape) ret = np.empty(ss, dtype=int) rf = ret.flatten() ci = 0 for s, shape in zip(fsl, total_shape): if isinstance(s, slice): ind = range(*s.indices(shape)) ll = len(ind) rf[ci:ll] = ind ci += ll elif isinstance(s, (list, tuple)): ll = len(s) rf[ci:ll] = s ci += ll elif isinstance(s, int): rf[ci] = s ci += 1 else: raise TypeError( 'Unsupported slice method') # TODO: Better error message return rf.reshape(ss)
def _coverage_to_granule(cls, coverage, start_time=None, end_time=None, stride_time=None, fuzzy_stride=True, parameters=None, stream_def_id=None, tdoa=None): slice_ = slice(None) # Defaults to all values # Validations if start_time is not None: validate_is_instance(start_time, Number, 'start_time must be a number for striding.') if end_time is not None: validate_is_instance(end_time, Number, 'end_time must be a number for striding.') if stride_time is not None: validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.') if tdoa is not None and isinstance(tdoa,slice): slice_ = tdoa elif stride_time is not None and not fuzzy_stride: # SLOW ugly_range = np.arange(start_time, end_time, stride_time) idx_values = [cls.get_time_idx(coverage,i) for i in ugly_range] idx_values = list(set(idx_values)) # Removing duplicates - also mixes the order of the list!!! idx_values.sort() slice_ = [idx_values] elif not (start_time is None and end_time is None): if start_time is not None: start_time = cls.get_time_idx(coverage,start_time) if end_time is not None: end_time = cls.get_time_idx(coverage,end_time) slice_ = slice(start_time,end_time,stride_time) log.info('Slice: %s', slice_) if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) if parameters is not None: # TODO: Improve efficiency here fields = list(set(parameters).intersection(rdt.fields)) else: fields = rdt.fields for field in fields: log.info( 'Slice is %s' , slice_) n = coverage.get_parameter_values(field,tdoa=slice_) if n is None: rdt[field] = [n] elif isinstance(n,np.ndarray): if coverage.get_data_extents(field)[0] < coverage.num_timesteps: log.error("Misformed coverage detected, padding with fill_value") arr_len = utils.slice_shape(slice_, (coverage.num_timesteps,))[0] fill_arr = np.empty(arr_len - n.shape[0] , dtype=n.dtype) fill_arr.fill(coverage.get_parameter_context(field).fill_value) n = np.append(n,fill_arr) elif coverage.get_data_extents(field)[0] > coverage.num_timesteps: raise CorruptionError('The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s' % (field, coverage.get_data_extents(field), coverage.num_timesteps)) rdt[field] = np.atleast_1d(n) else: rdt[field] = [n] return rdt
def __indexify_slice(self, slice_, total_shape): ## ONLY WORKS FOR 1D ARRAYS!!! fsl = utils.fix_slice(slice_, total_shape) ss = utils.slice_shape(slice_, total_shape) ret = np.empty(ss, dtype=int) rf = ret.flatten() ci = 0 for s, shape in zip(fsl, total_shape): if isinstance(s,slice): ind = range(*s.indices(shape)) ll = len(ind) rf[ci:ll] = ind ci += ll elif isinstance(s, (list,tuple)): ll = len(s) rf[ci:ll] = s ci += ll elif isinstance(s, int): rf[ci] = s ci += 1 else: raise TypeError('Unsupported slice method') # TODO: Better error message return rf.reshape(ss)
def _run_test_slices(ba, sl_list, val_arr, verbose): if not verbose: from sys import stdout for sl in sl_list: tstr = '*** Slice: {0} ***'.format(sl) if verbose: print '\n' + tstr print 'Slice Shape: {0}'.format(utils.slice_shape(sl, ba.total_domain)) ba.reset_bricks() vals = val_arr[sl] ba.put_values_to_bricks(sl, vals) vo = ba.get_values_from_bricks(sl) eq = np.array_equal(vals, vo) seq = np.array_equal(vals.squeeze(), vo) if not eq and not seq: print "\n!!!!!!!! NOT EQUAL !!!!!!!!" print 'vals in:\n%s' % (vals,) print 'vals out:\n%s' % (vo,) else: if verbose: print "Value Shape: {0}".format(vo.shape) print "Equal{0}!".format(' (w/squeeze)' if not eq else '') else: if not eq: stdout.write('s') else: stdout.write('.') stdout.flush() if verbose: print '\n' + '*' * len(tstr) print
def __getitem__(self, slice_): slice_ = utils.fix_slice(slice_, self.shape) ret_shape = utils.slice_shape(slice_, self.shape) ret = np.empty(ret_shape, dtype=np.dtype(self.value_encoding)) ret.fill(self.content) return _cleanse_value(ret, slice_)
def __getitem__(self, slice_): slice_ = utils.fix_slice(slice_, self.shape) ret_shape = utils.slice_shape(slice_, self.shape) ret = np.empty(ret_shape, dtype=np.dtype(object)) # Always object type because it's 2 values / element!! ret.fill(self.content) return _cleanse_value(ret, slice_)
def __getitem__(self, slice_): slice_ = utils.fix_slice(slice_, self.shape) ret_shape = utils.slice_shape(slice_, self.shape) ret = np.empty(ret_shape, dtype=np.dtype( object)) # Always object type because it's 2 values / element!! ret.fill(self.content) return _cleanse_value(ret, slice_)
def get_values_from_bricks(self, slice_): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice( slice_, self.rtree, self.total_domain ) # this is a list of tuples [(b_id, (bounds...),), ...] ret_shp = utils.slice_shape(slice_, self.total_domain) ret_arr = np.empty(ret_shp, dtype=self.dtype) for b in bricks: bid, bbnds = b brick_slice, brick_mm = bricking_utils.get_brick_slice_nd( slice_, bbnds) if None in brick_slice: continue ret_slice = bricking_utils.get_value_slice_nd( slice_, ret_shp, bbnds, brick_slice, brick_mm) if not self.use_hdf: ret_vals = self.bricks[bid][brick_slice] else: fi = self.bricks[bid] with h5py.File(fi) as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ret_vals = ds[brick_slice] ret_arr[ret_slice] = ret_vals ret_arr = ret_arr.squeeze() if ret_arr.size == 1: if ret_arr.ndim == 0: ret_arr = ret_arr[()] else: ret_arr = ret_arr[0] return ret_arr
def map_cov_rdt(cls, coverage, rdt, field, slice_): log.trace( 'Slice is %s' , slice_) try: n = coverage.get_parameter_values(field,tdoa=slice_) except ParameterFunctionException: return if n is None: rdt[field] = [n] elif isinstance(n,np.ndarray): if coverage.get_data_extents(field)[0] < coverage.num_timesteps: log.error("Misformed coverage detected, padding with fill_value") arr_len = utils.slice_shape(slice_, (coverage.num_timesteps,))[0] fill_arr = np.empty(arr_len - n.shape[0] , dtype=n.dtype) fill_arr.fill(coverage.get_parameter_context(field).fill_value) n = np.append(n,fill_arr) elif coverage.get_data_extents(field)[0] > coverage.num_timesteps: raise CorruptionError('The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s' % (field, coverage.get_data_extents(field), coverage.num_timesteps)) rdt[field] = np.atleast_1d(n) else: rdt[field] = [n]
def get_values_from_bricks(self, slice_): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice( slice_, self.rtree, self.total_domain ) # this is a list of tuples [(b_id, (bounds...),), ...] ret_shp = utils.slice_shape(slice_, self.total_domain) ret_arr = np.empty(ret_shp, dtype=self.dtype) for b in bricks: bid, bbnds = b brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) if None in brick_slice: continue ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm) if not self.use_hdf: ret_vals = self.bricks[bid][brick_slice] else: fi = self.bricks[bid] with HDFLockingFile(fi) as f: ds = f.require_dataset( str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1 ) ret_vals = ds[brick_slice] ret_arr[ret_slice] = ret_vals ret_arr = ret_arr.squeeze() if ret_arr.size == 1: if ret_arr.ndim == 0: ret_arr = ret_arr[()] else: ret_arr = ret_arr[0] return ret_arr
def _run_test_slices(ba, sl_list, val_arr, verbose): if not verbose: from sys import stdout for sl in sl_list: tstr = '*** Slice: {0} ***'.format(sl) if verbose: print '\n' + tstr print 'Slice Shape: {0}'.format( utils.slice_shape(sl, ba.total_domain)) ba.reset_bricks() vals = val_arr[sl] ba.put_values_to_bricks(sl, vals) vo = ba.get_values_from_bricks(sl) eq = np.array_equal(vals, vo) seq = np.array_equal(vals.squeeze(), vo) if not eq and not seq: print "\n!!!!!!!! NOT EQUAL !!!!!!!!" print 'vals in:\n%s' % (vals, ) print 'vals out:\n%s' % (vo, ) else: if verbose: print "Value Shape: {0}".format(vo.shape) print "Equal{0}!".format(' (w/squeeze)' if not eq else '') else: if not eq: stdout.write('s') else: stdout.write('.') stdout.flush() if verbose: print '\n' + '*' * len(tstr) print
def __setitem__(self, slice_, value): """ Called to implement assignment of self[slice_, value]. Not implemented by the abstract class @param slice A set of valid constraints - int, [int,], (int,), or slice @param value The value to assign to the storage at location slice_ @raise ValueError when brick contains no values for specified slice """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) from coverage_model import bricking_utils, utils extents = tuple([s for s in self.total_domain.total_extents if s != 0]) # bricks is a list of tuples [(b_ord, b_guid), ...] slice_ = utils.fix_slice(deepcopy(slice_), extents) log.trace('slice_=%s', slice_) bricks = bricking_utils.get_bricks_from_slice(slice_, self.brick_tree, extents) log.trace('Found bricks: %s', bricks) values = np.asanyarray(value) v_shp = values.shape log.trace('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, extents) log.trace('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.trace('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format( s_shp, v_shp)) else: value_slice = None log.trace('value_shape: %s', v_shp) for b in bricks: # b is (brick_ordinal, brick_guid) _, bid = b # brick_list[brick_guid] contains: [brick_extents, origin, tuple(bD), brick_active_size] _, bori, _, bact = self.brick_list[bid] bbnds = [] bexts = [] for i, bnd in enumerate(bori): bbnds.append((bori[i], bori[i] + bact[i] - 1)) bexts.append(bori[i] + bact[i]) bbnds = tuple(bbnds) bexts = tuple(bexts) log.trace('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) log.trace('Determining slice for brick: %s', b) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) log.trace('brick_slice=%s\tbrick_mm=%s', brick_slice, brick_mm) if None in brick_slice: # Brick does not contain any of the requested indices log.debug('Brick does not contain any of the requested indices: Move to next brick') continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.trace('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] self._set_values_to_brick(bid, brick_slice, v)
def __getitem__(self, slice_): """ Called to implement evaluation of self[slice_]. Not implemented by the abstract class @param slice_ A set of valid constraints - int, [int,], (int,), or slice @return The value contained by the storage at location slice @raise ValueError when brick contains no values for specified slice """ from coverage_model import bricking_utils, utils extents = tuple([s for s in self.total_domain.total_extents if s != 0]) if extents == (): # Empty domain(s) - no data, return empty array return np.empty(0, dtype=self.dtype) # bricks is a list of tuples [(b_ord, b_guid), ...] slice_ = utils.fix_slice(deepcopy(slice_), extents) log.trace('slice_=%s', slice_) bricks = bricking_utils.get_bricks_from_slice(slice_, self.brick_tree, extents) log.trace('Found bricks: %s', bricks) ret_shp = utils.slice_shape(slice_, extents) log.trace('Return array shape: %s', ret_shp) ret_arr = np.empty(ret_shp, dtype=self.dtype) ret_arr.fill(self.fill_value) for b in bricks: # b is (brick_ordinal, brick_guid) _, bid = b # brick_list[brick_guid] contains: [brick_extents, origin, tuple(bD), brick_active_size] _, bori, _, bact = self.brick_list[bid] bbnds = [] for i, bnd in enumerate(bori): bbnds.append((bori[i], bori[i] + bact[i] - 1)) bbnds = tuple(bbnds) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) log.trace('brick_slice=%s\tbrick_mm=%s', brick_slice, brick_mm) if None in brick_slice: log.debug('Brick does not contain any of the requested indices: Move to next brick') continue ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm) brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid) if not os.path.exists(brick_file_path): log.trace('Found virtual brick file: %s', brick_file_path) else: log.trace('Found real brick file: %s', brick_file_path) with h5py.File(brick_file_path) as brick_file: ret_vals = brick_file[bid][brick_slice] # Check if object type if self.dtype == '|O8': if hasattr(ret_vals, '__iter__'): ret_vals = [self._object_unpack_hook(x) for x in ret_vals] else: ret_vals = self._object_unpack_hook(ret_vals) ret_arr[ret_slice] = ret_vals # ret_arr = np.atleast_1d(ret_arr.squeeze()) # ret_arr = np.atleast_1d(ret_arr) # # # If the array is size 1 AND a slice object was NOT part of the query # if ret_arr.size == 1 and not np.atleast_1d([isinstance(s, slice) for s in slice_]).all(): # ret_arr = ret_arr[0] return ret_arr
class SparseConstantValue(AbstractComplexParameterValue): def __init__(self, parameter_type, domain_set, storage=None, **kwargs): """ @param **kwargs Additional keyword arguments are copied and the copy is passed up to AbstractComplexParameterValue; see documentation for that class for details """ kwc = kwargs.copy() AbstractComplexParameterValue.__init__(self, parameter_type, domain_set, storage, **kwc) self._storage.expand((1, ), 0, 1) @property def content(self): return self._storage[0] def expand_content(self, domain, origin, expansion): # No op - storage expanded in __setitem__ pass def _update_min_max(self, value): # TODO: There is a flaw here when OVERWRITING: # overwritten values may still appear to be a min/max value as # recalculation of the full array does not occur...\ if np.dtype(self.value_encoding).kind not in [ 'S', 'O' ]: # No min/max for strings or objects v = np.atleast_1d(value) # All values are fill_values, leave what we have! if np.atleast_1d(v == self.fill_value).all(): return # Mask fill_value so it's not included in the calculation v = np.atleast_1d( np.ma.masked_equal(v, self.fill_value, copy=False)) # Update min self._min = min( v.min(), self._min) if self._min != self.fill_value else v.min() # Update max self._max = max( v.max(), self._max) if self._min != self.fill_value else v.max() def __indexify_slice(self, slice_, total_shape): ## ONLY WORKS FOR 1D ARRAYS!!! fsl = utils.fix_slice(slice_, total_shape) ss = utils.slice_shape(slice_, total_shape) ret = np.empty(ss, dtype=int) rf = ret.flatten() ci = 0 for s, shape in zip(fsl, total_shape): if isinstance(s, slice): ind = range(*s.indices(shape)) ll = len(ind) rf[ci:ll] = ind ci += ll elif isinstance(s, (list, tuple)): ll = len(s) rf[ci:ll] = s ci += ll elif isinstance(s, int): rf[ci] = s ci += 1 else: raise TypeError( 'Unsupported slice method') # TODO: Better error message return rf.reshape(ss) def _apply_value(self, stor_sub): v_arr = np.empty(0, dtype=self.value_encoding) max_i = self.shape[0] for s in stor_sub: # log.trace('s: %s, max_i: %s', s, max_i) st = s.lower_bound or 0 en = s.upper_bound or max_i # log.trace('st: %s, en: %s, offset: %s', st, en, s.offset) if st == en == max_i: break if isinstance(s.value, AbstractParameterValue): st += s.offset en += s.offset e = s.value[st:en] else: sz = en - st e = np.empty(sz, dtype=self.value_encoding) e.fill(s.value) v_arr = np.append(v_arr, e) return v_arr def __getitem__(self, slice_): slice_ = utils.fix_slice(slice_, self.shape) # Nothing asked for! if len(slice_) is 0: return np.empty(0, dtype=self.value_encoding) try: spans = self._storage[0] except ValueError, ve: if ve.message != 'No Bricks!': raise return np.empty(0, dtype=self.value_encoding) if not hasattr(spans, '__iter__') and spans == self.fill_value: ret = np.empty(utils.slice_shape(slice_, self.shape), dtype=self.value_encoding) ret.fill(self.fill_value) return _cleanse_value(ret, slice_) # Build the index array ind_arr = self.__indexify_slice(slice_, self.shape) # Empty index array! if len(ind_arr) == 0: return np.empty(0, dtype=self.value_encoding) # Get first and last index fi, li = ind_arr.min(), ind_arr.max() # Find the first storage needed strt_i = None end_i = None enum = enumerate(spans) for i, s in enum: if fi in s: strt_i = i break if fi == li: end_i = strt_i + 1 # If this breaks, it's probably because there's a gap in the spans... else: for i, s in reversed(list(enum)): if li in s: end_i = i + 1 break # log.trace('srt: %s, end: %s, fi: %s, li: %s', strt_i, end_i, fi, li) stor_sub = spans[strt_i:end_i] # Build the array of stored values v_arr = self._apply_value(stor_sub) if stor_sub[0].lower_bound is None: offset = 0 else: offset = stor_sub[0].lower_bound io = ind_arr - offset vals = np.atleast_1d(v_arr[io]) if hasattr(self.parameter_type.base_type, 'inner_encoding'): vals = ArrayValue._apply_inner_encoding( vals, self.parameter_type.base_type) return _cleanse_value(vals, slice_)
def put_values_to_bricks(self, slice_, values): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice( slice_, self.rtree, self.total_domain ) # this is a list of tuples [(b_id, (bounds...),), ...] values = np.asanyarray(values) v_shp = values.shape log.debug('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, self.total_domain) log.debug('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.debug('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([ i for i in s_shp if i != 1 ]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}' .format(s_shp, v_shp)) else: value_slice = None log.debug('value_shape: %s', v_shp) for b in bricks: # b is (brick_id, (brick_bounds per dim...),) bid, bbnds = b log.debug('Determining slice for brick: %s', b) bexts = tuple([x + 1 for x in zip(*bbnds)[1] ]) # Shift from index to size log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd( slice_, bbnds) if None in brick_slice: # Brick does not contain any of the requested indices log.debug( 'Brick does not contain any of the requested indices: Move to next brick' ) continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd( slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.debug( '\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] log.debug('\nvalues %s=\n%s', v.shape, v) if not self.use_hdf: self.bricks[bid][brick_slice] = v else: fi = self.bricks[bid] with h5py.File(fi) as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[brick_slice] = v
nndim = np.atleast_1d(value).ndim if pndim != nndim: raise ValueError( 'The dimensionality of the value is not compatible with the previous value: {0} != {1}' .format(pndim, nndim)) if slice_[0] == self.shape[0] - 1: # -1 was used for slice # Change the value of the last span lspn.value = value else: nspn_offset = 0 if isinstance(slice_[0], Span): # TODO: This could be used to alter previous span objects, but for now, just use it to pass the offset nspn_offset = slice_[0].offset elif utils.slice_shape(slice_, self.shape) == self.shape: # Full slice nspn_offset = -self.shape[0] if not isinstance(value, AbstractParameterValue) and not isinstance( lspn.value, AbstractParameterValue): if np.atleast_1d( np.atleast_1d(value) == np.atleast_1d( lspn.value)).all(): # The previous value equals the new value - do not add a new span! return # The current index becomes the upper_bound of the previous span and the start of the next span curr_ind = self.shape[0] # Reset the upper_bound of the previous span
def __getitem__(self, slice_): """ Called to implement evaluation of self[slice_]. Not implemented by the abstract class @param slice_ A set of valid constraints - int, [int,], (int,), or slice @return The value contained by the storage at location slice @raise ValueError when brick contains no values for specified slice """ from coverage_model import bricking_utils, utils extents = tuple([s for s in self.total_domain.total_extents if s != 0]) if extents == (): # Empty domain(s) - no data, return empty array return np.empty(0, dtype=self.dtype) # bricks is a list of tuples [(b_ord, b_guid), ...] slice_ = utils.fix_slice(deepcopy(slice_), extents) log.trace('slice_=%s', slice_) bricks = bricking_utils.get_bricks_from_slice(slice_, self.brick_tree, extents) log.trace('Found bricks: %s', bricks) ret_shp = utils.slice_shape(slice_, extents) log.trace('Return array shape: %s', ret_shp) ret_arr = np.empty(ret_shp, dtype=self.dtype) ret_arr.fill(self.fill_value) for b in bricks: # b is (brick_ordinal, brick_guid) _, bid = b # brick_list[brick_guid] contains: [brick_extents, origin, tuple(bD), brick_active_size] _, bori, _, bact = self.brick_list[bid] bbnds = [] for i, bnd in enumerate(bori): bbnds.append((bori[i], bori[i] + bact[i] - 1)) bbnds = tuple(bbnds) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) log.trace('brick_slice=%s\tbrick_mm=%s', brick_slice, brick_mm) if None in brick_slice: log.debug('Brick does not contain any of the requested indices: Move to next brick') continue ret_slice = bricking_utils.get_value_slice_nd(slice_, ret_shp, bbnds, brick_slice, brick_mm) brick_file_path = '{0}/{1}.hdf5'.format(self.brick_path, bid) if not os.path.exists(brick_file_path): log.trace('Found virtual brick file: %s', brick_file_path) else: log.trace('Found real brick file: %s', brick_file_path) with HDFLockingFile(brick_file_path) as brick_file: ret_vals = brick_file[bid][brick_slice] # Check if object type if self.dtype in ('|O8', '|O4'): if hasattr(ret_vals, '__iter__'): ret_vals = [self._object_unpack_hook(x) for x in ret_vals] else: ret_vals = self._object_unpack_hook(ret_vals) if self.parameter_manager.parameter_name == 'lat': log.trace("values from brick %s %s", str(ret_vals), type(ret_vals)) ret_arr[ret_slice] = ret_vals # ret_arr = np.atleast_1d(ret_arr.squeeze()) # ret_arr = np.atleast_1d(ret_arr) # # # If the array is size 1 AND a slice object was NOT part of the query # if ret_arr.size == 1 and not np.atleast_1d([isinstance(s, slice) for s in slice_]).all(): # ret_arr = ret_arr[0] return ret_arr
nndim = len(value.shape) else: nndim = np.atleast_1d(value).ndim if pndim != nndim: raise ValueError('The dimensionality of the value is not compatible with the previous value: {0} != {1}'.format(pndim, nndim)) if slice_[0] == self.shape[0] - 1: # -1 was used for slice # Change the value of the last span lspn.value = value else: nspn_offset = 0 if isinstance(slice_[0], Span): # TODO: This could be used to alter previous span objects, but for now, just use it to pass the offset nspn_offset = slice_[0].offset elif utils.slice_shape(slice_, self.shape) == self.shape: # Full slice nspn_offset = -self.shape[0] if not isinstance(value, AbstractParameterValue) and not isinstance(lspn.value, AbstractParameterValue): if np.atleast_1d(np.atleast_1d(value) == np.atleast_1d(lspn.value)).all(): # The previous value equals the new value - do not add a new span! return # The current index becomes the upper_bound of the previous span and the start of the next span curr_ind = self.shape[0] # Reset the upper_bound of the previous span spans[-1] = Span(lspn.lower_bound, curr_ind, offset=lspn.offset, value=lspn.value) # Create the new span nspn = Span(curr_ind, None, nspn_offset, value=value)
def put_values_to_bricks(self, slice_, values): slice_ = utils.fix_slice(slice_, self.total_domain) bricks = bricking_utils.get_bricks_from_slice(slice_, self.rtree, self.total_domain) # this is a list of tuples [(b_id, (bounds...),), ...] values = np.asanyarray(values) v_shp = values.shape log.debug('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, self.total_domain) log.debug('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.debug('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format( s_shp, v_shp)) else: value_slice = None log.debug('value_shape: %s', v_shp) for b in bricks: # b is (brick_id, (brick_bounds per dim...),) bid, bbnds = b log.debug('Determining slice for brick: %s', b) bexts = tuple([x + 1 for x in zip(*bbnds)[1]]) # Shift from index to size log.debug('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) if None in brick_slice: # Brick does not contain any of the requested indices log.debug('Brick does not contain any of the requested indices: Move to next brick') continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.debug('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] log.debug('\nvalues %s=\n%s', v.shape, v) if not self.use_hdf: self.bricks[bid][brick_slice] = v else: fi = self.bricks[bid] with HDFLockingFile(fi, 'a') as f: ds = f.require_dataset(str(bid), shape=self.brick_sizes, dtype=self.dtype, chunks=None, fillvalue=-1) ds[brick_slice] = v
def __setitem__(self, slice_, value): """ Called to implement assignment of self[slice_, value]. Not implemented by the abstract class @param slice A set of valid constraints - int, [int,], (int,), or slice @param value The value to assign to the storage at location slice_ @raise ValueError when brick contains no values for specified slice """ if self.mode == 'r': raise IOError('PersistenceLayer not open for writing: mode == \'{0}\''.format(self.mode)) from coverage_model import bricking_utils, utils extents = tuple([s for s in self.total_domain.total_extents if s != 0]) # bricks is a list of tuples [(b_ord, b_guid), ...] slice_ = utils.fix_slice(deepcopy(slice_), extents) log.trace('slice_=%s', slice_) bricks = bricking_utils.get_bricks_from_slice(slice_, self.brick_tree, extents) log.trace('Found bricks: %s', bricks) values = np.asanyarray(value) v_shp = values.shape log.trace('value_shape: %s', v_shp) s_shp = utils.slice_shape(slice_, extents) log.trace('slice_shape: %s', s_shp) is_broadcast = False if v_shp == (): log.trace('Broadcast!!') is_broadcast = True value_slice = () elif v_shp != s_shp: if v_shp == tuple([i for i in s_shp if i != 1]): # Missing dimensions are singleton, just reshape to fit values = values.reshape(s_shp) v_shp = values.shape else: raise IndexError( 'Shape of \'value\' is not compatible with \'slice_\': slice_ shp == {0}\tvalue shp == {1}'.format( s_shp, v_shp)) else: value_slice = None log.trace('value_shape: %s', v_shp) for b in bricks: # b is (brick_ordinal, brick_guid) _, bid = b # brick_list[brick_guid] contains: [brick_extents, origin, tuple(bD), brick_active_size] _, bori, _, bact = self.brick_list[bid] bbnds = [] bexts = [] for i, bnd in enumerate(bori): bbnds.append((bori[i], bori[i] + bact[i] - 1)) bexts.append(bori[i] + bact[i]) bbnds = tuple(bbnds) bexts = tuple(bexts) log.trace('bid=%s, bbnds=%s, bexts=%s', bid, bbnds, bexts) log.trace('Determining slice for brick: %s', b) brick_slice, brick_mm = bricking_utils.get_brick_slice_nd(slice_, bbnds) log.trace('brick_slice=%s\tbrick_mm=%s', brick_slice, brick_mm) if None in brick_slice: # Brick does not contain any of the requested indices log.debug('Brick does not contain any of the requested indices: Move to next brick') continue try: brick_slice = utils.fix_slice(brick_slice, bexts) except IndexError: log.debug('Malformed brick_slice: move to next brick') continue if not is_broadcast: value_slice = bricking_utils.get_value_slice_nd(slice_, v_shp, bbnds, brick_slice, brick_mm) try: value_slice = utils.fix_slice(value_slice, v_shp) except IndexError: log.debug('Malformed value_slice: move to next brick') continue log.trace('\nbrick %s:\n\tbrick_slice %s=%s\n\tmin/max=%s\n\tvalue_slice %s=%s', b, utils.slice_shape(brick_slice, bexts), brick_slice, brick_mm, utils.slice_shape(value_slice, v_shp), value_slice) v = values[value_slice] self._set_values_to_brick(bid, brick_slice, v) import datetime if self.parameter_manager.parameter_name in self.master_manager.param_groups and v.dtype.type is not np.string_: valid_types = [np.bool_, np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128] invalid_fill_values = [None, np.NaN, self.fill_value] try: min_val = None max_val = None # First try to do it fast try: if issubclass(v.dtype.type, numbers.Number) or v.dtype.type in valid_types: tried_it = True min_val = v.min() max_val = v.max() except: min_val = None max_val = None # if fast didn't return valid values, do it slow, but right if min_val in invalid_fill_values or max_val in invalid_fill_values: ts = datetime.datetime.now() mx = [x for x in v if x not in invalid_fill_values and (type(x) in valid_types or issubclass(type(x), numbers.Number))] if len(mx) > 0: min_val = min(mx) max_val = max(mx) time_loss = datetime.datetime.now() - ts log.debug("Repaired numpy statistics inconsistency for parameter/type %s/%s. Time loss of %s seconds ", self.parameter_manager.parameter_name, str(v.dtype.type), str(time_loss)) if min_val is not None and max_val is not None: log.trace("%s min/max %s/%s type %s", self.parameter_manager.parameter_name, min_val, max_val, type(min_val)) self.master_manager.track_data_written_to_brick(bid, brick_slice, self.parameter_manager.parameter_name, min_val, max_val) except Exception as e: log.warn("Could not store Span extents for %s. Unexpected error %s", str( (bid, brick_slice, self.parameter_manager.parameter_name)), e.message ) raise
def _coverage_to_granule(cls, coverage, start_time=None, end_time=None, stride_time=None, fuzzy_stride=True, parameters=None, stream_def_id=None, tdoa=None): slice_ = slice(None) # Defaults to all values # Validations if start_time is not None: validate_is_instance(start_time, Number, 'start_time must be a number for striding.') if end_time is not None: validate_is_instance(end_time, Number, 'end_time must be a number for striding.') if stride_time is not None: validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.') if tdoa is not None and isinstance(tdoa, slice): slice_ = tdoa elif stride_time is not None and not fuzzy_stride: # SLOW ugly_range = np.arange(start_time, end_time, stride_time) idx_values = [cls.get_time_idx(coverage, i) for i in ugly_range] idx_values = list( set(idx_values) ) # Removing duplicates - also mixes the order of the list!!! idx_values.sort() slice_ = [idx_values] elif not (start_time is None and end_time is None): if start_time is not None: start_time = cls.get_time_idx(coverage, start_time) if end_time is not None: end_time = cls.get_time_idx(coverage, end_time) slice_ = slice(start_time, end_time, stride_time) log.info('Slice: %s', slice_) if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) if parameters is not None: # TODO: Improve efficiency here fields = list(set(parameters).intersection(rdt.fields)) else: fields = rdt.fields for field in fields: log.info('Slice is %s', slice_) n = coverage.get_parameter_values(field, tdoa=slice_) if n is None: rdt[field] = [n] elif isinstance(n, np.ndarray): if coverage.get_data_extents( field)[0] < coverage.num_timesteps: log.error( "Misformed coverage detected, padding with fill_value") arr_len = utils.slice_shape(slice_, (coverage.num_timesteps, ))[0] fill_arr = np.empty(arr_len - n.shape[0], dtype=n.dtype) fill_arr.fill( coverage.get_parameter_context(field).fill_value) n = np.append(n, fill_arr) elif coverage.get_data_extents( field)[0] > coverage.num_timesteps: raise CorruptionError( 'The coverage is corrupted:\n\tfield: %s\n\textents: %s\n\ttimesteps: %s' % (field, coverage.get_data_extents(field), coverage.num_timesteps)) rdt[field] = n else: rdt[field] = [n] return rdt