def oneparamcov_noautoflush(save_coverage=False, in_memory=False): # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', pdict, tdom, sdom, in_memory, auto_flush_values=False) # Insert some timesteps (automatically expands other arrays) nt = 100 scov.insert_timesteps(nt) # Add data for the parameter scov.set_parameter_values('time', value=np.arange(nt)) if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/sample.cov') return scov
def cov_io(self, context, value_array, comp_val=None): pdict = ParameterDictionary() time = ParameterContext(name='time', param_type=QuantityType(value_encoding=np.float64)) pdict.add_context(context) pdict.add_context(time, True) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain cov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) cov.insert_timesteps(len(value_array)) cov.set_parameter_values('test', tdoa=slice(0,len(value_array)), value=value_array) comp_val = comp_val if comp_val is not None else value_array testval = cov.get_parameter_values('test') try: np.testing.assert_array_equal(testval, comp_val) except: print repr(value_array) raise
def nospatialcov(save_coverage=False, in_memory=False): # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary t_ctxt = ParameterContext('quantity_time', param_type=QuantityType(value_encoding=np.dtype('int64')), variability=VariabilityEnum.TEMPORAL) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) quant_ctxt = ParameterContext('quantity', param_type=QuantityType(value_encoding=np.dtype('float32'))) quant_ctxt.long_name = 'example of a parameter of type QuantityType' quant_ctxt.uom = 'degree_Celsius' pdict.add_context(quant_ctxt) const_ctxt = ParameterContext('constant', param_type=ConstantType()) const_ctxt.long_name = 'example of a parameter of type ConstantType' pdict.add_context(const_ctxt) arr_ctxt = ParameterContext('array', param_type=ArrayType()) arr_ctxt.long_name = 'example of a parameter of type ArrayType with base_type ndarray (resolves to \'object\')' pdict.add_context(arr_ctxt) arr2_ctxt = ParameterContext('array2', param_type=ArrayType()) arr2_ctxt.long_name = 'example of a parameter of type ArrayType with base_type object' pdict.add_context(arr2_ctxt) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', pdict, temporal_domain=tdom, in_memory_storage=in_memory) # Insert some timesteps (automatically expands other arrays) nt = 20 scov.insert_timesteps(nt) # Add data for each parameter scov.set_parameter_values('quantity_time', value=np.arange(nt)) scov.set_parameter_values('quantity', value=np.random.random_sample(nt)*(26-23)+23) scov.set_parameter_values('constant', value=20) arrval = [] arr2val = [] for x in xrange(nt): # One value (which IS an array) for each member of the domain arrval.append(np.random.bytes(np.random.randint(1,20))) arr2val.append(np.random.random_sample(np.random.randint(1,10))) scov.set_parameter_values('array', value=arrval) scov.set_parameter_values('array2', value=arr2val) if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/ptypes.cov') return scov
def samplecov(save_coverage=False, in_memory=False): # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt, is_temporal=True) lat_ctxt = ParameterContext('lat', param_type=QuantityType(value_encoding=np.dtype('float32'))) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' pdict.add_context(lat_ctxt) lon_ctxt = ParameterContext('lon', param_type=QuantityType(value_encoding=np.dtype('float32'))) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' pdict.add_context(lon_ctxt) temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=np.dtype('float32'))) temp_ctxt.uom = 'degree_Celsius' pdict.add_context(temp_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=np.dtype('float32'))) cond_ctxt.uom = 'unknown' pdict.add_context(cond_ctxt) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', pdict, tdom, sdom, in_memory) # Insert some timesteps (automatically expands other arrays) nt = 30 scov.insert_timesteps(nt) # Add data for each parameter scov.set_parameter_values('time', value=np.arange(nt)) scov.set_parameter_values('lat', value=45) scov.set_parameter_values('lon', value=-71) # make a random sample of 10 values between 23 and 26 # Ref: http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.random_sample.html#numpy.random.random_sample # --> To sample multiply the output of random_sample by (b-a) and add a tvals=np.random.random_sample(nt)*(26-23)+23 scov.set_parameter_values('temp', value=tvals) scov.set_parameter_values('conductivity', value=np.random.random_sample(nt)*(110-90)+90) if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/sample.cov') return scov
def benchmark_value_setting(num_params=10, num_insertions=100, repeat=1, bulk_ts_insert=False): # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) for i in xrange(num_params-1): pdict.add_context(ParameterContext('param_{0}'.format(i))) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory) import time counter = 1 insert_times = [] per_rep_times = [] full_time = time.time() for r in xrange(repeat): # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain cov = SimplexCoverage('test_data', create_guid(), 'empty sample coverage_model', parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) rep_time = time.time() if bulk_ts_insert: cov.insert_timesteps(num_insertions) for x in xrange(num_insertions): in_time = time.time() if not bulk_ts_insert: cov.insert_timesteps(1) slice_ = slice(cov.num_timesteps - 1, None) cov.set_parameter_values('time', 1, tdoa=slice_) for i in xrange(num_params-1): cov.set_parameter_values('param_{0}'.format(i), 1.1, tdoa=slice_) in_time = time.time() - in_time insert_times.append(in_time) counter += 1 rep_time = time.time() - rep_time per_rep_times.append(rep_time) cov.close() print 'Average Value Insertion Time (%s repetitions): %s' % (repeat, sum(insert_times) / len(insert_times)) print 'Average Total Expansion Time (%s repetitions): %s' % (repeat, sum(per_rep_times) / len(per_rep_times)) print 'Full Time (includes cov creation/closing): %s' % (time.time() - full_time) return cov
def cov_io(self, context, value_array, comp_val=None): pdict = ParameterDictionary() time = ParameterContext( name='time', param_type=QuantityType(value_encoding=np.float64)) pdict.add_context(context) pdict.add_context(time, True) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE ) # 0d spatial topology (station/trajectory) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain cov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) self.addCleanup(shutil.rmtree, cov.persistence_dir) cov.insert_timesteps(len(value_array)) cov.set_parameter_values('test', tdoa=slice(0, len(value_array)), value=value_array) comp_val = comp_val if comp_val is not None else value_array testval = cov.get_parameter_values('test') try: np.testing.assert_array_equal(testval, comp_val) except: print repr(value_array) raise
def ncstation2cov(save_coverage=False, in_memory=False): # Open the netcdf dataset ds = Dataset('test_data/usgs.nc') # Itemize the variable names that we want to include in the coverage var_names = ['time','lat','lon','z','streamflow','water_temperature',] # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a ParameterContext object for each of the variables in the dataset and add them to the ParameterDictionaryl for v in var_names: var = ds.variables[v] ptype = QuantityType(var.dtype.char) if v in ('lat','lon','z'): ptype=ConstantType(ptype) pcontext = ParameterContext(v, param_type=ptype) if 'units' in var.ncattrs(): pcontext.uom = var.getncattr('units') if 'long_name' in var.ncattrs(): pcontext.description = var.getncattr('long_name') if '_FillValue' in var.ncattrs(): pcontext.fill_value = var.getncattr('_FillValue') # Set the axis for the coordinate parameters if v == 'time': pcontext.variability = VariabilityEnum.TEMPORAL pcontext.axis = AxisTypeEnum.TIME elif v == 'lat': pcontext.axis = AxisTypeEnum.LAT elif v == 'lon': pcontext.axis = AxisTypeEnum.LON elif v == 'z': pcontext.axis = AxisTypeEnum.HEIGHT pdict.add_context(pcontext) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS.standard_temporal() scrs = CRS.lat_lon_height() # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 1d spatial topology (station/trajectory) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample station coverage_model', pdict, tdom, sdom, in_memory) # Insert the timesteps (automatically expands other arrays) tvar=ds.variables['time'] scov.insert_timesteps(tvar.size) # Add data to the parameters - NOT using setters at this point, direct assignment to arrays for v in var_names: var = ds.variables[v] var.set_auto_maskandscale(False) if v in ('lat','lon','z'): scov._range_value[v][0] = make_range_expr(var[0]) else: scov._range_value[v][:] = var[:] if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/usgs.cov') return scov
def ncgrid2cov(save_coverage=False, in_memory=False): if True: raise NotImplementedError('Multidimensional support is not available at this time') # Open the netcdf dataset ds = Dataset('test_data/ncom.nc') # Itemize the variable names that we want to include in the coverage var_names = ['time','lat','lon','depth','water_u','water_v','salinity','water_temp',] # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a ParameterContext object for each of the variables in the dataset and add them to the ParameterDictionary for v in var_names: var = ds.variables[v] pcontext = ParameterContext(v, param_type=QuantityType(value_encoding=ds.variables[v].dtype.char)) if 'units' in var.ncattrs(): pcontext.uom = var.getncattr('units') if 'long_name' in var.ncattrs(): pcontext.description = var.getncattr('long_name') if '_FillValue' in var.ncattrs(): pcontext.fill_value = var.getncattr('_FillValue') # Set the axis for the coordinate parameters if v == 'time': pcontext.variability = VariabilityEnum.TEMPORAL pcontext.axis = AxisTypeEnum.TIME elif v == 'lat': pcontext.axis = AxisTypeEnum.LAT elif v == 'lon': pcontext.axis = AxisTypeEnum.LON elif v == 'depth': pcontext.axis = AxisTypeEnum.HEIGHT pdict.add_context(pcontext) # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT, AxisTypeEnum.HEIGHT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal'), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [34,57,89]), scrs, MutabilityEnum.IMMUTABLE) # 3d spatial topology (grid) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample grid coverage_model', pdict, tdom, sdom, in_memory) # Insert the timesteps (automatically expands other arrays) tvar=ds.variables['time'] scov.insert_timesteps(tvar.size) # Add data to the parameters - NOT using setters at this point, direct assignment to arrays for v in var_names: log.debug('Assign values to %s', v) var = ds.variables[v] var.set_auto_maskandscale(False) arr = var[:] # TODO: Sort out how to leave these sparse internally and only broadcast during read if v == 'depth': z,_,_ = my_meshgrid(arr,np.zeros([57]),np.zeros([89]),indexing='ij',sparse=True) scov._range_value[v][:] = z elif v == 'lat': _,y,_ = my_meshgrid(np.zeros([34]),arr,np.zeros([89]),indexing='ij',sparse=True) scov._range_value[v][:] = y elif v == 'lon': _,_,x = my_meshgrid(np.zeros([34]),np.zeros([57]),arr,indexing='ij',sparse=True) scov._range_value[v][:] = x else: scov._range_value[v][:] = var[:] if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/ncom.cov') return scov
def ptypescov(save_coverage=False, in_memory=False): # Construct temporal and spatial Coordinate Reference System objects tcrs = CRS([AxisTypeEnum.TIME]) scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT]) # Construct temporal and spatial Domain objects tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline) sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory) # Instantiate a ParameterDictionary pdict = ParameterDictionary() # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary quant_t_ctxt = ParameterContext('quantity_time', param_type=QuantityType(value_encoding=np.dtype('int64')), variability=VariabilityEnum.TEMPORAL) quant_t_ctxt.axis = AxisTypeEnum.TIME quant_t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(quant_t_ctxt) cnst_int_ctxt = ParameterContext('const_int', param_type=ConstantType(QuantityType(value_encoding=np.dtype('int32'))), variability=VariabilityEnum.NONE) cnst_int_ctxt.long_name = 'example of a parameter of type ConstantType, base_type int32' cnst_int_ctxt.axis = AxisTypeEnum.LAT cnst_int_ctxt.uom = 'degree_north' pdict.add_context(cnst_int_ctxt) cnst_flt_ctxt = ParameterContext('const_float', param_type=ConstantType(), variability=VariabilityEnum.NONE) cnst_flt_ctxt.long_name = 'example of a parameter of type QuantityType, base_type float (default)' cnst_flt_ctxt.axis = AxisTypeEnum.LON cnst_flt_ctxt.uom = 'degree_east' pdict.add_context(cnst_flt_ctxt) # func_ctxt = ParameterContext('function', param_type=FunctionType(QuantityType(value_encoding=np.dtype('float32')))) # func_ctxt.long_name = 'example of a parameter of type FunctionType' # pdict.add_context(func_ctxt) quant_ctxt = ParameterContext('quantity', param_type=QuantityType(value_encoding=np.dtype('float32'))) quant_ctxt.long_name = 'example of a parameter of type QuantityType' quant_ctxt.uom = 'degree_Celsius' pdict.add_context(quant_ctxt) arr_ctxt = ParameterContext('array', param_type=ArrayType()) arr_ctxt.long_name = 'example of a parameter of type ArrayType, will be filled with variable-length \'byte-string\' data' pdict.add_context(arr_ctxt) rec_ctxt = ParameterContext('record', param_type=RecordType()) rec_ctxt.long_name = 'example of a parameter of type RecordType, will be filled with dictionaries' pdict.add_context(rec_ctxt) # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain scov = SimplexCoverage('test_data', create_guid(), 'sample coverage_model', pdict, tdom, sdom, in_memory) # Insert some timesteps (automatically expands other arrays) nt = 20 scov.insert_timesteps(nt) # Add data for each parameter scov.set_parameter_values('quantity_time', value=np.arange(nt)) scov.set_parameter_values('const_int', value=45.32) # Set a constant directly, with incorrect data type (fixed under the hood) scov.set_parameter_values('const_float', value=make_range_expr(-71.11)) # Set with a properly formed constant expression scov.set_parameter_values('quantity', value=np.random.random_sample(nt)*(26-23)+23) # # Setting three range expressions such that indices 0-2 == 10, 3-7 == 15 and >=8 == 20 # scov.set_parameter_values('function', value=make_range_expr(10, 0, 3, min_incl=True, max_incl=False, else_val=-999.9)) # scov.set_parameter_values('function', value=make_range_expr(15, 3, 8, min_incl=True, max_incl=False, else_val=-999.9)) # scov.set_parameter_values('function', value=make_range_expr(20, 8, min_incl=True, max_incl=False, else_val=-999.9)) arrval = [] recval = [] letts='abcdefghijklmnopqrstuvwxyz' while len(letts) < nt: letts += 'abcdefghijklmnopqrstuvwxyz' for x in xrange(nt): arrval.append(np.random.bytes(np.random.randint(1,20))) # One value (which is a byte string) for each member of the domain d = {letts[x]: letts[x:]} recval.append(d) # One value (which is a dict) for each member of the domain scov.set_parameter_values('array', value=arrval) scov.set_parameter_values('record', value=recval) if in_memory and save_coverage: SimplexCoverage.pickle_save(scov, 'test_data/ptypes.cov') return scov
def repair(self, backup=True, copy_over=True, keep_temp=False, reanalyze=False, analyze_bricks=False, detailed_analysis=False): """ Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's original construction parameters, then reconstructs the Master and Parameter metadata files by inspection of the ION objects and "valid" brick files. @return: """ if self._ar is None or reanalyze: self._ar = self._do_analysis(analyze_bricks=analyze_bricks, detailed_analysis=detailed_analysis) if self._ar.is_corrupt: if len(self._ar.get_brick_corruptions()) > 0: raise NotImplementedError( 'Brick corruption. Cannot repair at this time!!!') else: # Repair the Master and Parameter metadata files # Need the ParameterDictionary, TemporalDomain and SpatialDomain pdict = ParameterDictionary.load( self._dso.parameter_dictionary) tdom = GridDomain.load(self._dso.temporal_domain) sdom = GridDomain.load(self._dso.spatial_domain) # Set up the working directory for the recovered coverage tempcov_dir = tempfile.mkdtemp('covs') # Create the temporary Coverage tempcov = SimplexCoverage(root_dir=tempcov_dir, persistence_guid=self._guid, name=self._guid, parameter_dictionary=pdict, spatial_domain=sdom, temporal_domain=tdom) # Handle to persistence layer for tempcov pl = tempcov._persistence_layer # Set up the original and temporary coverage path strings orig_dir = os.path.join(self.cov_pth, self._guid) temp_dir = os.path.join(tempcov.persistence_dir, tempcov.persistence_guid) # Insert same number of timesteps into temporary coverage as in broken coverage brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks( self.cov_pth, self._guid, 'time') empty_cov = brick_list_spans is None # If None, there are no brick files --> no timesteps, empty coverage! if not empty_cov: # If None, there are no brick files --> no timesteps, empty coverage! bls = [s.value for s in brick_list_spans] maxes = [sum(b[3]) for b in new_brick_list.values()] tempcov.insert_timesteps(sum(maxes)) # Replace metadata is the Master file pl.master_manager.brick_domains = brick_domains_new pl.master_manager.brick_list = new_brick_list # Repair ExternalLinks to brick files f = h5py.File(pl.master_manager.file_path, 'a') for param_name in pdict.keys(): del f[param_name] f.create_group(param_name) for brick in bls: link_path = '/{0}/{1}'.format(param_name, brick[0]) brick_file_name = '{0}.hdf5'.format(brick[0]) brick_rel_path = os.path.join( pl.parameter_metadata[param_name].root_dir. replace(tempcov.persistence_dir, '.'), brick_file_name) log.debug('link_path: %s', link_path) log.debug('brick_rel_path: %s', brick_rel_path) pl.master_manager.add_external_link( link_path, brick_rel_path, brick[0]) pl.flush_values() pl.flush() tempcov.close() # Remove 'rtree' dataset from Master file if it already exists (post domain expansion) # to make way for reconstruction f = h5py.File(pl.master_manager.file_path, 'a') if 'rtree' in f.keys(): del f['rtree'] f.close() # Reconstruct 'rtree' dataset # Open temporary Coverage and PersistenceLayer objects fixed_cov = AbstractCoverage.load(tempcov.persistence_dir, mode='a') pl_fixed = fixed_cov._persistence_layer # Call update_rtree for each brick using PersistenceLayer builtin brick_count = 0 if not empty_cov: for brick in bls: rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents( brick[1][1], bD, tD) pl_fixed.master_manager.update_rtree(brick_count, rtree_extents, obj=brick[0]) brick_count += 1 # Update parameter_bounds property based on each parameter's brick data using deep inspection valid_bounds_types = [ 'BooleanType', 'ConstantType', 'QuantityType', 'ConstantRangeType' ] if not empty_cov: for param in pdict.keys(): if pdict.get_context( param ).param_type.__class__.__name__ in valid_bounds_types: brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks( self.cov_pth, self._guid, param) # Update the metadata pl_fixed.update_parameter_bounds( param, [min_data_bound, max_data_bound]) pl_fixed.flush() fixed_cov.close() # Create backup copy of original Master and Parameter files if backup: import datetime orig_master_file = os.path.join( self.cov_pth, '{0}_master.hdf5'.format(self._guid)) # Generate the timestamp tstamp_format = '%Y%m%d%H%M%S' tstamp = datetime.datetime.now().strftime(tstamp_format) backup_master_file = os.path.join( self.cov_pth, '{0}_master.{1}.hdf5'.format(self._guid, tstamp)) shutil.copy2(orig_master_file, backup_master_file) for param in pdict.keys(): param_orig = os.path.join(orig_dir, param, '{0}.hdf5'.format(param)) param_backup = os.path.join( orig_dir, param, '{0}.{1}.hdf5'.format(param, tstamp)) shutil.copy2(param_orig, param_backup) # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location if copy_over == True: shutil.copy2( os.path.join(tempcov.persistence_dir, '{0}_master.hdf5'.format(self._guid)), os.path.join(self.cov_pth, '{0}_master.hdf5'.format(self._guid))) for param in pdict.keys(): shutil.copy2( os.path.join(temp_dir, param, '{0}.hdf5'.format(param)), os.path.join(orig_dir, param, '{0}.hdf5'.format(param))) # Reanalyze the repaired coverage self._ar = self._do_analysis(analyze_bricks=True) # Verify repair worked, clean up if not if self._ar.is_corrupt: # If the files were backed up then revert if backup: # Remove backed up files and clean up the repair attempt log.info( 'Repair attempt failed. Reverting to pre-repair state.' ) # Use backup copy to replace post-repair file. shutil.copy2(backup_master_file, orig_master_file) # Delete the backup os.remove(backup_master_file) # Iterate over parameters and revert to pre-repair state for param in pdict.keys(): param_orig = os.path.join(orig_dir, param, '{0}.hdf5'.format(param)) param_backup = os.path.join( orig_dir, param, '{0}.{1}.hdf5'.format(param, tstamp)) # Use backup copy to replace post-repair file. shutil.copy2(param_backup, param_orig) # Delete the backup os.remove(param_backup) raise ValueError( 'Coverage repair failed! Revert to stored backup version, if possible.' ) # Remove temporary coverage if keep_temp == False: shutil.rmtree(tempcov_dir) else: return tempcov_dir else: log.info('Coverage is not corrupt, nothing to repair!')
def repair(self, backup=True, copy_over=True, keep_temp=False, reanalyze=False, analyze_bricks=False, detailed_analysis=False): """ Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's original construction parameters, then reconstructs the Master and Parameter metadata files by inspection of the ION objects and "valid" brick files. @return: """ if self._ar is None or reanalyze: self._ar = self._do_analysis(analyze_bricks=analyze_bricks, detailed_analysis=detailed_analysis) if self._ar.is_corrupt: if len(self._ar.get_brick_corruptions()) > 0: raise NotImplementedError('Brick corruption. Cannot repair at this time!!!') else: # Repair the Master and Parameter metadata files # Need the ParameterDictionary, TemporalDomain and SpatialDomain pdict = ParameterDictionary.load(self._dso.parameter_dictionary) tdom = GridDomain.load(self._dso.temporal_domain) sdom = GridDomain.load(self._dso.spatial_domain) # Set up the working directory for the recovered coverage tempcov_dir = tempfile.mkdtemp('covs') # Create the temporary Coverage tempcov = SimplexCoverage(root_dir=tempcov_dir, persistence_guid=self._guid, name=self._guid, parameter_dictionary=pdict, spatial_domain=sdom, temporal_domain=tdom) # Handle to persistence layer for tempcov pl = tempcov._persistence_layer # Set up the original and temporary coverage path strings orig_dir = os.path.join(self.cov_pth, self._guid) temp_dir = os.path.join(tempcov.persistence_dir, tempcov.persistence_guid) # Insert same number of timesteps into temporary coverage as in broken coverage brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(self.cov_pth, self._guid, 'time') empty_cov = brick_list_spans is None # If None, there are no brick files --> no timesteps, empty coverage! if not empty_cov: # If None, there are no brick files --> no timesteps, empty coverage! bls = [s.value for s in brick_list_spans] maxes = [sum(b[3]) for b in new_brick_list.values()] tempcov.insert_timesteps(sum(maxes)) # Replace metadata is the Master file pl.master_manager.brick_domains = brick_domains_new pl.master_manager.brick_list = new_brick_list # Repair ExternalLinks to brick files with HDFLockingFile(pl.master_manager.file_path, 'r+') as f: for param_name in pdict.keys(): del f[param_name] f.create_group(param_name) for param_name in pdict.keys(): for brick in bls: link_path = '/{0}/{1}'.format(param_name, brick[0]) brick_file_name = '{0}.hdf5'.format(brick[0]) brick_rel_path = os.path.join(pl.parameter_metadata[param_name].root_dir.replace(tempcov.persistence_dir, '.'), brick_file_name) log.debug('link_path: %s', link_path) log.debug('brick_rel_path: %s', brick_rel_path) pl.master_manager.add_external_link(link_path, brick_rel_path, brick[0]) pl.flush_values() pl.flush() tempcov.close() # Remove 'rtree' dataset from Master file if it already exists (post domain expansion) # to make way for reconstruction with HDFLockingFile(pl.master_manager.file_path, 'r+') as f: if 'rtree' in f.keys(): del f['rtree'] # Reconstruct 'rtree' dataset # Open temporary Coverage and PersistenceLayer objects fixed_cov = AbstractCoverage.load(tempcov.persistence_dir, mode='r+') pl_fixed = fixed_cov._persistence_layer # Call update_rtree for each brick using PersistenceLayer builtin brick_count = 0 if not empty_cov: for brick in bls: rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents(brick[1][1],bD,tD) pl_fixed.master_manager.update_rtree(brick_count, rtree_extents, obj=brick[0]) brick_count += 1 # Update parameter_bounds property based on each parameter's brick data using deep inspection valid_bounds_types = [ 'BooleanType', 'ConstantType', 'QuantityType', 'ConstantRangeType' ] if not empty_cov: for param in pdict.keys(): if pdict.get_context(param).param_type.__class__.__name__ in valid_bounds_types: brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(self.cov_pth, self._guid, param) # Update the metadata pl_fixed.update_parameter_bounds(param, [min_data_bound, max_data_bound]) pl_fixed.flush() fixed_cov.close() # Create backup copy of original Master and Parameter files if backup: import datetime orig_master_file = os.path.join(self.cov_pth, '{0}_master.hdf5'.format(self._guid)) # Generate the timestamp tstamp_format = '%Y%m%d%H%M%S' tstamp = datetime.datetime.now().strftime(tstamp_format) backup_master_file = os.path.join(self.cov_pth, '{0}_master.{1}.hdf5'.format(self._guid, tstamp)) shutil.copy2(orig_master_file, backup_master_file) for param in pdict.keys(): param_orig = os.path.join(orig_dir, param, '{0}.hdf5'.format(param)) param_backup = os.path.join(orig_dir, param, '{0}.{1}.hdf5'.format(param, tstamp)) shutil.copy2(param_orig, param_backup) # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location if copy_over == True: shutil.copy2(os.path.join(tempcov.persistence_dir, '{0}_master.hdf5'.format(self._guid)), os.path.join(self.cov_pth, '{0}_master.hdf5'.format(self._guid))) for param in pdict.keys(): shutil.copy2(os.path.join(temp_dir, param, '{0}.hdf5'.format(param)), os.path.join(orig_dir, param, '{0}.hdf5'.format(param))) # Reanalyze the repaired coverage self._ar = self._do_analysis(analyze_bricks=True) # Verify repair worked, clean up if not if self._ar.is_corrupt: # If the files were backed up then revert if backup: # Remove backed up files and clean up the repair attempt log.info('Repair attempt failed. Reverting to pre-repair state.') # Use backup copy to replace post-repair file. shutil.copy2(backup_master_file, orig_master_file) # Delete the backup os.remove(backup_master_file) # Iterate over parameters and revert to pre-repair state for param in pdict.keys(): param_orig = os.path.join(orig_dir, param, '{0}.hdf5'.format(param)) param_backup = os.path.join(orig_dir, param, '{0}.{1}.hdf5'.format(param, tstamp)) # Use backup copy to replace post-repair file. shutil.copy2(param_backup, param_orig) # Delete the backup os.remove(param_backup) raise ValueError('Coverage repair failed! Revert to stored backup version, if possible.') # Remove temporary coverage if keep_temp == False: shutil.rmtree(tempcov_dir) else: return tempcov_dir else: log.info('Coverage is not corrupt, nothing to repair!')