def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks # and 5 real outputs (one from the yield {}) allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') performance.init_performance(tmp, swmr=True) smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a')) res = smap.reduce() smap.h5.close() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 5) # outputs self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info']), 0) shutil.rmtree(tmpdir)
def sitecol(self): """ Read the site collection from .filename and cache it """ if 'sitecol' in vars(self): return self.__dict__['sitecol'] if self.filename is None or not os.path.exists(self.filename): # case of nofilter/None sitecol return with hdf5.File(self.filename, 'r') as h5: self.__dict__['sitecol'] = sc = h5.get('sitecol') return sc
def convert_xml_hdf5(input_file, output_file): with hdf5.File(output_file, 'w') as out: inp = nrml.read(input_file) if inp['xmlns'].endswith('nrml/0.4'): # old version d = os.path.dirname(input_file) or '.' raise ValueError('Please upgrade with `oq upgrade_nrml %s`' % d) elif inp['xmlns'].endswith('nrml/0.5'): # current version sm = inp.sourceModel else: # not a NRML raise ValueError('Unknown NRML:' % inp['xmlns']) out.save(node.node_to_dict(sm)) return output_file
def hdf5new(datadir=None): """ Return a new `hdf5.File by` instance with name determined by the last calculation in the datadir (plus one). Set the .path attribute to the generated filename. """ datadir = datadir or get_datadir() calc_id = get_last_calc_id(datadir) + 1 fname = os.path.join(datadir, 'calc_%d.hdf5' % calc_id) new = hdf5.File(fname, 'w') new.path = fname return new
def open(self, mode): """ Open the underlying .hdf5 file and the parent, if any """ if self.hdf5 == (): # not already open kw = dict(mode=mode, libver='latest') if mode == 'r': kw['swmr'] = True try: self.hdf5 = hdf5.File(self.hdf5path, **kw) except OSError as exc: raise OSError('%s in %s' % (exc, self.hdf5path))
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks # and 18 outputs (1 output does not produce a subtask) allargs = [('aaaaeeeeiii',), ('uuuuaaaaeeeeiii',), ('aaaaaaaaeeeeiii',), ('aaaaeeeeiiiiiooooooo',)] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') hdf5.File(tmp, 'w').close() # the file must exist smap = parallel.Starmap(supertask, allargs, hdf5path=tmp) res = smap.reduce() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 5) # outputs self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info/supertask']), 0) shutil.rmtree(tmpdir)
def sitecol(self): """ Read the site collection from .filename and cache it """ if 'sitecol' in vars(self): return self.__dict__['sitecol'] if self.filename is None: return elif not os.path.exists(self.filename): raise FileNotFoundError('%s: shared_dir issue?' % self.filename) with hdf5.File(self.filename, 'r') as h5: self.__dict__['sitecol'] = sc = h5.get('sitecol') return sc
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') performance.init_performance(tmp) smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a')) res = smap.reduce() smap.h5.close() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 4) # tasks self.assertEqual(num[b'total get_length'], 17) # subtasks info = h5['task_info'][()] dic = dict(general.fast_agg3(info, 'taskname', ['received'])) self.assertGreater(dic[b'get_length'], 0) self.assertGreater(dic[b'supertask'], 0) shutil.rmtree(tmpdir)
def __init__(self, sitecol, integration_distance, hdf5path=None): if sitecol is not None and len(sitecol) < len(sitecol.complete): raise ValueError('%s is not complete!' % sitecol) self.hdf5path = hdf5path if hdf5path and (config.distribution.oq_distribute in ('no', 'processpool') or config.directory.shared_dir): # store the sitecol with hdf5.File(hdf5path, 'w') as h5: h5['sitecol'] = sitecol else: # keep the sitecol in memory self.__dict__['sitecol'] = sitecol self.integration_distance = (IntegrationDistance(integration_distance) if isinstance(integration_distance, dict) else integration_distance)
def __init__(self, sitecol, integration_distance, filename=None): if sitecol is not None and len(sitecol) < len(sitecol.complete): raise ValueError('%s is not complete!' % sitecol) elif sitecol is None: integration_distance = {} self.filename = filename self.integration_distance = (IntegrationDistance(integration_distance) if isinstance(integration_distance, dict) else integration_distance) if filename and not os.path.exists(filename): # store the sitecol with hdf5.File(filename, 'w') as h5: h5['sitecol'] = sitecol if sitecol else () else: # keep the sitecol in memory self.__dict__['sitecol'] = sitecol
def export_asset_loss_table(ekey, dstore): """ Export in parallel the asset loss table from the datastore. NB1: for large calculation this may run out of memory NB2: due to an heisenbug in the parallel reading of .hdf5 files this works reliably only if the datastore has been created by a different process The recommendation is: *do not use this exporter*: rather, study its source code and write what you need. Every postprocessing is different. """ key, fmt = ekey oq = dstore['oqparam'] assetcol = dstore['assetcol'] arefs = dstore['asset_refs'].value avals = assetcol.values() loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split() dtlist = [(lt, F32) for lt in loss_types] if oq.insured_losses: for lt in loss_types: dtlist.append((lt + '_ins', F32)) lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)]) fname = dstore.export_path('%s.%s' % ekey) monitor = performance.Monitor(key, fname) lrgetter = riskinput.LossRatiosGetter(dstore) aids = range(len(assetcol)) allargs = [(lrgetter, list(block), monitor) for block in split_in_blocks(aids, oq.concurrent_tasks)] dstore.close() # avoid OSError: Can't read data (Wrong b-tree signature) L = len(loss_types) with hdf5.File(fname, 'w') as f: nbytes = 0 total = numpy.zeros(len(dtlist), F32) for pairs in parallel.Starmap(get_loss_ratios, allargs): for aid, data in pairs: asset = assetcol[aid] avalue = avals[aid] for l, lt in enumerate(loss_types): aval = avalue[lt] for i in range(oq.insured_losses + 1): data['ratios'][:, l + L * i] *= aval aref = arefs[asset.idx] f[b'asset_loss_table/' + aref] = data.view(lrs_dt) total += data['ratios'].sum(axis=0) nbytes += data.nbytes f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types) f['asset_loss_table'].attrs['total'] = total f['asset_loss_table'].attrs['nbytes'] = nbytes return [fname]
def convert_xml_hdf5(input_file, output_file): with hdf5.File(output_file, 'w') as out: inp = nrml.read(input_file) if inp['xmlns'].endswith('nrml/0.4'): # old version d = os.path.dirname(input_file) or '.' raise ValueError('Please upgrade with `oq upgrade_nrml %s`' % d) elif inp['xmlns'].endswith('nrml/0.5'): # current version sm = inp.sourceModel else: # not a NRML raise ValueError('Unknown NRML:' % inp['xmlns']) for group in sm: for src in group: # make the trt implicit del src.attrib['tectonicRegion'] out.save(node.node_to_dict(sm)) return output_file
def to_python(fname, converter): """ Convert a source model .hdf5 file into a :class:`SourceModel` object """ with hdf5.File(fname, 'r') as f: source_model = f['/'] for sg in source_model: for src in sg: if hasattr(src, 'mfd'): # multipoint source src.tom = converter.tom kwargs = getattr(src.mfd, 'kwargs', {}) if 'bin_width' not in kwargs: kwargs['bin_width'] = [converter.width_of_mfd_bin] return source_model
def convert_nonParametricSeismicSource(fname, node): """ Convert the given node into a non parametric source object. :param fname: full pathname to the XML file associated to the node :param node: a Node object coming from an XML file :returns: a :class:`openquake.hazardlib.source.NonParametricSeismicSource` instance """ trt = node.attrib.get('tectonicRegion') rups_weights = None if 'rup_weights' in node.attrib: rups_weights = F64(node['rup_weights'].split()) nps = source.NonParametricSeismicSource(node['id'], node['name'], trt, [], []) nps.splittable = 'rup_weights' not in node.attrib path = os.path.splitext(fname)[0] + '.hdf5' hdf5_fname = path if os.path.exists(path) else None if hdf5_fname: # read the rupture data from the HDF5 file assert node.text is None, node.text with hdf5.File(hdf5_fname, 'r') as h: dic = {k: d[:] for k, d in h[node['id']].items()} nps.fromdict(dic, rups_weights) num_probs = len(dic['probs_occur']) else: # read the rupture data from the XML nodes num_probs = None for i, rupnode in enumerate(node): po = rupnode['probs_occur'] probs = pmf.PMF(valid.pmf(po)) if num_probs is None: # first time num_probs = len(probs.data) elif len(probs.data) != num_probs: # probs_occur must have uniform length for all ruptures raise ValueError( 'prob_occurs=%s has %d elements, expected %s' % (po, len(probs.data), num_probs)) rup = RuptureConverter(5.).convert_node(rupnode) rup.tectonic_region_type = trt rup.weight = None if rups_weights is None else rups_weights[i] nps.data.append((rup, probs)) nps.num_probs_occur = num_probs return nps
def post_execute(self, pmap_by_grp_id): """ Collect the hazard curves by realization and export them. :param pmap_by_grp_id: a dictionary grp_id -> hazard curves """ oq = self.oqparam try: csm_info = self.csm.info except AttributeError: csm_info = self.datastore['csm_info'] trt_by_grp = csm_info.grp_by("trt") grp_name = { grp.id: grp.name for sm in csm_info.source_models for grp in sm.src_groups } data = [] with self.monitor('saving probability maps', autoflush=True): for grp_id, pmap in pmap_by_grp_id.items(): if pmap: # pmap can be missing if the group is filtered away base.fix_ones(pmap) # avoid saving PoEs == 1 trt = trt_by_grp[grp_id] key = 'poes/grp-%02d' % grp_id self.datastore[key] = pmap self.datastore.set_attrs(key, trt=trt) extreme = max( get_extreme_poe(pmap[sid].array, oq.imtls) for sid in pmap) data.append((grp_id, grp_name[grp_id], extreme)) if 'rup' in set(self.datastore): self.datastore.set_nbytes('rup/grp-%02d' % grp_id) tot_ruptures = sum( len(r) for r in self.datastore['rup'].values()) self.datastore.set_attrs('rup', tot_ruptures=tot_ruptures) if oq.hazard_calculation_id is None and 'poes' in self.datastore: self.datastore.set_nbytes('poes') self.datastore['disagg_by_grp'] = numpy.array( sorted(data), grp_extreme_dt) # save a copy of the poes in hdf5cache with hdf5.File(self.hdf5cache) as cache: cache['oqparam'] = oq self.datastore.hdf5.copy('poes', cache) self.calc_stats(self.hdf5cache)
def read_inputs(self): """ Read risk data and sources if any """ oq = self.oqparam self._read_risk_data() self.check_overflow() # check if self.sitecol is too large if ('amplification' in oq.inputs and oq.amplification_method == 'kernel'): logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.af = AmplFunction.from_dframe(df) if getattr(self, 'sitecol', None): # can be None for the ruptures-only calculator with hdf5.File(self.datastore.tempname, 'w') as tmp: tmp['sitecol'] = self.sitecol elif (oq.calculation_mode == 'disaggregation' and oq.max_sites_disagg < len(self.sitecol)): raise ValueError('Please set max_sites_disagg=%d in %s' % (len(self.sitecol), oq.inputs['job_ini'])) elif oq.disagg_by_src and len(self.sitecol) > oq.max_sites_disagg: raise ValueError( 'There are too many sites to use disagg_by_src=true') if ('source_model_logic_tree' in oq.inputs and oq.hazard_calculation_id is None): with self.monitor('composite source model', measuremem=True): self.csm = csm = readinput.get_composite_source_model( oq, self.datastore.hdf5) srcs = [src for sg in csm.src_groups for src in sg] if not srcs: raise RuntimeError('All sources were discarded!?') logging.info('Checking the sources bounding box') sids = self.src_filter().within_bbox(srcs) if len(sids) == 0: raise RuntimeError('All sources were discarded!?') self.full_lt = csm.full_lt self.init() # do this at the end of pre-execute if (not oq.hazard_calculation_id and oq.calculation_mode != 'preclassical' and not oq.save_disk_space): self.gzip_inputs()
def init(self): """ Read the poes and set the .data attribute with the hazard curves """ if hasattr(self, '_pmap'): # already initialized return self._pmap dstore = hdf5.File(self.filename, 'r') self.rlzs_by_g = dstore['rlzs_by_g'][()] # populate _pmap dset = dstore['_poes'] # NLG_ L, G = dset.shape[1:] self._pmap = probability_map.ProbabilityMap.build(L, G, self.sids) for sid, array in zip(self.sids, dset[list(self.sids)]): self._pmap[sid].array = array self.nbytes = self._pmap.nbytes dstore.close() return self._pmap
def gen_rupture_getters(self): """ :returns: a list of RuptureGetters """ dstore = (self.datastore.parent if self.datastore.parent else self.datastore) hdf5cache = dstore.hdf5cache() mode = 'r+' if os.path.exists(hdf5cache) else 'w' with hdf5.File(hdf5cache, mode) as cache: if 'ruptures' not in cache: dstore.hdf5.copy('ruptures', cache) if 'rupgeoms' not in cache: dstore.hdf5.copy('rupgeoms', cache) yield from gen_rupture_getters( dstore, concurrent_tasks=self.oqparam.concurrent_tasks or 1, hdf5cache=hdf5cache) if self.datastore.parent: self.datastore.parent.close()
def post_execute(self, pmap_by_grp_id): """ Collect the hazard curves by realization and export them. :param pmap_by_grp_id: a dictionary grp_id -> hazard curves """ oq = self.oqparam grp_trt = self.csm_info.grp_by("trt") grp_source = self.csm_info.grp_by("name") if oq.disagg_by_src: src_name = { src.src_group_id: src.name for src in self.csm.get_sources() } data = [] with self.monitor('saving probability maps', autoflush=True): for grp_id, pmap in pmap_by_grp_id.items(): if pmap: # pmap can be missing if the group is filtered away fix_ones(pmap) # avoid saving PoEs == 1 key = 'poes/grp-%02d' % grp_id self.datastore[key] = pmap self.datastore.set_attrs(key, trt=grp_trt[grp_id]) if oq.disagg_by_src: data.append( (grp_id, grp_source[grp_id], src_name[grp_id])) if oq.hazard_calculation_id is None and 'poes' in self.datastore: self.datastore.set_nbytes('poes') if oq.disagg_by_src and self.csm_info.get_num_rlzs() == 1: # this is useful for disaggregation, which is implemented # only for the case of a single realization self.datastore['disagg_by_src/source_id'] = numpy.array( sorted(data), grp_source_dt) # save a copy of the poes in hdf5cache if hasattr(self, 'hdf5cache'): with hdf5.File(self.hdf5cache) as cache: cache['oqparam'] = oq self.datastore.hdf5.copy('poes', cache) self.calc_stats(self.hdf5cache) else: self.calc_stats(self.datastore) self.datastore.open('r+') self.save_hmaps()
def init(self): """ Read the poes and set the .data attribute with the hazard curves """ if hasattr(self, '_pmap'): # already initialized return self._pmap dstore = hdf5.File(self.filename, 'r') self.rlzs_by_g = dstore['rlzs_by_g'][()] # populate _pmap dset = dstore['_poes'] # GNL G, N, L = dset.shape self._pmap = probability_map.ProbabilityMap.build(L, G, self.sids) data = dset[:, self.sids, :] # shape (G, N, L) for i, sid in enumerate(self.sids): self._pmap[sid].array = data[:, i, :].T # shape (L, G) self.nbytes = self._pmap.nbytes dstore.close() return self._pmap
def execute(self): oq = self.oqparam self.set_param( num_taxonomies=self.assetcol.num_taxonomies_by_site(), maxweight=oq.ebrisk_maxweight / (oq.concurrent_tasks or 1), epspath=cache_epsilons(self.datastore, oq, self.assetcol, self.riskmodel, self.E)) parent = self.datastore.parent if parent: hdf5path = parent.filename grp_indices = parent['ruptures'].attrs['grp_indices'] nruptures = len(parent['ruptures']) else: hdf5path = self.datastore.hdf5cache() grp_indices = self.datastore['ruptures'].attrs['grp_indices'] nruptures = len(self.datastore['ruptures']) with hdf5.File(hdf5path, 'r+') as cache: self.datastore.hdf5.copy('weights', cache) self.datastore.hdf5.copy('ruptures', cache) self.datastore.hdf5.copy('rupgeoms', cache) self.init_logic_tree(self.csm_info) smap = parallel.Starmap(self.core_task.__func__, monitor=self.monitor()) trt_by_grp = self.csm_info.grp_by("trt") samples = self.csm_info.get_samples_by_grp() rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp() ruptures_per_block = numpy.ceil(nruptures / (oq.concurrent_tasks or 1)) first_event = 0 for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items(): start, stop = grp_indices[grp_id] for indices in general.block_splitter(range(start, stop), ruptures_per_block): rgetter = getters.RuptureGetter(hdf5path, list(indices), grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim, first_event) first_event += rgetter.num_events smap.submit(rgetter, self.src_filter, self.param) self.events_per_sid = [] self.gmf_nbytes = 0 res = smap.reduce(self.agg_dicts, numpy.zeros(self.N)) logging.info('Produced %s of GMFs', general.humansize(self.gmf_nbytes)) return res
def get_assets_by_taxo(assets, tempname=None): """ :param assets: an array of assets :param tempname: hdf5 file where the epsilons are (or None) :returns: assets_by_taxo with attributes eps and idxs """ assets_by_taxo = AccumDict(group_array(assets, 'taxonomy')) assets_by_taxo.idxs = numpy.argsort( numpy.concatenate([a['ordinal'] for a in assets_by_taxo.values()])) assets_by_taxo.eps = {} if tempname is None: # no epsilons return assets_by_taxo # otherwise read the epsilons and group them by taxonomy with hdf5.File(tempname, 'r') as h5: dset = h5['epsilon_matrix'] for taxo, assets in assets_by_taxo.items(): lst = [dset[aid] for aid in assets['ordinal']] assets_by_taxo.eps[taxo] = numpy.array(lst) return assets_by_taxo
def init(self): """ Build the probability curves from the underlying dataframes """ if self._pmap: return self._pmap G = len(self.rlzs_by_g) with hdf5.File(self.filename) as dstore: for start, stop in self.slices: poes_df = dstore.read_df('_poes', slc=slice(start, stop)) for sid, df in poes_df.groupby('sid'): try: array = self._pmap[sid].array except KeyError: array = numpy.zeros((self.L, G)) self._pmap[sid] = probability_map.ProbabilityCurve( array) array[df.lid, df.gid] = df.poe return self._pmap
def get_source_ids(oqparam): """ :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: the complete set of source IDs found in all the source models """ source_ids = set() for fname in oqparam.inputs['source']: if fname.endswith('.hdf5'): with hdf5.File(fname, 'r') as f: for sg in f['/']: for src in sg: source_ids.add(src.source_id) else: for sg in read_source_groups(fname): for src_node in sg: source_ids.add(src_node['id']) return source_ids
def init_performance(hdf5file, swmr=False): """ :param hdf5file: file name of hdf5.File instance """ fname = isinstance(hdf5file, str) h5 = hdf5.File(hdf5file, 'a') if fname else hdf5file if 'performance_data' not in h5: hdf5.create(h5, 'performance_data', perf_dt) if 'task_info' not in h5: hdf5.create(h5, 'task_info', task_info_dt) if 'task_sent' not in h5: h5['task_sent'] = '{}' if swmr: try: h5.swmr_mode = True except ValueError as exc: raise ValueError('%s: %s' % (hdf5file, exc)) if fname: h5.close()
def store_sm(smodel, filename, monitor): """ :param smodel: a :class:`openquake.hazardlib.nrml.SourceModel` instance :param filename: path to an hdf5 file (cache_XXX.hdf5) :param monitor: a Monitor instance with an .hdf5 attribute """ h5 = monitor.hdf5 with monitor('store source model'): sources = h5['source_info'] source_geom = h5['source_geom'] gid = len(source_geom) for sg in smodel: if filename: with hdf5.File(filename, 'r+') as hdf5cache: hdf5cache['grp-%02d' % sg.id] = sg srcs = [] geoms = [] for src in sg: srcgeom = src.geom() n = len(srcgeom) geom = numpy.zeros(n, point3d) geom['lon'], geom['lat'], geom['depth'] = srcgeom.T if len(geom) > 1: # more than a point source msg = 'source %s' % src.source_id try: geo.utils.check_extent(geom['lon'], geom['lat'], msg) except ValueError as err: logging.error(str(err)) dic = { k: v for k, v in vars(src).items() if k != 'id' and k != 'src_group_id' } src.checksum = zlib.adler32(pickle.dumps(dic)) srcs.append((sg.id, src.source_id, src.code, gid, gid + n, src.num_ruptures, 0, 0, 0, src.checksum)) geoms.append(geom) gid += n if geoms: hdf5.extend(source_geom, numpy.concatenate(geoms)) if sources: hdf5.extend(sources, numpy.array(srcs, source_info_dt))
def get_rupture_getters(self): """ :returns: a list of RuptureGetters """ dstore = (self.datastore.parent if self.datastore.parent else self.datastore) hdf5cache = dstore.hdf5cache() with hdf5.File(hdf5cache, 'r+') as cache: if 'rupgeoms' not in cache: dstore.hdf5.copy('rupgeoms', cache) rgetters = get_rupture_getters(dstore, split=self.oqparam.concurrent_tasks, hdf5cache=hdf5cache) num_events = self.E if hasattr(self, 'E') else len(dstore['events']) num_ruptures = len(dstore['ruptures']) logging.info('Found {:,d} ruptures and {:,d} events'.format( num_ruptures, num_events)) if self.datastore.parent: self.datastore.parent.close() return rgetters
def flush(self, hdf5path): """ Save the measurements on the performance file """ if not self.children: data = self.get_data() else: lst = [self.get_data()] for child in self.children: lst.append(child.get_data()) child.reset() data = numpy.concatenate(lst) if len(data) == 0: # no information return elif not os.path.exists(hdf5path): with hdf5.File(hdf5path, 'w') as h5: hdf5.create(h5, 'performance_data', perf_dt) hdf5.create(h5, 'task_info', task_info_dt) hdf5.extend3(hdf5path, 'performance_data', data) self.reset()
def test_from_sites(self): s1 = Site(location=Point(10, 20, 30), vs30=1.2, vs30measured=True, z1pt0=3.4, z2pt5=5.6, backarc=True) s2 = Site(location=Point(-1.2, -3.4, -5.6), vs30=55.4, vs30measured=False, z1pt0=66.7, z2pt5=88.9, backarc=False) cll = SiteCollection([s1, s2]) self.assertTrue((cll.vs30 == [1.2, 55.4]).all()) self.assertTrue((cll.vs30measured == [True, False]).all()) self.assertTrue((cll.z1pt0 == [3.4, 66.7]).all()) self.assertTrue((cll.z2pt5 == [5.6, 88.9]).all()) self.assertTrue((cll.mesh.lons == [10, -1.2]).all()) self.assertTrue((cll.mesh.lats == [20, -3.4]).all()) self.assertTrue((cll.backarc == [True, False]).all()) self.assertIs(cll.mesh.depths, None) for arr in (cll.vs30, cll.z1pt0, cll.z2pt5): self.assertIsInstance(arr, numpy.ndarray) self.assertEqual(arr.flags.writeable, False) self.assertEqual(arr.dtype, float) for arr in (cll.vs30measured, cll.backarc): self.assertIsInstance(arr, numpy.ndarray) self.assertEqual(arr.flags.writeable, False) self.assertEqual(arr.dtype, bool) self.assertEqual(len(cll), 2) # test serialization to hdf5 fd, fpath = tempfile.mkstemp(suffix='.hdf5') os.close(fd) with hdf5.File(fpath, 'w') as f: f['folder'] = dict(sitecol=cll, b=[2, 3]) newcll = f['folder/sitecol'] self.assertEqual(newcll, cll) self.assertEqual(list(f['folder/b']), [2, 3]) os.remove(fpath)
def init(self): """ Initialize the computers. Should be called on the workers """ if hasattr(self, 'computers'): # init already called return with hdf5.File(self.rupgetter.filename, 'r') as parent: self.weights = parent['weights'][()] self.computers = [] for ebr in self.rupgetter.get_ruptures(self.srcfilter): sitecol = self.sitecol.filtered(ebr.sids) try: computer = calc.gmf.GmfComputer( ebr, sitecol, self.oqparam.imtls, self.cmaker, self.oqparam.truncation_level, self.correl_model) except FarAwayRupture: # due to numeric errors, ruptures within the maximum_distance # when written, can be outside when read; I found a case with # a distance of 99.9996936 km over a maximum distance of 100 km continue self.computers.append(computer)