def run_tiles(num_tiles, job_ini, poolsize=0): """ Run a hazard calculation by splitting the sites into tiles. WARNING: this is experimental and meant only for internal users """ t0 = time.time() oq = readinput.get_oqparam(job_ini) num_sites = len(readinput.get_mesh(oq)) task_args = [(job_ini, slc) for slc in general.split_in_slices(num_sites, num_tiles)] if poolsize == 0: # no pool Starmap = parallel.Sequential elif os.environ.get('OQ_DISTRIBUTE') == 'celery': Starmap = parallel.Processmap # celery plays only with processes else: # multiprocessing plays only with threads Starmap = parallel.Threadmap parent_child = [None, None] def agg(calc_ids, calc_id): if not calc_ids: # first calculation parent_child[0] = calc_id parent_child[1] = calc_id logs.dbcmd('update_parent_child', parent_child) logging.warn('Finished calculation %d of %d', len(calc_ids) + 1, num_tiles) return calc_ids + [calc_id] calc_ids = Starmap(engine.run_tile, task_args, poolsize).reduce(agg, []) datadir = datastore.get_datadir() for calc_id in calc_ids: print(os.path.join(datadir, 'calc_%d.hdf5' % calc_id)) print('Total calculation time: %.1f h' % ((time.time() - t0) / 3600.))
def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ self.datastore.set_attrs('task_info/start_ebrisk', times=times) oq = self.oqparam elt_length = len(self.datastore['losses_by_event']) builder = get_loss_builder(self.datastore) self.build_datasets(builder) mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache())) smap = parallel.Starmap(compute_loss_curves_maps, monitor=mon) self.datastore.close() acc = [] ct = oq.concurrent_tasks or 1 for elt_slice in general.split_in_slices(elt_length, ct): smap.submit(self.datastore.filename, elt_slice, oq.conditional_loss_poes, oq.individual_curves) acc = smap.reduce(acc=[]) # copy performance information from the cache to the datastore pd = mon.hdf5['performance_data'].value hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( mon.hdf5['task_info/compute_loss_curves_maps'].value) with self.monitor('saving loss_curves and maps', autoflush=True): for name, idx, arr in acc: for ij, val in numpy.ndenumerate(arr): self.datastore[name][ij + idx] = val
def save_hmaps(self): """ Save hazard maps generated from the hazard curves """ oq = self.oqparam if oq.poes: mon = self.monitor('computing hazard maps') logging.info('Computing hazard maps for PoEs=%s', oq.poes) with mon: N = len(self.sitecol.complete) ct = oq.concurrent_tasks or 1 if 'hcurves' in self.datastore: kinds = self.datastore['hcurves'] hmaps_dt = numpy.dtype( [('%s-%s' % (imt, poe), F32) for imt in oq.imtls for poe in oq.poes]) for kind in kinds: self.datastore.create_dset( 'hmaps/' + kind, hmaps_dt, (N,), fillvalue=None) allargs = [] for slc in general.split_in_slices(N, ct): hcurves_by_kind = { kind: self.datastore['hcurves/' + kind][slc] for kind in kinds} allargs.append((hcurves_by_kind, slc, oq.imtls, oq.poes, mon)) for dic, slc in Starmap(build_hmaps, allargs): for kind, hmaps in dic.items(): self.datastore['hmaps/' + kind][slc] = hmaps else: # single realization pg = PmapGetter(self.datastore, self.rlzs_assoc) self.datastore['hmaps/mean'] = calc.make_hmap_array( pg.get_mean(), oq.imtls, oq.poes, N)
def gen_args(self): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam sitecol = self.sitecol.complete monitor = self.monitor(self.core_task.__name__) imts = list(oq.imtls) min_iml = self.get_min_iml(oq) correl_model = oq.get_correl_model() try: csm_info = self.csm.info except AttributeError: # no csm csm_info = self.datastore['csm_info'] samples_by_grp = csm_info.get_samples_by_grp() rlzs_by_gsim = { grp_id: self.rlzs_assoc.get_rlzs_by_gsim(grp_id) for grp_id in samples_by_grp } if self.precalc: num_ruptures = sum(len(rs) for rs in self.precalc.result.values()) block_size = math.ceil(num_ruptures / (oq.concurrent_tasks or 1)) for grp_id, ruptures in self.precalc.result.items(): if not ruptures: continue for block in block_splitter(ruptures, block_size): getter = GmfGetter(rlzs_by_gsim[grp_id], block, sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, oq.filter_distance, samples_by_grp[grp_id]) yield [getter], oq, monitor return U = len(self.datastore['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore for slc in split_in_slices(U, oq.concurrent_tasks or 1): getters = [] for grp_id in rlzs_by_gsim: ruptures = RuptureGetter(parent, slc, grp_id) if parent is self.datastore: # not accessible parent ruptures = list(ruptures) if not ruptures: continue getters.append( GmfGetter(rlzs_by_gsim[grp_id], ruptures, sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, oq.filter_distance, samples_by_grp[grp_id])) yield getters, oq, monitor
def gen_args(self, monitor): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam param = dict(oqparam=oq, min_iml=self.get_min_iml(oq), truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path) concurrent_tasks = oq.concurrent_tasks if oq.hazard_calculation_id: U = len(self.datastore.parent['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore samples_by_grp = self.csm_info.get_samples_by_grp() for slc in split_in_slices(U, concurrent_tasks or 1): for grp_id in self.rlzs_by_gsim_grp: rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id] ruptures = RuptureGetter(parent, slc, grp_id) par = param.copy() par['samples'] = samples_by_grp[grp_id] yield ruptures, self.sitecol, rlzs_by_gsim, par, monitor return maxweight = self.csm.get_maxweight(weight, concurrent_tasks or 1) logging.info('Using maxweight=%d', maxweight) num_tasks = 0 num_sources = 0 for sm in self.csm.source_models: par = param.copy() par['samples'] = sm.samples for sg in sm.src_groups: # ignore the sources not producing ruptures sg.sources = [src for src in sg.sources if src.eb_ruptures] if not sg.sources: continue rlzs_by_gsim = self.rlzs_by_gsim_grp[sg.id] if sg.src_interdep == 'mutex': # do not split yield sg, self.src_filter, rlzs_by_gsim, par, monitor num_tasks += 1 num_sources += len(sg.sources) continue for block in block_splitter(sg.sources, maxweight, weight): yield block, self.src_filter, rlzs_by_gsim, par, monitor num_tasks += 1 num_sources += len(block) logging.info('Sent %d sources in %d tasks', num_sources, num_tasks)
def from_ruptures(self, param, monitor): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam concurrent_tasks = oq.concurrent_tasks U = len(self.datastore.parent['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore for slc in split_in_slices(U, concurrent_tasks or 1): for grp_id in self.rlzs_by_gsim_grp: rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id] ruptures = RuptureGetter(parent, slc, grp_id) par = param.copy() par['samples'] = self.samples_by_grp[grp_id] yield ruptures, self.sitecol, rlzs_by_gsim, par, monitor
def gen_args(self): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__) imts = list(oq.imtls) min_iml = self.get_min_iml(oq) correl_model = oq.get_correl_model() try: csm_info = self.csm.info except AttributeError: # no csm csm_info = self.datastore['csm_info'] samples_by_grp = csm_info.get_samples_by_grp() rlzs_by_gsim = {grp_id: self.rlzs_assoc.get_rlzs_by_gsim(grp_id) for grp_id in samples_by_grp} if self.precalc: for grp_id, ruptures in self.precalc.result.items(): if not ruptures: continue for block in block_splitter(ruptures, oq.ruptures_per_block): getter = GmfGetter( rlzs_by_gsim[grp_id], block, self.sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, samples_by_grp[grp_id]) yield getter, oq, monitor return parent = self.get_parent() or self.datastore U = len(parent['ruptures']) logging.info('Found %d ruptures', U) if parent is not self.datastore: # accessible parent parent.close() for slc in split_in_slices(U, oq.concurrent_tasks or 1): for grp_id in rlzs_by_gsim: ruptures = calc.RuptureGetter(parent, slc, grp_id) if parent is self.datastore: # not accessible parent ruptures = list(ruptures) if not ruptures: continue getter = GmfGetter( rlzs_by_gsim[grp_id], ruptures, self.sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, samples_by_grp[grp_id]) yield getter, oq, monitor