def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.gsim_lt.values def weight_src(src): return src.num_ruptures logging.info('Building ruptures') smap = parallel.Starmap( self.build_ruptures.__func__, monitor=self.monitor()) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) ses_idx = 0 for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for block in self.block_splitter( sg.sources, weight_src, by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)] smap.submit(block, self.src_filter, par) ses_idx += 1 else: smap.submit(block, self.src_filter, par) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) store_rlzs_by_grp(self.datastore) self.init_logic_tree(self.csm.info) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures').value # order the ruptures by serial sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) self.save_events(sorted_ruptures)
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') R = len(self.rlzs_assoc.realizations) I = len(oq.imtls) P = len(oq.poes_disagg) or 1 if R * I * P > 10: logging.warn( 'You have %d realizations, %d IMTs and %d poes_disagg: the ' 'disaggregation will be heavy and memory consuming', R, I, P) iml4 = disagg.make_iml4( R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml4) else: logging.warn('disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple(sorted(set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) mon = self.monitor('disaggregation') R = iml4.shape[1] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby( smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker( rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append( (src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap( compute_disagg, all_args, self.monitor() ).reduce(self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} srcfilter = self.src_filter() srcs = self.csm.get_sources() if oq.is_ucerf(): logging.info('Prefiltering UCERFSources') for src in srcs: if hasattr(src, 'start'): src.src_filter = srcfilter # hack for .iter_ruptures src.all_ridx = src.get_ridx() calc_times = parallel.Starmap.apply( preclassical, (srcs, SourceFilter(self.sitecol, oq.maximum_distance)), concurrent_tasks=oq.concurrent_tasks or 1, num_cores=oq.num_cores, h5=self.datastore.hdf5).reduce() if oq.calculation_mode == 'preclassical': self.store_source_info(calc_times, nsites=True) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.update_source_info(calc_times, nsites=True) # if OQ_SAMPLE_SOURCES is set extract one source for group ss = os.environ.get('OQ_SAMPLE_SOURCES') if ss: logging.info('Reducing the number of sources') for sg in self.csm.src_groups: if not sg.atomic: src = max(sg, key=operator.attrgetter('nsites', 'source_id')) sg.sources = [src] mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] psd = oq.pointsource_distance if psd is not None: psd.interp(mags_by_trt) for trt, dic in psd.ddic.items(): # the sum is zero for {'default': [(1, 0), (10, 0)]} if sum(dic.values()): it = list(dic.items()) md = '%s->%d ... %s->%d' % (it[0] + it[-1]) logging.info('ps_dist %s: %s', trt, md) imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and psd and psd.suggested()) or (imts_ok and oq.minimum_intensity): aw = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if psd: dic = { trt: [(float(mag), int(dst)) for mag, dst in psd.ddic[trt].items()] for trt in psd.ddic if trt != 'default' } logging.info('pointsource_distance=\n%s', pprint.pformat(dic)) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.monitor.save('srcfilter', srcfilter) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): oq = self.oqparam if oq.return_periods != [0]: # setting return_periods = 0 disable loss curves eff_time = oq.investigation_time * oq.ses_per_logic_tree_path if eff_time < 2: logging.warning( 'eff_time=%s is too small to compute loss curves', eff_time) return if 'source_info' in self.datastore: # missing for gmf_ebrisk logging.info('Building src_loss_table') source_ids, losses = get_src_loss_table(self.datastore, self.L) self.datastore['src_loss_table'] = losses self.datastore.set_shape_attrs('src_loss_table', source=source_ids, loss_type=oq.loss_names) shp = self.get_shape(self.L) # (L, T...) text = ' x '.join('%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) if oq.aggregate_by: self.build_datasets(builder, oq.aggregate_by, 'agg_') self.build_datasets(builder, [], 'app_') self.build_datasets(builder, [], 'tot_') parent = self.datastore.parent full_aggregate_by = (parent['oqparam'].aggregate_by if parent else ()) or oq.aggregate_by if oq.aggregate_by: aggkeys = build_aggkeys(oq.aggregate_by, self.tagcol, full_aggregate_by) if parent and 'event_loss_table' in parent: ds = parent else: ds = self.datastore ds.swmr_on() smap = parallel.Starmap(post_ebrisk, [(ds, aggkey) for aggkey in aggkeys], h5=self.datastore.hdf5) else: smap = () # do everything in process since it is really fast ds = self.datastore for res in smap: if not res: continue for r, dic in res.items(): if oq.aggregate_by: ds['agg_curves-rlzs'][(slice(None), r, slice(None)) + dic['idx'] # PRLT.. ] = dic['agg_curves'] ds['agg_losses-rlzs'][(slice(None), r) + dic['idx'] # LRT... ] = dic['agg_losses'] ds['app_curves-rlzs'][:, r] += dic['agg_curves'] # PL lbe = ds['losses_by_event'][()] rlz_ids = ds['events']['rlz_id'][lbe['event_id']] dic = dict(enumerate(lbe['loss'].T)) # lti -> losses df = pandas.DataFrame(dic, rlz_ids) for r, losses_df in df.groupby(rlz_ids): losses = numpy.array(losses_df) curves = builder.build_curves(losses, r), ds['tot_curves-rlzs'][:, r] = curves # PL ds['tot_losses-rlzs'][:, r] = losses.sum(axis=0) * oq.ses_ratio units = self.datastore['cost_calculator'].get_units(oq.loss_names) aggby = { tagname: encode(getattr(self.tagcol, tagname)[1:]) for tagname in oq.aggregate_by } set_rlzs_stats(self.datastore, 'app_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'tot_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'tot_losses', loss_types=oq.loss_names, **aggby, units=units) if oq.aggregate_by: set_rlzs_stats(self.datastore, 'agg_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'agg_losses', loss_types=oq.loss_names, **aggby, units=units) return 1
def test_countletters(self): data = [('hello', 'world'), ('ciao', 'mondo')] smap = parallel.Starmap(countletters, data) self.assertEqual(smap.reduce(), {'n': 19})
def get_csm(oq, full_lt, h5=None): """ Build source models from the logic tree and to store them inside the `source_full_lt` dataset. """ converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, oq.source_id, discard_trts=oq.discard_trts) classical = not oq.is_event_based() full_lt.ses_seed = oq.ses_seed if oq.is_ucerf(): [grp] = nrml.to_python(oq.inputs["source_model"], converter) src_groups = [] for grp_id, sm_rlz in enumerate(full_lt.sm_rlzs): sg = copy.copy(grp) src_groups.append(sg) src = sg[0].new(sm_rlz.ordinal, sm_rlz.value[0]) # one source src.checksum = src.grp_id = src.trt_smr = grp_id src.samples = sm_rlz.samples logging.info('Reading sections and rupture planes for %s', src) planes = src.get_planes() if classical: src.ruptures_per_block = oq.ruptures_per_block sg.sources = list(src) for s in sg: s.planes = planes s.sections = s.get_sections() # add background point sources sg = copy.copy(grp) src_groups.append(sg) sg.sources = src.get_background_sources() else: # event_based, use one source sg.sources = [src] src.planes = planes src.sections = src.get_sections() return CompositeSourceModel(full_lt, src_groups) logging.info('Reading the source model(s) in parallel') # NB: the source models file are often NOT in the shared directory # (for instance in oq-engine/demos) so the processpool must be used dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') # NB: h5 is None in logictree_test.py allargs = [] for fname in full_lt.source_model_lt.info.smpaths: allargs.append((fname, converter)) smdict = parallel.Starmap(read_source_model, allargs, distribute=dist, h5=h5 if h5 else None).reduce() if len(smdict) > 1: # really parallel parallel.Starmap.shutdown() # save memory fix_geometry_sections(smdict) groups = _build_groups(full_lt, smdict) # checking the changes changes = sum(sg.changes for sg in groups) if changes: logging.info('Applied {:_d} changes to the composite source model'. format(changes)) return _get_csm(full_lt, groups)
def compute(self): """ Submit disaggregation tasks and return the results """ oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) magi = numpy.searchsorted(self.bin_edges[0], dstore['rup/mag'][:]) - 1 magi[magi == -1] = 0 # when the magnitude is on the edge totrups = len(magi) logging.info('Reading {:_d} ruptures'.format(totrups)) rdt = [('grp_id', U16), ('magi', U8), ('nsites', U16), ('idx', U32)] rdata = numpy.zeros(totrups, rdt) rdata['magi'] = magi rdata['idx'] = numpy.arange(totrups) rdata['grp_id'] = dstore['rup/grp_id'][:] rdata['nsites'] = dstore['rup/nsites'][:] totweight = rdata['nsites'].sum() et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min( numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) # ABSURDLY IMPORTANT!! we rely on the fact that the classical part # of the calculation stores the ruptures in chunks of constant # grp_id, therefore it is possible to build (start, stop) slices; # we are NOT grouping by operator.itemgetter('grp_id', 'magi'): # that would break the ordering of the indices causing an incredibly # worse performance, but visible only in extra-large calculations! for block in block_splitter(rdata, maxweight, operator.itemgetter('nsites'), operator.itemgetter('grp_id')): grp_id = block[0]['grp_id'] trti = et_ids[grp_id][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], {'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'num_epsilon_bins': oq.num_epsilon_bins, 'investigation_time': oq.investigation_time, 'imtls': oq.imtls}) U = max(U, block.weight) slc = slice(block[0]['idx'], block[-1]['idx'] + 1) smap.submit((dstore, slc, cmaker, self.hmap4, trti, magi[slc], self.bin_edges)) task_inputs.append((trti, slc.stop-slc.start)) nbytes, msg = get_nbytes_msg(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(task_inputs)) nbytes, msg = get_nbytes_msg(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') dt = numpy.dtype([('trti', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def execute(self): oq = self.oqparam if oq.return_periods != [0]: # setting return_periods = 0 disable loss curves eff_time = oq.investigation_time * oq.ses_per_logic_tree_path if eff_time < 2: logging.warning( 'eff_time=%s is too small to compute loss curves', eff_time) return if 'source_info' in self.datastore: # missing for gmf_ebrisk logging.info('Building src_loss_table') source_ids, losses = get_src_loss_table(self.datastore, self.L) self.datastore['src_loss_table'] = losses self.datastore.set_shape_attrs('src_loss_table', source=source_ids, loss_type=oq.loss_names) shp = self.get_shape(self.L) # (L, T...) text = ' x '.join('%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) if oq.aggregate_by: self.build_datasets(builder, oq.aggregate_by, 'agg_') self.build_datasets(builder, [], 'app_') self.build_datasets(builder, [], 'tot_') ds = (self.datastore.parent if oq.hazard_calculation_id else self.datastore) if oq.aggregate_by: aggkeys = sorted(ds['event_loss_table']) aggkeys = build_aggkeys(oq.aggregate_by, self.tagcol, ds['oqparam'].aggregate_by) if not oq.hazard_calculation_id: # no parent ds.swmr_on() smap = parallel.Starmap(post_ebrisk, [(ds, aggkey) for aggkey in aggkeys], h5=self.datastore.hdf5) else: smap = () # do everything in process since it is really fast ds = self.datastore for res in smap: if not res: continue for r, dic in res.items(): if oq.aggregate_by: ds['agg_curves-rlzs'][(slice(None), r, slice(None)) + dic['idx'] # PRLT.. ] = dic['agg_curves'] ds['agg_losses-rlzs'][(slice(None), r) + dic['idx'] # LRT... ] = dic['agg_losses'] ds['app_curves-rlzs'][:, r] += dic['agg_curves'] # PL elt = ds.read_df('losses_by_event', ['event_id', 'rlzi']) for r, curves, losses in builder.gen_curves_by_rlz(elt, oq.ses_ratio): ds['tot_curves-rlzs'][:, r] = curves # PL ds['tot_losses-rlzs'][:, r] = losses # L units = self.datastore['cost_calculator'].get_units(oq.loss_names) aggby = { tagname: encode(getattr(self.tagcol, tagname)[1:]) for tagname in oq.aggregate_by } set_rlzs_stats(self.datastore, 'app_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'tot_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'tot_losses', loss_types=oq.loss_names, **aggby, units=units) if oq.aggregate_by: set_rlzs_stats(self.datastore, 'agg_curves', return_periods=builder.return_periods, loss_types=oq.loss_names, **aggby, units=units) set_rlzs_stats(self.datastore, 'agg_losses', loss_types=oq.loss_names, **aggby, units=units) return 1
def execute(self): """ Compute the conditional spectrum """ oq = self.oqparam self.full_lt = self.datastore['full_lt'] self.trts = list(self.full_lt.gsim_lt.values) self.imts = list(oq.imtls) imti = self.imts.index(oq.imt_ref) self.M = M = len(self.imts) dstore = (self.datastore.parent if self.datastore.parent else self.datastore) totrups = len(dstore['rup/mag']) logging.info('Reading {:_d} ruptures'.format(totrups)) rdt = [('grp_id', U16), ('nsites', U16), ('idx', U32)] rdata = numpy.zeros(totrups, rdt) rdata['idx'] = numpy.arange(totrups) rdata['grp_id'] = dstore['rup/grp_id'][:] rdata['nsites'] = [len(sids) for sids in dstore['rup/sids_']] totweight = rdata['nsites'].sum() trt_smrs = dstore['trt_smrs'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(trt_smrs) _G = sum(len(rbg) for rbg in rlzs_by_gsim) self.periods = [from_string(imt).period for imt in self.imts] if oq.imls_ref: self.imls = oq.imls_ref else: # extract imls from the "mean" hazard map curve = self.datastore.sel('hcurves-stats', stat='mean')[0, 0, imti] [self.imls] = compute_hazard_maps(curve, oq.imtls[oq.imt_ref], oq.poes) # there is 1 site self.P = P = len(self.imls) self.datastore.create_dset('cs-rlzs', float, (self.R, M, self.N, 2, self.P)) self.datastore.set_shape_descr('cs-rlzs', rlz_id=self.R, period=self.periods, sid=self.N, cs=2, poe_id=P) self.datastore.create_dset('cs-stats', float, (1, M, self.N, 2, P)) self.datastore.set_shape_descr('cs-stats', stat='mean', period=self.periods, sid=self.N, cs=['spec', 'std'], poe_id=P) self.datastore.create_dset('_c', float, (_G, M, self.N, 2, P)) self.datastore.create_dset('_s', float, (_G, self.N, P)) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) U = 0 Ta = 0 self.cmakers = read_cmakers(self.datastore) self.datastore.swmr_on() smap = parallel.Starmap(conditional_spectrum, h5=self.datastore.hdf5) # IMPORTANT!! we rely on the fact that the classical part # of the calculation stores the ruptures in chunks of constant # grp_id, therefore it is possible to build (start, stop) slices for block in general.block_splitter(rdata, maxweight, operator.itemgetter('nsites'), operator.itemgetter('grp_id')): Ta += 1 grp_id = block[0]['grp_id'] G = len(rlzs_by_gsim[grp_id]) cmaker = self.cmakers[grp_id] U = max(U, block.weight) slc = slice(block[0]['idx'], block[-1]['idx'] + 1) smap.submit((dstore, slc, cmaker, imti, self.imls)) return smap.reduce()
def get_csm(oq, full_lt, h5=None): """ Build source models from the logic tree and to store them inside the `source_full_lt` dataset. """ if oq.pointsource_distance is None: spinning_off = False else: spinning_off = sum(oq.pointsource_distance.max().values()) == 0 if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, not spinning_off, oq.source_id, discard_trts=oq.discard_trts) logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs)) classical = not oq.is_event_based() if oq.is_ucerf(): sample = .001 if os.environ.get('OQ_SAMPLE_SOURCES') else None [grp] = nrml.to_python(oq.inputs["source_model"], converter) src_groups = [] for grp_id, sm_rlz in enumerate(full_lt.sm_rlzs): sg = copy.copy(grp) src_groups.append(sg) src = sg[0].new(sm_rlz.ordinal, sm_rlz.value) # one source sg.mags = numpy.unique(numpy.round(src.mags)) del src.__dict__['mags'] # remove cache src.checksum = src.grp_id = src.id = grp_id src.samples = sm_rlz.samples if classical: src.ruptures_per_block = oq.ruptures_per_block if sample: sg.sources = [list(src)[0]] # take the first source else: sg.sources = list(src) # add background point sources sg.sources.extend(src.get_background_sources(sample)) else: # event_based, use one source sg.sources = [src] return CompositeSourceModel(full_lt, src_groups) logging.info('Reading the source model(s) in parallel') if 'OQ_SAMPLE_SOURCES' in os.environ and h5: srcfilter = calc.filters.SourceFilter( h5['sitecol'], h5['oqparam'].maximum_distance) else: srcfilter = None # NB: the source models file are often NOT in the shared directory # (for instance in oq-engine/demos) so the processpool must be used dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') # NB: h5 is None in logictree_test.py allargs = [] for fname in full_lt.source_model_lt.info.smpaths: allargs.append((fname, converter, srcfilter)) smdict = parallel.Starmap(read_source_model, allargs, distribute=dist, h5=h5 if h5 else None).reduce() if len(smdict) > 1: # really parallel parallel.Starmap.shutdown() # save memory groups = _build_groups(full_lt, smdict) # checking the changes changes = sum(sg.changes for sg in groups) if changes: logging.info('Applied %d changes to the composite source model', changes) #res = _get_csm(full_lt, groups) # ######################################################################### # logging.info(f"Working for EarthQuake Ruptures Forecast") # srcs = [src for sg in res.src_groups for src in sg] # ruptures = [rup for src in srcs for rup in src.iter_ruptures()] # # rup_df = rup2df(ruptures[0]) # # for i in tqdm.tqdm(range(1, len(ruptures))): # rup = ruptures[i] # try: # df = rup2df(rup) # rup_df = pd.concat([rup_df, df], axis=0, ignore_index=True) # rup_df.to_csv("ruptures_from_source_tmp.csv", index=None) # rup_df.to_csv("ruptures_from_source.csv", index=None) # except Exception as e: # logging.error(f"Error in iteration {i}: {e}") # break #############################################################################################3 return _get_csm(full_lt, groups)
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level bb_dict = self.datastore['bb_dict'] sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.src_groups) max_mag = max(mod.max_mag for mod in smodel.src_groups) min_mag = min(mod.min_mag for mod in smodel.src_groups) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for src_group in smodel.src_groups: if src_group.id not in self.rlzs_assoc.gsims_by_grp_id: continue # the group has been filtered away for sid, site in zip(sitecol.sids, sitecol): curves = curves_dict[sid] if not curves: continue # skip zero-valued hazard curves bb = bb_dict[sm_id, sid] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info('%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info('%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info('%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for sid, site in zip(sitecol.sids, sitecol): if (sm_id, sid) in self.bin_edges: bin_edges[sid] = self.bin_edges[sm_id, sid] src_filter = SourceFilter(sitecol, oq.maximum_distance) split_sources = [] for src in src_group: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) mon = self.monitor('disaggregation') for srcs in split_in_blocks(split_sources, nblocks): all_args.append( (src_filter, srcs, src_group.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, mon)) results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)
def compute(self): """ Submit disaggregation tasks and return the results """ logging.info('Reading ruptures') oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) allargs = [] totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items() if n.startswith('mag_') and len(d['rctx'])) et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 totrups = 0 for mag in mags: rctx = dstore['mag_%s/rctx' % mag][:] totrups += len(rctx) for grp_id, gids in enumerate(et_ids): idxs, = numpy.where(rctx['grp_id'] == grp_id) if len(idxs) == 0: continue trti = gids[0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) for blk in block_splitter(rctx[idxs], maxweight, nsites): nr = len(blk) U = max(U, blk.weight) allargs.append((dstore, numpy.array(blk), cmaker, self.hmap4, trti, self.bin_edges, oq)) task_inputs.append((trti, mag, nr)) logging.info('Found {:_d} ruptures'.format(totrups)) nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') sd['mags_trt'] = sum( len(mags) for mags in self.datastore['source_mags'].values()) nbytes, msg = get_array_nbytes(sd) logging.info('Estimated memory on the master:\n%s', msg) dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) self.bin_edges = {} curves = [self.get_curves(sid) for sid in sitecol.sids] # determine the number of effective source groups sg_data = self.datastore['csm_info/sg_data'] num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0) nblocks = math.ceil(oq.concurrent_tasks / num_grps) src_filter = SourceFilter(sitecol, oq.maximum_distance) R = len(self.rlzs_assoc.realizations) max_poe = numpy.zeros(R, oq.imt_dt()) # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in self.csm.source_models for sg in smodel.src_groups))) # build mag_edges min_mag = min(sg.min_mag for smodel in self.csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in self.csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) logging.info('dist = %s...%s', min(dist_edges), max(dist_edges)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) for sid, bb in zip(self.sitecol.sids, bbs): lon_edges, lat_edges = disagg.lon_lat_bins(bb, oq.coordinate_bin_width) logging.info('site %d, lon = %s...%s', sid, min(lon_edges), max(lon_edges)) logging.info('site %d, lat = %s...%s', sid, min(lat_edges), max(lat_edges)) self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) shape = [len(edges) - 1 for edges in bs] + [len(trts)] logging.info('%s for sid %d', shape, sid) # check poes for smodel in self.csm.source_models: sm_id = smodel.ordinal for i, site in enumerate(sitecol): sid = sitecol.sids[i] curve = curves[i] # populate max_poe array for rlzi, poes in curve.items(): for imt in oq.imtls: max_poe[rlzi][imt] = max(max_poe[rlzi][imt], poes[imt].max()) if not curve: continue # skip zero-valued hazard curves # check for too big poes_disagg for poe in oq.poes_disagg: for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]: rlzi = rlz.ordinal for imt in oq.imtls: min_poe = max_poe[rlzi][imt] if poe > min_poe: raise ValueError( self.POE_TOO_BIG % (poe, sm_id, smodel.name, min_poe, rlzi, imt)) # build all_args all_args = [] for smodel in self.csm.source_models: for sg in smodel.src_groups: split_sources = [] for src in sg: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) if not split_sources: continue mon = self.monitor('disaggregation') rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim( sg.trt, smodel.ordinal) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) imls = [ disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg, oq.poes_disagg, curve) for curve in curves ] for srcs in split_in_blocks(split_sources, nblocks): all_args.append((src_filter, srcs, cmaker, imls, trts, self.bin_edges, oq, mon)) self.cache_info = numpy.zeros(2) # operations, cache_hits results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) ops, hits = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) self.save_disagg_results(results)
def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ if len(times): try: dset = self.datastore['task_info/start_ebrisk'] except KeyError: # can happen for mysterious race conditions on some machines pass else: # store the time information plus the events_per_sid info dset.attrs['times'] = times dset.attrs['events_per_sid'] = numpy.mean(self.events_per_sid) oq = self.oqparam shp = self.get_shape(self.L) # (L, T...) text = ' x '.join('%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) self.build_datasets(builder) self.datastore.close() if 'losses_by_event' in self.datastore.parent: dstore = self.datastore.parent else: dstore = self.datastore allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)] h5 = hdf5.File(self.datastore.hdf5cache()) acc = list( parallel.Starmap(compute_loss_curves_maps, allargs, hdf5path=h5.filename)) # copy performance information from the cache to the datastore pd = h5['performance_data'][()] hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( h5['task_info/compute_loss_curves_maps'][()]) self.datastore.open('r+') with self.monitor('saving loss_curves and maps', autoflush=True): for r, (curves, maps) in acc: if len(curves): # some realization can give zero contribution self.datastore['agg_curves-rlzs'][:, r] = curves if len(maps): # conditional_loss_poes can be empty self.datastore['agg_maps-rlzs'][:, r] = maps if self.R > 1: logging.info('Computing aggregate loss curves statistics') set_rlzs_stats(self.datastore, 'agg_curves') self.datastore.set_attrs('agg_curves-stats', return_periods=builder.return_periods, loss_types=' '.join( self.crmodel.loss_types)) if oq.conditional_loss_poes: logging.info('Computing aggregate loss maps statistics') set_rlzs_stats(self.datastore, 'agg_maps') # sanity check with the asset_loss_table if oq.asset_loss_table and len(oq.aggregate_by) == 1: alt = self.datastore['asset_loss_table'][()] if alt.sum() == 0: # nothing was saved return logging.info('Checking the loss curves') tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:] T = len(tags) P = len(builder.return_periods) # sanity check on the loss curves for simple tag aggregation arr = self.assetcol.aggregate_by(oq.aggregate_by, alt) # shape (T, E, L) rlzs = self.datastore['events']['rlz'] curves = numpy.zeros((P, self.R, self.L, T)) for t in range(T): for r in range(self.R): for l in range(self.L): curves[:, r, l, t] = losses_by_period(arr[t, rlzs == r, l], builder.return_periods, builder.num_events[r], builder.eff_time) numpy.testing.assert_allclose( curves, self.datastore['agg_curves-rlzs'][()])
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam params = dict(imtls=oq.imtls, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, ses_seed=oq.ses_seed) gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], progress=logging.debug).reduce() for src in sources: try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() src.weight = src.num_ruptures maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures source_data = AccumDict(accum=[]) allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) cmaker = ContextMaker(sg.trt, gsims_by_trt[sg.trt], oq) for src_group in sg.split(maxweight): allargs.append((src_group, cmaker, srcfilter.sitecol)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 # estimated classical ruptures within maxdist for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['source_data']: source_data += dic['source_data'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # don't change the order of the 3 things below! self.store_source_info(source_data) self.store_rlz_info(eff_ruptures) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups_events( self.datastore.getitem('ruptures')[()], get_rupture_getters)
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance, use_rtree=False) csm = self.csm.filter(src_filter) # fine filtering self.datastore['csm_info'] = csm.info eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(oq.concurrent_tasks) mon = self.monitor('disaggregation') R = len(self.rlzs_assoc.realizations) iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg or (None, ), curves) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for block in csm.split_in_blocks(maxweight, sources): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() logging.info('Building ruptures') maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: if os.environ.get('OQ_SAMPLE_SOURCES'): raise SystemExit(0) # success even with no ruptures raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) self.init_logic_tree(self.csm.full_lt) with self.monitor('store source_info'): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by rup_id sorted_ruptures.sort(order='serial') nr = len(sorted_ruptures) assert len(numpy.unique(sorted_ruptures['serial'])) == nr # sanity self.datastore['ruptures'] = sorted_ruptures self.datastore['ruptures']['id'] = numpy.arange(nr) with self.monitor('saving events'): self.save_events(sorted_ruptures)
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def get_models(self): """ :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ oq = self.oqparam spinning_off = self.oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') smlt_dir = os.path.dirname(self.source_model_lt.filename) converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, not spinning_off, oq.source_id) if oq.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oq.inputs["source_model"], converter) dic = {'ucerf': grp} elif self.in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap( nrml.read_source_models, distribute=dist, hdf5path=self.hdf5.filename if self.hdf5 else None) for sm in self.source_model_lt.gen_source_models(self.gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} else: dic = {} # consider only the effective realizations idx = 0 if self.hdf5: sources = hdf5.create(self.hdf5, 'source_info', source_info_dt) hdf5.create(self.hdf5, 'source_geom', point3d) hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr) grp_id = 0 for sm in self.source_model_lt.gen_source_models(self.gsim_lt): if 'ucerf' in dic: sg = copy.copy(dic['ucerf']) sm.src_groups = [sg] sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source src.src_group_id = grp_id src.id = idx if oq.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, -1, src.num_ruptures, 0, 0, 0, idx))] hdf5.extend(sources, numpy.array(data, source_info_dt)) else: self.apply_uncertainties(sm, idx, dic) yield sm if self.mags and self.hdf5 and 'site_model' not in oq.inputs: mags_by_trt = {trt: sorted(ms) for trt, ms in self.mags.items()} idist = self.gsim_lt.get_integration_distance(mags_by_trt, oq) self.hdf5['integration_distance'] = idist self.hdf5.set_nbytes('integration_distance') hdf5.extend(self.hdf5['source_mfds'], numpy.array(list(self.mfds), hdf5.vstr)) # log if some source file is being used more than once dupl = 0 for fname, hits in self.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not self.changes: dupl += hits if self.changes: logging.info('Applied %d changes to the composite source model', self.changes)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() # must go before to set the pointsource_distance run_preclassical(self.csm, oq, self.datastore) # exit early if we want to perform only a preclassical if oq.calculation_mode == 'preclassical': recs = [tuple(row) for row in self.csm.source_info.values()] self.datastore['source_info'] = numpy.array( recs, readinput.source_info_dt) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() smap = parallel.Starmap(classical, self.get_args(acc0), h5=self.datastore.hdf5) smap.monitor.save('srcfilter', self.src_filter()) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: source_ids = self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(source_ids[s] for s in srcids) self.by_task.clear() if self.calc_times: # can be empty in case of errors self.numctxs = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Total number of contexts: {:_d}'.format( int(self.numctxs))) logging.info('Average number of sites per context: %d', numsites / self.numctxs) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError('Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter(self.datastore, ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve(curves, oq.imtls, stats.mean_curve, ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [ self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids ] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) self.datastore['iml4'] = self.iml4 self.datastore['poe4'] = numpy.zeros_like(self.iml4.array) self.save_bin_edges() tot = get_outputs_size(self.shapedic, oq.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: iml3 = self.iml4[s] for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldic[s, rlz, poe, imt] = iml3[m, p, z] # submit disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mag_edges = self.bin_edges[0] indices = get_indices_by_gidx_mag(dstore, mag_edges) allargs = [] totweight = sum(sum(ri.weight for ri in indices[gm]) for gm in indices) maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1))) grp_ids = dstore['grp_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] G, U = 0, 0 for gidx, magi in indices: trti = grp_ids[gidx][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[gidx], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) G = max(G, len(cmaker.gsims)) for rupidxs in block_splitter(indices[gidx, magi], maxweight, weight): idxs = numpy.array([ri.index for ri in rupidxs]) U = max(U, len(idxs)) allargs.append((dstore, idxs, cmaker, self.iml4, trti, magi, self.bin_edges[1:], oq)) task_inputs.append((trti, magi, len(idxs))) nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U)) logging.info('Maximum mean_std per task:\n%s', msg) sd = self.shapedic.copy() sd.pop('trt') sd.pop('mag') sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def run_preclassical(csm, oqparam, h5): """ :param csm: a CompositeSourceModel with attribute .srcfilter :param oqparam: the parameters in job.ini file :param h5: a DataStore instance """ logging.info('Sending %s', csm.sitecol) # do nothing for atomic sources except counting the ruptures for src in csm.get_sources(atomic=True): src.num_ruptures = src.count_ruptures() src.nsites = len(csm.sitecol) # run preclassical for non-atomic sources sources_by_grp = groupby(csm.get_sources(atomic=False), lambda src: (src.grp_id, msr_name(src))) param = dict(maximum_distance=oqparam.maximum_distance, pointsource_distance=oqparam.pointsource_distance, ps_grid_spacing=oqparam.ps_grid_spacing, split_sources=oqparam.split_sources) srcfilter = SourceFilter( csm.sitecol.reduce(10000) if csm.sitecol else None, oqparam.maximum_distance) res = parallel.Starmap( preclassical, ((srcs, srcfilter, param) for srcs in sources_by_grp.values()), h5=h5, distribute=None if len(sources_by_grp) > 1 else 'no').reduce() if res and res['before'] != res['after']: logging.info( 'Reduced the number of sources from {:_d} -> {:_d}'.format( res['before'], res['after'])) if res and h5: csm.update_source_info(res['calc_times'], nsites=True) for grp_id, srcs in res.items(): # srcs can be empty if the minimum_magnitude filter is on if srcs and not isinstance(grp_id, str): newsg = SourceGroup(srcs[0].tectonic_region_type) newsg.sources = srcs csm.src_groups[grp_id] = newsg # sanity check for sg in csm.src_groups: for src in sg: assert src.num_ruptures assert src.nsites # store ps_grid data, if any for key, sources in res.items(): if isinstance(key, str) and key.startswith('ps_grid/'): arrays = [] for ps in sources: if hasattr(ps, 'location'): lonlats = [ps.location.x, ps.location.y] for src in getattr(ps, 'pointsources', []): lonlats.extend([src.location.x, src.location.y]) arrays.append(F32(lonlats)) h5[key] = arrays
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 srcfilter = self.src_filter(self.datastore.tempname) self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.datastore.parent['full_lt']) else: # from sources self.build_events_from_sources(srcfilter) if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel self.datastore.swmr_on() logging.info('Reading %d ruptures', len(self.datastore['ruptures'])) iterargs = ( (rgetter, srcfilter, self.param) for rgetter in gen_rupture_getters(self.datastore, srcfilter)) acc = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores).reduce( self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.pointsource_distance: logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt], getdefault(oq.pointsource_distance, trt)) for t, trt in enumerate(gsims_by_trt) } for trt, eff in self.effect.items(): oq.maximum_distance.magdist[trt] = eff.dist_by_mag() oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value) else: self.effect = {} smap = parallel.Starmap(self.core_task.__func__, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vuint32, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = srcids self.by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: %d/%d', numrups, self.totrups) logging.info('Effective number of sites per rupture: %d', numsites / numrups) self.calc_times.clear() # save a bit of memory return acc
def build_events_from_sources(self, srcfilter): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.info.get_gsims_by_trt() logging.info('Building ruptures') eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt ses_idx = 0 allargs = [] for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] if sg.atomic: # do not split the group allargs.append((sg, srcfilter, par)) else: # traditional groups for block in self.block_splitter(sg.sources, key=by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par = par.copy() # avoid mutating the dict par['ses_seeds'] = [ (ses_idx, oq.ses_seed + i + 1)] allargs.append((block, srcfilter, par)) ses_idx += 1 else: allargs.append((block, srcfilter, par)) smap = parallel.Starmap( self.build_ruptures.__func__, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) self.init_logic_tree(self.csm.info) with self.monitor('store source_info'): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by rup_id sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore['ruptures']['id'] = numpy.arange(len(sorted_ruptures)) self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) with self.monitor('saving events'): self.save_events(sorted_ruptures)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() if oq.pointsource_distance is not None: for trt in gsims_by_trt: oq.pointsource_distance[trt] = getdefault( oq.pointsource_distance, trt) mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and oq.pointsource_distance and oq.pointsource_distance.suggested()) or ( imts_ok and oq.minimum_intensity): aw, self.psd = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw elif oq.pointsource_distance: self.psd = oq.pointsource_distance.interp(mags_by_trt) else: self.psd = {} smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if self.psd: psdist = max(max(self.psd[trt].values()) for trt in self.psd) if psdist != -1 and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.minimum_intensity and len(self.sitecol) == 1 and len(mags): logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect, (mags, self.sitecol, gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) threshold = getdefault(oq.minimum_intensity, list(oq.imtls)[-1]) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dists=dist_bins[trt], threshold=threshold) for t, trt in enumerate(gsims_by_trt) } else: self.effect = {} if oq.calculation_mode == 'preclassical' and self.N == 1: mags = sorted(set('%.3f' % mag for mag in mags)) smap = parallel.Starmap(ruptures_by_mag_dist) for func, args in self.gen_task_queue(): smap.submit(args) counts = smap.reduce() ndists = oq.maximum_distance.get_dist_bins.__defaults__[0] for mag, mag in enumerate(mags): arr = numpy.zeros((ndists, len(gsims_by_trt)), U32) for trti, trt in enumerate(gsims_by_trt): try: arr[:, trti] = counts[trt][mag] except KeyError: pass self.datastore['rups_by_mag_dist/' + mag] = arr self.datastore.set_attrs('rups_by_mag_dist', **dist_bins) self.datastore['csm_info'] = self.csm_info return {} smap = parallel.Starmap(self.core_task.__func__) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) self.maxdists = [] try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: if self.maxdists: maxdist = numpy.mean(self.maxdists) logging.info( 'Using effective maximum distance for ' 'point sources %d km', maxdist) with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.sources_by_task: num_tasks = max(self.sources_by_task) + 1 sbt = numpy.zeros(num_tasks, [('eff_ruptures', U32), ('eff_sites', U32), ('srcids', hdf5.vuint32)]) for task_no in range(num_tasks): sbt[task_no] = self.sources_by_task.get( task_no, (0, 0, U32([]))) self.datastore['sources_by_task'] = sbt self.sources_by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) if self.totrups != numrups: logging.info('Considered %d/%d ruptures', numrups, self.totrups) self.calc_times.clear() # save a bit of memory return acc
def calc_stats(self): oq = self.oqparam hstats = oq.hazard_stats() # initialize datasets imls = oq.imtls.array N = len(self.sitecol.complete) P = len(oq.poes) M = self.M = len(oq.imtls) imts = list(oq.imtls) if oq.soil_intensities is not None: L = M * len(oq.soil_intensities) else: L = len(imls) L1 = self.L1 = L // M R = len(self.realizations) S = len(hstats) if R > 1 and oq.individual_curves or not hstats: self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_attrs('hcurves-rlzs', site_id=N, rlz_id=R, imt=imts, lvl=L1) if oq.poes: self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_attrs('hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) if hstats: self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_attrs('hcurves-stats', site_id=N, stat=list(hstats), imt=imts, lvl=numpy.arange(L1)) if oq.poes: self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_attrs('hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poe=oq.poes) ct = oq.concurrent_tasks or 1 logging.info('Building hazard statistics') self.weights = [rlz.weight for rlz in self.realizations] allargs = [ # this list is very fast to generate (getters.PmapGetter(self.datastore, self.weights, t.sids, oq.poes), N, hstats, oq.individual_curves, oq.max_sites_disagg, self.amplifier) for t in self.sitecol.split_in_tiles(ct) ] if self.few_sites: dist = 'no' else: dist = None # parallelize as usual self.datastore.swmr_on() parallel.Starmap(build_hazard, allargs, distribute=dist, h5=self.datastore.hdf5).reduce(self.save_hazard)
def calc_stats(self): oq = self.oqparam hstats = oq.hazard_stats() # initialize datasets imls = oq.imtls.array N = len(self.sitecol.complete) P = len(oq.poes) M = self.M = len(oq.imtls) imts = list(oq.imtls) if oq.soil_intensities is not None: L = M * len(oq.soil_intensities) else: L = len(imls) L1 = self.L1 = L // M R = len(self.realizations) S = len(hstats) if R > 1 and oq.individual_curves or not hstats: self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_attrs('hcurves-rlzs', site_id=N, rlz_id=R, imt=imts, lvl=L1) if oq.poes: self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_attrs('hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) if hstats: self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_attrs('hcurves-stats', site_id=N, stat=list(hstats), imt=imts, lvl=numpy.arange(L1)) if oq.poes: self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_attrs('hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poe=oq.poes) ct = oq.concurrent_tasks or 1 logging.info('Building hazard statistics') self.weights = [rlz.weight for rlz in self.realizations] dstore = (self.datastore.parent if oq.hazard_calculation_id else self.datastore) allargs = [ # this list is very fast to generate (getters.PmapGetter(dstore, self.weights, t.sids, oq.imtls, oq.poes), N, hstats, oq.individual_curves, oq.max_sites_disagg, self.amplifier) for t in self.sitecol.split_in_tiles(ct) ] if self.few_sites: dist = 'no' else: dist = None # parallelize as usual parallel.Starmap(build_hazard, allargs, distribute=dist, h5=self.datastore.hdf5).reduce(self.save_hazard) if 'hmaps-stats' in self.datastore: hmaps = self.datastore.sel('hmaps-stats', stat='mean') # NSMP maxhaz = hmaps.max(axis=(0, 1, 3)) mh = dict(zip(self.oqparam.imtls, maxhaz)) logging.info('The maximum hazard map values are %s', mh) if Image is None or not self.from_engine: # missing PIL return M, P = hmaps.shape[2:] logging.info('Saving %dx%d mean hazard maps', M, P) inv_time = oq.investigation_time allargs = [] for m, imt in enumerate(self.oqparam.imtls): for p, poe in enumerate(self.oqparam.poes): dic = dict(m=m, p=p, imt=imt, poe=poe, inv_time=inv_time, calc_id=self.datastore.calc_id, array=hmaps[:, 0, m, p]) allargs.append((dic, self.sitecol.lons, self.sitecol.lats)) smap = parallel.Starmap(make_hmap_png, allargs) for dic in smap: self.datastore['png/hmap_%(m)d_%(p)d' % dic] = dic['img']
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], h5=self.datastore.hdf5).reduce() for src in sources: src.nsites = 1 # avoid 0 weight try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # must be called before storing the events self.store_rlz_info(eff_ruptures) # store full_lt self.store_source_info(calc_times) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups(self.datastore.getitem('ruptures')[()])