def _collect_bins_data(trt_num, sources, site, curves, src_group_id, rlzs_assoc, gsims, imtls, poes, truncation_level, n_epsilons, iml_disagg, mon): # returns a BinData instance sitecol = SiteCollection([site]) mags = [] dists = [] lons = [] lats = [] trts = [] pnes = collections.defaultdict(list) sitemesh = sitecol.mesh make_ctxt = mon('making contexts', measuremem=False) disagg_poe = mon('disaggregate_poe', measuremem=False) cmaker = ContextMaker(gsims) for source in sources: try: tect_reg = trt_num[source.tectonic_region_type] for rupture in source.iter_ruptures(): with make_ctxt: try: sctx, rctx, dctx = cmaker.make_contexts( sitecol, rupture) except filters.FarAwayRupture: continue # extract rupture parameters of interest mags.append(rupture.mag) dists.append(dctx.rjb[0]) # single site => single distance [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) trts.append(tect_reg) # a dictionary rlz.id, poe, imt_str -> (iml, prob_no_exceed) for gsim in gsims: gs = str(gsim) for imt_str, imls in imtls.items(): imt = from_string(imt_str) imls = numpy.array(imls[::-1]) for rlz in rlzs_assoc[src_group_id, gs]: rlzi = rlz.ordinal iml = iml_disagg.get(imt_str) curve_poes = curves[rlzi, imt_str][::-1] for k, v in _disagg( iml, poes, curve_poes, imls, gsim, rupture, rlzi, imt, imt_str, sctx, rctx, dctx, truncation_level, n_epsilons, disagg_poe): pnes[k].append(v) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise_(etype, msg, tb) return BinData(numpy.array(mags, float), numpy.array(dists, float), numpy.array(lons, float), numpy.array(lats, float), numpy.array(trts, int), pnes)
def get_hazard_curve_source(input_set): """ From a dictionary input set returns hazard curves """ try: cmaker = ContextMaker( [input_set["gsims"][key] for key in input_set["gsims"]], None) for rupture, r_sites in input_set["ruptures_sites"]: gsim = input_set["gsims"][rupture.tectonic_region_type] sctx, rctx, dctx = cmaker.make_contexts(r_sites, rupture) for iimt in input_set["imts"]: poes = gsim.get_poes(sctx, rctx, dctx, imt.from_string(iimt), input_set["imts"][iimt], input_set["truncation_level"]) pno = rupture.get_probability_no_exceedance(poes) input_set["curves"][iimt] *= r_sites.expand(pno, placeholder=1) except Exception, err: pass
def __init__(self, trt, gsims): self.trt = trt self.cmaker = ContextMaker(gsims) self.params = sorted(self.cmaker.REQUIRES_RUPTURE_PARAMETERS - set('mag strike dip rake hypo_depth'.split())) self.dt = numpy.dtype([ ('rupserial', U32), ('multiplicity', U16), ('numsites', U32), ('occurrence_rate', F64), ('mag', F64), ('lon', F32), ('lat', F32), ('depth', F32), ('strike', F64), ('dip', F64), ('rake', F64), ('boundary', hdf5.vstr)] + [(param, F64) for param in self.params])
class RuptureData(object): """ Container for information about the ruptures of a given tectonic region type. """ def __init__(self, trt, gsims): self.trt = trt self.cmaker = ContextMaker(trt, gsims) self.params = sorted(self.cmaker.REQUIRES_RUPTURE_PARAMETERS - set('mag strike dip rake hypo_depth'.split())) self.dt = numpy.dtype([ ('rup_id', U32), ('srcidx', U32), ('multiplicity', U16), ('occurrence_rate', F64), ('mag', F32), ('lon', F32), ('lat', F32), ('depth', F32), ('strike', F32), ('dip', F32), ('rake', F32), ('boundary', hdf5.vstr)] + [(param, F32) for param in self.params]) def to_array(self, ebruptures): """ Convert a list of ebruptures into an array of dtype RuptureRata.dt """ data = [] for ebr in ebruptures: rup = ebr.rupture self.cmaker.add_rup_params(rup) ruptparams = tuple(getattr(rup, param) for param in self.params) point = rup.surface.get_middle_point() multi_lons, multi_lats = rup.surface.get_surface_boundaries() bounds = ','.join('((%s))' % ','.join( '%.5f %.5f' % (lon, lat) for lon, lat in zip(lons, lats)) for lons, lats in zip(multi_lons, multi_lats)) try: rate = ebr.rupture.occurrence_rate except AttributeError: # for nonparametric sources rate = numpy.nan data.append( (ebr.serial, ebr.srcidx, ebr.n_occ, rate, rup.mag, point.x, point.y, point.z, rup.surface.get_strike(), rup.surface.get_dip(), rup.rake, 'MULTIPOLYGON(%s)' % decode(bounds)) + ruptparams) return numpy.array(data, self.dt)
def get_ctx(self, rupture, site_collection): return ContextMaker('*', [self.gsim_class], dict(imtls={})).get_ctxs([rupture], site_collection, 'src_id')[0]
def event_based(proxies, full_lt, oqparam, dstore, monitor): """ Compute GMFs and optionally hazard curves """ alldata = AccumDict(accum=[]) sig_eps = [] times = [] # rup_id, nsites, dt hcurves = {} # key -> poes trt_smr = proxies[0]['trt_smr'] fmon = monitor('filtering ruptures', measuremem=False) cmon = monitor('computing gmfs', measuremem=False) with dstore: trt = full_lt.trts[trt_smr // len(full_lt.sm_rlzs)] srcfilter = SourceFilter(dstore['sitecol'], oqparam.maximum_distance(trt)) rupgeoms = dstore['rupgeoms'] rlzs_by_gsim = full_lt._rlzs_by_gsim(trt_smr) param = vars(oqparam).copy() param['imtls'] = oqparam.imtls param['min_iml'] = oqparam.min_iml param['maximum_distance'] = oqparam.maximum_distance(trt) cmaker = ContextMaker(trt, rlzs_by_gsim, param) min_mag = getdefault(oqparam.minimum_magnitude, trt) for proxy in proxies: t0 = time.time() with fmon: if proxy['mag'] < min_mag: continue sids = srcfilter.close_sids(proxy, trt) if len(sids) == 0: # filtered away continue proxy.geom = rupgeoms[proxy['geom_id']] ebr = proxy.to_ebr(cmaker.trt) # after the geometry is set try: computer = GmfComputer(ebr, srcfilter.sitecol.filtered(sids), cmaker, oqparam.correl_model, oqparam.cross_correl, oqparam._amplifier, oqparam._sec_perils) except FarAwayRupture: continue with cmon: data = computer.compute_all(sig_eps) dt = time.time() - t0 times.append((computer.ebrupture.id, len(computer.ctx.sids), dt)) for key in data: alldata[key].extend(data[key]) for key, val in sorted(alldata.items()): if key in 'eid sid rlz': alldata[key] = U32(alldata[key]) else: alldata[key] = F32(alldata[key]) gmfdata = strip_zeros(pandas.DataFrame(alldata)) if len(gmfdata) and oqparam.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) for (sid, rlz), df in gmfdata.groupby(['sid', 'rlz']): with hc_mon: poes = calc.gmvs_to_poes(df, oqparam.imtls, oqparam.ses_per_logic_tree_path) for m, imt in enumerate(oqparam.imtls): hcurves[rsi2str(rlz, sid, imt)] = poes[m] times = numpy.array([tup + (monitor.task_no, ) for tup in times], time_dt) times.sort(order='rup_id') if not oqparam.ground_motion_fields: gmfdata = () return dict(gmfdata=gmfdata, hcurves=hcurves, times=times, sig_eps=numpy.array(sig_eps, sig_eps_dt(oqparam.imtls)))
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam params = dict(imtls=oq.imtls, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, ses_seed=oq.ses_seed) gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], progress=logging.debug).reduce() for src in sources: try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() src.weight = src.num_ruptures maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) params['maximum_distance'] = oq.maximum_distance(sg.trt) cmaker = ContextMaker(sg.trt, gsims_by_trt[sg.trt], params) for src_group in sg.split(maxweight): allargs.append((src_group, cmaker, srcfilter.sitecol)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 # estimated classical ruptures within maxdist for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # don't change the order of the 3 things below! self.store_source_info(calc_times) self.store_rlz_info(eff_ruptures) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups_events( self.datastore.getitem('ruptures')[()], get_rupture_getters)
def compute_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: List of commonlib.source.Source tuples :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param siteidx: always equal to 0 :param rlzs_assoc: a :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: monitor instance :returns: a dictionary trt_model_id -> [Rupture instances] """ assert siteidx == 0, ( 'siteidx can be nonzero only for the classical_tiling calculations: ' 'tiling with the EventBasedRuptureCalculator is an error') # NB: by construction each block is a non-empty list with # sources of the same trt_model_id trt_model_id = sources[0].trt_model_id oq = monitor.oqparam trt = sources[0].tectonic_region_type max_dist = oq.maximum_distance[trt] cmaker = ContextMaker(rlzs_assoc.gsims_by_trt_id[trt_model_id]) params = cmaker.REQUIRES_RUPTURE_PARAMETERS rup_data_dt = numpy.dtype([('rupserial', U32), ('multiplicity', U16), ('numsites', U32)] + [(param, F32) for param in params]) eb_ruptures = [] rup_data = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) num_samples = rlzs_assoc.samples[trt_model_id] # Compute and save stochastic event sets for src in sources: t0 = time.time() s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol) if s_sites is None: continue rupture_filter = functools.partial(filter_sites_by_distance_to_rupture, integration_distance=max_dist, sites=s_sites) num_occ_by_rup = sample_ruptures(src, oq.ses_per_logic_tree_path, num_samples, rlzs_assoc.seed) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures(src, num_occ_by_rup, rupture_filter, oq.random_seed, rup_mon): nsites = len(ebr.indices) rc = cmaker.make_rupture_context(ebr.rupture) ruptparams = tuple(getattr(rc, param) for param in params) rup_data.append((ebr.serial, len(ebr.etags), nsites) + ruptparams) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({trt_model_id: eb_ruptures}) res.calc_times = calc_times res.rup_data = numpy.array(rup_data, rup_data_dt) res.trt = trt return res
def disaggregation(sources, site, imt, iml, gsim_by_trt, truncation_level, n_epsilons, mag_bin_width, dist_bin_width, coord_bin_width, source_filter=filters.source_site_noop_filter, filter_distance='rjb'): """ Compute "Disaggregation" matrix representing conditional probability of an intensity mesaure type ``imt`` exceeding, at least once, an intensity measure level ``iml`` at a geographical location ``site``, given rupture scenarios classified in terms of: - rupture magnitude - Joyner-Boore distance from rupture surface to site - longitude and latitude of the surface projection of a rupture's point closest to ``site`` - epsilon: number of standard deviations by which an intensity measure level deviates from the median value predicted by a GSIM, given the rupture parameters - rupture tectonic region type In other words, the disaggregation matrix allows to compute the probability of each scenario with the specified properties (e.g., magnitude, or the magnitude and distance) to cause one or more exceedences of a given hazard level. For more detailed information about the disaggregation, see for instance "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell, Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520, April 1999. :param sources: Seismic source model, as for :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it should be an iterator of seismic sources. :param site: :class:`~openquake.hazardlib.site.Site` of interest to calculate disaggregation matrix for. :param imt: Instance of :mod:`intensity measure type <openquake.hazardlib.imt>` class. :param iml: Intensity measure level. A float value in units of ``imt``. :param gsim_by_trt: Tectonic region type to GSIM objects mapping. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param n_epsilons: Integer number of epsilon histogram bins in the result matrix. :param mag_bin_width: Magnitude discretization step, width of one magnitude histogram bin. :param dist_bin_width: Distance histogram discretization step, in km. :param coord_bin_width: Longitude and latitude histograms discretization step, in decimal degrees. :param source_filter: Optional source-site filter function. See :mod:`openquake.hazardlib.calc.filters`. :returns: A tuple of two items. First is itself a tuple of bin edges information for (in specified order) magnitude, distance, longitude, latitude, epsilon and tectonic region types. Second item is 6d-array representing the full disaggregation matrix. Dimensions are in the same order as bin edges in the first item of the result tuple. The matrix can be used directly by pmf-extractor functions. """ trts = sorted(set(src.tectonic_region_type for src in sources)) trt_num = dict((trt, i) for i, trt in enumerate(trts)) rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts} cmaker = ContextMaker(rlzs_by_gsim, source_filter.integration_distance, filter_distance) iml4 = make_iml4(1, {str(imt): iml}) by_trt = groupby(sources, operator.attrgetter('tectonic_region_type')) bdata = {} sitecol = SiteCollection([site]) for trt, srcs in by_trt.items(): bdata[trt] = collect_bin_data(srcs, sitecol, cmaker, iml4, truncation_level, n_epsilons) if sum(len(bd.mags) for bd in bdata.values()) == 0: warnings.warn( 'No ruptures have contributed to the hazard at site %s' % site, RuntimeWarning) return None, None min_mag = min(bd.mags.min() for bd in bdata.values()) max_mag = max(bd.mags.max() for bd in bdata.values()) mag_bins = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) min_dist = min(bd.dists.min() for bd in bdata.values()) max_dist = max(bd.dists.max() for bd in bdata.values()) dist_bins = dist_bin_width * numpy.arange( int(numpy.floor(min_dist / dist_bin_width)), int(numpy.ceil(max_dist / dist_bin_width) + 1)) bb = (min(bd.lons.min() for bd in bdata.values()), min(bd.lats.min() for bd in bdata.values()), max(bd.lons.max() for bd in bdata.values()), max(bd.lats.max() for bd in bdata.values())) lon_bins, lat_bins = lon_lat_bins(bb, coord_bin_width) eps_bins = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) bin_edges = (mag_bins, dist_bins, [lon_bins], [lat_bins], eps_bins) matrix = numpy.zeros( (len(mag_bins) - 1, len(dist_bins) - 1, len(lon_bins) - 1, len(lat_bins) - 1, len(eps_bins) - 1, len(trts))) for trt in bdata: dic = build_disagg_matrix(bdata[trt], bin_edges, sid=0) if dic: # (poe, imt, rlzi) -> matrix [mat] = dic.values() matrix[..., trt_num[trt]] = mat return bin_edges + (trts, ), matrix
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: mutex_weight = None grp_ids = set() for src in group: if not src.num_ruptures: # src.num_ruptures is set when parsing the XML, but not when # the source is instantiated manually, so it is set here src.num_ruptures = src.count_ruptures() grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist, param, monitor) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(group): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, indep) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def disaggregation(sources, site, imt, iml, gsim_by_trt, truncation_level, n_epsilons, mag_bin_width, dist_bin_width, coord_bin_width, source_filter=filters.nofilter, **kwargs): """ Compute "Disaggregation" matrix representing conditional probability of an intensity mesaure type ``imt`` exceeding, at least once, an intensity measure level ``iml`` at a geographical location ``site``, given rupture scenarios classified in terms of: - rupture magnitude - Joyner-Boore distance from rupture surface to site - longitude and latitude of the surface projection of a rupture's point closest to ``site`` - epsilon: number of standard deviations by which an intensity measure level deviates from the median value predicted by a GSIM, given the rupture parameters - rupture tectonic region type In other words, the disaggregation matrix allows to compute the probability of each scenario with the specified properties (e.g., magnitude, or the magnitude and distance) to cause one or more exceedences of a given hazard level. For more detailed information about the disaggregation, see for instance "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell, Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520, April 1999. :param sources: Seismic source model, as for :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it should be an iterator of seismic sources. :param site: :class:`~openquake.hazardlib.site.Site` of interest to calculate disaggregation matrix for. :param imt: Instance of :mod:`intensity measure type <openquake.hazardlib.imt>` class. :param iml: Intensity measure level. A float value in units of ``imt``. :param gsim_by_trt: Tectonic region type to GSIM objects mapping. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param n_epsilons: Integer number of epsilon histogram bins in the result matrix. :param mag_bin_width: Magnitude discretization step, width of one magnitude histogram bin. :param dist_bin_width: Distance histogram discretization step, in km. :param coord_bin_width: Longitude and latitude histograms discretization step, in decimal degrees. :param source_filter: Optional source-site filter function. See :mod:`openquake.hazardlib.calc.filters`. :returns: A tuple of two items. First is itself a tuple of bin edges information for (in specified order) magnitude, distance, longitude, latitude, epsilon and tectonic region types. Second item is 6d-array representing the full disaggregation matrix. Dimensions are in the same order as bin edges in the first item of the result tuple. The matrix can be used directly by pmf-extractor functions. """ trts = sorted(set(src.tectonic_region_type for src in sources)) trt_num = dict((trt, i) for i, trt in enumerate(trts)) rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts} by_trt = groupby(sources, operator.attrgetter('tectonic_region_type')) bdata = {} # by trt, magi sitecol = SiteCollection([site]) iml2 = numpy.array([[iml]]) eps3 = _eps3(truncation_level, n_epsilons) rups = AccumDict(accum=[]) cmaker = {} # trt -> cmaker for trt, srcs in by_trt.items(): contexts.RuptureContext.temporal_occurrence_model = ( srcs[0].temporal_occurrence_model) cmaker[trt] = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': truncation_level, 'maximum_distance': source_filter.integration_distance, 'imtls': { str(imt): [iml] } }) rups[trt].extend(cmaker[trt].from_srcs(srcs, sitecol)) min_mag = min(r.mag for rs in rups.values() for r in rs) max_mag = max(r.mag for rs in rups.values() for r in rs) mag_bins = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) for trt in cmaker: gsim = gsim_by_trt[trt] for magi, ctxs in enumerate(_magbin_groups(rups[trt], mag_bins)): set_mean_std(ctxs, [imt], [gsim]) bdata[trt, magi] = disaggregate(ctxs, [0], {imt: iml2}, eps3) if sum(len(bd.dists) for bd in bdata.values()) == 0: warnings.warn( 'No ruptures have contributed to the hazard at site %s' % site, RuntimeWarning) return None, None min_dist = min(bd.dists.min() for bd in bdata.values()) max_dist = max(bd.dists.max() for bd in bdata.values()) dist_bins = dist_bin_width * numpy.arange( int(numpy.floor(min_dist / dist_bin_width)), int(numpy.ceil(max_dist / dist_bin_width) + 1)) lon_bins, lat_bins = lon_lat_bins(site.location.x, site.location.y, max_dist, coord_bin_width) eps_bins = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) bin_edges = (mag_bins, dist_bins, lon_bins, lat_bins, eps_bins) matrix = numpy.zeros( (len(mag_bins) - 1, len(dist_bins) - 1, len(lon_bins) - 1, len(lat_bins) - 1, len(eps_bins) - 1, len(trts))) # 6D for trt, magi in bdata: mat7 = _build_disagg_matrix(bdata[trt, magi], bin_edges[1:]) matrix[magi, ..., trt_num[trt]] = mat7[..., 0, 0, 0] return bin_edges + (trts, ), matrix
def make_contexts(self, site_collection, rupture): param = dict(imtls={}) return ContextMaker('faketrt', [self.gsim_class], param).make_contexts(site_collection, rupture)
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if not hasattr(src_filter, 'sitecol'): # a sitecol was passed src_filter = SourceFilter(src_filter, {}) # Get the parameters assigned to the group src_mutex = getattr(group, 'src_interdep', None) == 'mutex' rup_mutex = getattr(group, 'rup_interdep', None) == 'mutex' cluster = getattr(group, 'cluster', None) # Compute the number of ruptures grp_ids = set() for src in group: if not src.num_ruptures: # src.num_ruptures is set when parsing the XML, but not when # the source is instantiated manually, so it is set here src.num_ruptures = src.count_ruptures() # This sets the proper TOM in case of a cluster if cluster: src.temporal_occurrence_model = FatedTOM(time_span=1) # Updating IDs grp_ids.update(src.src_group_ids) # Now preparing context maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker( src.tectonic_region_type, gsims, maxdist, param, monitor) # Prepare the accumulator for the probability maps pmap = AccumDict({grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids}) rupdata = {grp_id: [] for grp_id in grp_ids} # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(2, numpy.float32)) eff_ruptures = AccumDict(accum=0) # grp_id -> num_ruptures nsites = {} # src.id -> num_sites # Computing hazard for src, s_sites in src_filter(group): # filter now nsites[src.id] = src.nsites t0 = time.time() try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, rup_indep=not rup_mutex) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if src_mutex: # mutex sources, there is a single group for sid in poemap: pcurve = pmap[src.src_group_id].setdefault(sid, 0) pcurve += poemap[sid] * src.mutex_weight elif poemap: for gid in src.src_group_ids: pmap[gid] |= poemap if len(cmaker.rupdata): for gid in src.src_group_ids: rupdata[gid].append(cmaker.rupdata) calc_times[src.id] += numpy.array([src.weight, time.time() - t0]) # storing the number of contributing ruptures too eff_ruptures += {gid: getattr(poemap, 'eff_ruptures', 0) for gid in src.src_group_ids} # Updating the probability map in the case of mutually exclusive # sources group_probability = getattr(group, 'grp_probability', None) if src_mutex and group_probability: pmap[src.src_group_id] *= group_probability # Processing cluster if cluster: tom = getattr(group, 'temporal_occurrence_model') pmap = _cluster(param, tom, imtls, gsims, grp_ids, pmap) # Return results for gid, data in rupdata.items(): if len(data): rupdata[gid] = numpy.concatenate(data) return dict(pmap=pmap, calc_times=calc_times, eff_ruptures=eff_ruptures, rup_data=rupdata, nsites=nsites)
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) self.bin_edges = {} curves = [self.get_curves(sid) for sid in sitecol.sids] # determine the number of effective source groups sg_data = self.datastore['csm_info/sg_data'] num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0) nblocks = math.ceil(oq.concurrent_tasks / num_grps) src_filter = SourceFilter(sitecol, oq.maximum_distance) R = len(self.rlzs_assoc.realizations) max_poe = numpy.zeros(R, oq.imt_dt()) # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in self.csm.source_models for sg in smodel.src_groups))) # build mag_edges min_mag = min(sg.min_mag for smodel in self.csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in self.csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) logging.info('dist = %s...%s', min(dist_edges), max(dist_edges)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) for sid, bb in zip(self.sitecol.sids, bbs): lon_edges, lat_edges = disagg.lon_lat_bins(bb, oq.coordinate_bin_width) logging.info('site %d, lon = %s...%s', sid, min(lon_edges), max(lon_edges)) logging.info('site %d, lat = %s...%s', sid, min(lat_edges), max(lat_edges)) self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) shape = [len(edges) - 1 for edges in bs] + [len(trts)] logging.info('%s for sid %d', shape, sid) # check poes for smodel in self.csm.source_models: sm_id = smodel.ordinal for i, site in enumerate(sitecol): sid = sitecol.sids[i] curve = curves[i] # populate max_poe array for rlzi, poes in curve.items(): for imt in oq.imtls: max_poe[rlzi][imt] = max(max_poe[rlzi][imt], poes[imt].max()) if not curve: continue # skip zero-valued hazard curves # check for too big poes_disagg for poe in oq.poes_disagg: for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]: rlzi = rlz.ordinal for imt in oq.imtls: min_poe = max_poe[rlzi][imt] if poe > min_poe: raise ValueError( self.POE_TOO_BIG % (poe, sm_id, smodel.name, min_poe, rlzi, imt)) # build all_args all_args = [] for smodel in self.csm.source_models: for sg in smodel.src_groups: split_sources = [] for src in sg: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) if not split_sources: continue mon = self.monitor('disaggregation') rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim( sg.trt, smodel.ordinal) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) imls = [ disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg, oq.poes_disagg, curve) for curve in curves ] for srcs in split_in_blocks(split_sources, nblocks): all_args.append((src_filter, srcs, cmaker, imls, trts, self.bin_edges, oq, mon)) self.cache_info = numpy.zeros(2) # operations, cache_hits results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) ops, hits = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) self.save_disagg_results(results)
def compute_ruptures(sources, sitecol, gsims, monitor): """ :param sources: List of commonlib.source.Source tuples :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param gsims: a list of GSIMs for the current tectonic region model :param monitor: monitor instance :returns: a dictionary src_group_id -> [Rupture instances] """ # NB: by construction each block is a non-empty list with # sources of the same src_group_id src_group_id = sources[0].src_group_id trt = sources[0].tectonic_region_type max_dist = monitor.maximum_distance[trt] cmaker = ContextMaker(gsims) params = sorted(cmaker.REQUIRES_RUPTURE_PARAMETERS) rup_data_dt = numpy.dtype( [('rupserial', U32), ('multiplicity', U16), ('numsites', U32), ('occurrence_rate', F64)] + [ (param, F64) for param in params]) eb_ruptures = [] rup_data = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) num_samples = monitor.samples num_events = 0 # Compute and save stochastic event sets for src in sources: t0 = time.time() s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol) if s_sites is None: continue rupture_filter = functools.partial( filter_sites_by_distance_to_rupture, integration_distance=max_dist, sites=s_sites) num_occ_by_rup = sample_ruptures( src, monitor.ses_per_logic_tree_path, num_samples, monitor.seed) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures( src, num_occ_by_rup, rupture_filter, monitor.seed, rup_mon): nsites = len(ebr.indices) try: rate = ebr.rupture.occurrence_rate except AttributeError: # for nonparametric sources rate = numpy.nan rc = cmaker.make_rupture_context(ebr.rupture) ruptparams = tuple(getattr(rc, param) for param in params) rup_data.append((ebr.serial, ebr.multiplicity, nsites, rate) + ruptparams) eb_ruptures.append(ebr) num_events += ebr.multiplicity dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({src_group_id: eb_ruptures}) res.num_events = num_events res.calc_times = calc_times res.rup_data = numpy.array(rup_data, rup_data_dt) res.trt = trt return res
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance, use_rtree=False) csm = self.csm.filter(src_filter) # fine filtering self.datastore['csm_info'] = csm.info eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(oq.concurrent_tasks) mon = self.monitor('disaggregation') R = len(self.rlzs_assoc.realizations) iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg or (None, ), curves) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for block in csm.split_in_blocks(maxweight, sources): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def _collect_bins_data(sources, site, imt, iml, gsims, truncation_level, n_epsilons, source_site_filter, rupture_site_filter): """ Extract values of magnitude, distance, closest point, tectonic region types and PoE distribution. This method processes the source model (generates ruptures) and collects all needed parameters to arrays. It also defines tectonic region type bins sequence. """ mags = [] dists = [] lons = [] lats = [] tect_reg_types = [] probs_no_exceed = [] sitecol = SiteCollection([site]) sitemesh = sitecol.mesh _next_trt_num = 0 trt_nums = {} # here we ignore filtered site collection because either it is the same # as the original one (with one site), or the source/rupture is filtered # out and doesn't show up in the filter's output for src_idx, (source, s_sites) in \ enumerate(source_site_filter(sources, sitecol)): try: tect_reg = source.tectonic_region_type gsim = gsims[tect_reg] cmaker = ContextMaker([gsim]) if tect_reg not in trt_nums: trt_nums[tect_reg] = _next_trt_num _next_trt_num += 1 tect_reg = trt_nums[tect_reg] for rupture, r_sites in rupture_site_filter( source.iter_ruptures(), s_sites): # extract rupture parameters of interest mags.append(rupture.mag) [jb_dist] = rupture.surface.get_joyner_boore_distance(sitemesh) dists.append(jb_dist) [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) tect_reg_types.append(tect_reg) # compute conditional probability of exceeding iml given # the current rupture, and different epsilon level, that is # ``P(IMT >= iml | rup, epsilon_bin)`` for each of epsilon bins sctx, rctx, dctx = cmaker.make_contexts(sitecol, rupture) [poes_given_rup_eps] = gsim.disaggregate_poe( sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons ) # collect probability of a rupture causing no exceedances probs_no_exceed.append( rupture.get_probability_no_exceedance(poes_given_rup_eps) ) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, str(err)) raise_(etype, msg, tb) mags = numpy.array(mags, float) dists = numpy.array(dists, float) lons = numpy.array(lons, float) lats = numpy.array(lats, float) tect_reg_types = numpy.array(tect_reg_types, int) probs_no_exceed = numpy.array(probs_no_exceed, float) trt_bins = [ trt for (num, trt) in sorted((num, trt) for (trt, num) in trt_nums.items()) ] return (mags, dists, lons, lats, tect_reg_types, trt_bins, probs_no_exceed)
def get_conditional_gmfs(database, rupture, sites, gsims, imts, number_simulations, truncation_level, correlation_model=DEFAULT_CORRELATION): """ Get a set of random fields conditioned on a set of observations :param database: Ground motion records for the event as instance of :class: smtk.sm_database.GroundMotionDatabase :param rupture: Event rupture as instance of :class: openquake.hazardlib.source.rupture.Rupture :param sites: Target sites as instance of :class: openquake.hazardlib.site.SiteCollection :param list gsims: List of GMPEs required :param list imts: List of intensity measures required :param int number_simulations: Number of simulated fields required :param float truncation_level: Ground motion truncation level """ # Get known sites mesh known_sites = database.get_site_collection() # Get Observed Residuals residuals = Residuals(gsims, imts) residuals.get_residuals(database) imt_dict = OrderedDict([(imtx, np.zeros([len(sites.lons), number_simulations])) for imtx in imts]) gmfs = OrderedDict([(gmpe, imt_dict) for gmpe in gsims]) gmpe_list = [GSIM_LIST[gmpe]() for gmpe in gsims] cmaker = ContextMaker(rupture.tectonic_region_type, gmpe_list) ctxs = cmaker.make_contexts(sites, rupture) if len(ctxs) == 3: # engine version >= 3.10 _rctx, sctx, dctx = ctxs else: # old version, 2 contexts sctx, dctx = ctxs for gsim in gmpe_list: gmpe = gsim.__class__.__name__ for imtx in imts: if truncation_level == 0: gmfs[gmpe][imtx], _ = gsim.get_mean_and_stddevs( sctx, rupture, dctx, from_string(imtx), stddev_types=[]) continue if "Intra event" in gsim.DEFINED_FOR_STANDARD_DEVIATION_TYPES: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Intra event"], sites, imtx, number_simulations, correlation_model) tau = np.unique(residuals.residuals[gmpe][imtx]["Inter event"]) mean, [stddev_inter, stddev_intra] = gsim.get_mean_and_stddevs( sctx, rupture, dctx, from_string(imtx), ["Inter event", "Intra event"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp(mean + (tau * stddev_inter) + (epsilon[:, iloc] * stddev_intra)) else: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Total"], sites, imtx, number_simulations, correlation_model) tau = None mean, [stddev_total ] = gsim.get_mean_and_stddevs(sctx, rupture, dctx, from_string(imtx), ["Total"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp(mean + epsilon[:, iloc] * stddev_total.flatten()) return gmfs
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) self.datastore['iml4'] = self.iml4 self.datastore['poe4'] = numpy.zeros_like(self.iml4.array) self.save_bin_edges() tot = get_outputs_size(self.shapedic, oq.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: iml3 = self.iml4[s] for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldic[s, rlz, poe, imt] = iml3[m, p, z] # submit disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mag_edges = self.bin_edges[0] indices = get_indices_by_gidx_mag(dstore, mag_edges) allargs = [] totweight = sum(sum(ri.weight for ri in indices[gm]) for gm in indices) maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1))) grp_ids = dstore['grp_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] G, U = 0, 0 for gidx, magi in indices: trti = grp_ids[gidx][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[gidx], {'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls}) G = max(G, len(cmaker.gsims)) for rupidxs in block_splitter( indices[gidx, magi], maxweight, weight): idxs = numpy.array([ri.index for ri in rupidxs]) U = max(U, len(idxs)) allargs.append((dstore, idxs, cmaker, self.iml4, trti, magi, self.bin_edges[1:], oq)) task_inputs.append((trti, magi, len(idxs))) nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U)) logging.info('Maximum mean_std per task:\n%s', msg) sd = self.shapedic.copy() sd.pop('trt') sd.pop('mag') sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap( compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) if oq.rlz_index is None: try: rlzs = self.datastore['best_rlz'][()] except KeyError: rlzs = numpy.zeros(self.N, int) else: rlzs = [oq.rlz_index] * self.N if oq.iml_disagg: self.poe_id = {None: 0} curves = [None] * len(self.sitecol) # no hazard curves are needed self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src() eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: iml2 = self.iml2s[s] r = rlzs[s] logging.info('Site #%d, disaggregating for rlz=#%d', s, r) for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml2[m, p] # submit disagg tasks gid = self.datastore['rup/grp_id'][()] indices_by_grp = get_indices(gid) # grp_id -> [(start, stop),...] blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1 allargs = [] for grp_id, trt in csm_info.trt_by_grp.items(): trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls }) for start, stop in indices_by_grp[grp_id]: for slc in gen_slices(start, stop, blocksize): allargs.append((self.datastore, slc, cmaker, self.iml2s, trti, self.bin_edges)) self.datastore.close() results = parallel.Starmap(compute_disagg, allargs, hdf5path=self.datastore.filename).reduce( self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 7D array
def make_contexts(self, site_collection, rupture): return ContextMaker([self.gsim_class]).make_contexts( site_collection, rupture)
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplemented('Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] poes_disagg = oq.poes_disagg or (None, ) R = len(self.rlzs_assoc.realizations) rlzs = extract.disagg_key(self.datastore).rlzs if oq.iml_disagg: self.poe_id = {None: 0} curves = [None] * len(self.sitecol) # no hazard curves are needed else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids] self.check_poes_disagg(curves, rlzs) iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, poes_disagg, curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml2s) else: logging.warning( 'disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: iml2 = iml2s[s] r = rlzs[s] logging.info('Site #%d, disaggregating for rlz=#%d', s, r) for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml2[m, p] for grp, dset in self.datastore['rup'].items(): grp_id = int(grp[4:]) trt = csm_info.trt_by_grp[grp_id] trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker(trt, rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(dset[()], 1000): all_args.append((src_filter.sitecol, numpy.array(block), cmaker, iml2s, trti, self.bin_edges, oq)) self.num_ruptures = [0] * len(self.trts) mon = self.monitor() results = parallel.Starmap(compute_disagg, all_args, mon).reduce(self.agg_result, AccumDict(accum={})) return results
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') R = len(self.rlzs_assoc.realizations) I = len(oq.imtls) P = len(oq.poes_disagg) or 1 if R * I * P > 10: logging.warn( 'You have %d realizations, %d IMTs and %d poes_disagg: the ' 'disaggregation will be heavy and memory consuming', R, I, P) iml4 = disagg.make_iml4(R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None, ), curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml4) else: logging.warn( 'disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) mon = self.monitor('disaggregation') R = iml4.shape[1] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } srcs = group.sources else: mutex_weight = None srcs = sum([split_source(src) for src in group], []) grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance with GroundShakingIntensityModel.forbid_instantiation(): imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('make_contexts', measuremem=False) poe_mon = monitor('get_poes', measuremem=False) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(srcs): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, ctx_mon, poe_mon, indep) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def hazard_curves_per_trt( sources, sites, imtls, gsims, truncation_level=None, source_site_filter=filters.source_site_noop_filter, rupture_site_filter=filters.rupture_site_noop_filter, maximum_distance=None, bbs=(), monitor=DummyMonitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: A list of G arrays of size N, where N is the number of sites and G the number of gsims. Each array contains records with fields given by the intensity measure types; the size of each field is given by the number of levels in ``imtls``. """ cmaker = ContextMaker(gsims, maximum_distance) gnames = list(map(str, gsims)) imt_dt = numpy.dtype([(imt, float, len(imtls[imt])) for imt in sorted(imtls)]) imts = {from_string(imt): imls for imt, imls in imtls.items()} curves = [numpy.ones(len(sites), imt_dt) for gname in gnames] sources_sites = ((source, sites) for source in sources) ctx_mon = monitor('making contexts', measuremem=False) rup_mon = monitor('getting ruptures', measuremem=False) pne_mon = monitor('computing poes', measuremem=False) monitor.calc_times = [] # pairs (src_id, delta_t) for source, s_sites in source_site_filter(sources_sites): t0 = time.time() try: with rup_mon: rupture_sites = list(rupture_site_filter( (rupture, s_sites) for rupture in source.iter_ruptures())) for rupture, r_sites in rupture_sites: with ctx_mon: try: sctx, rctx, dctx = cmaker.make_contexts( r_sites, rupture) except FarAwayRupture: continue # add optional disaggregation information (bounding boxes) if bbs: sids = set(sctx.sites.sids) jb_dists = dctx.rjb closest_points = rupture.surface.get_closest_points( sctx.sites.mesh) bs = [bb for bb in bbs if bb.site_id in sids] # NB: the assert below is always true; we are # protecting against possible refactoring errors assert len(bs) == len(jb_dists) == len(closest_points) for bb, dist, p in zip(bs, jb_dists, closest_points): if dist < maximum_distance: # ruptures too far away are ignored bb.update([dist], [p.longitude], [p.latitude]) for i, gsim in enumerate(gsims): with pne_mon: for imt in imts: poes = gsim.get_poes( sctx, rctx, dctx, imt, imts[imt], truncation_level) pno = rupture.get_probability_no_exceedance(poes) expanded_pno = sctx.sites.expand(pno, 1.0) curves[i][str(imt)] *= expanded_pno except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, str(err)) raise_(etype, msg, tb) # we are attaching the calculation times to the monitor # so that oq-lite (and the engine) can store them monitor.calc_times.append((source.id, time.time() - t0)) # NB: source.id is an integer; it should not be confused # with source.source_id, which is a string for i in range(len(gnames)): for imt in imtls: curves[i][imt] = 1. - curves[i][imt] return curves
def pmap_from_grp(sources, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a ProbabilityMap instance """ if isinstance(sources, SourceGroup): group = sources sources = group.sources trt = sources[0].tectonic_region_type mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: # list of sources trt = sources[0].tectonic_region_type group = SourceGroup(trt, sources, 'src_group', 'indep', 'indep') grp_id = sources[0].src_group_id maxdist = src_filter.integration_distance if hasattr(gsims, 'keys'): # dictionary trt -> gsim gsims = [gsims[trt]] srcs = [] for src in sources: if hasattr(src, '__iter__'): # MultiPointSource srcs.extend(src) else: srcs.append(src) del sources with GroundShakingIntensityModel.forbid_instantiation(): imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('making contexts', measuremem=False) pne_mons = [ monitor('%s.get_poes' % gsim, measuremem=False) for gsim in gsims ] src_indep = group.src_interdep == 'indep' pmap = ProbabilityMap(len(imtls.array), len(gsims)) pmap.calc_times = [] # pairs (src_id, delta_t) pmap.grp_id = grp_id for src, s_sites in src_filter(srcs): t0 = time.time() poemap = poe_map(src, s_sites, imtls, cmaker, trunclevel, ctx_mon, pne_mons, group.rup_interdep == 'indep') if src_indep: # usual composition of probabilities pmap |= poemap else: # mutually exclusive probabilities weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap.setdefault(sid, 0) pcurve += poemap[sid] * weight pmap.calc_times.append( (src.source_id, src.weight, len(s_sites), time.time() - t0)) # storing the number of contributing ruptures too pmap.eff_ruptures = {pmap.grp_id: pne_mons[0].counts} if group.grp_probability is not None: return pmap * group.grp_probability return pmap
def hazard_curves_per_trt( sources, sites, imtls, gsims, truncation_level=None, source_site_filter=filters.source_site_noop_filter, rupture_site_filter=filters.rupture_site_noop_filter, maximum_distance=None, bbs=(), monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: A list of G arrays of size N, where N is the number of sites and G the number of gsims. Each array contains records with fields given by the intensity measure types; the size of each field is given by the number of levels in ``imtls``. """ cmaker = ContextMaker(gsims, maximum_distance) gnames = list(map(str, gsims)) imt_dt = numpy.dtype([(imt, float, len(imtls[imt])) for imt in sorted(imtls)]) imts = {from_string(imt): imls for imt, imls in imtls.items()} curves = [numpy.ones(len(sites), imt_dt) for gname in gnames] sources_sites = ((source, sites) for source in sources) ctx_mon = monitor('making contexts', measuremem=False) pne_mon = monitor('computing poes', measuremem=False) monitor.calc_times = [] # pairs (src_id, delta_t) monitor.eff_ruptures = 0 # effective number of contributing ruptures for source, s_sites in source_site_filter(sources_sites): t0 = time.time() try: rupture_sites = rupture_site_filter( (rupture, s_sites) for rupture in source.iter_ruptures()) for rupture, r_sites in rupture_sites: with ctx_mon: try: sctx, rctx, dctx = cmaker.make_contexts( r_sites, rupture) except FarAwayRupture: continue monitor.eff_ruptures += 1 # add optional disaggregation information (bounding boxes) if bbs: sids = set(sctx.sites.sids) jb_dists = dctx.rjb closest_points = rupture.surface.get_closest_points( sctx.sites.mesh) bs = [bb for bb in bbs if bb.site_id in sids] # NB: the assert below is always true; we are # protecting against possible refactoring errors assert len(bs) == len(jb_dists) == len(closest_points) for bb, dist, p in zip(bs, jb_dists, closest_points): if dist < maximum_distance: # ruptures too far away are ignored bb.update([dist], [p.longitude], [p.latitude]) for i, gsim in enumerate(gsims): with pne_mon: for imt in imts: poes = gsim.get_poes( sctx, rctx, dctx, imt, imts[imt], truncation_level) pno = rupture.get_probability_no_exceedance(poes) expanded_pno = sctx.sites.expand(pno, 1.0) curves[i][str(imt)] *= expanded_pno except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, str(err)) raise_(etype, msg, tb) # we are attaching the calculation times to the monitor # so that oq-lite (and the engine) can store them monitor.calc_times.append((source.id, time.time() - t0)) # NB: source.id is an integer; it should not be confused # with source.source_id, which is a string for i in range(len(gnames)): for imt in imtls: curves[i][imt] = 1. - curves[i][imt] return curves
def _collect_bins_data(sources, site, imt, iml, gsims, truncation_level, n_epsilons, source_site_filter, rupture_site_filter): """ Extract values of magnitude, distance, closest point, tectonic region types and PoE distribution. This method processes the source model (generates ruptures) and collects all needed parameters to arrays. It also defines tectonic region type bins sequence. """ mags = [] dists = [] lons = [] lats = [] tect_reg_types = [] probs_no_exceed = [] sitecol = SiteCollection([site]) sitemesh = sitecol.mesh _next_trt_num = 0 trt_nums = {} sources_sites = ((source, sitecol) for source in sources) # here we ignore filtered site collection because either it is the same # as the original one (with one site), or the source/rupture is filtered # out and doesn't show up in the filter's output for src_idx, (source, s_sites) in \ enumerate(source_site_filter(sources_sites)): try: tect_reg = source.tectonic_region_type gsim = gsims[tect_reg] cmaker = ContextMaker([gsim]) if tect_reg not in trt_nums: trt_nums[tect_reg] = _next_trt_num _next_trt_num += 1 tect_reg = trt_nums[tect_reg] ruptures_sites = ((rupture, s_sites) for rupture in source.iter_ruptures()) for rupture, r_sites in rupture_site_filter(ruptures_sites): # extract rupture parameters of interest mags.append(rupture.mag) [jb_dist] = rupture.surface.get_joyner_boore_distance(sitemesh) dists.append(jb_dist) [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) tect_reg_types.append(tect_reg) # compute conditional probability of exceeding iml given # the current rupture, and different epsilon level, that is # ``P(IMT >= iml | rup, epsilon_bin)`` for each of epsilon bins sctx, rctx, dctx = cmaker.make_contexts(sitecol, rupture) [poes_given_rup_eps] = gsim.disaggregate_poe( sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons ) # collect probability of a rupture causing no exceedances probs_no_exceed.append( rupture.get_probability_no_exceedance(poes_given_rup_eps) ) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, str(err)) raise_(etype, msg, tb) mags = numpy.array(mags, float) dists = numpy.array(dists, float) lons = numpy.array(lons, float) lats = numpy.array(lats, float) tect_reg_types = numpy.array(tect_reg_types, int) probs_no_exceed = numpy.array(probs_no_exceed, float) trt_bins = [ trt for (num, trt) in sorted((num, trt) for (trt, num) in trt_nums.items()) ] return (mags, dists, lons, lats, tect_reg_types, trt_bins, probs_no_exceed)
def _read_scenario_ruptures(self): oq = self.oqparam gsim_lt = readinput.get_gsim_lt(self.oqparam) G = gsim_lt.get_num_paths() if oq.calculation_mode.startswith('scenario'): ngmfs = oq.number_of_ground_motion_fields if oq.inputs['rupture_model'].endswith('.xml'): # check the number of branchsets bsets = len(gsim_lt._ltnode) if bsets > 1: raise InvalidFile( '%s for a scenario calculation must contain a single ' 'branchset, found %d!' % (oq.inputs['job_ini'], bsets)) [(trt, rlzs_by_gsim)] = gsim_lt.get_rlzs_by_gsim_trt().items() self.cmaker = ContextMaker( trt, rlzs_by_gsim, { 'maximum_distance': oq.maximum_distance(trt), 'minimum_distance': oq.minimum_distance, 'truncation_level': oq.truncation_level, 'imtls': oq.imtls }) rup = readinput.get_rupture(oq) if self.N > oq.max_sites_disagg: # many sites, split rupture ebrs = [ EBRupture(copyobj(rup, rup_id=rup.rup_id + i), 'NA', 0, G, e0=i * G, scenario=True) for i in range(ngmfs) ] else: # keep a single rupture with a big occupation number ebrs = [ EBRupture(rup, 'NA', 0, G * ngmfs, rup.rup_id, scenario=True) ] srcfilter = SourceFilter(self.sitecol, oq.maximum_distance(trt)) aw = get_rup_array(ebrs, srcfilter) if len(aw) == 0: raise RuntimeError( 'The rupture is too far from the sites! Please check the ' 'maximum_distance and the position of the rupture') elif oq.inputs['rupture_model'].endswith('.csv'): aw = get_ruptures(oq.inputs['rupture_model']) if len(gsim_lt.values) == 1: # fix for scenario_damage/case_12 aw['trt_smr'] = 0 # a single TRT if oq.calculation_mode.startswith('scenario'): # rescale n_occ by ngmfs and nrlzs aw['n_occ'] *= ngmfs * gsim_lt.get_num_paths() else: raise InvalidFile("Something wrong in %s" % oq.inputs['job_ini']) rup_array = aw.array hdf5.extend(self.datastore['rupgeoms'], aw.geom) if len(rup_array) == 0: raise RuntimeError( 'There are no sites within the maximum_distance' ' of %s km from the rupture' % oq.maximum_distance(rup.tectonic_region_type)(rup.mag)) fake = logictree.FullLogicTree.fake(gsim_lt) self.realizations = fake.get_realizations() self.datastore['full_lt'] = fake self.store_rlz_info({}) # store weights self.save_params() imp = calc.RuptureImporter(self.datastore) imp.import_rups_events(rup_array, get_rupture_getters)
def compute(self): """ Submit disaggregation tasks and return the results """ oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) magi = numpy.searchsorted(self.bin_edges[0], dstore['rup/mag'][:]) - 1 magi[magi == -1] = 0 # when the magnitude is on the edge totrups = len(magi) logging.info('Reading {:_d} ruptures'.format(totrups)) rdt = [('grp_id', U16), ('magi', U8), ('nsites', U16), ('idx', U32)] rdata = numpy.zeros(totrups, rdt) rdata['magi'] = magi rdata['idx'] = numpy.arange(totrups) rdata['grp_id'] = dstore['rup/grp_id'][:] rdata['nsites'] = dstore['rup/nsites'][:] totweight = rdata['nsites'].sum() et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) # ABSURDLY IMPORTANT!! we rely on the fact that the classical part # of the calculation stores the ruptures in chunks of constant # grp_id, therefore it is possible to build (start, stop) slices; # we are NOT grouping by operator.itemgetter('grp_id', 'magi'): # that would break the ordering of the indices causing an incredibly # worse performance, but visible only in extra-large calculations! for block in block_splitter(rdata, maxweight, operator.itemgetter('nsites'), operator.itemgetter('grp_id')): grp_id = block[0]['grp_id'] trti = et_ids[grp_id][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'num_epsilon_bins': oq.num_epsilon_bins, 'investigation_time': oq.investigation_time, 'imtls': oq.imtls }) U = max(U, block.weight) slc = slice(block[0]['idx'], block[-1]['idx'] + 1) smap.submit((dstore, slc, cmaker, self.hmap4, trti, magi[slc], self.bin_edges)) task_inputs.append((trti, slc.stop - slc.start)) nbytes, msg = get_nbytes_msg(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(task_inputs)) nbytes, msg = get_nbytes_msg(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') dt = numpy.dtype([('trti', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.rlzs_assoc.realizations] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): logging.info('Site #%d, disaggregating for rlz=#%d', s, rlz) for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit disagg tasks gid = self.datastore['rup/grp_id'][()] indices_by_grp = get_indices(gid) # grp_id -> [(start, stop),...] blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1 # NB: removing the blocksize causes slow disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in csm_info.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker( trt, rlzs_by_gsim, {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for start, stop in indices_by_grp[grp_id]: for slc in gen_slices(start, stop, blocksize): rupdata = {k: dstore['rup/' + k][slc] for k in self.datastore['rup']} smap.submit((rupdata, self.sitecol, oq, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def compute(self): """ Submit disaggregation tasks and return the results """ logging.info('Reading ruptures') oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) allargs = [] totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items() if n.startswith('mag_') and len(d['rctx'])) et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 totrups = 0 for mag in mags: rctx = dstore['mag_%s/rctx' % mag][:] totrups += len(rctx) for grp_id, gids in enumerate(et_ids): idxs, = numpy.where(rctx['grp_id'] == grp_id) if len(idxs) == 0: continue trti = gids[0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) for blk in block_splitter(rctx[idxs], maxweight, nsites): nr = len(blk) U = max(U, blk.weight) allargs.append((dstore, numpy.array(blk), cmaker, self.hmap4, trti, self.bin_edges, oq)) task_inputs.append((trti, mag, nr)) logging.info('Found {:_d} ruptures'.format(totrups)) nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic size = s['dist'] * s['eps'] + s['lon'] * s['lat'] sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], size=size) sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') sd['mags_trt'] = sum( len(mags) for mags in self.datastore['source_mags'].values()) nbytes, msg = get_array_nbytes(sd) logging.info('Estimated memory on the master:\n%s', msg) dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def compute_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: List of commonlib.source.Source tuples :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param siteidx: always equal to 0 :param rlzs_assoc: a :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: monitor instance :returns: a dictionary src_group_id -> [Rupture instances] """ assert siteidx == 0, ( 'siteidx can be nonzero only for the classical_tiling calculations: ' 'tiling with the EventBasedRuptureCalculator is an error') # NB: by construction each block is a non-empty list with # sources of the same src_group_id src_group_id = sources[0].src_group_id oq = monitor.oqparam trt = sources[0].tectonic_region_type max_dist = oq.maximum_distance[trt] cmaker = ContextMaker(rlzs_assoc.gsims_by_grp_id[src_group_id]) params = sorted(cmaker.REQUIRES_RUPTURE_PARAMETERS) rup_data_dt = numpy.dtype( [('rupserial', U32), ('multiplicity', U16), ('numsites', U32), ('occurrence_rate', F32)] + [ (param, F32) for param in params]) eb_ruptures = [] rup_data = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) num_samples = rlzs_assoc.samples[src_group_id] # Compute and save stochastic event sets for src in sources: t0 = time.time() s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol) if s_sites is None: continue rupture_filter = functools.partial( filter_sites_by_distance_to_rupture, integration_distance=max_dist, sites=s_sites) num_occ_by_rup = sample_ruptures( src, oq.ses_per_logic_tree_path, num_samples, rlzs_assoc.seed) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures( src, num_occ_by_rup, rupture_filter, oq.random_seed, rup_mon): nsites = len(ebr.indices) try: rate = ebr.rupture.occurrence_rate except AttributeError: # for nonparametric sources rate = numpy.nan rc = cmaker.make_rupture_context(ebr.rupture) ruptparams = tuple(getattr(rc, param) for param in params) rup_data.append((ebr.serial, len(ebr.etags), nsites, rate) + ruptparams) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({src_group_id: eb_ruptures}) res.calc_times = calc_times res.rup_data = numpy.array(rup_data, rup_data_dt) res.trt = trt return res
def _collect_bins_data(trt_num, source_ruptures, site, curves, trt_model_id, rlzs_assoc, gsims, imtls, poes, truncation_level, n_epsilons, mon): # returns a BinData instance sitecol = SiteCollection([site]) mags = [] dists = [] lons = [] lats = [] trts = [] pnes = [] sitemesh = sitecol.mesh make_ctxt = mon('making contexts', measuremem=False) disagg_poe = mon('disaggregate_poe', measuremem=False) cmaker = ContextMaker(gsims) for source, ruptures in source_ruptures: try: tect_reg = trt_num[source.tectonic_region_type] for rupture in ruptures: with make_ctxt: sctx, rctx, dctx = cmaker.make_contexts(sitecol, rupture) # extract rupture parameters of interest mags.append(rupture.mag) dists.append(dctx.rjb[0]) # single site => single distance [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) trts.append(tect_reg) pne_dict = {} # a dictionary rlz.id, poe, imt_str -> prob_no_exceed for gsim in gsims: for imt_str, imls in imtls.iteritems(): imt = from_string(imt_str) imls = numpy.array(imls[::-1]) for rlz in rlzs_assoc[trt_model_id, gsim.__class__.__name__]: rlzi = rlz.ordinal curve_poes = curves[rlzi, imt_str][::-1] for poe in poes: iml = numpy.interp(poe, curve_poes, imls) # compute probability of exceeding iml given # the current rupture and epsilon_bin, that is # ``P(IMT >= iml | rup, epsilon_bin)`` # for each of the epsilon bins with disagg_poe: [poes_given_rup_eps] = \ gsim.disaggregate_poe( sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons) pne = rupture.get_probability_no_exceedance( poes_given_rup_eps) pne_dict[rlzi, poe, imt_str] = (iml, pne) pnes.append(pne_dict) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise etype, msg, tb return BinData(numpy.array(mags, float), numpy.array(dists, float), numpy.array(lons, float), numpy.array(lats, float), numpy.array(trts, int), pnes)
def _collect_bins_data(trt_num, source_ruptures, site, curves, src_group_id, rlzs_assoc, gsims, imtls, poes, truncation_level, n_epsilons, mon): # returns a BinData instance sitecol = SiteCollection([site]) mags = [] dists = [] lons = [] lats = [] trts = [] pnes = [] sitemesh = sitecol.mesh make_ctxt = mon('making contexts', measuremem=False) disagg_poe = mon('disaggregate_poe', measuremem=False) cmaker = ContextMaker(gsims) for source, ruptures in source_ruptures: try: tect_reg = trt_num[source.tectonic_region_type] for rupture in ruptures: with make_ctxt: sctx, rctx, dctx = cmaker.make_contexts(sitecol, rupture) # extract rupture parameters of interest mags.append(rupture.mag) dists.append(dctx.rjb[0]) # single site => single distance [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) trts.append(tect_reg) pne_dict = {} # a dictionary rlz.id, poe, imt_str -> prob_no_exceed for gsim in gsims: gs = str(gsim) for imt_str, imls in imtls.items(): imt = from_string(imt_str) imls = numpy.array(imls[::-1]) for rlz in rlzs_assoc[src_group_id, gs]: rlzi = rlz.ordinal curve_poes = curves[rlzi, imt_str][::-1] for poe in poes: iml = numpy.interp(poe, curve_poes, imls) # compute probability of exceeding iml given # the current rupture and epsilon_bin, that is # ``P(IMT >= iml | rup, epsilon_bin)`` # for each of the epsilon bins with disagg_poe: [poes_given_rup_eps] = \ gsim.disaggregate_poe( sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons) pne = rupture.get_probability_no_exceedance( poes_given_rup_eps) pne_dict[rlzi, poe, imt_str] = (iml, pne) pnes.append(pne_dict) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise_(etype, msg, tb) return BinData(numpy.array(mags, float), numpy.array(dists, float), numpy.array(lons, float), numpy.array(lats, float), numpy.array(trts, int), pnes)
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if not hasattr(src_filter, 'sitecol'): # a sitecol was passed src_filter = SourceFilter(src_filter, {}) # Get the parameters assigned to the group src_mutex = getattr(group, 'src_interdep', None) == 'mutex' rup_mutex = getattr(group, 'rup_interdep', None) == 'mutex' cluster = getattr(group, 'cluster', None) # Compute the number of ruptures grp_ids = set() trts = set() for src in group: if not src.num_ruptures: # src.num_ruptures is set when parsing the XML, but not when # the source is instantiated manually, so it is set here src.num_ruptures = src.count_ruptures() # This sets the proper TOM in case of a cluster if cluster: src.temporal_occurrence_model = FatedTOM(time_span=1) # Updating IDs grp_ids.update(src.src_group_ids) trts.add(src.tectonic_region_type) # Now preparing context maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') [trt] = trts # there must be a single tectonic region type cmaker = ContextMaker(trt, gsims, maxdist, param, monitor) # Prepare the accumulator for the probability maps pmap = AccumDict({grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids}) pmap.trt = trt rupdata = {grp_id: [] for grp_id in grp_ids} # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(2, numpy.float32)) eff_ruptures = AccumDict(accum=0) # grp_id -> num_ruptures nsites = {} # src.id -> num_sites # Computing hazard for src, s_sites in src_filter(group): # filter now nsites[src.id] = src.nsites t0 = time.time() try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, rup_indep=not rup_mutex) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if src_mutex: # mutex sources, there is a single group for sid in poemap: pcurve = pmap[src.src_group_id].setdefault(sid, 0) pcurve += poemap[sid] * src.mutex_weight elif poemap: for gid in src.src_group_ids: pmap[gid] |= poemap if len(cmaker.rupdata): for gid in src.src_group_ids: rupdata[gid].append(cmaker.rupdata) calc_times[src.id] += numpy.array([src.weight, time.time() - t0]) # storing the number of contributing ruptures too eff_ruptures += {gid: getattr(poemap, 'eff_ruptures', 0) for gid in src.src_group_ids} # Updating the probability map in the case of mutually exclusive # sources group_probability = getattr(group, 'grp_probability', None) if src_mutex and group_probability: pmap[src.src_group_id] *= group_probability # Processing cluster if cluster: tom = getattr(group, 'temporal_occurrence_model') pmap = _cluster(param, tom, imtls, gsims, grp_ids, pmap) # Return results for gid, data in rupdata.items(): if len(data): rupdata[gid] = numpy.concatenate(data) return dict(pmap=pmap, calc_times=calc_times, eff_ruptures=eff_ruptures, rup_data=rupdata, nsites=nsites)
def get_conditional_gmfs(database, rupture, sites, gsims, imts, number_simulations, truncation_level, correlation_model=DEFAULT_CORRELATION): """ Get a set of random fields conditioned on a set of observations :param database: Ground motion records for the event as instance of :class: smtk.sm_database.GroundMotionDatabase :param rupture: Event rupture as instance of :class: openquake.hazardlib.source.rupture.Rupture :param sites: Target sites as instance of :class: openquake.hazardlib.site.SiteCollection :param list gsims: List of GMPEs required :param list imts: List of intensity measures required :param int number_simulations: Number of simulated fields required :param float truncation_level: Ground motion truncation level """ # Get known sites mesh known_sites = database.get_site_collection() # Get Observed Residuals residuals = Residuals(gsims, imts) residuals.get_residuals(database) imt_dict = OrderedDict([ (imtx, np.zeros([len(sites.lons), number_simulations])) for imtx in imts]) gmfs = OrderedDict([(gmpe, imt_dict) for gmpe in gsims]) gmpe_list = [GSIM_LIST[gmpe]() for gmpe in gsims] cmaker = ContextMaker(gmpe_list) sctx, rctx, dctx = cmaker.make_contexts(sites, rupture) for gsim in gmpe_list: gmpe = gsim.__class__.__name__ #gsim = GSIM_LIST[gmpe]() #sctx, rctx, dctx = gsim.make_contexts(sites, rupture) for imtx in imts: if truncation_level == 0: gmfs[gmpe][imtx], _ = gsim.get_mean_and_stddevs(sctx, rctx, dctx, from_string(imtx), stddev_types=[]) continue if "Intra event" in gsim.DEFINED_FOR_STANDARD_DEVIATION_TYPES: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Intra event"], sites, imtx, number_simulations, correlation_model) tau = np.unique(residuals.residuals[gmpe][imtx]["Inter event"]) mean, [stddev_inter, stddev_intra] = gsim.get_mean_and_stddevs( sctx, rctx, dctx, from_string(imtx), ["Inter event", "Intra event"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp( mean + (tau * stddev_inter) + (epsilon[:, iloc].A1 * stddev_intra)) else: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Total"], sites, imtx, number_simulations, correlation_model) tau = None mean, [stddev_total] = gsim.get_mean_and_stddevs( sctx, rctx, dctx, from_string(imtx), ["Total"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp( mean + (epsilon[:, iloc].A1 * stddev_total.flatten())) return gmfs
def calc_hazard_curves(groups, srcfilter, imtls, gsim_by_trt, truncation_level=None, apply=sequential_apply, reqv=None, **kwargs): """ Compute hazard curves on a list of sites, given a set of seismic source groups and a dictionary of ground shaking intensity models (one per tectonic region type). Probability of ground motion exceedance is computed in different ways depending if the sources are independent or mutually exclusive. :param groups: A sequence of groups of seismic sources objects (instances of of :class:`~openquake.hazardlib.source.base.BaseSeismicSource`). :param srcfilter: A source filter over the site collection or the site collection itself :param imtls: Dictionary mapping intensity measure type strings to lists of intensity measure levels. :param gsim_by_trt: Dictionary mapping tectonic region types (members of :class:`openquake.hazardlib.const.TRT`) to :class:`~openquake.hazardlib.gsim.base.GMPE` or :class:`~openquake.hazardlib.gsim.base.IPE` objects. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param apply: apply function to use (default sequential_apply) :param reqv: If not None, an instance of RjbEquivalent :returns: An array of size N, where N is the number of sites, which elements are records with fields given by the intensity measure types; the size of each field is given by the number of levels in ``imtls``. """ # This is ensuring backward compatibility i.e. processing a list of # sources if not isinstance(groups[0], SourceGroup): # sent a list of sources odic = groupby(groups, operator.attrgetter('tectonic_region_type')) groups = [ SourceGroup(trt, odic[trt], 'src_group', 'indep', 'indep') for trt in odic ] idx = 0 span = None for i, grp in enumerate(groups): for src in grp: tom = getattr(src, 'temporal_occurrence_model', None) if tom: span = tom.time_span src.weight = src.count_ruptures() src.grp_id = i src.id = idx idx += 1 imtls = DictArray(imtls) shift_hypo = kwargs['shift_hypo'] if 'shift_hypo' in kwargs else False param = dict(imtls=imtls, truncation_level=truncation_level, reqv=reqv, cluster=grp.cluster, shift_hypo=shift_hypo, investigation_time=kwargs.get('investigation_time', span)) pmap = ProbabilityMap(imtls.size, 1) # Processing groups with homogeneous tectonic region mon = Monitor() sitecol = getattr(srcfilter, 'sitecol', srcfilter) for group in groups: trt = group.trt if sitecol is not srcfilter: param['maximum_distance'] = srcfilter.integration_distance(trt) cmaker = ContextMaker(trt, [gsim_by_trt[trt]], param, mon) if group.atomic: # do not split it = [classical(group, sitecol, cmaker)] else: # split the group and apply `classical` in parallel it = apply(classical, (group.sources, sitecol, cmaker), weight=operator.attrgetter('weight')) for dic in it: pmap |= dic['pmap'] return pmap.convert(imtls, len(sitecol.complete))