def get_rup_array(ebruptures): """ Convert a list of EBRuptures into a numpy composite array """ if not BaseRupture._code: BaseRupture.init() # initialize rupture codes lst = [] geoms = [] nbytes = 0 offset = 0 for ebrupture in ebruptures: rup = ebrupture.rupture mesh = surface_to_array(rup.surface) sy, sz = mesh.shape[1:] # sanity checks assert sy < TWO16, 'Too many multisurfaces: %d' % sy assert sz < TWO16, 'The rupture mesh spacing is too small' hypo = rup.hypocenter.x, rup.hypocenter.y, rup.hypocenter.z rate = getattr(rup, 'occurrence_rate', numpy.nan) points = mesh.reshape(3, -1).T # shape (n, 3) n = len(points) tup = (ebrupture.serial, ebrupture.srcidx, ebrupture.grp_id, rup.code, ebrupture.n_occ, offset, offset + n, -1, rup.mag, rup.rake, rate, hypo, sy, sz) offset += n lst.append(tup) geoms.append(numpy.array([tuple(p) for p in points], point3d)) nbytes += rupture_dt.itemsize + mesh.nbytes dic = dict(geom=numpy.concatenate(geoms), nbytes=nbytes) # TODO: PMFs for nonparametric ruptures are not converted return hdf5.ArrayWrapper(numpy.array(lst, rupture_dt), dic)
def _hmap4(rlzs, iml_disagg, imtls, poes_disagg, curves): # an ArrayWrapper of shape (N, M, P, Z) N, Z = rlzs.shape P = len(poes_disagg) M = len(imtls) arr = numpy.empty((N, M, P, Z)) for m, imt in enumerate(imtls): for (s, z), rlz in numpy.ndenumerate(rlzs): curve = curves[s][z] if poes_disagg == (None, ): arr[s, m, 0, z] = imtls[imt] elif curve: rlz = rlzs[s, z] max_poe = curve[imt].max() arr[s, m, :, z] = calc.compute_hazard_maps(curve[imt], imtls[imt], poes_disagg) for iml, poe in zip(arr[s, m, :, z], poes_disagg): if iml == 0: logging.warning( 'Cannot disaggregate for site %d, %s, ' 'poe=%s, rlz=%d: the hazard is zero', s, imt, poe, rlz) elif poe > max_poe: logging.warning(POE_TOO_BIG, s, poe, max_poe, rlz, imt) return hdf5.ArrayWrapper(arr, {'rlzs': rlzs})
def build_matrices(rupdata, sitecol, cmaker, iml4, num_epsilon_bins, bin_edges, pne_mon, mat_mon, gmf_mon): """ :param rupdata: a dictionary of rupture data :param sitecol: a site collection of N elements :param cmaker: a ContextMaker :param iml4: an array of shape (N, M, P, Z) :param num_epsilon_bins: number of epsilons bins :param bin_edges: edges of the bins :yield: (sid, 8dmatrix) if the matrix is nonzero """ if len(sitecol) >= 32768: raise ValueError('You can disaggregate at max 32,768 sites') indices = _site_indices(rupdata['sid_'], len(sitecol)) eps3 = _eps3(cmaker.trunclevel, num_epsilon_bins) # this is slow M, P, Z = iml4.shape[1:] for sid, iml3 in zip(sitecol.sids, iml4): singlesitecol = sitecol.filtered([sid]) bins = get_bins(bin_edges, sid) arr = numpy.zeros([len(b) - 1 for b in bins] + [M, P, Z]) for z in range(Z): rlz = iml4.rlzs[sid, z] iml2 = hdf5.ArrayWrapper(iml3[:, :, z], dict(rlzi=rlz, imts=iml4.imts)) bdata = _disaggregate(cmaker, singlesitecol, rupdata, indices[sid], iml2, eps3, pne_mon, gmf_mon) if bdata.pnes.sum(): with mat_mon: arr[..., z] = _build_disagg_matrix(bdata, bins) if arr.any(): # nonzero yield sid, arr
def export_losses_by_event(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = dstore['oqparam'] writer = writers.CsvWriter(fmt=writers.FIVEDIGITS) dest = dstore.build_fname('losses_by_event', '', 'csv') md = dstore.metadata if 'scenario' not in oq.calculation_mode: md.update( dict(investigation_time=oq.investigation_time, risk_investigation_time=oq.risk_investigation_time)) events = dstore['events'][()] columns = dict(rlz_id=lambda rec: events[rec.event_id]['rlz_id']) if oq.investigation_time: # not scenario year_of = year_dict(events['id'], oq.investigation_time, oq.ses_seed) columns['rup_id'] = lambda rec: events[rec.event_id]['rup_id'] columns['year'] = lambda rec: year_of[rec.event_id] lbe = dstore['losses_by_event'][()] lbe.sort(order='event_id') dic = dict(shape_descr=['event_id']) dic['event_id'] = list(lbe['event_id']) # example (0, 1, 2, 3) -> (0, 2, 3, 1) axis = [0] + list(range(2, len(lbe['loss'].shape))) + [1] data = lbe['loss'].transpose(axis) # shape (E, T..., L) aw = hdf5.ArrayWrapper(data, dic, oq.loss_names) table = add_columns(aw.to_table(), **columns) writer.save(table, dest, comment=md) return writer.getsaved()
def export_agg_curve_rlzs(ekey, dstore): oq = dstore['oqparam'] lnames = numpy.array(oq.loss_names) agg_tags = get_agg_tags(dstore, oq.aggregate_by) aggvalue = dstore['agg_values'][()] # shape (K+1, L) md = dstore.metadata md['risk_investigation_time'] = ( oq.risk_investigation_time or oq.investigation_time) writer = writers.CsvWriter(fmt=writers.FIVEDIGITS) descr = hdf5.get_shape_descr(dstore[ekey[0]].attrs['json']) name, suffix = ekey[0].split('-') rlzs_or_stats = descr[suffix[:-1]] aw = hdf5.ArrayWrapper(dstore[ekey[0]], descr, ('loss_value',)) dataf = aw.to_dframe().set_index(suffix[:-1]) for r, ros in enumerate(rlzs_or_stats): md['kind'] = f'{name}-' + ( ros if isinstance(ros, str) else 'rlz-%03d' % ros) try: df = dataf[dataf.index == ros] except KeyError: logging.warning('No data for %s', md['kind']) continue dic = {col: df[col].to_numpy() for col in dataf.columns} dic['loss_type'] = lnames[dic['lti']] for tagname in oq.aggregate_by: dic[tagname] = agg_tags[tagname][dic['agg_id']] dic['loss_ratio'] = dic['loss_value'] / aggvalue[ dic['agg_id'], dic.pop('lti')] dic['annual_frequency_of_exceedence'] = 1 / dic['return_period'] del dic['agg_id'] dest = dstore.build_fname(md['kind'], '', 'csv') writer.save(pandas.DataFrame(dic), dest, comment=md) return writer.getsaved()
def get_effect(mags, sitecol1, gsims_by_trt, oq): """ :params mags: a dictionary trt -> magnitudes :param sitecol1: a SiteCollection with a single site :param gsims_by_trt: a dictionary trt -> gsims :param oq: an object with attributes imtls, minimum_intensity, maximum_distance and pointsource_distance :returns: an ArrayWrapper trt -> effect_by_mag_dst and a nested dictionary trt -> mag -> dist with the effective pointsource_distance Updates oq.maximum_distance.magdist """ assert list(mags) == list(gsims_by_trt), 'Missing TRTs!' dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } aw = hdf5.ArrayWrapper((), {}) # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA psd = (oq.pointsource_distance.interp(mags) if oq.pointsource_distance is not None else {}) if psd: logging.info('Computing effect of the ruptures') allmags = set() for trt in mags: allmags.update(mags[trt]) eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (sorted(allmags), sitecol1, gsims_by_trt, oq.maximum_distance, oq.imtls)).reduce() effect = {} for t, trt in enumerate(mags): arr = numpy.array([eff_by_mag[mag][:, t] for mag in mags[trt]]) setattr(aw, trt, arr) # shape (#mags, #dists) setattr(aw, trt + '_dist_bins', dist_bins[trt]) effect[trt] = Effect(dict(zip(mags[trt], arr)), dist_bins[trt]) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # build a dict trt -> mag -> dst if psd and set(psd[trt].values()) == {-1}: maxdist = oq.maximum_distance[trt] psd[trt] = eff.dist_by_mag(eff.collapse_value(maxdist)) dic = { trt: [(float(mag), int(dst)) for mag, dst in psd[trt].items()] for trt in psd if trt != 'default' } logging.info('Using pointsource_distance=\n%s', pprint.pformat(dic)) return aw, psd
def get_output(crmodel, assets_by_taxo, haz, rlzi=None): """ :param assets_by_taxo: a dictionary taxonomy index -> assets on a site :param haz: an array or a dictionary of hazard on that site :param rlzi: if given, a realization index :returns: an ArrayWrapper loss_type -> array of shape (A, ...) """ if hasattr(haz, 'array'): # classical eids = [] data = [haz.array[crmodel.imtls(imt), 0] for imt in crmodel.imtls] elif isinstance(haz, numpy.ndarray): # NB: in GMF-based calculations the order in which # the gmfs are stored is random since it depends on # which hazard task ends first; here we reorder # the gmfs by event ID; this is convenient in # general and mandatory for the case of # VulnerabilityFunctionWithPMF, otherwise the # sample method would receive the means in random # order and produce random results even if the # seed is set correctly; very tricky indeed! (MS) haz.sort(order='eid') eids = haz['eid'] data = haz['gmv'] # shape (E, M) elif haz == 0: # no hazard for this site (event based) eids = numpy.arange(1) data = [] else: raise ValueError('Unexpected haz=%s' % haz) dic = dict(eids=eids, assets=assets_by_taxo.assets, loss_types=crmodel.loss_types) if rlzi is not None: dic['rlzi'] = rlzi for l, lt in enumerate(crmodel.loss_types): ls = [] for taxonomy, assets_ in assets_by_taxo.items(): if len(assets_by_taxo.eps): epsilons = assets_by_taxo.eps[taxonomy][:, eids] else: # no CoVs epsilons = () arrays = [] rmodels, weights = crmodel.get_rmodels_weights(taxonomy) for rm in rmodels: if len(data) == 0: dat = [0] elif len(eids): # gmfs dat = data[:, rm.imti[lt]] else: # hcurves dat = data[rm.imti[lt]] arrays.append(rm(lt, assets_, dat, eids, epsilons)) res = arrays[0] if len(arrays) == 1 else numpy.average( arrays, weights=weights, axis=0) ls.append(res) arr = numpy.concatenate(ls) dic[lt] = arr[assets_by_taxo.idxs] if len(arr) else arr return hdf5.ArrayWrapper((), dic)
def get_output(crmodel, assets_by_taxo, haz, rlzi=None): """ :param assets_by_taxo: a dictionary taxonomy index -> assets on a site :param haz: an array or a dictionary of hazard on that site :param rlzi: if given, a realization index :returns: an ArrayWrapper loss_type -> array of shape (A, ...) """ primary = crmodel.primary_imtls alias = {imt: 'gmv_%d' % i for i, imt in enumerate(primary)} if hasattr(haz, 'array'): # classical eids = [] data = {f'gmv_{m}': haz.array[crmodel.imtls(imt), 0] for m, imt in enumerate(primary)} elif set(haz.columns) - {'sid', 'eid', 'rlz'}: # regular case # NB: in GMF-based calculations the order in which # the gmfs are stored is random since it depends on # which hazard task ends first; here we reorder # the gmfs by event ID; this is convenient in # general and mandatory for the case of # VulnerabilityFunctionWithPMF, otherwise the # sample method would receive the means in random # order and produce random results even if the # seed is set correctly; very tricky indeed! (MS) haz = haz.sort_values('eid') eids = haz.eid.to_numpy() data = haz else: # ZeroGetter for this site (event based) eids = numpy.arange(1) data = {f'gmv_{m}': [0] for m, imt in enumerate(primary)} dic = dict(eids=eids, assets=assets_by_taxo.assets, loss_types=crmodel.loss_types, haz=haz) if rlzi is not None: dic['rlzi'] = rlzi for l, lt in enumerate(crmodel.loss_types): ls = [] for taxonomy, assets_ in assets_by_taxo.items(): if len(assets_by_taxo.eps): epsilons = assets_by_taxo.eps[taxonomy][:, eids] else: # no CoVs epsilons = () arrays = [] rmodels, weights = crmodel.get_rmodels_weights(taxonomy) for rm in rmodels: imt = rm.imt_by_lt[lt] dat = data[alias.get(imt, imt)] if hasattr(dat, 'to_numpy'): dat = dat.to_numpy() arrays.append(rm(lt, assets_, dat, eids, epsilons)) res = arrays[0] if len(arrays) == 1 else numpy.average( arrays, weights=weights, axis=0) ls.append(res) arr = numpy.concatenate(ls) dic[lt] = arr[assets_by_taxo.idxs] if len(arr) else arr return hdf5.ArrayWrapper((), dic)
def get_ruptures(fname_csv): """ Read ruptures in CSV format and return an ArrayWrapper. :param fname_csv: path to the CSV file """ if not rupture.BaseRupture._code: rupture.BaseRupture.init() # initialize rupture codes code = rupture.BaseRupture.str2code aw = hdf5.read_csv(fname_csv, rupture.rupture_dt) rups = [] geoms = [] n_occ = 1 for u, row in enumerate(aw.array): hypo = row['lon'], row['lat'], row['dep'] dic = json.loads(row['extra']) meshes = F32(json.loads(row['mesh'])) # num_surfaces 3D arrays num_surfaces = len(meshes) shapes = [] points = [] minlons = [] maxlons = [] minlats = [] maxlats = [] for mesh in meshes: shapes.extend(mesh.shape[1:]) points.extend(mesh.flatten()) # lons + lats + deps minlons.append(mesh[0].min()) minlats.append(mesh[1].min()) maxlons.append(mesh[0].max()) maxlats.append(mesh[1].max()) rec = numpy.zeros(1, rupture_dt)[0] rec['seed'] = row['seed'] rec['minlon'] = minlon = min(minlons) rec['minlat'] = minlat = min(minlats) rec['maxlon'] = maxlon = max(maxlons) rec['maxlat'] = maxlat = max(maxlats) rec['mag'] = row['mag'] rec['hypo'] = hypo rate = dic.get('occurrence_rate', numpy.nan) tup = (u, row['seed'], 'no-source', aw.trts.index(row['trt']), code[row['kind']], n_occ, row['mag'], row['rake'], rate, minlon, minlat, maxlon, maxlat, hypo, u, 0) rups.append(tup) geoms.append(numpy.concatenate([[num_surfaces], shapes, points])) if not rups: return () dic = dict(geom=numpy.array(geoms, object)) # NB: PMFs for nonparametric ruptures are missing return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)
def export_losses_by_event(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = dstore['oqparam'] writer = writers.CsvWriter(fmt=writers.FIVEDIGITS) dest = dstore.build_fname('losses_by_event', '', 'csv') if oq.calculation_mode.startswith('scenario'): arr = dstore['losses_by_event'][('eid', 'loss')] dtlist = [('event_id', U64), ('rlz_id', U16)] + oq.loss_dt_list() num_loss_types = len(dtlist) - 2 loss = arr['loss'] z = numpy.zeros(len(arr), dtlist) z['event_id'] = arr['eid'] z['rlz_id'] = dstore['events']['rlz'] for i, (name, _) in enumerate(dtlist[2:]): z[name] = loss[:, i] if num_loss_types > 1 else loss writer.save(z, dest) elif oq.calculation_mode == 'ebrisk': tagcol = dstore['assetcol/tagcol'] lbe = dstore['losses_by_event'][()] lbe.sort(order='eid') dic = dict(tagnames=['event_id'] + oq.aggregate_by) for tagname in oq.aggregate_by: dic[tagname] = getattr(tagcol, tagname) dic['event_id'] = ['?'] + list(lbe['eid']) # example (0, 1, 2, 3) -> (0, 2, 3, 1) axis = [0] + list(range(2, len(lbe['loss'].shape))) + [1] data = lbe['loss'].transpose(axis) # shape (E, T..., L) aw = hdf5.ArrayWrapper(data, dic, oq.loss_dt().names) writer.save(aw.to_table(), dest) else: dtlist = [('event_id', U64), ('rlz_id', U16), ('rup_id', U32), ('year', U32)] + oq.loss_dt_list() eids = dstore['losses_by_event']['eid'] events = dstore['events'] year_of = year_dict(events['id'], oq.investigation_time, oq.ses_seed) arr = numpy.zeros(len(dstore['losses_by_event']), dtlist) arr['event_id'] = eids arr['rup_id'] = arr['event_id'] / TWO32 arr['rlz_id'] = get_rlz_ids(events, eids) arr['year'] = [year_of[eid] for eid in eids] loss = dstore['losses_by_event']['loss'].T # shape (L, E) for losses, loss_type in zip(loss, oq.loss_dt().names): arr[loss_type] = losses writer.save(arr, dest) return writer.getsaved()
def get_effect(mags, sitecol, gsims_by_trt, oq): """ :returns: an ArrayWrapper effect_by_mag_dst_trt Also updates oq.maximum_distance.magdist and oq.pointsource_distance """ dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA effect = {} imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) aw = hdf5.ArrayWrapper((), dist_bins) if sitecol is None: return aw if len(sitecol) >= oq.max_sites_disagg and imts_ok: logging.info('Computing effect of the ruptures') mon = performance.Monitor('rupture effect') eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() aw.array = eff_by_mag effect.update({ trt: Effect({mag: eff_by_mag[mag][:, t] for mag in eff_by_mag}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt) }) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if oq.pointsource_distance['default']: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(oq.pointsource_distance['default'])) elif oq.pointsource_distance['default']: # replace pointsource_distance with a dict trt -> mag -> dst for trt in gsims_by_trt: try: dst = getdefault(oq.pointsource_distance, trt) except TypeError: # 'NoneType' object is not subscriptable dst = getdefault(oq.maximum_distance, trt) oq.pointsource_distance[trt] = {mag: dst for mag in mags} return aw
def get_rup_array(ebruptures, srcfilter=nofilter): """ Convert a list of EBRuptures into a numpy composite array, by filtering out the ruptures far away from every site """ if not BaseRupture._code: BaseRupture.init() # initialize rupture codes rups = [] geoms = [] nbytes = 0 offset = 0 for ebrupture in ebruptures: rup = ebrupture.rupture mesh = surface_to_array(rup.surface) sy, sz = mesh.shape[1:] # sanity checks assert sy < TWO16, 'Too many multisurfaces: %d' % sy assert sz < TWO16, 'The rupture mesh spacing is too small' hypo = rup.hypocenter.x, rup.hypocenter.y, rup.hypocenter.z points = mesh.reshape(3, -1).T # shape (n, 3) rec = numpy.zeros(1, rupture_dt)[0] rec['serial'] = rup.rup_id rec['minlon'] = minlon = points[:, 0].min() rec['minlat'] = minlat = points[:, 1].min() rec['maxlon'] = maxlon = points[:, 0].max() rec['maxlat'] = maxlat = points[:, 1].max() rec['mag'] = rup.mag rec['hypo'] = hypo if srcfilter.integration_distance and len( srcfilter.close_sids(rec, rup.tectonic_region_type)) == 0: continue rate = getattr(rup, 'occurrence_rate', numpy.nan) tup = (0, ebrupture.rup_id, ebrupture.srcidx, ebrupture.grp_id, rup.code, ebrupture.n_occ, rup.mag, rup.rake, rate, minlon, minlat, maxlon, maxlat, hypo, offset, offset + len(points), sy, sz, 0, 0) #,ebrupture.source_id) offset += len(points) rups.append(tup) geoms.append(numpy.array([tuple(p) for p in points], point3d)) nbytes += rupture_dt.itemsize + mesh.nbytes if not rups: return () dic = dict(geom=numpy.concatenate(geoms), nbytes=nbytes) # NB: PMFs for nonparametric ruptures are not saved since they # are useless for the GMF computation return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)
def _iml4(rlzs, iml_disagg, imtls, poes_disagg, curves): # an ArrayWrapper of shape (N, M, P, Z) N, Z = rlzs.shape P = len(poes_disagg) M = len(imtls) arr = numpy.empty((N, M, P, Z)) for m, imt in enumerate(imtls): for (s, z), rlz in numpy.ndenumerate(rlzs): curve = curves[s][z] if poes_disagg == (None,): arr[s, m, 0, z] = imtls[imt] elif curve: poes = curve[imt][::-1] imls = imtls[imt][::-1] arr[s, m, :, z] = numpy.interp(poes_disagg, poes, imls) return hdf5.ArrayWrapper(arr, {'rlzs': rlzs})
def get_effect(mags, sitecol, gsims_by_trt, oq): """ :returns: an ArrayWrapper effect_by_mag_dst_trt Updates oq.maximum_distance.magdist and oq.pointsource_distance """ dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } aw = hdf5.ArrayWrapper((), dist_bins) if sitecol is None: return aw # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA effect = {} imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) psd = oq.pointsource_distance['default'] effect_ok = imts_ok and (psd or oq.minimum_intensity) if effect_ok: logging.info('Computing effect of the ruptures') eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls)).reduce() aw.array = eff_by_mag effect.update({ trt: Effect({mag: eff_by_mag[mag][:, t] for mag in eff_by_mag}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt) }) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if psd: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(psd)) elif psd: # like in case_24 with PGV for trt in dist_bins: pdist = getdefault(oq.pointsource_distance, trt) oq.pointsource_distance[trt] = {mag: pdist for mag in mags} return aw
def _iml3(rlzs, iml_disagg, imtls, poes_disagg, curves): # a dictionary of ArrayWrappers imt -> (N, P, Z) with intensities N, Z = rlzs.shape P = len(poes_disagg) dic = {} for m, imt in enumerate(imtls): iml3 = numpy.empty((N, P, Z)) iml3.fill(numpy.nan) for (s, z), rlz in numpy.ndenumerate(rlzs): curve = curves[s][z] if poes_disagg == (None,): iml3[s, 0, z] = imtls[imt] elif curve: poes = curve[imt][::-1] imls = imtls[imt][::-1] iml3[s, :, z] = numpy.interp(poes_disagg, poes, imls) dic[imt] = hdf5.ArrayWrapper( iml3, dict(imt=from_string(imt), imti=m, rlzs=rlzs)) return dic
def _iml4(rlzs, iml_disagg, imtls, poes_disagg, curves): # an array of shape (N, M, P, Z) with intensities N, Z = rlzs.shape M = len(imtls) P = len(poes_disagg) iml4 = numpy.empty((N, M, P, Z)) iml4.fill(numpy.nan) for (s, z), rlz in numpy.ndenumerate(rlzs): curve = curves[s][z] if poes_disagg == (None, ): for m, imt in enumerate(imtls): iml4[s, m, 0, z] = imtls[imt] elif curve: for m, imt in enumerate(imtls): poes = curve[imt][::-1] imls = imtls[imt][::-1] iml4[s, m, :, z] = numpy.interp(poes_disagg, poes, imls) return hdf5.ArrayWrapper( iml4, dict(imts=[from_string(imt) for imt in imtls], rlzs=rlzs))
def export_agg_maps_csv(ekey, dstore): name, kind = ekey[0].split('-') oq = dstore['oqparam'] tagcol = dstore['assetcol/tagcol'] agg_maps = dstore[ekey[0]][()] # shape (C, R, L, T...) R = agg_maps.shape[1] kinds = (['rlz-%03d' % r for r in range(R)] if ekey[0].endswith('-rlzs') else list(oq.hazard_stats())) clp = [str(p) for p in oq.conditional_loss_poes] dic = dict(tagnames=['clp', 'kind', 'loss_type'] + oq.aggregate_by, clp=['?'] + clp, kind=['?'] + kinds, loss_type=('?', ) + oq.loss_dt().names) for tagname in oq.aggregate_by: dic[tagname] = getattr(tagcol, tagname) aw = hdf5.ArrayWrapper(agg_maps, dic) writer = writers.CsvWriter(fmt=writers.FIVEDIGITS) fname = dstore.export_path('%s.%s' % ekey) writer.save(aw.to_table(), fname) return [fname]
def _iml2s(rlzs, iml_disagg, imtls, poes_disagg, curves): # a list of N arrays of shape (M, P) with intensities M = len(imtls) P = len(poes_disagg) imts = [from_string(imt) for imt in imtls] lst = [] for s, curve in enumerate(curves): iml2 = numpy.empty((M, P)) iml2.fill(numpy.nan) if poes_disagg == (None, ): for m, imt in enumerate(imtls): iml2[m, 0] = imtls[imt] elif curve: for m, imt in enumerate(imtls): poes = curve[imt][::-1] imls = imtls[imt][::-1] iml2[m] = numpy.interp(poes_disagg, poes, imls) aw = hdf5.ArrayWrapper( iml2, dict(poes_disagg=poes_disagg, imts=imts, rlzi=rlzs[s])) lst.append(aw) return lst
def get_source_ids(ebruptures, srcfilter): """ Save source_id given by source model and srcidx found in ebruptures :param ebruptures: list of EBRuptures objects """ if not BaseRupture._code: BaseRupture.init() # initialize rupture codes srcs = [] for ebrupture in ebruptures: rup = ebrupture.rupture mesh = surface_to_array(rup.surface) sy, sz = mesh.shape[1:] # sanity checks assert sy < TWO16, 'Too many multisurfaces: %d' % sy assert sz < TWO16, 'The rupture mesh spacing is too small' hypo = rup.hypocenter.x, rup.hypocenter.y, rup.hypocenter.z points = mesh.reshape(3, -1).T # shape (n, 3) rec = numpy.zeros(1, rupture_dt)[0] rec['minlon'] = points[:, 0].min() rec['minlat'] = points[:, 1].min() rec['maxlon'] = points[:, 0].max() rec['maxlat'] = points[:, 1].max() rec['mag'] = rup.mag rec['hypo'] = hypo if srcfilter.integration_distance and len( srcfilter.close_sids(rec, rup.tectonic_region_type)) == 0: continue tup = (ebrupture.source_id, ebrupture.srcidx, ebrupture.trt) #if tup not in srcs: srcs.append(tup) if not srcs: return () dic = {} return hdf5.ArrayWrapper(numpy.array(srcs, source_ids_dt), dic)
def get_ruptures(fname_csv): """ Read ruptures in CSV format and return an ArrayWrapper """ if not rupture.BaseRupture._code: rupture.BaseRupture.init() # initialize rupture codes code = rupture.BaseRupture.str2code aw = hdf5.read_csv(fname_csv, rupture.rupture_dt) trts = aw.trts rups = [] geoms = [] n_occ = 1 for u, row in enumerate(aw.array): hypo = row['lon'], row['lat'], row['dep'] dic = json.loads(row['extra']) mesh = F32(json.loads(row['mesh'])) s1, s2 = mesh.shape[1:] rec = numpy.zeros(1, rupture_dt)[0] rec['seed'] = row['seed'] rec['minlon'] = minlon = mesh[0].min() rec['minlat'] = minlat = mesh[1].min() rec['maxlon'] = maxlon = mesh[0].max() rec['maxlat'] = maxlat = mesh[1].max() rec['mag'] = row['mag'] rec['hypo'] = hypo rate = dic.get('occurrence_rate', numpy.nan) tup = (u, row['seed'], 'no-source', trts.index(row['trt']), code[row['kind']], n_occ, row['mag'], row['rake'], rate, minlon, minlat, maxlon, maxlat, hypo, u, 0, 0) rups.append(tup) points = mesh.flatten() # lons + lats + deps # FIXME: extend to MultiSurfaces geoms.append(numpy.concatenate([[1], [s1, s2], points])) if not rups: return () dic = dict(geom=numpy.array(geoms, object)) # NB: PMFs for nonparametric ruptures are missing return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)
def build_matrix(cmaker, singlesite, ctxs, iml3, imts, rlzs, num_epsilon_bins, bins, pne_mon, mat_mon, gmf_mon): """ :param cmaker: a ContextMaker :param singlesite: a site collection with a single site :param ctxs: a list of pairs (rctx, dctx) :param iml3: an array of shape (M, P, Z) :param imts: a list of intensity measure types :param rlzs: Z realizations for the given site :param num_epsilon_bins: number of epsilons bins :param bins: bin edges for the given site :returns: 8D disaggregation matrix """ eps3 = _eps3(cmaker.trunclevel, num_epsilon_bins) arr = numpy.zeros([len(b) - 1 for b in bins] + list(iml3.shape)) for z, rlz in enumerate(rlzs): iml2 = hdf5.ArrayWrapper(iml3[:, :, z], dict(rlzi=rlz, imts=imts)) bdata = _disaggregate(cmaker, singlesite, ctxs, iml2, eps3, pne_mon, gmf_mon) if bdata.pnes.sum(): with mat_mon: arr[..., z] = _build_disagg_matrix(bdata, bins) return arr
def get_risk_functions(oqparam, kind='vulnerability fragility consequence ' 'vulnerability_retrofitted'): """ :param oqparam: an OqParam instance :param kind: a space-separated string with the kinds of risk models to read :returns: a list of risk functions """ kinds = kind.split() rmodels = AccumDict() for kind in kinds: for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: loss_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) if kind == 'consequence': logging.warning( 'Consequence models in XML format are ' 'deprecated, please replace %s with a CSV', oqparam.inputs[key]) if len(rmodel) == 0: raise InvalidFile('%s is empty!' % oqparam.inputs[key]) rmodels[loss_type, kind] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError('Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % (key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != loss_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of ' 'type "%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, loss_type)) cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') rlist = RiskFuncList() rlist.limit_states = [] for (loss_type, kind), rm in sorted(rmodels.items()): if kind == 'fragility': for (imt, riskid), ffl in sorted(rm.items()): if not rlist.limit_states: rlist.limit_states.extend(rm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert rlist.limit_states == rm.limitStates, ( rlist.limit_states, rm.limitStates) ffl.loss_type = loss_type ffl.kind = kind rlist.append(ffl) elif kind == 'consequence': for riskid, cf in sorted(rm.items()): rf = hdf5.ArrayWrapper( cf, dict(id=riskid, loss_type=loss_type, kind=kind)) rlist.append(rf) else: # vulnerability, vulnerability_retrofitted # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for (imt, riskid), rf in sorted(rm.items()): rf = rf.strictly_increasing() if cl_risk else rf rf.loss_type = loss_type rf.kind = kind rlist.append(rf) return rlist
def disaggregation(sources, site, imt, iml, gsim_by_trt, truncation_level, n_epsilons, mag_bin_width, dist_bin_width, coord_bin_width, source_filter=filters.nofilter, **kwargs): """ Compute "Disaggregation" matrix representing conditional probability of an intensity mesaure type ``imt`` exceeding, at least once, an intensity measure level ``iml`` at a geographical location ``site``, given rupture scenarios classified in terms of: - rupture magnitude - Joyner-Boore distance from rupture surface to site - longitude and latitude of the surface projection of a rupture's point closest to ``site`` - epsilon: number of standard deviations by which an intensity measure level deviates from the median value predicted by a GSIM, given the rupture parameters - rupture tectonic region type In other words, the disaggregation matrix allows to compute the probability of each scenario with the specified properties (e.g., magnitude, or the magnitude and distance) to cause one or more exceedences of a given hazard level. For more detailed information about the disaggregation, see for instance "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell, Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520, April 1999. :param sources: Seismic source model, as for :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it should be an iterator of seismic sources. :param site: :class:`~openquake.hazardlib.site.Site` of interest to calculate disaggregation matrix for. :param imt: Instance of :mod:`intensity measure type <openquake.hazardlib.imt>` class. :param iml: Intensity measure level. A float value in units of ``imt``. :param gsim_by_trt: Tectonic region type to GSIM objects mapping. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param n_epsilons: Integer number of epsilon histogram bins in the result matrix. :param mag_bin_width: Magnitude discretization step, width of one magnitude histogram bin. :param dist_bin_width: Distance histogram discretization step, in km. :param coord_bin_width: Longitude and latitude histograms discretization step, in decimal degrees. :param source_filter: Optional source-site filter function. See :mod:`openquake.hazardlib.calc.filters`. :returns: A tuple of two items. First is itself a tuple of bin edges information for (in specified order) magnitude, distance, longitude, latitude, epsilon and tectonic region types. Second item is 6d-array representing the full disaggregation matrix. Dimensions are in the same order as bin edges in the first item of the result tuple. The matrix can be used directly by pmf-extractor functions. """ trts = sorted(set(src.tectonic_region_type for src in sources)) trt_num = dict((trt, i) for i, trt in enumerate(trts)) rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts} by_trt = groupby(sources, operator.attrgetter('tectonic_region_type')) bdata = {} sitecol = SiteCollection([site]) iml2 = hdf5.ArrayWrapper(numpy.array([[iml]]), dict(imts=[imt], poes_disagg=[None], rlzi=0)) eps3 = _eps3(truncation_level, n_epsilons) for trt, srcs in by_trt.items(): cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': truncation_level, 'maximum_distance': source_filter.integration_distance, 'imtls': { str(imt): [iml] } }) contexts.RuptureContext.temporal_occurrence_model = ( srcs[0].temporal_occurrence_model) ctxs = cmaker.from_srcs(srcs, sitecol) bdata[trt] = _disaggregate(cmaker, sitecol, ctxs, iml2, eps3) if sum(len(bd.mags) for bd in bdata.values()) == 0: warnings.warn( 'No ruptures have contributed to the hazard at site %s' % site, RuntimeWarning) return None, None min_mag = min(bd.mags.min() for bd in bdata.values()) max_mag = max(bd.mags.max() for bd in bdata.values()) mag_bins = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) min_dist = min(bd.dists.min() for bd in bdata.values()) max_dist = max(bd.dists.max() for bd in bdata.values()) dist_bins = dist_bin_width * numpy.arange( int(numpy.floor(min_dist / dist_bin_width)), int(numpy.ceil(max_dist / dist_bin_width) + 1)) bb = (min(bd.lons.min() for bd in bdata.values()), min(bd.lats.min() for bd in bdata.values()), max(bd.lons.max() for bd in bdata.values()), max(bd.lats.max() for bd in bdata.values())) lon_bins, lat_bins = lon_lat_bins(bb, coord_bin_width) eps_bins = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) bin_edges = (mag_bins, dist_bins, lon_bins, lat_bins, eps_bins) matrix = numpy.zeros( (len(mag_bins) - 1, len(dist_bins) - 1, len(lon_bins) - 1, len(lat_bins) - 1, len(eps_bins) - 1, len(trts))) for trt in bdata: mat7 = _build_disagg_matrix(bdata[trt], bin_edges) # shape (..., M, P) matrix[..., trt_num[trt]] = mat7[..., 0, 0] return bin_edges + (trts, ), matrix
def get_rup_array(ebruptures, srcfilter=nofilter): """ Convert a list of EBRuptures into a numpy composite array, by filtering out the ruptures far away from every site """ if not BaseRupture._code: BaseRupture.init() # initialize rupture codes rups = [] geoms = [] for ebrupture in ebruptures: rup = ebrupture.rupture arrays = surface_to_arrays(rup.surface) # one array per surface points = [] shapes = [] for array in arrays: s0, s1, s2 = array.shape assert s0 == 3, s0 assert s1 < TWO16, 'Too many lines' assert s2 < TWO16, 'The rupture mesh spacing is too small' shapes.append(s1) shapes.append(s2) points.extend(array.flat) # example of points: [25.0, 25.1, 25.1, 25.0, # -24.0, -24.0, -24.1, -24.1, # 5.0, 5.0, 5.0, 5.0] points = F32(points) shapes = U32(shapes) hypo = rup.hypocenter.x, rup.hypocenter.y, rup.hypocenter.z rec = numpy.zeros(1, rupture_dt)[0] rec['seed'] = rup.rup_id n = len(points) // 3 lons = points[0:n] lats = points[n:2 * n] rec['minlon'] = minlon = lons.min() rec['minlat'] = minlat = lats.min() rec['maxlon'] = maxlon = lons.max() rec['maxlat'] = maxlat = lats.max() rec['mag'] = rup.mag rec['hypo'] = hypo if srcfilter.integration_distance and len( srcfilter.close_sids(rec, rup.tectonic_region_type)) == 0: continue rate = getattr(rup, 'occurrence_rate', numpy.nan) tup = (0, ebrupture.rup_id, ebrupture.source_id, ebrupture.trt_smrlz, rup.code, ebrupture.n_occ, rup.mag, rup.rake, rate, minlon, minlat, maxlon, maxlat, hypo, 0, 0) rups.append(tup) # we are storing the geometries as arrays of 32 bit floating points; # the first element is the number of surfaces, then there are # 2 * num_surfaces integers describing the first and second # dimension of each surface, and then the lons, lats and deps of # the underlying meshes of points. geom = numpy.concatenate([[len(shapes) // 2], shapes, points]) geoms.append(geom) if not rups: return () dic = dict(geom=numpy.array(geoms, object)) # NB: PMFs for nonparametric ruptures are not saved since they # are useless for the GMF computation return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)