def get_source_ids(self): """ :returns: the unique source IDs contained in the composite model """ oq = self.oqparam self.M = len(oq.imtls) self.L1 = oq.imtls.size // self.M sources = encode([src_id for src_id in self.csm.source_info]) size, msg = get_nbytes_msg( dict(N=self.N, R=self.R, M=self.M, L1=self.L1, Ns=self.Ns)) ps = 'pointSource' in self.full_lt.source_model_lt.source_types if size > TWO32 and not ps: raise RuntimeError('The matrix disagg_by_src is too large: %s' % msg) elif size > TWO32: msg = ('The source model contains point sources: you cannot set ' 'disagg_by_src=true unless you convert them to multipoint ' 'sources with the command oq upgrade_nrml --multipoint %s' ) % oq.base_path raise RuntimeError(msg) return sources
def post_execute(self, result): """ Compute stats for the aggregated distributions and save the results on the datastore. """ if not result: self.collapsed() return dstates = self.crmodel.damage_states ltypes = self.crmodel.loss_types L = self.L = len(ltypes) R = self.R D = len(dstates) A = len(self.assetcol) E = len(self.datastore['events']) # reduction factor _, msg1 = get_nbytes_msg(dict(A=A, E=E, L=L)) _, msg2 = get_nbytes_msg( dict(nrows=len(self.datastore['dd_data/eid']), ncols=D + 2)) logging.info('Using %s\ninstead of %s', msg2, msg1) # avg_ratio = ratio used when computing the averages oq = self.oqparam if oq.investigation_time: # event_based_damage avg_ratio = numpy.array([oq.ses_ratio] * R) else: # scenario_damage avg_ratio = 1. / self.param['num_events'] # damage by asset d_asset = numpy.zeros((A, R, L, D), F32) for (l, r, a, tot) in result['d_asset']: d_asset[a, r, l] = tot * avg_ratio[r] self.datastore['damages-rlzs'] = d_asset set_rlzs_stats(self.datastore, 'damages', asset_id=self.assetcol['id'], loss_type=oq.loss_names, dmg_state=dstates) self.sanity_check() # damage by event: make sure the sum of the buildings is consistent tot = self.assetcol['number'].sum() dt = F32 if self.param['approx_ddd'] else U32 dbe = numpy.zeros((self.E, L, D), dt) # shape E, L, D dbe[:, :, 0] = tot for e, dmg_by_lt in result['d_event'].items(): for l, dmg in enumerate(dmg_by_lt): dbe[e, l, 0] = tot - dmg.sum() dbe[e, l, 1:] = dmg self.datastore['dmg_by_event'] = dbe # consequence distributions del result['d_asset'] del result['d_event'] dtlist = [('event_id', U32), ('rlz_id', U16), ('loss', (F32, (L, )))] rlz = self.datastore['events']['rlz_id'] for name, csq in result.items(): if name.startswith('avg_'): c_asset = numpy.zeros((A, R, L), F32) for (l, r, a, stat) in result[name]: c_asset[a, r, l] = stat * avg_ratio[r] self.datastore[name + '-rlzs'] = c_asset set_rlzs_stats(self.datastore, name, asset_id=self.assetcol['id'], loss_type=oq.loss_names) elif name.endswith('_by_event'): arr = numpy.zeros(len(csq), dtlist) for i, (eid, loss) in enumerate(csq.items()): arr[i] = (eid, rlz[eid], loss) self.datastore[name] = arr
def compute(self): """ Submit disaggregation tasks and return the results """ oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) magi = numpy.searchsorted(self.bin_edges[0], dstore['rup/mag'][:]) - 1 magi[magi == -1] = 0 # when the magnitude is on the edge totrups = len(magi) logging.info('Reading {:_d} ruptures'.format(totrups)) rdt = [('grp_id', U16), ('magi', U8), ('nsites', U16), ('idx', U32)] rdata = numpy.zeros(totrups, rdt) rdata['magi'] = magi rdata['idx'] = numpy.arange(totrups) rdata['grp_id'] = dstore['rup/grp_id'][:] rdata['nsites'] = dstore['rup/nsites'][:] totweight = rdata['nsites'].sum() et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) # IMPORTANT!! we rely on the fact that the classical part # of the calculation stores the ruptures in chunks of constant # grp_id, therefore it is possible to build (start, stop) slices; # we are NOT grouping by operator.itemgetter('grp_id', 'magi'): # that would break the ordering of the indices causing an incredibly # worse performance, but visible only in extra-large calculations! for block in block_splitter(rdata, maxweight, operator.itemgetter('nsites'), operator.itemgetter('grp_id')): grp_id = block[0]['grp_id'] trti = et_ids[grp_id][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'num_epsilon_bins': oq.num_epsilon_bins, 'investigation_time': oq.investigation_time, 'imtls': oq.imtls }) U = max(U, block.weight) slc = slice(block[0]['idx'], block[-1]['idx'] + 1) smap.submit((dstore, slc, cmaker, self.hmap4, trti, magi[slc], self.bin_edges)) task_inputs.append((trti, slc.stop - slc.start)) nbytes, msg = get_nbytes_msg(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic Ta = len(task_inputs) nbytes = s['N'] * s['M'] * s['P'] * s['Z'] * Ta * 8 data_transfer = (s['dist'] * s['eps'] + s['lon'] * s['lat']) * nbytes if data_transfer > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (humansize(data_transfer), humansize(oq.max_data_transfer))) logging.info('Estimated data transfer: %s', humansize(data_transfer)) dt = numpy.dtype([('trti', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array