def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ self.datastore.set_attrs('task_info/start_ebrisk', times=times) oq = self.oqparam elt_length = len(self.datastore['losses_by_event']) builder = get_loss_builder(self.datastore) self.build_datasets(builder) mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache())) smap = parallel.Starmap(compute_loss_curves_maps, monitor=mon) self.datastore.close() acc = [] ct = oq.concurrent_tasks or 1 for elt_slice in general.split_in_slices(elt_length, ct): smap.submit(self.datastore.filename, elt_slice, oq.conditional_loss_poes, oq.individual_curves) acc = smap.reduce(acc=[]) # copy performance information from the cache to the datastore pd = mon.hdf5['performance_data'].value hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( mon.hdf5['task_info/compute_loss_curves_maps'].value) with self.monitor('saving loss_curves and maps', autoflush=True): for name, idx, arr in acc: for ij, val in numpy.ndenumerate(arr): self.datastore[name][ij + idx] = val
def combine_pmaps_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') self.gmdata += res['gmdata'] if res['gmfcoll'] is not None: with sav_mon: for (grp_id, gsim), array in res['gmfcoll'].items(): if len(array): key = 'gmf_data/grp-%02d/%s' % (grp_id, gsim) hdf5.extend3(self.datastore.hdf5path, key, array) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in res['hcurves'].items(): rlzi, sid, imt = str2rsi(key) array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() if 'ruptures' in res: vars(EventBasedRuptureCalculator)['save_ruptures'](self, res['ruptures']) return acc
def save_task_data(self, mon): if hasattr(mon, 'weight'): duration = mon.children[0].duration # the task is the first child tup = (mon.task_no, mon.weight, duration) data = numpy.array([tup], self.task_data_dt) hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data) mon.flush()
def save_task_data(self, mon): if mon.hdf5path and hasattr(mon, 'weight'): duration = mon.children[0].duration # the task is the first child tup = (mon.task_no, mon.weight, duration) data = numpy.array([tup], self.task_data_dt) hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data) mon.flush()
def combine_pmaps_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') self.gmdata += res['gmdata'] data = res['gmfdata'] if data is not None: with sav_mon: hdf5.extend3(self.datastore.hdf5path, 'gmf_data/data', data) for sid, start, stop in res['indices']: self.indices[sid].append( (start + self.offset, stop + self.offset)) self.offset += len(data) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in res['hcurves'].items(): rlzi, sid, imt = str2rsi(key) array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() if 'ruptures' in res: vars(EventBasedRuptureCalculator)['save_ruptures'](self, res['ruptures']) return acc
def save_task_info(self, mon): if mon.hdf5path: duration = mon.children[0].duration # the task is the first child tup = (mon.task_no, mon.weight, duration, self.received[-1]) data = numpy.array([tup], task_data_dt) hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data, argnames=self.argnames, sent=self.sent) mon.flush()
def save_info(self, dic): """ Save (name, value) information in the associated hdf5path """ if self.hdf5path: data = numpy.array( _pairs(dic.items()), [('par_name', hdf5.vstr), ('par_value', hdf5.vstr)]) hdf5.extend3(self.hdf5path, 'job_info', data)
def save_info(self, dic): """ Save (name, value) information in the associated hdf5path """ if self.hdf5path: if 'hostname' not in dic: dic['hostname'] = socket.gethostname() data = numpy.array( _pairs(dic.items()), [('par_name', hdf5.vstr), ('par_value', hdf5.vstr)]) hdf5.extend3(self.hdf5path, 'job_info', data)
def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ oq = self.oqparam if oq.save_ruptures and not oq.ground_motion_fields: self.gmf_size += max_gmf_size( result['ruptures'], self.csm_info.rlzs_assoc.get_rlzs_by_gsim, self.csm_info.get_samples_by_grp(), len(self.oqparam.imtls)) if hasattr(result, 'calc_times'): for srcid, nsites, eids, dt in result.calc_times: info = self.csm.infos[srcid] info.num_sites += nsites info.calc_time += dt info.num_split += 1 info.events += len(eids) if hasattr(result, 'eff_ruptures'): acc.eff_ruptures += result.eff_ruptures if hasattr(result, 'events'): self.datastore.extend('events', result.events) self.save_ruptures(result['ruptures']) sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') hdf5path = self.datastore.hdf5path if 'gmdata' in result: self.gmdata += result['gmdata'] data = result['gmfdata'] with sav_mon: hdf5.extend3(hdf5path, 'gmf_data/data', data) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid].append( (start + self.offset, stop + self.offset)) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc
def save_task_info(self, hdf5path, res, name, sent, mem_gb=0): """ Called by parallel.IterResult. :param hdf5path: where to save the info :param res: a :class:`Result` object :param name: name of the task function :param sent: number of bytes sent :param mem_gb: memory consumption at the saving time (optional) """ t = (name, self.task_no, self.weight, self.duration, len(res.pik), mem_gb) data = numpy.array([t], task_info_dt) hdf5.extend3(hdf5path, 'task_info', data, **{'sent_' + name: str(sent)})
def save_task_info(self, res, mem_gb=0): """ :param self: an object with attributes .hdf5, .argnames, .sent :parent res: a :class:`Result` object :param mem_gb: memory consumption at the saving time (optional) """ mon = res.mon name = mon.operation[6:] # strip 'total ' if self.hdf5: mon.hdf5 = self.hdf5 # needed for the flush below t = (mon.task_no, mon.weight, mon.duration, len(res.pik), mem_gb) data = numpy.array([t], task_info_dt) hdf5.extend3(self.hdf5.filename, 'task_info/' + name, data, argnames=self.argnames, sent=self.sent) mon.flush()
def flush(self): """ Save the measurements on the performance file (or on stdout) """ for child in self.children: child.flush() data = self.get_data() if len(data) == 0: # no information return [] elif self.hdf5path: hdf5.extend3(self.hdf5path, 'performance_data', data) # reset monitor self.duration = 0 self.mem = 0 self.counts = 0 return data
def flush(self, hdf5path): """ Save the measurements on the performance file """ if not self.children: data = self.get_data() else: lst = [self.get_data()] for child in self.children: lst.append(child.get_data()) child.reset() data = numpy.concatenate(lst) if len(data) == 0: # no information return elif not os.path.exists(hdf5path): with hdf5.File(hdf5path, 'w') as h5: hdf5.create(h5, 'performance_data', perf_dt) hdf5.create(h5, 'task_info', task_info_dt) hdf5.extend3(hdf5path, 'performance_data', data) self.reset()
def flush(self): """ Save the measurements on the performance file (or on stdout) """ if not self._flush: raise RuntimeError( 'Monitor(%r).flush() must not be called in a worker' % self.operation) for child in self.children: child.flush() data = self.get_data() if len(data) == 0: # no information return [] elif self.hdf5path: hdf5.extend3(self.hdf5path, 'performance_data', data) # reset monitor self.duration = 0 self.mem = 0 self.counts = 0 return data
def test_extend3_vlen_same_len(self): data = numpy.array([[4, 1], [1, 2], [3, 1]], hdf5.vfloat32) nrows = hdf5.extend3(self.tmp, 'dset', data) self.assertEqual(nrows, 3) with hdf5.File(self.tmp, 'r') as f: print(f['dset'].value)
def post_execute(self, dummy): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ oq = self.oqparam if oq.avg_losses: self.datastore['avg_losses-stats'].attrs['stats'] = [b'mean'] logging.info('Building loss tables') build_loss_tables(self.datastore) shp = self.get_shape(self.L) # (L, T...) text = ' x '.join('%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) self.build_datasets(builder) self.datastore.close() if 'losses_by_event' in self.datastore.parent: dstore = self.datastore.parent else: dstore = self.datastore args = [(dstore.filename, builder, oq.ses_ratio, rlzi) for rlzi in range(self.R)] h5 = hdf5.File(self.datastore.cachepath()) try: acc = list( parallel.Starmap(postprocess, args, hdf5path=h5.filename)) finally: # copy performance information from the cache to the datastore pd = h5['performance_data'][()] hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen for r, (curves, maps), agg_losses in acc: if len(curves): # some realization can give zero contribution self.datastore['agg_curves-rlzs'][:, r] = curves if len(maps): # conditional_loss_poes can be empty self.datastore['agg_maps-rlzs'][:, r] = maps self.datastore['agg_losses-rlzs'][:, r] = agg_losses if self.R > 1: logging.info('Computing aggregate statistics') set_rlzs_stats(self.datastore, 'agg_curves') set_rlzs_stats(self.datastore, 'agg_losses') if oq.conditional_loss_poes: set_rlzs_stats(self.datastore, 'agg_maps') # sanity check with the asset_loss_table if oq.asset_loss_table and len(oq.aggregate_by) == 1: alt = self.datastore['asset_loss_table'][()] if alt.sum() == 0: # nothing was saved return logging.info('Checking the loss curves') tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:] T = len(tags) P = len(builder.return_periods) # sanity check on the loss curves for simple tag aggregation arr = self.assetcol.aggregate_by(oq.aggregate_by, alt) # shape (T, E, L) rlzs = self.datastore['events']['rlz_id'] curves = numpy.zeros((P, self.R, self.L, T)) for t in range(T): for r in range(self.R): for l in range(self.L): curves[:, r, l, t] = losses_by_period(arr[t, rlzs == r, l], builder.return_periods, builder.num_events[r], builder.eff_time) numpy.testing.assert_allclose( curves, self.datastore['agg_curves-rlzs'][()])
def test_extend3(self): nrows = hdf5.extend3(self.tmp, 'dset', numpy.zeros(3)) self.assertEqual(nrows, 3)
def test_extend3_vlen_same_len(self): data = numpy.array([[4, 1], [1, 2], [3, 1]], hdf5.vfloat32) nrows = hdf5.extend3(self.tmp, 'dset', data) self.assertEqual(nrows, 3) with hdf5.File(self.tmp, 'r') as f: print(f['dset'][()])
def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ if len(times): self.datastore.set_attrs( 'task_info/start_ebrisk', times=times, events_per_sid=numpy.mean(self.events_per_sid)) oq = self.oqparam shp = self.get_shape(self.L) # (L, T...) text = ' x '.join( '%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) self.build_datasets(builder) self.datastore.close() if 'losses_by_event' in self.datastore.parent: dstore = self.datastore.parent else: dstore = self.datastore allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)] mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache())) acc = list(parallel.Starmap(compute_loss_curves_maps, allargs, mon)) # copy performance information from the cache to the datastore pd = mon.hdf5['performance_data'][()] hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( mon.hdf5['task_info/compute_loss_curves_maps'][()]) self.datastore.open('r+') with self.monitor('saving loss_curves and maps', autoflush=True): for r, (curves, maps) in acc: if len(curves): # some realization can give zero contribution self.datastore['agg_curves-rlzs'][:, r] = curves if len(maps): # conditional_loss_poes can be empty self.datastore['agg_maps-rlzs'][:, r] = maps if self.R > 1: logging.info('Computing aggregate loss curves statistics') set_rlzs_stats(self.datastore, 'agg_curves') self.datastore.set_attrs( 'agg_curves-stats', return_periods=builder.return_periods, loss_types=' '.join(self.riskmodel.loss_types)) if oq.conditional_loss_poes: logging.info('Computing aggregate loss maps statistics') set_rlzs_stats(self.datastore, 'agg_maps') # sanity check with the asset_loss_table if oq.asset_loss_table and len(oq.aggregate_by) == 1: alt = self.datastore['asset_loss_table'][()] if alt.sum() == 0: # nothing was saved return logging.info('Checking the loss curves') tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:] T = len(tags) P = len(builder.return_periods) # sanity check on the loss curves for simple tag aggregation arr = self.assetcol.aggregate_by(oq.aggregate_by, alt) # shape (T, E, L) rlzs = self.datastore['events']['rlz'] curves = numpy.zeros((P, self.R, self.L, T)) for t in range(T): for r in range(self.R): for l in range(self.L): curves[:, r, l, t] = losses_by_period( arr[t, rlzs == r, l], builder.return_periods, builder.num_events[r], builder.eff_time) numpy.testing.assert_allclose( curves, self.datastore['agg_curves-rlzs'][()])
def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ if len(times): self.datastore.set_attrs('task_info/start_ebrisk', times=times, events_per_sid=numpy.mean( self.events_per_sid)) oq = self.oqparam shp = self.get_shape(self.L) # (L, T...) text = ' x '.join('%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:])) logging.info('Producing %d(loss_types) x %s loss curves', self.L, text) builder = get_loss_builder(self.datastore) self.build_datasets(builder) self.datastore.close() if 'losses_by_event' in self.datastore.parent: dstore = self.datastore.parent else: dstore = self.datastore allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)] mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache())) acc = list(parallel.Starmap(compute_loss_curves_maps, allargs, mon)) # copy performance information from the cache to the datastore pd = mon.hdf5['performance_data'][()] hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( mon.hdf5['task_info/compute_loss_curves_maps'][()]) self.datastore.open('r+') with self.monitor('saving loss_curves and maps', autoflush=True): for r, (curves, maps) in acc: if len(curves): # some realization can give zero contribution self.datastore['agg_curves-rlzs'][:, r] = curves if len(maps): # conditional_loss_poes can be empty self.datastore['agg_maps-rlzs'][:, r] = maps if self.R > 1: logging.info('Computing aggregate loss curves statistics') set_rlzs_stats(self.datastore, 'agg_curves') self.datastore.set_attrs('agg_curves-stats', return_periods=builder.return_periods, loss_types=' '.join( self.riskmodel.loss_types)) if oq.conditional_loss_poes: logging.info('Computing aggregate loss maps statistics') set_rlzs_stats(self.datastore, 'agg_maps') # sanity check with the asset_loss_table if oq.asset_loss_table and len(oq.aggregate_by) == 1: alt = self.datastore['asset_loss_table'][()] if alt.sum() == 0: # nothing was saved return logging.info('Checking the loss curves') tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:] T = len(tags) P = len(builder.return_periods) # sanity check on the loss curves for simple tag aggregation arr = self.assetcol.aggregate_by(oq.aggregate_by, alt) # shape (T, E, L) rlzs = self.datastore['events']['rlz'] curves = numpy.zeros((P, self.R, self.L, T)) for t in range(T): for r in range(self.R): for l in range(self.L): curves[:, r, l, t] = losses_by_period(arr[t, rlzs == r, l], builder.return_periods, builder.num_events[r], builder.eff_time) numpy.testing.assert_allclose( curves, self.datastore['agg_curves-rlzs'][()])