Exemplo n.º 1
0
    def build_starmap(self, ssm, sitecol, assetcol, riskmodel, imts,
                      trunc_level, correl_model, min_iml, monitor):
        """
        :param ssm: CompositeSourceModel containing a single source model
        :param sitecol: a SiteCollection instance
        :param assetcol: an AssetCollection instance
        :param riskmodel: a RiskModel instance
        :param imts: a list of Intensity Measure Types
        :param trunc_level: truncation level
        :param correl_model: correlation model
        :param min_iml: vector of minimum intensities, one per IMT
        :param monitor: a Monitor instance
        :returns: a pair (starmap, dictionary)
        """
        ruptures_by_grp = AccumDict()
        num_ruptures = 0
        num_events = 0
        allargs = []
        grp_trt = {}
        # collect the sources
        maxweight = ssm.get_maxweight(self.oqparam.concurrent_tasks)
        logging.info('Using a maxweight of %d', maxweight)
        for src_group in ssm.src_groups:
            grp_trt[src_group.id] = trt = src_group.trt
            gsims = ssm.gsim_lt.values[trt]
            for block in block_splitter(src_group, maxweight, getweight):
                allargs.append((block, self.sitecol, gsims, monitor))
        # collect the ruptures
        for dic in parallel.starmap(self.compute_ruptures, allargs):
            ruptures_by_grp += dic
            [rupts] = dic.values()
            num_ruptures += len(rupts)
            num_events += dic.num_events
        ruptures_by_grp.num_events = num_events
        save_ruptures(self, ruptures_by_grp)

        # determine the realizations
        rlzs_assoc = ssm.info.get_rlzs_assoc(
            count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, 0)))
        allargs = []
        # prepare the risk inputs
        ruptures_per_block = self.oqparam.ruptures_per_block
        for src_group in ssm.src_groups:
            for rupts in block_splitter(
                    ruptures_by_grp[src_group.id], ruptures_per_block):
                trt = grp_trt[rupts[0].grp_id]
                ri = riskinput.RiskInputFromRuptures(
                    trt, imts, sitecol, rupts, trunc_level,
                    correl_model, min_iml)
                allargs.append((ri, riskmodel, rlzs_assoc, assetcol, monitor))
        taskname = '%s#%d' % (losses_by_taxonomy.__name__, ssm.sm_id + 1)
        smap = starmap(losses_by_taxonomy, allargs, name=taskname)
        attrs = dict(num_ruptures={
            sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items()},
                     num_events=num_events,
                     num_rlzs=len(rlzs_assoc.realizations),
                     sm_id=ssm.sm_id)
        return smap, attrs
Exemplo n.º 2
0
    def execute(self):
        """
        Split the computation by tiles which are run in parallel.
        """
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = oq = self.oqparam
        self.tiles = split_in_blocks(
            self.sitecol, self.oqparam.concurrent_tasks or 1)
        oq.concurrent_tasks = 0
        calculator = ClassicalCalculator(
            self.oqparam, monitor, persistent=False)
        calculator.csm = self.csm
        rlzs_assoc = self.csm.get_rlzs_assoc()
        self.rlzs_assoc = calculator.rlzs_assoc = rlzs_assoc

        # parallelization
        all_args = []
        position = 0
        for (i, tile) in enumerate(self.tiles):
            all_args.append((calculator, SiteCollection(tile),
                             position, i, monitor))
            position += len(tile)
        acc = {trt_gsim: zero_curves(len(self.sitecol), oq.imtls)
               for trt_gsim in calculator.rlzs_assoc}
        acc['calc_times'] = []
        return parallel.starmap(classical_tiling, all_args).reduce(
            agg_curves_by_trt_gsim, acc)
Exemplo n.º 3
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        self.sesruptures = []
        if self.precalc:  # the ruptures are already in memory
            for grp_id, sesruptures in self.precalc.result.items():
                for sr in sesruptures:
                    self.sesruptures.append(sr)
        else:  # read the ruptures from the datastore
            for serial in self.datastore['sescollection']:
                sr = self.datastore['sescollection/' + serial]
                self.sesruptures.append(sr)
        self.sesruptures.sort(key=operator.attrgetter('serial'))
        if self.oqparam.ground_motion_fields:
            calc.check_overflow(self)

        L = len(oq.imtls.array)
        res = parallel.starmap(
            self.core_task.__func__, self.gen_args(self.sesruptures)
        ).submit_all()
        acc = functools.reduce(self.combine_pmaps_and_save_gmfs, res, {
            rlz.ordinal: ProbabilityMap(L, 1)
            for rlz in self.rlzs_assoc.realizations})
        self.save_data_transfer(res)
        return acc
Exemplo n.º 4
0
 def execute(self):
     """
     Run in parallel `core_task(sources, sitecol, monitor)`, by
     parallelizing on the sources according to their weight and
     tectonic region type.
     """
     oq = self.oqparam
     monitor = self.monitor.new(
         self.core_task.__name__,
         truncation_level=oq.truncation_level,
         imtls=oq.imtls,
         maximum_distance=oq.maximum_distance,
         poes_disagg=oq.poes_disagg,
         ses_per_logic_tree_path=oq.ses_per_logic_tree_path,
         seed=oq.random_seed)
     with self.monitor('managing sources', autoflush=True):
         src_groups = list(self.csm.src_groups)
         iterargs = saving_sources_by_task(
             self.gen_args(src_groups, oq, monitor), self.datastore)
         res = parallel.starmap(
             self.core_task.__func__, iterargs).submit_all()
     acc = reduce(self.agg_dicts, res, self.zerodict())
     self.save_data_transfer(res)
     with self.monitor('store source_info', autoflush=True):
         self.store_source_info(self.infos)
     self.rlzs_assoc = self.csm.info.get_rlzs_assoc(
         partial(self.count_eff_ruptures, acc))
     self.datastore['csm_info'] = self.csm.info
     return acc
Exemplo n.º 5
0
 def test_spawn(self):
     all_data = [("a", list(range(10))), ("b", list(range(20))), ("c", list(range(15)))]
     res = {key: parallel.starmap(get_length, [(data,)]) for key, data in all_data}
     for key, val in res.items():
         res[key] = val.reduce()
     parallel.TaskManager.restart()
     self.assertEqual(res, {"a": {"n": 10}, "c": {"n": 15}, "b": {"n": 20}})
Exemplo n.º 6
0
 def test_spawn(self):
     all_data = [
         ('a', range(10)), ('b', range(20)), ('c', range(15))]
     res = {key: parallel.starmap(get_length, [(data,)])
            for key, data in all_data}
     for key, val in res.iteritems():
         res[key] = val.reduce()
     parallel.TaskManager.restart()
     self.assertEqual(res, {'a': {'n': 10}, 'c': {'n': 15}, 'b': {'n': 20}})
Exemplo n.º 7
0
 def test_spawn(self):
     all_data = [('a', list(range(10))), ('b', list(range(20))),
                 ('c', list(range(15)))]
     res = {
         key: parallel.starmap(get_length, [(data, )])
         for key, data in all_data
     }
     for key, val in res.items():
         res[key] = val.reduce()
     parallel.TaskManager.restart()
     self.assertEqual(res, {'a': {'n': 10}, 'c': {'n': 15}, 'b': {'n': 20}})
Exemplo n.º 8
0
    def execute(self):
        """
        Split the computation by tiles which are run in parallel.
        """
        acc = AccumDict(
            {trt_gsim: zero_curves(len(self.sitecol), self.oqparam.imtls)
             for trt_gsim in self.rlzs_assoc})
        acc.calc_times = []
        acc.n = len(self.sitecol)
        hint = math.ceil(acc.n / self.oqparam.sites_per_tile)
        tiles = self.sitecol.split_in_tiles(hint)
        logging.info('Generating %d tiles of %d sites each',
                     len(tiles), len(tiles[0]))
        sources = self.csm.get_sources()
        rlzs_assoc = self.csm.get_rlzs_assoc()
        ctasks = self.oqparam.concurrent_tasks or 1
        maxweight = math.ceil(self.csm.weight / ctasks)
        siteidx = 0
        tmanagers = []
        maximum_distance = self.oqparam.maximum_distance
        # try to produce more tasks than self.oqparam.concurrent_tasks
        num_blocks = math.ceil(self.MORE_TASKS * ctasks / len(tiles))
        splitmap = {}
        for i, tile in enumerate(tiles, 1):
            monitor = self.monitor.new()
            monitor.oqparam = self.oqparam
            with self.monitor('filtering sources per tile', autoflush=True):
                filtered_sources = [
                    src for src in sources
                    if src.filter_sites_by_distance_to_source(
                        maximum_distance, tile) is not None]
                if not filtered_sources:
                    continue
            blocks = split_in_blocks(
                split_sources(sources, maxweight, splitmap), num_blocks,
                weight=operator.attrgetter('weight'),
                key=operator.attrgetter('trt_model_id'))
            tm = parallel.starmap(
                classical,
                ((blk, tile, siteidx, rlzs_assoc, monitor) for blk in blocks),
                name='tile_%d/%d' % (i, len(tiles)))
            tmanagers.append(tm)
            siteidx += len(tile)

        logging.info('Total number of tasks submitted: %d',
                     sum(len(tm.results) for tm in tmanagers))
        for tm in tmanagers:
            tm.reduce(self.agg_dicts, acc)
        self.rlzs_assoc = self.csm.get_rlzs_assoc(
            partial(is_effective_trt_model, acc))
        return acc
Exemplo n.º 9
0
 def execute(self):
     """
     Parallelize on the riskinputs and returns a dictionary of results.
     Require a `.core_task` to be defined with signature
     (riskinputs, riskmodel, rlzs_assoc, monitor).
     """
     self.monitor.oqparam = self.oqparam
     rlz_ids = getattr(self.oqparam, 'rlz_ids', ())
     if rlz_ids:
         self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids)
     all_args = ((riskinput, self.riskmodel, self.rlzs_assoc) +
                 self.extra_args + (self.monitor,)
                 for riskinput in self.riskinputs)
     res = starmap(self.core_task.__func__, all_args).reduce()
     return res
Exemplo n.º 10
0
 def execute(self):
     """
     Parallelize on the riskinputs and returns a dictionary of results.
     Require a `.core_task` to be defined with signature
     (riskinputs, riskmodel, rlzs_assoc, monitor).
     """
     self.monitor.oqparam = self.oqparam
     rlz_ids = getattr(self.oqparam, 'rlz_ids', ())
     if rlz_ids:
         self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids)
     all_args = ((riskinput, self.riskmodel, self.rlzs_assoc) +
                 self.extra_args + (self.monitor, )
                 for riskinput in self.riskinputs)
     res = starmap(self.core_task.__func__, all_args).reduce()
     return res
Exemplo n.º 11
0
 def send_sources(self):
     """
     Filter/split and send the sources to the workers.
     :returns: a :class:`openquake.commonlib.parallel.TaskManager`
     """
     oq = self.oqparam
     tiles = [self.sitecol]
     self.num_tiles = 1
     if self.is_tiling():
         hint = math.ceil(len(self.sitecol) / oq.sites_per_tile)
         tiles = self.sitecol.split_in_tiles(hint)
         self.num_tiles = len(tiles)
         logging.info('Generating %d tiles of %d sites each',
                      self.num_tiles, len(tiles[0]))
     manager = source.SourceManager(
         self.csm, oq.maximum_distance, self.datastore,
         self.monitor.new(oqparam=oq), self.random_seed,
         oq.filter_sources, num_tiles=self.num_tiles)
     tm = starmap(self.core_task.__func__, manager.gen_args(tiles))
     manager.store_source_info(self.datastore)
     return tm
Exemplo n.º 12
0
    def execute(self):
        """
        Builds hcurves and stats from the stored PoEs
        """
        if 'poes' not in self.datastore:  # for short report
            return
        oq = self.oqparam
        rlzs = self.rlzs_assoc.realizations

        # initialize datasets
        N = len(self.sitecol)
        L = len(oq.imtls.array)
        attrs = dict(
            __pyclass__='openquake.hazardlib.probability_map.ProbabilityMap',
            sids=numpy.arange(N, dtype=numpy.uint32))
        if oq.individual_curves:
            for rlz in rlzs:
                self.datastore.create_dset(
                    'hcurves/rlz-%03d' % rlz.ordinal, F32,
                    (N, L, 1),  attrs=attrs)
        if oq.mean_hazard_curves:
            self.datastore.create_dset(
                'hcurves/mean', F32, (N, L, 1), attrs=attrs)
        for q in oq.quantile_hazard_curves:
            self.datastore.create_dset(
                'hcurves/quantile-%s' % q, F32, (N, L, 1), attrs=attrs)
        self.datastore.flush()

        logging.info('Building hazard curves')
        with self.monitor('submitting poes', autoflush=True):
            pmap_by_grp = {
                int(group_id): self.datastore['poes/' + group_id]
                for group_id in self.datastore['poes']}
            res = parallel.starmap(
                build_hcurves_and_stats,
                list(self.gen_args(pmap_by_grp))).submit_all()
        with self.monitor('saving hcurves and stats', autoflush=True):
            nbytes = reduce(self.save_hcurves, res, AccumDict())
            self.save_data_transfer(res)
            return nbytes
Exemplo n.º 13
0
 def send_sources(self):
     """
     Filter/split and send the sources to the workers.
     :returns: a :class:`openquake.commonlib.parallel.TaskManager`
     """
     oq = self.oqparam
     tiles = [self.sitecol]
     self.num_tiles = 1
     if self.is_tiling():
         hint = math.ceil(len(self.sitecol) / oq.sites_per_tile)
         tiles = self.sitecol.split_in_tiles(hint)
         self.num_tiles = len(tiles)
         logging.info('Generating %d tiles of %d sites each',
                      self.num_tiles, len(tiles[0]))
     manager = source.SourceManager(self.csm,
                                    oq.maximum_distance,
                                    self.datastore,
                                    self.monitor.new(oqparam=oq),
                                    self.random_seed,
                                    oq.filter_sources,
                                    num_tiles=self.num_tiles)
     tm = starmap(self.core_task.__func__, manager.gen_args(tiles))
     manager.store_source_info(self.datastore)
     return tm
Exemplo n.º 14
0
    def execute(self):
        """
        Run the event_based_risk calculator and aggregate the results
        """
        oq = self.oqparam
        correl_model = oq.get_correl_model()
        self.N = len(self.assetcol)
        self.E = sum(len(v) for v in self.datastore['events'].values())
        logging.info('Populating the risk inputs')
        all_ruptures = []
        preprecalc = getattr(self.precalc, 'precalc', None)
        if preprecalc:  # the ruptures are already in memory
            for grp_id, sesruptures in preprecalc.result.items():
                for sr in sesruptures:
                    all_ruptures.append(sr)
        else:  # read the ruptures from the datastore
            for serial in self.datastore['sescollection']:
                rup = self.datastore['sescollection/' + serial]
                all_ruptures.append(rup)
        all_ruptures.sort(key=operator.attrgetter('serial'))
        if not self.riskmodel.covs:
            # do not generate epsilons
            eps = None
        else:
            eps = riskinput.make_eps(
                self.assets_by_site, self.E, oq.master_seed,
                oq.asset_correlation)
            logging.info('Generated %s epsilons', eps.shape)

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.C = self.oqparam.loss_curve_resolution
        self.L = L = len(loss_types)
        self.R = R = len(self.rlzs_assoc.realizations)
        self.I = self.oqparam.insured_losses

        # ugly: attaching attributes needed in the task function
        mon = self.monitor
        mon.num_assets = self.count_assets()
        mon.avg_losses = self.oqparam.avg_losses
        mon.asset_loss_table = self.oqparam.asset_loss_table
        mon.insured_losses = self.I
        mon.ses_ratio = (
            oq.risk_investigation_time or oq.investigation_time) / (
                oq.investigation_time * oq.ses_per_logic_tree_path)

        self.N = N = len(self.assetcol)
        self.E = sum(len(v) for v in self.datastore['events'].values())

        # average losses, stored in a composite array of shape N, R
        self.avg_losses = numpy.zeros((N, R), oq.loss_dt())

        self.ass_loss_table = square(L, R, lambda: None)
        self.agg_loss_table = square(L, R, lambda: None)

        self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes(
            self.I)
        for (l, r) in itertools.product(range(L), range(R)):
            lt = loss_types[l]
            if self.oqparam.asset_loss_table:
                self.ass_loss_table[l, r] = self.datastore.create_dset(
                    'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt)
            self.agg_loss_table[l, r] = self.datastore.create_dset(
                'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt)

        self.saved = collections.Counter()  # nbytes per HDF5 key
        self.ass_bytes = 0
        self.agg_bytes = 0
        self.gmfbytes = 0
        rlz_ids = getattr(self.oqparam, 'rlz_ids', ())
        if rlz_ids:
            self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids)

        if not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.riskmodel.get_min_iml()
        min_iml = calc.fix_minimum_intensity(
            oq.minimum_intensity, oq.imtls)
        if min_iml.sum() == 0:
            logging.warn('The GMFs are not filtered: '
                         'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        csm_info = self.datastore['csm_info']
        grp_trt = {sg.id: sg.trt for sm in csm_info.source_models
                   for sg in sm.src_groups}
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = self.riskmodel.build_inputs_from_ruptures(
                grp_trt, list(oq.imtls), self.sitecol.complete, all_ruptures,
                oq.truncation_level, correl_model, min_iml, eps,
                oq.concurrent_tasks or 1)
            # NB: I am using generators so that the tasks are submitted one at
            # the time, without keeping all of the arguments in memory
            res = starmap(
                self.core_task.__func__,
                ((riskinput, self.riskmodel, self.rlzs_assoc,
                  self.assetcol, self.monitor.new('task'))
                 for riskinput in riskinputs)).submit_all()
        acc = functools.reduce(self.agg, res, AccumDict())
        self.save_data_transfer(res)
        return acc
Exemplo n.º 15
0
    def execute(self):
        """
        Run the event_based_risk calculator and aggregate the results
        """
        oq = self.oqparam
        correl_model = readinput.get_correl_model(oq)
        self.N = len(self.assetcol)
        self.E = len(self.etags)
        logging.info('Populating the risk inputs')
        rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id()
        num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()}
        num_assets = {sid: len(self.assets_by_site[sid])
                      for sid in self.sitecol.sids}
        all_ruptures = []
        for serial in self.datastore['sescollection']:
            rup = self.datastore['sescollection/' + serial]
            rup.set_weight(num_rlzs, num_assets)
            all_ruptures.append(rup)
        all_ruptures.sort(key=operator.attrgetter('serial'))
        if not self.riskmodel.covs:
            # do not generate epsilons
            eps = None
        else:
            eps = riskinput.make_eps(
                self.assets_by_site, self.E, oq.master_seed,
                oq.asset_correlation)
            logging.info('Generated %s epsilons', eps.shape)

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.C = self.oqparam.loss_curve_resolution
        self.L = L = len(loss_types)
        self.R = R = len(self.rlzs_assoc.realizations)
        self.I = self.oqparam.insured_losses

        # ugly: attaching attributes needed in the task function
        mon = self.monitor
        mon.num_assets = self.count_assets()
        mon.avg_losses = self.oqparam.avg_losses
        mon.asset_loss_table = self.oqparam.asset_loss_table
        mon.insured_losses = self.I
        mon.ses_ratio = (
            oq.risk_investigation_time or oq.investigation_time) / (
                oq.investigation_time * oq.ses_per_logic_tree_path)

        self.N = N = len(self.assetcol)
        self.E = len(self.datastore['etags'])

        # average losses, stored in a composite array of shape N, R
        multi_avg_dt = self.riskmodel.loss_type_dt(insured=self.I)
        self.avg_losses = numpy.zeros((N, R), multi_avg_dt)

        self.ass_loss_table = square(L, R, lambda: None)
        self.agg_loss_table = square(L, R, lambda: None)

        self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes(
            self.I)
        for (l, r) in itertools.product(range(L), range(R)):
            lt = loss_types[l]
            if self.oqparam.asset_loss_table:
                self.ass_loss_table[l, r] = self.datastore.create_dset(
                    'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt)
            self.agg_loss_table[l, r] = self.datastore.create_dset(
                'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt)

        self.saved = collections.Counter()  # nbytes per HDF5 key
        self.ass_bytes = 0
        self.agg_bytes = 0
        self.gmfbytes = 0
        rlz_ids = getattr(self.oqparam, 'rlz_ids', ())
        if rlz_ids:
            self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids)

        if not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.riskmodel.get_min_iml()
        min_iml = calc.fix_minimum_intensity(
            oq.minimum_intensity, oq.imtls)
        if min_iml.sum() == 0:
            logging.warn('The GMFs are not filtered: '
                         'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)

        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = self.riskmodel.build_inputs_from_ruptures(
                self.sitecol.complete, all_ruptures, oq.truncation_level,
                correl_model, min_iml, eps, oq.concurrent_tasks or 1)
            # NB: I am using generators so that the tasks are submitted one at
            # the time, without keeping all of the arguments in memory
            tm = starmap(
                self.core_task.__func__,
                ((riskinput, self.riskmodel, self.rlzs_assoc,
                  self.assetcol, self.monitor.new('task'))
                 for riskinput in riskinputs))
        return tm.reduce(agg=self.agg, posthook=self.save_data_transfer)
Exemplo n.º 16
0
    def full_disaggregation(self, curves_by_trt_gsim):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1,
                     min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.trt_models)
            max_mag = max(mod.max_mag for mod in smodel.trt_models)
            min_mag = min(mod.min_mag for mod in smodel.trt_models)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s', len(mag_edges) - 1,
                         min_mag, max_mag)
            for trt_model in smodel.trt_models:
                for site in sitecol:
                    curves = curves_dict[site.id]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = curves_by_trt_gsim.bb_dict[sm_id, site.id]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info(
                        '%d dist bins from %s to %s', len(dist_edges) - 1,
                        min(dist_edges), max(dist_edges))
                    logging.info(
                        '%d lon bins from %s to %s', len(lon_edges) - 1,
                        bb.west, bb.east)
                    logging.info(
                        '%d lat bins from %s to %s', len(lon_edges) - 1,
                        bb.south, bb.north)

                    self.bin_edges[sm_id, site.id] = (
                        mag_edges, dist_edges, lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for site in sitecol:
                    if (sm_id, site.id) in self.bin_edges:
                        bin_edges[site.id] = self.bin_edges[sm_id, site.id]

                for srcs in split_in_blocks(trt_model, nblocks):
                    all_args.append(
                        (sitecol, srcs, trt_model.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, self.monitor))

        results = parallel.starmap(compute_disagg, all_args).reduce(
            self.agg_result)
        self.save_disagg_results(results)
Exemplo n.º 17
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        monitor = self.monitor.new(self.core_task.__name__)
        monitor.oqparam = oq = self.oqparam
        ucerf_source = self.src_group.sources[0]
        max_dist = oq.maximum_distance[DEFAULT_TRT]
        acc = AccumDict({
            grp_id: ProbabilityMap(len(oq.imtls.array), len(gsims))
            for grp_id, gsims in self.rlzs_assoc.gsims_by_grp_id.items()})
        acc.calc_times = []
        acc.eff_ruptures = AccumDict()  # grp_id -> eff_ruptures
        acc.bb_dict = {}

        if len(self.csm) > 1:
            # when multiple branches, parallelise by branch
            branches = [br.value for br in self.smlt.branches.values()]
            rup_res = parallel.starmap(
                ucerf_classical_hazard_by_branch,
                self.gen_args(branches, ucerf_source, monitor)).submit_all()
        else:
            # single branch
            gsims = self.rlzs_assoc.gsims_by_grp_id[0]
            [(branch_id, branch)] = self.smlt.branches.items()
            branchname = branch.value
            ucerf_source.src_group_id = 0
            ucerf_source.weight = 1
            ucerf_source.nsites = len(self.sitecol)
            self.infos[0, ucerf_source.source_id] = source.SourceInfo(
                ucerf_source)
            logging.info('Getting the background point sources')
            with self.monitor('getting background sources', autoflush=True):
                ucerf_source.build_idx_set()
                background_sids = ucerf_source.get_background_sids(
                    self.sitecol, max_dist)
                bckgnd_sources = ucerf_source.get_background_sources(
                    background_sids)

            # parallelize on the background sources, small tasks
            args = (bckgnd_sources, self.sitecol, oq.imtls,
                    gsims, self.oqparam.truncation_level,
                    'SourceSitesFilter', max_dist, (), monitor)
            bg_res = parallel.apply(
                pmap_from_grp, args,
                concurrent_tasks=self.oqparam.concurrent_tasks).submit_all()

            # parallelize by rupture subsets
            tasks = self.oqparam.concurrent_tasks * 2  # they are big tasks
            rup_sets = ucerf_source.get_rupture_indices(branchname)
            rup_res = parallel.apply(
                ucerf_classical_hazard_by_rupture_set,
                (rup_sets, branchname, ucerf_source, self.src_group.id,
                 self.sitecol, gsims, monitor),
                concurrent_tasks=tasks).submit_all()

            # compose probabilities from background sources
            for pmap in bg_res:
                acc[0] |= pmap
            self.save_data_transfer(bg_res)

        pmap_by_grp_id = functools.reduce(self.agg_dicts, rup_res, acc)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(self.infos)
            self.save_data_transfer(rup_res)
        self.datastore['csm_info'] = self.csm.info
        self.rlzs_assoc = self.csm.info.get_rlzs_assoc(
            functools.partial(self.count_eff_ruptures, pmap_by_grp_id))
        self.datastore['csm_info'] = self.csm.info
        return pmap_by_grp_id
Exemplo n.º 18
0
    def full_disaggregation(self, curves_by_trt_gsim):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s',
                     len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.trt_models)
            max_mag = max(mod.max_mag for mod in smodel.trt_models)
            min_mag = min(mod.min_mag for mod in smodel.trt_models)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s',
                         len(mag_edges) - 1, min_mag, max_mag)
            for trt_model in smodel.trt_models:
                for site in sitecol:
                    curves = curves_dict[site.id]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = curves_by_trt_gsim.bb_dict[sm_id, site.id]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info('%d dist bins from %s to %s',
                                 len(dist_edges) - 1, min(dist_edges),
                                 max(dist_edges))
                    logging.info('%d lon bins from %s to %s',
                                 len(lon_edges) - 1, bb.west, bb.east)
                    logging.info('%d lat bins from %s to %s',
                                 len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id,
                                   site.id] = (mag_edges, dist_edges,
                                               lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for site in sitecol:
                    if (sm_id, site.id) in self.bin_edges:
                        bin_edges[site.id] = self.bin_edges[sm_id, site.id]

                for srcs in split_in_blocks(trt_model, nblocks):
                    all_args.append(
                        (sitecol, srcs, trt_model.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, self.monitor))

        results = parallel.starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        self.save_disagg_results(results)
Exemplo n.º 19
0
 def execute(self):
     num_rlzs = len(self.rlzs_assoc.realizations)
     allres = parallel.starmap(compute_losses, self.gen_args()).submit_all()
     num_events = self.save_results(allres, num_rlzs)
     self.save_data_transfer(allres)
     return num_events
Exemplo n.º 20
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        bb_dict = self.datastore["bb_dict"]
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info("%d epsilon bins from %s to %s", len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.src_groups)
            max_mag = max(mod.max_mag for mod in smodel.src_groups)
            min_mag = min(mod.min_mag for mod in smodel.src_groups)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)
            )
            logging.info("%d mag bins from %s to %s", len(mag_edges) - 1, min_mag, max_mag)
            for src_group in smodel.src_groups:
                if src_group.id not in self.rlzs_assoc.gsims_by_grp_id:
                    continue  # the group has been filtered away
                for sid, site in zip(sitecol.sids, sitecol):
                    curves = curves_dict[sid]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = bb_dict[sm_id, sid]
                    if not bb:
                        logging.info("location %s was too far, skipping disaggregation", site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info("%d dist bins from %s to %s", len(dist_edges) - 1, min(dist_edges), max(dist_edges))
                    logging.info("%d lon bins from %s to %s", len(lon_edges) - 1, bb.west, bb.east)
                    logging.info("%d lat bins from %s to %s", len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for sid, site in zip(sitecol.sids, sitecol):
                    if (sm_id, sid) in self.bin_edges:
                        bin_edges[sid] = self.bin_edges[sm_id, sid]

                ss_filter = SourceSitesFilter(oq.maximum_distance)
                split_sources = []
                for src in src_group:
                    for split, _sites in ss_filter(sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append(
                        (
                            sitecol,
                            srcs,
                            src_group.id,
                            self.rlzs_assoc,
                            trt_names,
                            curves_dict,
                            bin_edges,
                            oq,
                            self.monitor,
                        )
                    )

        results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result)
        self.save_disagg_results(results)
Exemplo n.º 21
0
    def execute(self):
        """
        Run the event_based_risk calculator and aggregate the results
        """
        oq = self.oqparam
        correl_model = readinput.get_correl_model(oq)
        self.N = len(self.assetcol)
        self.E = len(self.etags)
        logging.info('Populating the risk inputs')
        rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id()
        num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()}
        num_assets = {
            sid: len(self.assets_by_site[sid])
            for sid in self.sitecol.sids
        }
        all_ruptures = []
        for serial in self.datastore['sescollection']:
            rup = self.datastore['sescollection/' + serial]
            rup.set_weight(num_rlzs, num_assets)
            all_ruptures.append(rup)
        all_ruptures.sort(key=operator.attrgetter('serial'))
        if not self.riskmodel.covs:
            # do not generate epsilons
            eps = None
        else:
            eps = riskinput.make_eps(self.assets_by_site, self.E,
                                     oq.master_seed, oq.asset_correlation)
            logging.info('Generated %s epsilons', eps.shape)

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.C = self.oqparam.loss_curve_resolution
        self.L = L = len(loss_types)
        self.R = R = len(self.rlzs_assoc.realizations)
        self.I = self.oqparam.insured_losses

        # ugly: attaching attributes needed in the task function
        mon = self.monitor
        mon.num_assets = self.count_assets()
        mon.avg_losses = self.oqparam.avg_losses
        mon.asset_loss_table = self.oqparam.asset_loss_table
        mon.insured_losses = self.I
        mon.ses_ratio = (oq.risk_investigation_time or
                         oq.investigation_time) / (oq.investigation_time *
                                                   oq.ses_per_logic_tree_path)

        self.N = N = len(self.assetcol)
        self.E = len(self.datastore['etags'])

        # average losses, stored in a composite array of shape N, R
        multi_avg_dt = self.riskmodel.loss_type_dt(insured=self.I)
        self.avg_losses = numpy.zeros((N, R), multi_avg_dt)

        self.ass_loss_table = square(L, R, lambda: None)
        self.agg_loss_table = square(L, R, lambda: None)

        self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes(
            self.I)
        for (l, r) in itertools.product(range(L), range(R)):
            lt = loss_types[l]
            if self.oqparam.asset_loss_table:
                self.ass_loss_table[l, r] = self.datastore.create_dset(
                    'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt)
            self.agg_loss_table[l, r] = self.datastore.create_dset(
                'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt)

        self.saved = collections.Counter()  # nbytes per HDF5 key
        self.ass_bytes = 0
        self.agg_bytes = 0
        self.gmfbytes = 0
        rlz_ids = getattr(self.oqparam, 'rlz_ids', ())
        if rlz_ids:
            self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids)

        if not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.riskmodel.get_min_iml()
        min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, oq.imtls)
        if min_iml.sum() == 0:
            logging.warn('The GMFs are not filtered: '
                         'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)

        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = self.riskmodel.build_inputs_from_ruptures(
                self.sitecol.complete, all_ruptures, oq.truncation_level,
                correl_model, min_iml, eps, oq.concurrent_tasks or 1)
            # NB: I am using generators so that the tasks are submitted one at
            # the time, without keeping all of the arguments in memory
            tm = starmap(self.core_task.__func__,
                         ((riskinput, self.riskmodel, self.rlzs_assoc,
                           self.assetcol, self.monitor.new('task'))
                          for riskinput in riskinputs))
        res = tm.reduce(agg=self.agg)
        self.save_data_transfer(tm)
        return res