def post_process(self): """ Optionally generates aggregate curves, hazard maps and uniform_hazard_spectra. """ # means/quantiles: if self.hc.mean_hazard_curves or self.hc.quantile_hazard_curves: self.do_aggregate_post_proc() # hazard maps: # required for computing UHS # if `hazard_maps` is false but `uniform_hazard_spectra` is true, # just don't export the maps if self.hc.hazard_maps or self.hc.uniform_hazard_spectra: with self.monitor('generating hazard maps'): hazard_curves = models.HazardCurve.objects.filter( output__oq_job=self.job, imt__isnull=False) tasks.apply_reduce( hazard_curves_to_hazard_map, (self.job.id, hazard_curves, self.hc.poes)) if self.hc.uniform_hazard_spectra: individual_curves = self.job.get_param( 'individual_curves', missing=True) if individual_curves is False: logs.LOG.warn('The parameter `individual_curves` is false, ' 'cannot compute the UHS curves') else: do_uhs_post_proc(self.job)
def test_failing_subtask(self): try: tasks.apply_reduce(failing_task, ('job_id', [42]), agg=lambda a, x: x) except NotImplementedError: pass # expected else: raise Exception("Exception not raised.")
def test_failing_subtask(self): try: tasks.apply_reduce(failing_task, ([42], 'monitor'), agg=lambda a, x: x) except NotImplementedError: pass # expected else: raise Exception("Exception not raised.")
def prepare_risk(self): """ Associate assets and sites. """ self.outputdict = writers.combine_builders( [ob(self) for ob in self.output_builders]) # build the initializers hazard -> risk ct = sorted((counts, taxonomy) for taxonomy, counts in self.taxonomies_asset_count.iteritems()) tasks.apply_reduce(prepare_risk, (ct, self, self.monitor), concurrent_tasks=self.concurrent_tasks)
def prepare_risk(self): """ Associate assets and sites and for some calculator generate the epsilons. """ self.outputdict = writers.combine_builders( [ob(self) for ob in self.output_builders]) # build the initializers hazard -> risk ct = sorted((counts, taxonomy) for taxonomy, counts in self.taxonomies_asset_count.iteritems()) tasks.apply_reduce(prepare_risk, (self.job.id, ct, self.rc), concurrent_tasks=self.concurrent_tasks)
def prepare_risk(self): """ Associate assets and sites and for some calculator generate the epsilons. """ self.outputdict = writers.combine_builders( [ob(self) for ob in self.output_builders]) # build the initializers hazard -> risk ct = sorted( (counts, taxonomy) for taxonomy, counts in self.taxonomies_asset_count.iteritems()) tasks.apply_reduce(prepare_risk, (ct, self, self.monitor), concurrent_tasks=self.concurrent_tasks)
def generate_gmfs_and_curves(self): """ Generate the GMFs and optionally the hazard curves too """ sitecol = self.site_collection sesruptures = [] # collect the ruptures in a fixed order with self.monitor('reading ruptures'): for ses_coll in models.SESCollection.objects.filter( trt_model__lt_model__hazard_calculation=self.job): for sr in models.SESRupture.objects.filter( rupture__ses_collection=ses_coll): # adding the annotation below saves a LOT of memory # otherwise one would need as key in apply_reduce # lambda sr: sr.rupture.ses_collection.ordinal which would # read the world from the database sr.col_idx = ses_coll.ordinal sesruptures.append(sr) base_agg = super(EventBasedHazardCalculator, self).agg_curves if self.oqparam.hazard_curves_from_gmfs: zeros = {key: self.zeros for key in self.rlzs_assoc} else: zeros = {} return tasks.apply_reduce( compute_gmfs_and_curves, (sesruptures, sitecol, self.rlzs_assoc, self.monitor), base_agg, zeros, key=lambda sr: sr.col_idx)
def execute(self): """ Run :function:`openquake.engine.calculators.hazard.scenario.core.gmfs` in parallel. """ self.acc = tasks.apply_reduce( self.core_calc_task, (zip(self.tags, self.seeds), self.computer, self.monitor))
def execute(self): """ Run the `.core_calc_task` in parallel, by using the apply_reduce distribution, but it can be overridden in subclasses. """ self.acc = tasks.apply_reduce( self.core_calc_task, (self.job.id, self.all_sources, self.site_collection), agg=self.agg_curves, acc=self.acc, weight=attrgetter('weight'), key=attrgetter('trt_model_id'))
def execute(self): """ Run the `.core_calc_task` in parallel, by using the apply_reduce distribution, but it can be overridden in subclasses. """ csm = self.composite_model self.acc = tasks.apply_reduce( self.core_calc_task, (list(csm.sources), self.site_collection, csm.info, self.monitor), agg=self.agg_curves, acc=self.acc, weight=attrgetter('weight'), key=attrgetter('trt_model_id'))
def post_process(self): """ Optionally generates aggregate curves, hazard maps and uniform_hazard_spectra. """ # means/quantiles: if self.mean_hazard_curves or self.quantile_hazard_curves: self.do_aggregate_post_proc() # hazard maps: # required for computing UHS # if `hazard_maps` is false but `uniform_hazard_spectra` is true, # just don't export the maps if (self.oqparam.hazard_maps or self.oqparam.uniform_hazard_spectra): with self.monitor('generating hazard maps', autoflush=True) as mon: tasks.apply_reduce( hazard_curves_to_hazard_map, (self._hazard_curves, self.oqparam.poes, mon)) if self.oqparam.uniform_hazard_spectra: do_uhs_post_proc(self.job)
def execute(self): """ Method responsible for the distribution strategy. """ self.outputdict = writers.combine_builders( [ob(self) for ob in self.output_builders]) ct = sorted((counts, taxonomy) for taxonomy, counts in self.taxonomies_asset_count.iteritems()) self.acc = tasks.apply_reduce( build_getters, (self.job.id, ct, self), lambda acc, otm: otm.aggregate_results(self.agg_result, acc), self.acc, self.concurrent_tasks)
def generate_gmfs_and_curves(self): """ Generate the GMFs and optionally the hazard curves too """ sitecol = self.hc.site_collection sesruptures = [] # collect the ruptures in a fixed order for trt_model in models.TrtModel.objects.filter( lt_model__hazard_calculation=self.hc): sesruptures.extend( models.SESRupture.objects.filter( rupture__trt_model=trt_model)) self.curves = tasks.apply_reduce( compute_gmfs_and_curves, (self.job.id, sesruptures, sitecol), self.agg_curves, {}, key=lambda sr: sr.rupture.trt_model.id)
def execute(self): """ Method responsible for the distribution strategy. The risk calculators share a two phase distribution logic: in phase 1 the initializer objects are built, by distributing per taxonomy; in phase 2 the real computation is run, by distributing in chunks of asset_site associations. """ self.prepare_risk() # then run the real computation assocs = models.AssetSite.objects.filter(job=self.job).order_by( 'asset__taxonomy') self.acc = tasks.apply_reduce( run_risk, (assocs, self, self.monitor), self.agg_result, self.acc, self.concurrent_tasks, name=self.core.__name__)
def process_sources(self): """ Filter and split the sources in parallel. Return the list of processed sources. """ self.all_sources = AllSources() self.job.is_running = True self.job.save() num_models = len(self.source_collector) num_sites = len(self.hc.site_collection) for i, trt_model_id in enumerate(sorted(self.source_collector), 1): trt_model = models.TrtModel.objects.get(pk=trt_model_id) sc = self.source_collector[trt_model_id] # NB: the filtering of the sources by site is slow, so it is # done in parallel sm_lt_path = tuple(trt_model.lt_model.sm_lt_path) logs.LOG.progress( '[%d of %d] Filtering/splitting %d source(s) for ' 'sm_lt_path=%s, TRT=%s, model=%s', i, num_models, len(sc.sources), sm_lt_path, trt_model.tectonic_region_type, trt_model.lt_model.sm_name) if len(sc.sources) * num_sites > LOTS_OF_SOURCES_SITES: # filter in parallel sc.sources = tasks.apply_reduce( filter_and_split_sources, (self.job.id, sc.sources, self.hc.site_collection), list.__add__, []) else: # few sources and sites # filter sequentially on a single core sc.sources = filter_and_split_sources.task_func( self.job.id, sc.sources, self.hc.site_collection) sc.sources.sort(key=attrgetter('source_id')) if not sc.sources: logs.LOG.warn( 'Could not find sources close to the sites in %s ' 'sm_lt_path=%s, maximum_distance=%s km', trt_model.lt_model.sm_name, sm_lt_path, self.hc.maximum_distance) continue for src in sc.sources: self.all_sources.append( src, sc.update_num_ruptures(src), trt_model) trt_model.num_sources = len(sc.sources) trt_model.num_ruptures = sc.num_ruptures trt_model.save() return self.all_sources.get_total_weight()
def execute(self): """ Method responsible for the distribution strategy. The risk calculators share a two phase distribution logic: in phase 1 the initializer objects are built, by distributing per taxonomy; in phase 2 the real computation is run, by distributing in chunks of asset_site associations. """ self.prepare_risk() # then run the real computation assocs = models.AssetSite.objects.filter( job=self.job).order_by('asset__taxonomy') self.acc = tasks.apply_reduce(run_risk, (assocs, self, self.monitor), self.agg_result, self.acc, self.concurrent_tasks, name=self.core.__name__)
def execute(self): """ Run the `.core_calc_task` in parallel, by using the apply_reduce distribution, but it can be overridden in subclasses. """ csm = self.composite_model rlzs_assoc = csm.get_rlzs_assoc() # temporary hack if self.__class__.__name__ == 'EventBasedHazardCalculator': info = rlzs_assoc.csm_info else: info = rlzs_assoc.gsims_by_trt_id self.acc = tasks.apply_reduce( self.core_calc_task, (csm.get_sources(), self.site_collection, info, self.monitor), agg=self.agg_curves, acc=self.acc, weight=attrgetter('weight'), key=attrgetter('trt_model_id'), concurrent_tasks=self.concurrent_tasks)
def generate_gmfs_and_curves(self): """ Generate the GMFs and optionally the hazard curves too """ sitecol = self.site_collection sesruptures = [] # collect the ruptures in a fixed order with self.monitor('reading ruptures'): for trt_model in models.TrtModel.objects.filter( lt_model__hazard_calculation=self.job): for sr in models.SESRupture.objects.filter( rupture__ses_collection__trt_model=trt_model): # adding the annotation below saves a LOT of memory # otherwise one would need as key in apply_reduce # lambda sr: sr.rupture.tsrt_model.id which would # read the world from the database sr.trt_id = trt_model.id sesruptures.append(sr) base_agg = super(EventBasedHazardCalculator, self).agg_curves return tasks.apply_reduce( compute_gmfs_and_curves, (self.job.id, sesruptures, sitecol), base_agg, {}, key=lambda sr: sr.trt_id)
def test_apply_reduce(self): got = tasks.apply_reduce( get_even, (1, [1, 2, 3, 4, 5]), list.__add__, [], 2) self.assertEqual(sorted(got), [2, 4])
def test_apply_reduce(self): got = tasks.apply_reduce( get_even, ([1, 2, 3, 4, 5], 'monitor'), list.__add__, [], 2) self.assertEqual(sorted(got), [2, 4])