def asset_statistics(losses, curves_poes, quantiles, weights, poes): """ Compute output statistics (mean/quantile loss curves and maps) for a single asset :param losses: the losses on which the loss curves are defined :param curves_poes: a numpy matrix suitable to be used with :func:`openquake.engine.calculators.post_processing` :param list quantiles: an iterable over the quantile levels to be considered for quantile outputs :param list weights: the weights associated with each realization. If all the elements are `None`, implicit weights are taken into account :param list poes: the poe taken into account for computing loss maps :returns: a tuple with 1) mean loss curve 2) a list of quantile curves """ montecarlo = weights[0] is not None quantile_curves = [] for quantile in quantiles: if montecarlo: q_curve = post_processing.weighted_quantile_curve( curves_poes, weights, quantile) else: q_curve = post_processing.quantile_curve(curves_poes, quantile) quantile_curves.append((losses, q_curve)) # then mean loss curve mean_curve_poes = post_processing.mean_curve(curves_poes, weights) mean_curve = (losses, mean_curve_poes) mean_map = [ scientific.conditional_loss_ratio(losses, mean_curve_poes, poe) for poe in poes ] quantile_maps = [[ scientific.conditional_loss_ratio(losses, poes, poe) for losses, poes in quantile_curves ] for poe in poes] return (mean_curve, quantile_curves, mean_map, quantile_maps)
def test_compute_quantile_curve(self): expected_curve = numpy.array([ 9.9178000e-01, 9.8892000e-01, 9.6903000e-01, 9.4030000e-01, 8.8405000e-01, 7.8782000e-01, 6.4897250e-01, 4.8284250e-01, 3.4531500e-01, 3.2337000e-01, 1.8880500e-01, 9.5574000e-02, 4.3707250e-02, 1.9643000e-02, 8.1923000e-03, 2.9157000e-03, 7.9955000e-04, 1.5233000e-04, 1.5582000e-05]) quantile = 0.75 curves = [ [9.8161000e-01, 9.7837000e-01, 9.5579000e-01, 9.2555000e-01, 8.7052000e-01, 7.8214000e-01, 6.5708000e-01, 5.0526000e-01, 3.7044000e-01, 3.4740000e-01, 2.0502000e-01, 1.0506000e-01, 4.6531000e-02, 1.7548000e-02, 5.4791000e-03, 1.3377000e-03, 2.2489000e-04, 2.2345000e-05, 4.2696000e-07], [9.7309000e-01, 9.6857000e-01, 9.3853000e-01, 9.0089000e-01, 8.3673000e-01, 7.4057000e-01, 6.1272000e-01, 4.6467000e-01, 3.3694000e-01, 3.1536000e-01, 1.8340000e-01, 9.2412000e-02, 4.0202000e-02, 1.4900000e-02, 4.5924000e-03, 1.1126000e-03, 1.8647000e-04, 1.8882000e-05, 4.7123000e-07], [9.9178000e-01, 9.8892000e-01, 9.6903000e-01, 9.4030000e-01, 8.8405000e-01, 7.8782000e-01, 6.4627000e-01, 4.7537000e-01, 3.3168000e-01, 3.0827000e-01, 1.7279000e-01, 8.8360000e-02, 4.2766000e-02, 1.9643000e-02, 8.1923000e-03, 2.9157000e-03, 7.9955000e-04, 1.5233000e-04, 1.5582000e-05], [9.8885000e-01, 9.8505000e-01, 9.5972000e-01, 9.2494000e-01, 8.6030000e-01, 7.5574000e-01, 6.1009000e-01, 4.4217000e-01, 3.0543000e-01, 2.8345000e-01, 1.5760000e-01, 8.0225000e-02, 3.8681000e-02, 1.7637000e-02, 7.2685000e-03, 2.5474000e-03, 6.8347000e-04, 1.2596000e-04, 1.2853000e-05], [9.9178000e-01, 9.8892000e-01, 9.6903000e-01, 9.4030000e-01, 8.8405000e-01, 7.8782000e-01, 6.4627000e-01, 4.7537000e-01, 3.3168000e-01, 3.0827000e-01, 1.7279000e-01, 8.8360000e-02, 4.2766000e-02, 1.9643000e-02, 8.1923000e-03, 2.9157000e-03, 7.9955000e-04, 1.5233000e-04, 1.5582000e-05], ] actual_curve = post_processing.quantile_curve(curves, quantile) # TODO(LB): Check with our hazard experts to see if this is reasonable # tolerance. Better yet, get a fresh set of test data. (This test data # was just copied verbatim from from some old tests in # `tests/hazard_test.py`. numpy.testing.assert_allclose(expected_curve, actual_curve, atol=0.005) # Since this implementation is an optimized but equivalent version of # scipy's `mquantiles`, compare algorithms just to prove they are the # same: scipy_curve = mstats.mquantiles(curves, prob=quantile, axis=0)[0] numpy.testing.assert_allclose(scipy_curve, actual_curve)
def curve_statistics(asset, loss_ratio_curves, curves_weights, mean_loss_curve_id, quantile_loss_curve_ids, explicit_quantiles, assume_equal): if assume_equal == 'support': loss_ratios = loss_ratio_curves[0].abscissae curves_poes = [curve.ordinates for curve in loss_ratio_curves] elif assume_equal == 'image': loss_ratios = loss_ratio_curves[0].abscissae curves_poes = [curve.ordinate_for(loss_ratios) for curve in loss_ratio_curves] else: raise NotImplementedError for quantile, quantile_loss_curve_id in quantile_loss_curve_ids.items(): if explicit_quantiles: q_curve = post_processing.weighted_quantile_curve( curves_poes, curves_weights, quantile) else: q_curve = post_processing.quantile_curve( curves_poes, quantile) models.LossCurveData.objects.create( loss_curve_id=quantile_loss_curve_id, asset_ref=asset.asset_ref, poes=q_curve.tolist(), loss_ratios=loss_ratios, asset_value=asset.value, location=asset.site.wkt) # then means if mean_loss_curve_id: mean_curve = post_processing.mean_curve( curves_poes, weights=curves_weights) models.LossCurveData.objects.create( loss_curve_id=mean_loss_curve_id, asset_ref=asset.asset_ref, poes=mean_curve.tolist(), loss_ratios=loss_ratios, asset_value=asset.value, location=asset.site.wkt)
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( lt_model__hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = from_string(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='Mean Hazard Curves %s' % imt, output_type='hazard_curve' ) mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean' ) container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=( '%s quantile Hazard Curves %s' % (quantile, imt) ), output_type='hazard_curve' ) q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile ) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='job_init'): inserter = writer.CacheInserter( models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile ) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile ) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt) ) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve( curves_poes, weights=curves_weights ) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt) ) inserter.flush()
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ del self.source_collector # save memory weights = [rlz.weight for rlz in models.LtRealization.objects.filter( lt_model__hazard_calculation=self.hc)] num_rlzs = len(weights) if not num_rlzs: logs.LOG.warn('No realizations for hazard_calculation_id=%d', self.hc.id) return elif num_rlzs == 1 and self.hc.quantile_hazard_curves: logs.LOG.warn( 'There is only one realization, the configuration parameter ' 'quantile_hazard_curves should not be set') return if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = from_string(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='Mean Hazard Curves %s' % imt, output_type='hazard_curve' ) mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean' ) container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=( '%s quantile Hazard Curves %s' % (quantile, imt) ), output_type='hazard_curve' ) q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile ) container_ids['q%s' % quantile] = q_hc.id # num_rlzs * num_sites * num_levels # NB: different IMTs can have different num_levels all_curves_for_imt = numpy.array(self.curves_by_imt[imt]) del self.curves_by_imt[imt] # save memory inserter = writer.CacheInserter( models.HazardCurveData, max_cache_size=10000) # curve_poes below is an array num_rlzs * num_levels for i, site in enumerate(self.hc.site_collection): wkt = site.location.wkt2d curve_poes = numpy.array( [c_by_rlz[i] for c_by_rlz in all_curves_for_imt]) # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curve_poes, weights, quantile) else: # implicitly weighted quantiles q_curve = quantile_curve( curve_poes, quantile) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=wkt) ) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve(curve_poes, weights=weights) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=wkt) ) inserter.flush()
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = models.parse_imt(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='mean-curves-%s' % imt, output_type='hazard_curve') mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean') container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=('quantile(%s)-curves-%s' % (quantile, imt)), output_type='hazard_curve') q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='reslt_writer'): inserter = writer.CacheInserter(models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt)) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve(curves_poes, weights=curves_weights) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt)) inserter.flush()