Exemplo n.º 1
0
def sum_tbl(tbl, kfield, vfields):
    """
    Aggregate a composite array and compute the totals on a given key.

    >>> dt = numpy.dtype([('name', (bytes, 10)), ('value', int)])
    >>> tbl = numpy.array([('a', 1), ('a', 2), ('b', 3)], dt)
    >>> sum_tbl(tbl, 'name', ['value'])['value']
    array([3, 3])
    """
    pairs = [(n, tbl.dtype[n]) for n in [kfield] + vfields]
    dt = numpy.dtype(pairs + [('counts', int)])

    def sum_all(group):
        vals = numpy.zeros(1, dt)[0]
        for rec in group:
            for vfield in vfields:
                vals[vfield] += rec[vfield]
            vals['counts'] += 1
        vals[kfield] = rec[kfield]
        return vals
    rows = groupby(tbl, operator.itemgetter(kfield), sum_all).values()
    array = numpy.zeros(len(rows), dt)
    for i, row in enumerate(rows):
        for j, name in enumerate(dt.names):
            array[i][name] = row[j]
    return array
Exemplo n.º 2
0
def get_data_transfer(dstore):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.

    :param dstore: a :class:`openquake.commonlib.datastore.DataStore` instance
    :returns: (block_info, to_send_forward, to_send_back)
    """
    oqparam = OqParam.from_(dstore.attrs)
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['rlzs_assoc']
    info = dstore['job_info']
    sources = dstore['composite_source_model'].get_sources()
    num_gsims_by_trt = groupby(rlzs_assoc, operator.itemgetter(0),
                               lambda group: sum(1 for row in group))
    gsims_assoc = rlzs_assoc.gsims_by_trt_id
    to_send_forward = 0
    to_send_back = 0
    block_info = []
    for block in split_in_blocks(sources, oqparam.concurrent_tasks or 1,
                                 operator.attrgetter('weight'),
                                 operator.attrgetter('trt_model_id')):
        num_gsims = num_gsims_by_trt.get(block[0].trt_model_id, 0)
        back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims
        to_send_back += back * 8  # 8 bytes per float
        args = (block, sitecol, gsims_assoc, PerformanceMonitor(''))
        to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args))
        block_info.append((len(block), block.weight))
    return numpy.array(block_info, block_dt), to_send_forward, to_send_back
Exemplo n.º 3
0
def view_rlzs_by_col(name, dstore):
    """
    :returns: a dictionary col_id -> realization ordinals
    """
    return groupby(dstore['rlz_col_assocs'],
                   lambda x: x['col'],
                   lambda rows: [row['rlz'] for row in rows])
Exemplo n.º 4
0
def upgrade_file(path):
    """Upgrade to the latest NRML version"""
    node0 = nrml.read(path, chatty=False)[0]
    shutil.copy(path, path + '.bak')  # make a backup of the original file
    tag = striptag(node0.tag)
    gml = True
    if tag == 'vulnerabilityModel':
        vf_dict, cat_dict = get_vulnerability_functions_04(path)
        # below I am converting into a NRML 0.5 vulnerabilityModel
        node0 = Node(
            'vulnerabilityModel', cat_dict,
            nodes=list(map(riskmodels.obj_to_node, vf_dict.values())))
        gml = False
    elif tag == 'fragilityModel':
        node0 = riskmodels.convert_fragility_model_04(
            nrml.read(path)[0], path)
        gml = False
    elif tag == 'sourceModel':
        node0 = nrml.read(path)[0]
        dic = groupby(node0.nodes, operator.itemgetter('tectonicRegion'))
        node0.nodes = [Node('sourceGroup',
                            dict(tectonicRegion=trt, name="group %s" % i),
                            nodes=srcs)
                       for i, (trt, srcs) in enumerate(dic.items(), 1)]
    with open(path, 'w') as f:
        nrml.write([node0], f, gml=gml)
Exemplo n.º 5
0
def get_gmfs(calc):
    """
    :param calc: a ScenarioDamage or ScenarioRisk calculator
    :returns: a dictionary of gmfs
    """
    if 'gmfs' in calc.oqparam.inputs:  # from file
        return read_gmfs_from_csv(calc)
    # else from rupture
    gmf = calc.datastore['gmfs/col00'].value
    # NB: if the hazard site collection has N sites, the hazard
    # filtered site collection for the nonzero GMFs has N' <= N sites
    # whereas the risk site collection associated to the assets
    # has N'' <= N' sites
    if calc.datastore.parent:
        haz_sitecol = calc.datastore.parent['sitecol']  # N' values
    else:
        haz_sitecol = calc.sitecol
    risk_indices = set(calc.sitecol.indices)  # N'' values
    N = len(haz_sitecol.complete)
    imt_dt = numpy.dtype([(imt, float) for imt in calc.oqparam.imtls])
    gmf_by_idx = general.groupby(gmf, lambda row: row['idx'])
    R = len(gmf_by_idx)
    # build a matrix N x R for each GSIM realization
    gmfs = {(trt_id, gsim): numpy.zeros((N, R), imt_dt)
            for trt_id, gsim in calc.rlzs_assoc}
    for rupid, rows in sorted(gmf_by_idx.items()):
        for sid, gmv in zip(haz_sitecol.indices, rows):
            if sid in risk_indices:
                for trt_id, gsim in gmfs:
                    gmfs[trt_id, gsim][sid, rupid] = gmv[gsim]
    return gmfs
Exemplo n.º 6
0
    def post_execute(self, result):
        """
        Save the losses in a compact form.

        :param result:
            a dictionary rlz_idx -> (loss_type, asset_id) -> (avg, ins)
        """
        fields = []
        for loss_type in self.riskmodel.get_loss_types():
            fields.append(('avg_loss~%s' % loss_type, float))
            fields.append(('ins_loss~%s' % loss_type, float))
        avg_loss_dt = numpy.dtype(fields)
        num_rlzs = len(self.rlzs_assoc.realizations)
        assets = riskinput.sorted_assets(self.assets_by_site)
        self.asset_no_by_id = {a.id: no for no, a in enumerate(assets)}
        avg_losses = numpy.zeros(
            (len(self.asset_no_by_id), num_rlzs), avg_loss_dt)

        for rlz_no in result:
            losses_by_lt_asset = result[rlz_no]
            by_asset = operator.itemgetter(1)
            for asset, keys in general.groupby(
                    losses_by_lt_asset, by_asset).items():
                asset_no = self.asset_no_by_id[asset]
                losses = []
                for (loss_type, _) in keys:
                    losses.extend(losses_by_lt_asset[loss_type, asset])
                avg_losses[asset_no, rlz_no] = tuple(losses)

        self.avg_losses = avg_losses
Exemplo n.º 7
0
def data_transfer(calc):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.

    :returns: a triple (num_tasks, to_send_forward, to_send_back)
    """
    oqparam = calc.oqparam
    info = calc.job_info
    calc.monitor.oqparam = oqparam
    sources = calc.composite_source_model.get_sources()
    num_gsims_by_trt = groupby(calc.rlzs_assoc, operator.itemgetter(0),
                               lambda group: sum(1 for row in group))
    gsims_assoc = calc.rlzs_assoc.get_gsims_by_trt_id()
    to_send_forward = 0
    to_send_back = 0
    n_tasks = 0
    for block in split_in_blocks(sources, oqparam.concurrent_tasks,
                                 operator.attrgetter('weight'),
                                 operator.attrgetter('trt_model_id')):
        num_gsims = num_gsims_by_trt[block[0].trt_model_id]
        back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims
        to_send_back += back * 8  # 8 bytes per float
        args = (block, calc.sitecol, gsims_assoc, calc.monitor)
        logging.info('Pickling task args #%d', n_tasks)
        to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args))
        n_tasks += 1
    return n_tasks, to_send_forward, to_send_back
Exemplo n.º 8
0
def write_source_model(dest, sources_or_groups, name=None,
                       investigation_time=None):
    """
    Writes a source model to XML.

    :param dest:
        Destination path
    :param sources_or_groups:
        Source model in different formats
    :param name:
        Name of the source model (if missing, extracted from the filename)
    """
    if isinstance(sources_or_groups, nrml.SourceModel):
        with open(dest, 'wb') as f:
            nrml.write([obj_to_node(sources_or_groups)], f, '%s')
        return
    if isinstance(sources_or_groups[0], sourceconverter.SourceGroup):
        groups = sources_or_groups
    else:  # passed a list of sources
        srcs_by_trt = groupby(
            sources_or_groups, operator.attrgetter('tectonic_region_type'))
        groups = [sourceconverter.SourceGroup(trt, srcs_by_trt[trt])
                  for trt in srcs_by_trt]
    name = name or os.path.splitext(os.path.basename(dest))[0]
    nodes = list(map(obj_to_node, sorted(groups)))
    attrs = {"name": name}
    if investigation_time is not None:
        attrs['investigation_time'] = investigation_time
    source_model = Node("sourceModel", attrs, nodes=nodes)
    with open(dest, 'wb') as f:
        nrml.write([source_model], f, '%s')
    return dest
Exemplo n.º 9
0
def export_ses_xml(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    fmt = ekey[-1]
    oq = dstore['oqparam']
    try:
        csm_info = dstore['rlzs_assoc'].csm_info
    except AttributeError:  # for scenario calculators don't export
        return []
    sescollection = dstore['sescollection']
    col_id = 0
    fnames = []
    for sm in csm_info.source_models:
        for trt_model in sm.trt_models:
            sesruptures = list(sescollection[col_id].values())
            col_id += 1
            ses_coll = SESCollection(
                groupby(sesruptures, operator.attrgetter('ses_idx')),
                sm.path, oq.investigation_time)
            smpath = '_'.join(sm.path)
            fname = 'ses-%d-smltp_%s.%s' % (trt_model.id, smpath, fmt)
            dest = os.path.join(dstore.export_dir, fname)
            globals()['_export_ses_' + fmt](dest, ses_coll)
            fnames.append(fname)
    return fnames
Exemplo n.º 10
0
    def test_spatial_correlation(self):
        expected = {sc1: [0.99, 0.41],
                    sc2: [0.99, 0.64],
                    sc3: [0.99, 0.22]}

        for case in expected:
            self.run_calc(case.__file__, 'job.ini')
            oq = self.calc.oqparam
            self.assertEqual(list(oq.imtls), ['PGA'])
            dstore = DataStore(self.calc.datastore.calc_id)
            gmf_by_rupid = groupby(
                dstore['gmfs/col00'].value,
                lambda row: row['idx'],
                lambda rows: [row['BooreAtkinson2008']['PGA'] for row in rows])
            dstore.close()
            gmvs_site_1 = []
            gmvs_site_2 = []
            for rupid, gmf in gmf_by_rupid.iteritems():
                gmvs_site_1.append(gmf[0])
                gmvs_site_2.append(gmf[1])
            joint_prob_0_5 = joint_prob_of_occurrence(
                gmvs_site_1, gmvs_site_2, 0.5, oq.investigation_time,
                oq.ses_per_logic_tree_path)
            joint_prob_1_0 = joint_prob_of_occurrence(
                gmvs_site_1, gmvs_site_2, 1.0, oq.investigation_time,
                oq.ses_per_logic_tree_path)

            p05, p10 = expected[case]
            numpy.testing.assert_almost_equal(joint_prob_0_5, p05, decimal=1)
            numpy.testing.assert_almost_equal(joint_prob_1_0, p10, decimal=1)
Exemplo n.º 11
0
def export_ses_csv(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    if 'events' not in dstore:  # scenario
        return []
    dest = dstore.export_path('ruptures.csv')
    header = ('id mag centroid_lon centroid_lat centroid_depth trt '
              'strike dip rake boundary').split()
    csm_info = dstore['csm_info']
    grp_trt = csm_info.grp_trt()
    sm_by_grp = csm_info.get_sm_by_grp()
    rows = []
    for grp_id, trt in sorted(grp_trt.items()):
        sm = 'sm-%04d' % sm_by_grp[grp_id]
        etags = build_etags(dstore['events/' + sm])
        dic = groupby(etags, util.get_serial)
        for r in dstore['rup_data/grp-%02d' % grp_id]:
            for etag in dic[r['rupserial']]:
                boundary = 'MULTIPOLYGON(%s)' % r['boundary']
                rows.append(
                    (etag, r['mag'], r['lon'], r['lat'], r['depth'],
                     trt, r['strike'], r['dip'], r['rake'], boundary))
    rows.sort(key=operator.itemgetter(0))
    writers.write_csv(dest, rows, header=header)
    return [dest]
Exemplo n.º 12
0
def _pointsources2multipoints(srcs, i):
    allsources = []
    for key, sources in groupby(srcs, get_key).items():
        if len(sources) == 1:  # there is a single source
            allsources.extend(sources)
            continue
        msr, rar, usd, lsd, hd, npd = key
        mfds = [src[3] for src in sources]
        points = []
        for src in sources:
            points.extend(~src.pointGeometry.Point.pos)
        geom = Node('multiPointGeometry')
        geom.append(Node('gml:posList', text=points))
        geom.append(Node('upperSeismoDepth', text=usd))
        geom.append(Node('lowerSeismoDepth', text=lsd))
        node = Node(
            'multiPointSource',
            dict(id='mps-%d' % i, name='multiPointSource-%d' % i),
            nodes=[geom])
        node.append(Node("magScaleRel", text=msr))
        node.append(Node("ruptAspectRatio", text=rar))
        node.append(mfds2multimfd(mfds))
        node.append(Node('nodalPlaneDist', nodes=[
            Node('nodalPlane', dict(probability=prob, rake=rake,
                                    strike=strike, dip=dip))
            for prob, rake, strike, dip in npd]))
        node.append(Node('hypoDepthDist', nodes=[
            Node('hypoDepth', dict(depth=depth, probability=prob))
            for prob, depth in hd]))
        allsources.append(node)
        i += 1
    return i, allsources
Exemplo n.º 13
0
    def gen_outputs(self, riskinput, rlzs_assoc, monitor,
                    assetcol=None):
        """
        Group the assets per taxonomy and compute the outputs by using the
        underlying riskmodels. Yield the outputs generated as dictionaries
        out_by_lr.

        :param riskinput: a RiskInput instance
        :param rlzs_assoc: a RlzsAssoc instance
        :param monitor: a monitor object used to measure the performance
        :param assetcol: not None only for event based risk
        """
        mon_hazard = monitor('building hazard')
        mon_risk = monitor('computing riskmodel', measuremem=False)
        with mon_hazard:
            assets_by_site = (riskinput.assets_by_site if assetcol is None
                              else assetcol.assets_by_site())
            hazard_by_site = riskinput.get_hazard(
                rlzs_assoc, mon_hazard(measuremem=False))
        for sid, assets in enumerate(assets_by_site):
            hazard = hazard_by_site[sid]
            the_assets = groupby(assets, by_taxonomy)
            for taxonomy, assets in the_assets.items():
                riskmodel = self[taxonomy]
                epsgetter = riskinput.epsilon_getter(
                    [asset.ordinal for asset in assets])
                for imt, taxonomies in riskinput.imt_taxonomies:
                    if taxonomy in taxonomies:
                        with mon_risk:
                            yield riskmodel.out_by_lr(
                                imt, assets, hazard[imt], epsgetter)
        if hasattr(hazard_by_site, 'close'):  # for event based risk
            monitor.gmfbytes = hazard_by_site.close()
Exemplo n.º 14
0
 def get_trt_sources(self, optimize_same_id=None):
     """
     :returns: a list of pairs [(trt, group of sources)]
     """
     atomic = []
     acc = AccumDict(accum=[])
     for sm in self.source_models:
         for grp in sm.src_groups:
             if grp and grp.atomic:
                 atomic.append((grp.trt, grp))
             elif grp:
                 acc[grp.trt].extend(grp)
     if optimize_same_id is None:
         optimize_same_id = self.optimize_same_id
     if optimize_same_id is False:
         return atomic + list(acc.items())
     # extract a single source from multiple sources with the same ID
     n = 0
     tot = 0
     dic = {}
     for trt in acc:
         dic[trt] = []
         for grp in groupby(acc[trt], lambda x: x.source_id).values():
             src = grp[0]
             n += 1
             tot += len(grp)
             # src.src_group_id can be a list if get_sources_by_trt was
             # called before
             if len(grp) > 1 and not isinstance(src.src_group_id, list):
                 src.src_group_id = [s.src_group_id for s in grp]
             dic[trt].append(src)
     if n < tot:
         logging.info('Reduced %d sources to %d sources with unique IDs',
                      tot, n)
     return atomic + list(dic.items())
Exemplo n.º 15
0
def info(calculators, gsims, views, exports, report, input_file=''):
    """
    Give information. You can pass the name of an available calculator,
    a job.ini file, or a zip archive with the input files.
    """
    logging.basicConfig(level=logging.INFO)
    if calculators:
        for calc in sorted(base.calculators):
            print(calc)
    if gsims:
        for gs in gsim.get_available_gsims():
            print(gs)
    if views:
        for name in sorted(datastore.view):
            print(name)
    if exports:
        dic = groupby(export, operator.itemgetter(0),
                      lambda group: [r[1] for r in group])
        n = 0
        for exporter, formats in dic.items():
            print(exporter, formats)
            n += len(formats)
        print('There are %d exporters defined.' % n)
    if input_file.endswith('.xml'):
        print(nrml.read(input_file).to_str())
    elif input_file.endswith(('.ini', '.zip')):
        with Monitor('info', measuremem=True) as mon:
            if report:
                print('Generated', reportwriter.build_report(input_file))
            else:
                print_csm_info(input_file)
        if mon.duration > 1:
            print(mon)
    elif input_file:
        print("No info for '%s'" % input_file)
Exemplo n.º 16
0
def classical_risk(riskinput, riskmodel, rlzs_assoc, monitor):
    """
    Compute and return the average losses for each asset.

    :param riskinput:
        a :class:`openquake.risklib.riskinput.RiskInput` object
    :param riskmodel:
        a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance
    :param rlzs_assoc:
        associations (trt_id, gsim) -> realizations
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    """
    oq = monitor.oqparam
    ins = oq.insured_losses
    R = len(rlzs_assoc.realizations)
    result = dict(
        loss_curves=[], loss_maps=[], stat_curves=[], stat_maps=[])
    for out_by_lr in riskmodel.gen_outputs(riskinput, rlzs_assoc, monitor):
        for (l, r), out in sorted(out_by_lr.items()):
            for i, asset in enumerate(out.assets):
                aid = asset.ordinal
                avg = out.average_losses[i]
                avg_ins = (out.average_insured_losses[i]
                           if ins else numpy.nan)
                lcurve = (
                    out.loss_curves[i, 0],
                    out.loss_curves[i, 1], avg)
                if ins:
                    lcurve += (
                        out.insured_curves[i, 0],
                        out.insured_curves[i, 1], avg_ins)
                else:
                    lcurve += (None, None, None)
                result['loss_curves'].append((l, r, aid, lcurve))

                # no insured, shape (P, N)
                result['loss_maps'].append(
                    (l, r, aid, out.loss_maps[:, i]))

        # compute statistics
        if R > 1:
            for l, lrs in groupby(out_by_lr, operator.itemgetter(0)).items():
                outs = [out_by_lr[lr] for lr in lrs]
                curve_resolution = outs[0].loss_curves.shape[-1]
                statsbuilder = scientific.StatsBuilder(
                    oq.quantile_loss_curves,
                    oq.conditional_loss_poes, oq.poes_disagg,
                    curve_resolution, insured_losses=oq.insured_losses)
                stats = statsbuilder.build(outs)
                stat_curves, stat_maps = statsbuilder.get_curves_maps(stats)
                for i, asset in enumerate(out_by_lr.assets):
                    result['stat_curves'].append(
                        (l, asset.ordinal, stat_curves[:, i]))
                    if len(stat_maps):
                        result['stat_maps'].append(
                            (l, asset.ordinal, stat_maps[:, i]))

    return result
Exemplo n.º 17
0
 def get_imt_taxonomies(self):
     """
     For each IMT in the risk model, yield pairs (imt, taxonomies)
     with the taxonomies associated to the IMT. For fragility functions,
     there is a single taxonomy for each IMT.
     """
     by_imt = operator.itemgetter(0)
     by_taxo = operator.itemgetter(1)
     return groupby(self, by_imt, lambda group: map(by_taxo, group)).items()
Exemplo n.º 18
0
 def get_mesh_assets_by_site(self):
     """
     :returns: (Mesh instance, assets_by_site list)
     """
     assets_by_loc = general.groupby(self, key=lambda a: a.location)
     mesh = geo.Mesh.from_coords(list(assets_by_loc))
     assets_by_site = [
         assets_by_loc[lonlat] for lonlat in zip(mesh.lons, mesh.lats)]
     return mesh, assets_by_site
Exemplo n.º 19
0
 def dmg_dist_per_asset_node(self, data):
     """
     :param data: a sequence of records with attributes .exposure_data,
                  .mean and .stddev
     :returns: a `dmgDistPerAsset` node
     """
     node = Node('dmgDistPerAsset', nodes=[self.dmg_states])
     data_by_location = groupby(data, lambda r: r.exposure_data.site)
     for loc in data_by_location:
         dd = Node('DDNode', nodes=[self.point_node(loc)])
         data_by_asset = groupby(
             data_by_location[loc],
             lambda r: r.exposure_data.asset_ref,
             lambda rows: [(r.mean, r.stddev) for r in rows])
         for asset_ref, data in data_by_asset.items():
             means, stddevs = zip(*data)
             dd.append(self.asset_node(asset_ref, means, stddevs))
         node.append(dd)
     return node
Exemplo n.º 20
0
def view_ruptures_per_trt(token, dstore):
    tbl = []
    header = ('source_model trt_id trt num_sources '
              'eff_ruptures weight'.split())
    num_trts = 0
    tot_sources = 0
    eff_ruptures = 0
    tot_weight = 0
    source_info = dstore['source_info'].value
    csm_info = dstore['rlzs_assoc'].csm_info
    w = groupby(source_info, operator.itemgetter('trt_model_id'),
                lambda rows: sum(r['weight'] for r in rows))
    n = groupby(source_info, operator.itemgetter('trt_model_id'),
                lambda rows: sum(1 for r in rows))
    for i, sm in enumerate(csm_info.source_models):
        # NB: the number of effective ruptures per tectonic region model
        # is stored in the array eff_ruptures as a literal string describing
        # an array {trt_model_id: num_ruptures}; see the method
        # CompositionInfo.get_rlzs_assoc
        erdict = ast.literal_eval(csm_info.eff_ruptures[i])
        for trt_model in sm.trt_models:
            trt = source.capitalize(trt_model.trt)
            er = erdict.get(trt, 0)  # effective ruptures
            if er:
                num_trts += 1
                num_sources = n.get(trt_model.id, 0)
                tot_sources += num_sources
                eff_ruptures += er
                weight = w.get(trt_model.id, 0)
                tot_weight += weight
                tbl.append((sm.name, trt_model.id, trt,
                            num_sources, er, weight))
    rows = [('#TRT models', num_trts),
            ('#sources', tot_sources),
            ('#eff_ruptures', eff_ruptures),
            ('filtered_weight', tot_weight)]
    if len(tbl) > 1:
        summary = '\n\n' + rst_table(rows)
    else:
        summary = ''
    return rst_table(tbl, header=header) + summary
Exemplo n.º 21
0
    def get_rlzs_assoc(self, get_weight=lambda tm: tm.num_ruptures):
        """
        Return a RlzsAssoc with fields realizations, gsim_by_trt,
        rlz_idx and trt_gsims.

        :param get_weight: a function trt_model -> positive number
        """
        assoc = RlzsAssoc(self.get_info())
        random_seed = self.source_model_lt.seed
        num_samples = self.source_model_lt.num_samples
        idx = 0
        for smodel in self.source_models:
            # collect the effective tectonic region types
            trts = set(tm.trt for tm in smodel.trt_models if get_weight(tm))
            # recompute the GSIM logic tree if needed
            if trts != set(smodel.gsim_lt.tectonic_region_types):
                before = smodel.gsim_lt.get_num_paths()
                smodel.gsim_lt.reduce(trts)
                after = smodel.gsim_lt.get_num_paths()
                logging.warn('Reducing the logic tree of %s from %d to %d '
                             'realizations', smodel.name, before, after)
            if num_samples:  # sampling
                rnd = random.Random(random_seed + idx)
                rlzs = logictree.sample(smodel.gsim_lt, smodel.samples, rnd)
            else:  # full enumeration
                rlzs = logictree.get_effective_rlzs(smodel.gsim_lt)
            if rlzs:
                idx = assoc._add_realizations(idx, smodel, rlzs, trts)
                for trt_model in smodel.trt_models:
                    trt_model.gsims = smodel.gsim_lt.values[trt_model.trt]
            else:
                logging.warn('No realizations for %s, %s',
                             '_'.join(smodel.path), smodel.name)
        if assoc.realizations:
            if num_samples:
                assert len(assoc.realizations) == num_samples
                for rlz in assoc.realizations:
                    rlz.weight = 1. / num_samples
            else:
                tot_weight = sum(rlz.weight for rlz in assoc.realizations)
                if tot_weight == 0:
                    raise ValueError('All realizations have zero weight??')
                elif abs(tot_weight - 1) > 1E-12:  # allow for rounding errors
                    logging.warn('Some source models are not contributing, '
                                 'weights are being rescaled')
                for rlz in assoc.realizations:
                    rlz.weight = rlz.weight / tot_weight

        assoc.gsims_by_trt_id = groupby(
            assoc.rlzs_assoc, operator.itemgetter(0),
            lambda group: sorted(valid.gsim(gsim) for trt_id, gsim in group))

        return assoc
Exemplo n.º 22
0
 def combine_gmfs(self, gmfs):  # this is used in the export
     """
     :param gmfs: datastore /gmfs object
     :returns: a list of dictionaries rupid -> gmf array
     """
     gmfs_by_rupid = groupby(gmfs["col00"].value, lambda row: row["idx"], list)
     dicts = [{} for rlz in self.realizations]
     for rlz in self.realizations:
         gs = str(rlz)
         for rupid, rows in gmfs_by_rupid.items():
             dicts[rlz.ordinal][rupid] = numpy.array([r[gs] for r in rows], rows[0][gs].dtype)
     return dicts
Exemplo n.º 23
0
 def dmg_dist_per_taxonomy_node(self, data):
     """
     :param data: a sequence of records with attributes .taxonomy,
                  .mean and .stddev
     :returns: a `dmgDistPerTaxonomy` node
     """
     node = Node('dmgDistPerTaxonomy', nodes=[self.dmg_states])
     data_by_taxo = groupby(data, operator.attrgetter('taxonomy'))
     for taxonomy in data_by_taxo:
         means = [row.mean for row in data_by_taxo[taxonomy]]
         stddevs = [row.stddev for row in data_by_taxo[taxonomy]]
         node.append(self.dd_node_taxo(taxonomy, means, stddevs))
     return node
Exemplo n.º 24
0
def calc_results(request, calc_id):
    """
    Get a summarized list of calculation results for a given ``calc_id``.
    Result is a JSON array of objects containing the following attributes:

        * id
        * name
        * type (hazard_curve, hazard_map, etc.)
        * url (the exact url where the full result can be accessed)
    """
    user = utils.get_user_data(request)

    # If the specified calculation doesn't exist OR is not yet complete,
    # throw back a 404.
    try:
        oqjob = oqe_models.OqJob.objects.get(id=calc_id)
        if not user['is_super'] and oqjob.user_name != user['name']:
            return HttpResponseNotFound()
        if not oqjob.status == 'complete':
            return HttpResponseNotFound()
    except ObjectDoesNotExist:
        return HttpResponseNotFound()
    base_url = _get_base_url(request)

    # NB: export_output has as keys the list (output_type, extension)
    # so this returns an ordered map output_type -> extensions such as
    # OrderedDict([('agg_loss_curve', ['xml', 'csv']), ...])
    output_types = groupby(export_output, lambda oe: oe[0],
                           lambda oes: [e for o, e in oes])
    results = oq_engine.get_outputs(calc_id)
    if not results:
        return HttpResponseNotFound()

    response_data = []
    for result in results:
        try:  # output from the old calculators
            rtype = result.output_type
            outtypes = output_types[rtype]
        except KeyError:
            try:  # output from the datastore
                rtype = result.ds_key
                outtypes = output_types[rtype]
            except KeyError:
                continue  # non-exportable outputs should not be shown
        url = urlparse.urljoin(base_url, 'v1/calc/result/%d' % result.id)
        datum = dict(
            id=result.id, name=result.display_name, type=rtype,
            outtypes=outtypes, url=url)
        response_data.append(datum)

    return HttpResponse(content=json.dumps(response_data))
Exemplo n.º 25
0
    def gen_outputs(self, riskinput, rlzs_assoc, monitor, assetcol=None):
        """
        Group the assets per taxonomy and compute the outputs by using the
        underlying riskmodels. Yield the outputs generated as dictionaries
        out_by_lr.

        :param riskinput: a RiskInput instance
        :param rlzs_assoc: a RlzsAssoc instance
        :param monitor: a monitor object used to measure the performance
        :param assetcol: not None only for event based risk
        """
        mon_context = monitor("building context")
        mon_hazard = monitor("building hazard")
        mon_risk = monitor("computing risk", measuremem=False)
        with mon_context:
            assets_by_site = riskinput.assets_by_site if assetcol is None else assetcol.assets_by_site()
            hazard_getter = riskinput.hazard_getter(rlzs_assoc, mon_hazard(measuremem=False))

        # group the assets by taxonomy
        taxonomies = set()
        with monitor("grouping assets by taxonomy"):
            dic = collections.defaultdict(list)
            for i, assets in enumerate(assets_by_site):
                group = groupby(assets, by_taxonomy)
                for taxonomy in group:
                    epsgetter = riskinput.epsilon_getter([asset.ordinal for asset in group[taxonomy]])
                    dic[taxonomy].append((i, group[taxonomy], epsgetter))
                    taxonomies.add(taxonomy)

        if hasattr(riskinput, "ses_ruptures"):  # event based
            grp_id = riskinput.ses_ruptures[0].grp_id
            rlzs = rlzs_assoc.get_rlzs_by_grp_id()[grp_id]
        else:
            rlzs = rlzs_assoc.realizations
        for rlz in rlzs:
            with mon_hazard:
                hazard = hazard_getter(rlz)
            for taxonomy in sorted(taxonomies):
                riskmodel = self[taxonomy]
                for lt in self.loss_types:
                    imt = riskmodel.risk_functions[lt].imt
                    with mon_risk:
                        for i, assets, epsgetter in dic[taxonomy]:
                            haz = hazard[i].get(imt, ())
                            if len(haz):
                                out = riskmodel(lt, assets, haz, epsgetter)
                                if out:  # can be None in scenario_risk
                                    out.lr = self.lti[lt], rlz.ordinal
                                    yield out
        if hasattr(hazard_getter, "gmfbytes"):  # for event based risk
            monitor.gmfbytes = hazard_getter.gmfbytes
Exemplo n.º 26
0
def classical_risk(riskinput, riskmodel, monitor):
    """
    Compute and return the average losses for each asset.

    :param riskinput:
        a :class:`openquake.risklib.riskinput.RiskInput` object
    :param riskmodel:
        a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    """
    oq = monitor.oqparam
    ins = oq.insured_losses
    result = dict(loss_curves=[], stat_curves=[])
    outputs = list(riskmodel.gen_outputs(riskinput, monitor))
    for out in outputs:
        l, r = out.lr
        for i, asset in enumerate(out.assets):
            aid = asset.ordinal
            avg = out.average_losses[i]
            avg_ins = (out.average_insured_losses[i]
                       if ins else numpy.nan)
            lcurve = (
                out.loss_curves[i, 0],
                out.loss_curves[i, 1], avg)
            if ins:
                lcurve += (
                    out.insured_curves[i, 0],
                    out.insured_curves[i, 1], avg_ins)
            else:
                lcurve += (None, None, None)
            result['loss_curves'].append((l, r, aid, lcurve))

        # compute statistics
        if len(riskinput.rlzs) > 1:
            for (l, assets), outs in groupby(outputs, by_l_assets).items():
                weights = []
                for out in outs:  # outputs with the same loss type and assets
                    weights.append(riskinput.rlzs[out.lr[1]].weight)
                for i, asset in enumerate(assets):
                    avg_stats = compute_stats(
                        numpy.array([out.average_losses for out in outs]),
                        oq.quantile_loss_curves, weights)
                    losses = out.loss_curves[i, 0]
                    poes_stats = compute_stats(
                        numpy.array([out.loss_curves[i, 1] for out in outs]),
                        oq.quantile_loss_curves, weights)
                    result['stat_curves'].append(
                        (l, asset.ordinal, losses, poes_stats, avg_stats))

    return result
Exemplo n.º 27
0
def get_effective_rlzs(rlzs):
    """
    Group together realizations with the same unique identifier (uid)
    and yield the first representative of each group.
    """
    effective = []
    ordinal = 0
    for uid, group in groupby(rlzs, operator.attrgetter("uid")).items():
        rlz = group[0]
        if all(path == "@" for path in rlz.lt_uid):  # empty realization
            continue
        effective.append(Realization(rlz.value, sum(r.weight for r in group), rlz.lt_path, ordinal, rlz.lt_uid))
        ordinal += 1
    return effective
Exemplo n.º 28
0
def view_ruptures_per_trt(token, dstore):
    tbl = []
    header = ('source_model grp_id trt num_sources '
              'eff_ruptures tot_ruptures'.split())
    num_trts = 0
    tot_sources = 0
    eff_ruptures = 0
    tot_ruptures = 0
    source_info = dstore['source_info'].value
    csm_info = dstore['csm_info']
    r = groupby(source_info, operator.itemgetter('grp_id'),
                lambda rows: sum(r['num_ruptures'] for r in rows))
    n = groupby(source_info, operator.itemgetter('grp_id'),
                lambda rows: sum(1 for r in rows))
    for i, sm in enumerate(csm_info.source_models):
        for src_group in sm.src_groups:
            trt = source.capitalize(src_group.trt)
            er = src_group.eff_ruptures
            if er:
                num_trts += 1
                num_sources = n.get(src_group.id, 0)
                tot_sources += num_sources
                eff_ruptures += er
                ruptures = r.get(src_group.id, 0)
                tot_ruptures += ruptures
                tbl.append((sm.name, src_group.id, trt,
                            num_sources, er, ruptures))
    rows = [('#TRT models', num_trts),
            ('#sources', tot_sources),
            ('#eff_ruptures', eff_ruptures),
            ('#tot_ruptures', tot_ruptures),
            ('#tot_weight', csm_info.tot_weight), ]
    if len(tbl) > 1:
        summary = '\n\n' + rst_table(rows)
    else:
        summary = ''
    return rst_table(tbl, header=header) + summary
Exemplo n.º 29
0
 def _parse_header(self, header):
     fields = []  # pairs (name, dtype), for instance ('PGA', F32)
     cols = []  # pairs (name, float), for instance ('PGA', 0.1)
     for col in header:
         if '-' in col:  # for instance PGA-0.1
             cols.append(col.split('-', 1))
         else:  # for lon and lat
             fields.append((col, F32))
     imtls = {}
     for imt, imls in groupby(
             cols, operator.itemgetter(0),
             lambda g: [F32(r[1]) for r in g]).items():
         fields.append((imt, (F32, len(imls))))
         imtls[imt] = imls
     return DictArray(imtls), fields
Exemplo n.º 30
0
def regroup(idx_gmv_imt_triples):
    """
    Regroup the GMF data in a form suitable for export.

    >>> data = [(1, 0.1, 'PGA'), (2, 0.2, 'PGA'),
    ...         (1, 0.11, 'SA(0.1)'), (2, 0.21, 'SA(0.1)'),
    ...         (1, 0.12, 'SA(0.2)'), (2, 0.22, 'SA(0.2)')]
    >>> regroup(data)
    [(1, 2), [0.1, 0.2], [0.11, 0.21], [0.12, 0.22]]
    """
    def reducegroup(group):
        indices = []
        gmvs = []
        for sid, gmv, imt in group:
            indices.append(sid)
            gmvs.append(gmv)
        return tuple(indices), gmvs
    by_imt = operator.itemgetter(2)
    dic = groupby(idx_gmv_imt_triples, by_imt, reducegroup)
    by_indices = operator.itemgetter(0)
    val = groupby(dic.values(), by_indices,
                  lambda group: [gmvs for indices, gmvs in group])
    [(indices, gmvs_by_imt)] = val.items()
    return [indices] + gmvs_by_imt
Exemplo n.º 31
0
def reduce_sources(sources_with_same_id):
    """
    :param sources_with_same_id: a list of sources with the same source_id
    :returns: a list of truly unique sources, ordered by grp_id
    """
    out = []
    for src in sources_with_same_id:
        dic = {k: v for k, v in vars(src).items() if k not in 'grp_id samples'}
        src.checksum = zlib.adler32(pickle.dumps(dic, protocol=4))
    for srcs in general.groupby(sources_with_same_id,
                                operator.attrgetter('checksum')).values():
        # duplicate sources: same id, same checksum
        src = srcs[0]
        if len(srcs) > 1:  # happens in classical/case_20
            src.grp_id = tuple(s.grp_id for s in srcs)
        else:
            src.grp_id = src.grp_id,
        out.append(src)
    out.sort(key=operator.attrgetter('grp_id'))
    return out
Exemplo n.º 32
0
    def extract(self, rlz_indices, csm_info):
        """
        Extract a RlzsAssoc instance containing only the given realizations.

        :param rlz_indices: a list of realization indices from 0 to R - 1
        """
        assoc = self.__class__(csm_info)
        if len(rlz_indices) == 1:
            realizations = [self.realizations[rlz_indices[0]]]
        else:
            realizations = operator.itemgetter(*rlz_indices)(self.realizations)
        rlzs_smpath = groupby(realizations, operator.attrgetter('sm_lt_path'))
        smodel_from = {sm.path: sm for sm in csm_info.source_models}
        for smpath, rlzs in rlzs_smpath.items():
            sm = smodel_from[smpath]
            trts = set(sg.trt for sg in sm.src_groups)
            assoc._add_realizations(
                [r.ordinal for r in rlzs], sm,
                csm_info.gsim_lt.reduce(trts), [rlz.gsim_rlz for rlz in rlzs])
        assoc._init()
        return assoc
Exemplo n.º 33
0
def get_sitecol_assetcol(oqparam, exposure):
    """
    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        the site collection and the asset collection
    """
    assets_by_loc = groupby(exposure.assets, key=lambda a: a.location)
    lons, lats = zip(*sorted(assets_by_loc))
    mesh = geo.Mesh(numpy.array(lons), numpy.array(lats))
    sitecol = get_site_collection(oqparam, mesh)
    assets_by_site = []
    for lonlat in zip(sitecol.lons, sitecol.lats):
        assets = assets_by_loc[lonlat]
        assets_by_site.append(sorted(assets, key=operator.attrgetter('idx')))
    assetcol = riskinput.AssetCollection(assets_by_site,
                                         exposure.cost_calculator,
                                         oqparam.time_event,
                                         time_events=hdf5.array_of_vstr(
                                             sorted(exposure.time_events)))
    return sitecol, assetcol
Exemplo n.º 34
0
def view_assets_by_site(token, dstore):
    """
    Display statistical information about the distribution of the assets
    """
    taxonomies = dstore['assetcol/tagcol/taxonomy'].value
    assets_by_site = dstore['assetcol'].assets_by_site()
    data = ['taxonomy mean stddev min max num_sites num_assets'.split()]
    num_assets = AccumDict()
    for assets in assets_by_site:
        num_assets += {
            k: [len(v)]
            for k, v in groupby(assets, operator.attrgetter(
                'taxonomy')).items()
        }
    for taxo in sorted(num_assets):
        val = numpy.array(num_assets[taxo])
        data.append(stats(taxonomies[taxo], val, val.sum()))
    if len(num_assets) > 1:  # more than one taxonomy, add a summary
        n_assets = numpy.array([len(assets) for assets in assets_by_site])
        data.append(stats('*ALL*', n_assets, n_assets.sum()))
    return rst_table(data)
Exemplo n.º 35
0
def count_ruptures(sources, srcfilter, gsims, param, monitor):
    """
    Count the number of ruptures contained in the given sources by applying a
    raw source filtering on the integration distance. Return a dictionary
    src_group_id -> {}.
    All sources must belong to the same tectonic region type.
    """
    dic = groupby(sources, operator.attrgetter('src_group_id'))
    acc = AccumDict({grp_id: {} for grp_id in dic})
    acc.eff_ruptures = {grp_id: 0 for grp_id in dic}
    acc.calc_times = []
    for grp_id in dic:
        for src in sources:
            t0 = time.time()
            sites = srcfilter.get_close_sites(src)
            if sites is not None:
                acc.eff_ruptures[grp_id] += src.num_ruptures
                dt = time.time() - t0
                acc.calc_times.append(
                    (src.source_id, len(sites), src.weight, dt))
    return acc
Exemplo n.º 36
0
def _pointsources2multipoints(srcs, i):
    # converts pointSources with the same hpdist, npdist and msr into a
    # single multiPointSource.
    allsources = []
    for (hd, npd, msr), sources in groupby(srcs, dists).items():
        if len(sources) == 1:  # there is a single source
            allsources.extend(sources)
            continue
        mfds = [src[3] for src in sources]
        points = []
        usd = []
        lsd = []
        rar = []
        for src in sources:
            pg = src.pointGeometry
            points.extend(~pg.Point.pos)
            usd.append(~pg.upperSeismoDepth)
            lsd.append(~pg.lowerSeismoDepth)
            rar.append(~src.ruptAspectRatio)
        geom = Node('multiPointGeometry')
        geom.append(Node('gml:posList', text=points))
        geom.append(Node('upperSeismoDepth', text=collapse(usd)))
        geom.append(Node('lowerSeismoDepth', text=collapse(lsd)))
        node = Node(
            'multiPointSource',
            dict(id='mps-%d' % i, name='multiPointSource-%d' % i),
            nodes=[geom])
        node.append(Node("magScaleRel", text=collapse(msr)))
        node.append(Node("ruptAspectRatio", text=collapse(rar)))
        node.append(mfds2multimfd(mfds))
        node.append(Node('nodalPlaneDist', nodes=[
            Node('nodalPlane', dict(probability=prob, rake=rake,
                                    strike=strike, dip=dip))
            for prob, rake, strike, dip in npd]))
        node.append(Node('hypoDepthDist', nodes=[
            Node('hypoDepth', dict(depth=depth, probability=prob))
            for prob, depth in hd]))
        allsources.append(node)
        i += 1
    return i, allsources
Exemplo n.º 37
0
def _pointsources2multipoints(srcs, i):
    allsources = []
    for key, sources in groupby(srcs, get_key).items():
        if len(sources) == 1:  # there is a single source
            allsources.extend(sources)
            continue
        msr, rar, usd, lsd, hd, npd = key
        mfds = [src[3] for src in sources]
        points = []
        for src in sources:
            points.extend(~src.pointGeometry.Point.pos)
        geom = Node('multiPointGeometry')
        geom.append(Node('gml:posList', text=points))
        geom.append(Node('upperSeismoDepth', text=usd))
        geom.append(Node('lowerSeismoDepth', text=lsd))
        node = Node('multiPointSource',
                    dict(id='mps-%d' % i, name='multiPointSource-%d' % i),
                    nodes=[geom])
        node.append(Node("magScaleRel", text=msr))
        node.append(Node("ruptAspectRatio", text=rar))
        node.append(mfds2multimfd(mfds))
        node.append(
            Node('nodalPlaneDist',
                 nodes=[
                     Node(
                         'nodalPlane',
                         dict(probability=prob,
                              rake=rake,
                              strike=strike,
                              dip=dip)) for prob, rake, strike, dip in npd
                 ]))
        node.append(
            Node('hypoDepthDist',
                 nodes=[
                     Node('hypoDepth', dict(depth=depth, probability=prob))
                     for prob, depth in hd
                 ]))
        allsources.append(node)
        i += 1
    return i, allsources
Exemplo n.º 38
0
def info(calculators, gsims, views, exports, report, input_file=''):
    """
    Give information. You can pass the name of an available calculator,
    a job.ini file, or a zip archive with the input files.
    """
    logging.basicConfig(level=logging.INFO)
    if calculators:
        for calc in sorted(base.calculators):
            print(calc)
    if gsims:
        for gs in gsim.get_available_gsims():
            print(gs)
    if views:
        for name in sorted(view):
            print(name)
    if exports:
        dic = groupby(export, operator.itemgetter(0),
                      lambda group: [r[1] for r in group])
        n = 0
        for exporter, formats in dic.items():
            print(exporter, formats)
            n += len(formats)
        print('There are %d exporters defined.' % n)
    if os.path.isdir(input_file) and report:
        with Monitor('info', measuremem=True) as mon:
            with mock.patch.object(logging.root, 'info'):  # reduce logging
                do_build_reports(input_file)
        print(mon)
    elif input_file.endswith('.xml'):
        print(nrml.read(input_file).to_str())
    elif input_file.endswith(('.ini', '.zip')):
        with Monitor('info', measuremem=True) as mon:
            if report:
                print('Generated', reportwriter.build_report(input_file))
            else:
                print_csm_info(input_file)
        if mon.duration > 1:
            print(mon)
    elif input_file:
        print("No info for '%s'" % input_file)
Exemplo n.º 39
0
def make_eps(assets_by_site, num_samples, seed, correlation):
    """
    :param assets_by_site: a list of lists of assets
    :param int num_samples: the number of ruptures
    :param int seed: a random seed
    :param float correlation: the correlation coefficient
    :returns: epsilons matrix of shape (num_assets, num_samples)
    """
    all_assets = (a for assets in assets_by_site for a in assets)
    assets_by_taxo = groupby(all_assets, by_taxonomy)
    num_assets = sum(map(len, assets_by_site))
    eps = numpy.zeros((num_assets, num_samples), numpy.float32)
    for taxonomy, assets in assets_by_taxo.items():
        # the association with the epsilons is done in order
        assets.sort(key=operator.attrgetter('id'))
        shape = (len(assets), num_samples)
        logging.info('Building %s epsilons for taxonomy %s', shape, taxonomy)
        zeros = numpy.zeros(shape)
        epsilons = scientific.make_epsilons(zeros, seed, correlation)
        for asset, epsrow in zip(assets, epsilons):
            eps[asset.ordinal] = epsrow
    return eps
Exemplo n.º 40
0
def count_ruptures(sources, srcfilter, gsims, param, monitor):
    """
    Count the number of ruptures contained in the given sources by applying a
    raw source filtering on the integration distance. Return a dictionary
    src_group_id -> {}.
    All sources must belong to the same tectonic region type.
    """
    dic = groupby(sources, lambda src: src.src_group_ids[0])
    acc = AccumDict({grp_id: {} for grp_id in dic})
    acc.eff_ruptures = {grp_id: 0 for grp_id in dic}
    acc.calc_times = AccumDict(accum=numpy.zeros(4))
    for grp_id in dic:
        for src in sources:
            t0 = time.time()
            src_id = src.source_id.split(':')[0]
            sites = srcfilter.get_close_sites(src)
            if sites is not None:
                acc.eff_ruptures[grp_id] += src.num_ruptures
                dt = time.time() - t0
                acc.calc_times[src_id] += numpy.array(
                    [src.weight, len(sites), dt, 1])
    return acc
Exemplo n.º 41
0
 def write(self, destination, source_model, name=None):
     """
     Exports to NRML
     """
     if os.path.exists(destination):
         os.remove(destination)
     self.destination = destination
     if name:
         source_model.name = name
     output_source_model = Node("sourceModel", {"name": name})
     dic = groupby(source_model.sources,
                   operator.itemgetter('tectonicRegion'))
     for i, (trt, srcs) in enumerate(dic.items(), 1):
         output_source_model.append(
             Node('sourceGroup', {
                 'tectonicRegion': trt,
                 'name': 'group %d' % i
             },
                  nodes=srcs))
     print("Exporting Source Model to %s" % self.destination)
     with open(self.destination, "wb") as f:
         nrml.write([output_source_model], f, "%s")
Exemplo n.º 42
0
def sum_tbl(tbl, kfield, vfields):
    """
    Aggregate a composite array and compute the totals on a given key.

    >>> dt = numpy.dtype([('name', (bytes, 10)), ('value', int)])
    >>> tbl = numpy.array([('a', 1), ('a', 2), ('b', 3)], dt)
    >>> print(sum_tbl(tbl, 'name', ['value']))
    [('a', 3, 2) ('b', 3, 1)]
    """
    pairs = [(n, tbl.dtype[n]) for n in [kfield] + vfields]
    dt = numpy.dtype(pairs + [('counts', int)])

    def sum_all(group):
        vals = numpy.zeros(1, dt)[0]
        for rec in group:
            for vfield in vfields:
                vals[vfield] += rec[vfield]
            vals['counts'] += 1
        vals[kfield] = rec[kfield]
        return vals
    rows = groupby(tbl, operator.itemgetter(kfield), sum_all).values()
    return numpy.array(rows, dt)
Exemplo n.º 43
0
    def gen_outputs(self, riskinput, monitor, assetcol=None):
        """
        Group the assets per taxonomy and compute the outputs by using the
        underlying riskmodels. Yield the outputs generated as dictionaries
        out_by_lr.

        :param riskinput: a RiskInput instance
        :param monitor: a monitor object used to measure the performance
        :param assetcol: not None only for event based risk
        """
        mon_context = monitor('building context')
        mon_hazard = monitor('building hazard')
        mon_risk = monitor('computing risk', measuremem=False)
        hazard_getter = riskinput.hazard_getter
        with mon_context:
            if assetcol is None:
                assets_by_site = riskinput.assets_by_site
            else:
                assets_by_site = assetcol.assets_by_site()

        # group the assets by taxonomy
        dic = collections.defaultdict(list)
        for sid, assets in enumerate(assets_by_site):
            group = groupby(assets, by_taxonomy)
            for taxonomy in group:
                epsgetter = riskinput.epsilon_getter(
                    [asset.ordinal for asset in group[taxonomy]])
                dic[taxonomy].append((sid, group[taxonomy], epsgetter))
        imti = {imt: i for i, imt in enumerate(hazard_getter.imts)}
        with mon_hazard:
            hazard = hazard_getter.get_hazard()
        with mon_risk:
            for out in self._gen_outputs(hazard, imti, dic,
                                         hazard_getter.eids):
                yield out

        if hasattr(hazard_getter, 'gmdata'):  # for event based risk
            riskinput.gmdata = hazard_getter.gmdata
Exemplo n.º 44
0
    def _init(self):
        """
        Finalize the initialization of the RlzsAssoc object by setting
        the (reduced) weights of the realizations and the attribute
        gsims_by_trt_id.
        """
        if self.num_samples:
            assert len(self.realizations) == self.num_samples
            for rlz in self.realizations:
                rlz.weight = 1. / self.num_samples
        else:
            tot_weight = sum(rlz.weight for rlz in self.realizations)
            if tot_weight == 0:
                raise ValueError('All realizations have zero weight??')
            elif abs(tot_weight - 1) > 1E-12:  # allow for rounding errors
                logging.warn('Some source models are not contributing, '
                             'weights are being rescaled')
            for rlz in self.realizations:
                rlz.weight = rlz.weight / tot_weight

        self.gsims_by_trt_id = groupby(
            self.rlzs_assoc, operator.itemgetter(0),
            lambda group: sorted(gsim for trt_id, gsim in group))
Exemplo n.º 45
0
 def submit(self, sids, cmakers, max_weight):
     """
     :returns: a Starmap instance for the current tile
     """
     oq = self.oqparam
     self.datastore.swmr_on()  # must come before the Starmap
     smap = parallel.Starmap(classical, h5=self.datastore.hdf5)
     smap.monitor.save('sitecol', self.sitecol)
     triples = []
     for grp_id in self.grp_ids:
         sg = self.csm.src_groups[grp_id]
         if sg.atomic:
             # do not split atomic groups
             trip = (sg, sids, cmakers[grp_id])
             triples.append(trip)
             smap.submit(trip)
             self.n_outs[grp_id] += 1
         else:  # regroup the sources in blocks
             blks = (groupby(sg, get_source_id).values()
                     if oq.disagg_by_src else block_splitter(
                         sg, max_weight, get_weight, sort=True))
             blocks = list(blks)
             for block in blocks:
                 logging.debug('Sending %d source(s) with weight %d',
                               len(block), sum(src.weight for src in block))
                 trip = (block, sids, cmakers[grp_id])
                 triples.append(trip)
                 outs = (oq.outs_per_task
                         if len(block) >= oq.outs_per_task else len(block))
                 if outs > 1 and not oq.disagg_by_src:
                     smap.submit_split(trip, oq.time_per_task, outs)
                     self.n_outs[grp_id] += outs
                 else:
                     smap.submit(trip)
                     self.n_outs[grp_id] += 1
     logging.info('grp_id->n_outs: %s', list(self.n_outs.values()))
     return smap
Exemplo n.º 46
0
def write_source_model(dest,
                       sources_or_groups,
                       name=None,
                       investigation_time=None):
    """
    Writes a source model to XML.

    :param dest:
        Destination path
    :param sources_or_groups:
        Source model in different formats
    :param name:
        Name of the source model (if missing, extracted from the filename)
    """
    if isinstance(sources_or_groups, nrml.SourceModel):
        with open(dest, 'wb') as f:
            nrml.write([obj_to_node(sources_or_groups)], f, '%s')
        return
    if isinstance(sources_or_groups[0], sourceconverter.SourceGroup):
        groups = sources_or_groups
    else:  # passed a list of sources
        srcs_by_trt = groupby(sources_or_groups,
                              operator.attrgetter('tectonic_region_type'))
        groups = [
            sourceconverter.SourceGroup(trt, srcs_by_trt[trt])
            for trt in srcs_by_trt
        ]
    name = name or os.path.splitext(os.path.basename(dest))[0]
    nodes = list(map(obj_to_node, sorted(groups)))
    attrs = {"name": name}
    if investigation_time is not None:
        attrs['investigation_time'] = investigation_time
    source_model = Node("sourceModel", attrs, nodes=nodes)
    with open(dest, 'wb') as f:
        nrml.write([source_model], f, '%s')
    return dest
Exemplo n.º 47
0
 def collapse_point_ruptures(self, rups, sites):
     """
     Collapse ruptures more distant than the pointsource_distance
     """
     pointlike, output = [], []
     for rup in rups:
         if not rup.surface:
             pointlike.append(rup)
         else:
             output.append(rup)
     for mag, mrups in groupby(pointlike, bymag).items():
         if len(mrups) == 1:  # nothing to do
             output.extend(mrups)
             continue
         mdist = self.maximum_distance(self.trt, mag)
         coll = []
         for rup in mrups:  # called on a single site
             rup.dist = get_distances(rup, sites, 'rrup').min()
             if rup.dist <= mdist:
                 coll.append(rup)
         for rs in groupby_bin(coll, self.point_rupture_bins, bydist):
             # group together ruptures in the same distance bin
             output.extend(_collapse(rs))
     return output
Exemplo n.º 48
0
def renumber_sm(smlt_file):
    """
    Renumber the sources belonging to the same source model, even if split
    in multiple files, to avoid duplicated source IDs. NB: it changes the
    XML files in place, without making a backup, so be careful.
    """
    logging.basicConfig(level=logging.INFO)
    smpaths = logictree.collect_info(smlt_file).smpaths
    smap = parallel.Starmap(read_sm, [(path, ) for path in smpaths])
    smodel, srcs = {}, []
    for sm, fname, sources in smap:
        smodel[fname] = sm
        srcs.extend(sources)
    parallel.Starmap.shutdown()
    dic = general.groupby(srcs, operator.attrgetter('value'))
    n = 1
    for sources in dic.values():
        for src in sources:
            src.node['id'] = str(n)
        n += 1
    for fname, root in smodel.items():
        logging.info('Saving %s', fname)
        with open(fname, 'wb') as f:
            nrml.write(root, f, xmlns=root['xmlns'])
Exemplo n.º 49
0
def _get_csm(full_lt, groups):
    # extract a single source from multiple sources with the same ID
    # and regroup the sources in non-atomic groups by TRT
    atomic = []
    acc = general.AccumDict(accum=[])
    get_grp_id = full_lt.source_model_lt.get_grp_id(full_lt.gsim_lt.values)
    for sm in full_lt.sm_rlzs:
        for grp in groups[sm.ordinal]:
            if grp and grp.atomic:
                atomic.append(grp)
            elif grp:
                acc[grp.trt].extend(grp)
            grp_id = get_grp_id(grp.trt, sm.ordinal)
            for src in grp:
                src.grp_id = grp_id
                if sm.samples > 1:
                    src.samples = sm.samples
    dic = {}
    key = operator.attrgetter('source_id', 'checksum')
    idx = 0
    for trt in acc:
        lst = []
        for srcs in general.groupby(acc[trt], key).values():
            for src in srcs:
                src.id = idx
            idx += 1
            if len(srcs) > 1:  # happens in classical/case_20
                src.grp_id = [s.grp_id for s in srcs]
            lst.append(src)
        dic[trt] = sourceconverter.SourceGroup(trt, lst)
    for ag in atomic:
        for src in ag:
            src.id = idx
            idx += 1
    src_groups = list(dic.values()) + atomic
    return CompositeSourceModel(full_lt, src_groups)
Exemplo n.º 50
0
 def combine_gmfs(self, gmfs):
     """
     :param gmfs: datastore /gmfs object
     :returns: a list of dictionaries rupid -> gmf array
     """
     gsims_by_col = self.get_gsims_by_col()
     dicts = [{} for rlz in self.realizations]
     for col_id, gsims in enumerate(gsims_by_col):
         try:
             dataset = gmfs['col%02d' % col_id]
         except KeyError:  # empty dataset
             continue
         trt_id = self.csm_info.get_trt_id(col_id)
         gmfs_by_rupid = groupby(dataset.value, lambda row: row['idx'],
                                 list)
         for gsim in gsims:
             gs = str(gsim)
             for rlz in self.rlzs_assoc[trt_id, gs]:
                 col_ids = self.col_ids_by_rlz[rlz]
                 if not col_ids or col_id in col_ids:
                     for rupid, rows in gmfs_by_rupid.items():
                         dicts[rlz.ordinal][rupid] = numpy.array(
                             [r[gs] for r in rows], rows[0][gs].dtype)
     return dicts
Exemplo n.º 51
0
 def get_sources_by_trt(self):
     """
     Build a dictionary TRT string -> sources. Sources of kind "mutex"
     (if any) are silently discarded.
     """
     acc = AccumDict(accum=[])
     for sm in self.source_models:
         for grp in sm.src_groups:
             if grp.src_interdep != 'mutex':
                 acc[grp.trt].extend(grp)
     if self.optimize_same_id is False:
         return acc
     # extract a single source from multiple sources with the same ID
     dic = {}
     for trt in acc:
         dic[trt] = []
         for grp in groupby(acc[trt], lambda x: x.source_id).values():
             src = grp[0]
             # src.src_group_id can be a list if get_sources_by_trt was
             # called before
             if len(grp) > 1 and not isinstance(src.src_group_id, list):
                 src.src_group_id = [s.src_group_id for s in grp]
             dic[trt].append(src)
     return dic
Exemplo n.º 52
0
    def _init(self):
        """
        Finalize the initialization of the RlzsAssoc object by setting
        the (reduced) weights of the realizations and the attribute
        gsims_by_grp_id.
        """
        if self.num_samples:
            assert len(self.realizations) == self.num_samples, (
                len(self.realizations), self.num_samples)
            for rlz in self.realizations:
                rlz.weight = 1. / self.num_samples
        else:
            tot_weight = sum(rlz.weight for rlz in self.realizations)
            if tot_weight == 0:
                raise ValueError('All realizations have zero weight??')
            elif abs(tot_weight - 1) > 1E-8:
                # this may happen for rounding errors or because of the
                # logic tree reduction; we ensure the sum of the weights is 1
                for rlz in self.realizations:
                    rlz.weight = rlz.weight / tot_weight

        self.gsims_by_grp_id = groupby(
            self.rlzs_assoc, operator.itemgetter(0),
            lambda group: sorted(gsim for grp_id, gsim in group))
Exemplo n.º 53
0
    'bcr-stats': 'Benefit Cost Ratios Statistics',
    'sourcegroups': 'Seismic Source Groups',
    'ruptures': 'Earthquake Ruptures',
    'hcurves': 'Hazard Curves',
    'hmaps': 'Hazard Maps',
    'uhs': 'Uniform Hazard Spectra',
    'disagg': 'Disaggregation Outputs',
    'disagg-stats': 'Disaggregation Statistics',
    'disagg_by_src': 'Disaggregation by Source',
    'realizations': 'Realizations',
    'fullreport': 'Full Report',
    'input_zip': 'Input Files'
}

# sanity check, all display name keys must be exportable
dic = general.groupby(export, operator.itemgetter(0))
for key in DISPLAY_NAME:
    assert key in dic, key


def create_outputs(db, job_id, dskeys):
    """
    Build a correspondence between the outputs in the datastore and the
    ones in the database.

    :param db: a :class:`openquake.server.dbapi.Db` instance
    :param job_id: ID of the current job
    :param dskeys: a list of datastore keys
    """
    rows = [(job_id, DISPLAY_NAME.get(key, key), key) for key in dskeys]
    db.insert('output', 'oq_job_id display_name ds_key'.split(), rows)
Exemplo n.º 54
0
def calc_hazard_curves(groups,
                       srcfilter,
                       imtls,
                       gsim_by_trt,
                       truncation_level=None,
                       apply=sequential_apply,
                       filter_distance='rjb',
                       reqv=None,
                       **kwargs):
    """
    Compute hazard curves on a list of sites, given a set of seismic source
    groups and a dictionary of ground shaking intensity models (one per
    tectonic region type).

    Probability of ground motion exceedance is computed in different ways
    depending if the sources are independent or mutually exclusive.

    :param groups:
        A sequence of groups of seismic sources objects (instances of
        of :class:`~openquake.hazardlib.source.base.BaseSeismicSource`).
    :param srcfilter:
        A source filter over the site collection or the site collection itself
    :param imtls:
        Dictionary mapping intensity measure type strings
        to lists of intensity measure levels.
    :param gsim_by_trt:
        Dictionary mapping tectonic region types (members
        of :class:`openquake.hazardlib.const.TRT`) to
        :class:`~openquake.hazardlib.gsim.base.GMPE` or
        :class:`~openquake.hazardlib.gsim.base.IPE` objects.
    :param truncation_level:
        Float, number of standard deviations for truncation of the intensity
        distribution.
    :param apply:
        apply function to use (default sequential_apply)
    :param filter_distance:
        The distance used to filter the ruptures (default rjb)
    :param reqv:
        If not None, an instance of RjbEquivalent
    :returns:
        An array of size N, where N is the number of sites, which elements
        are records with fields given by the intensity measure types; the
        size of each field is given by the number of levels in ``imtls``.
    """
    # This is ensuring backward compatibility i.e. processing a list of
    # sources
    if not isinstance(groups[0], SourceGroup):  # sent a list of sources
        odic = groupby(groups, operator.attrgetter('tectonic_region_type'))
        groups = [
            SourceGroup(trt, odic[trt], 'src_group', 'indep', 'indep')
            for trt in odic
        ]
    # ensure the sources have the right src_group_id
    for i, grp in enumerate(groups):
        for src in grp:
            if src.src_group_id is None:
                src.src_group_id = i
    imtls = DictArray(imtls)
    shift_hypo = kwargs['shift_hypo'] if 'shift_hypo' in kwargs else False
    param = dict(imtls=imtls,
                 truncation_level=truncation_level,
                 filter_distance=filter_distance,
                 reqv=reqv,
                 cluster=grp.cluster,
                 shift_hypo=shift_hypo)
    pmap = ProbabilityMap(len(imtls.array), 1)
    # Processing groups with homogeneous tectonic region
    gsim = gsim_by_trt[groups[0][0].tectonic_region_type]
    mon = Monitor()
    for group in groups:
        if group.atomic:  # do not split
            it = [classical(group, srcfilter, [gsim], param, mon)]
        else:  # split the group and apply `classical` in parallel
            it = apply(classical,
                       (group.sources, srcfilter, [gsim], param, mon),
                       weight=operator.attrgetter('weight'))
        for dic in it:
            for grp_id, pval in dic['pmap'].items():
                pmap |= pval
    sitecol = getattr(srcfilter, 'sitecol', srcfilter)
    return pmap.convert(imtls, len(sitecol.complete))
Exemplo n.º 55
0
def disaggregation(sources,
                   site,
                   imt,
                   iml,
                   gsim_by_trt,
                   truncation_level,
                   n_epsilons,
                   mag_bin_width,
                   dist_bin_width,
                   coord_bin_width,
                   source_filter=filters.nofilter,
                   filter_distance='rjb'):
    """
    Compute "Disaggregation" matrix representing conditional probability of an
    intensity mesaure type ``imt`` exceeding, at least once, an intensity
    measure level ``iml`` at a geographical location ``site``, given rupture
    scenarios classified in terms of:

    - rupture magnitude
    - Joyner-Boore distance from rupture surface to site
    - longitude and latitude of the surface projection of a rupture's point
      closest to ``site``
    - epsilon: number of standard deviations by which an intensity measure
      level deviates from the median value predicted by a GSIM, given the
      rupture parameters
    - rupture tectonic region type

    In other words, the disaggregation matrix allows to compute the probability
    of each scenario with the specified properties (e.g., magnitude, or the
    magnitude and distance) to cause one or more exceedences of a given hazard
    level.

    For more detailed information about the disaggregation, see for instance
    "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell,
    Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520,
    April 1999.

    :param sources:
        Seismic source model, as for
        :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it
        should be an iterator of seismic sources.
    :param site:
        :class:`~openquake.hazardlib.site.Site` of interest to calculate
        disaggregation matrix for.
    :param imt:
        Instance of :mod:`intensity measure type <openquake.hazardlib.imt>`
        class.
    :param iml:
        Intensity measure level. A float value in units of ``imt``.
    :param gsim_by_trt:
        Tectonic region type to GSIM objects mapping.
    :param truncation_level:
        Float, number of standard deviations for truncation of the intensity
        distribution.
    :param n_epsilons:
        Integer number of epsilon histogram bins in the result matrix.
    :param mag_bin_width:
        Magnitude discretization step, width of one magnitude histogram bin.
    :param dist_bin_width:
        Distance histogram discretization step, in km.
    :param coord_bin_width:
        Longitude and latitude histograms discretization step,
        in decimal degrees.
    :param source_filter:
        Optional source-site filter function. See
        :mod:`openquake.hazardlib.calc.filters`.

    :returns:
        A tuple of two items. First is itself a tuple of bin edges information
        for (in specified order) magnitude, distance, longitude, latitude,
        epsilon and tectonic region types.

        Second item is 6d-array representing the full disaggregation matrix.
        Dimensions are in the same order as bin edges in the first item
        of the result tuple. The matrix can be used directly by pmf-extractor
        functions.
    """
    trts = sorted(set(src.tectonic_region_type for src in sources))
    trt_num = dict((trt, i) for i, trt in enumerate(trts))
    rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts}
    iml4 = make_iml4(1, {str(imt): iml})
    by_trt = groupby(sources, operator.attrgetter('tectonic_region_type'))
    bdata = {}
    sitecol = SiteCollection([site])
    for trt, srcs in by_trt.items():
        ruptures = []
        for src in srcs:
            ruptures.extend(src.iter_ruptures())
        cmaker = ContextMaker(trt, rlzs_by_gsim,
                              source_filter.integration_distance,
                              {'filter_distance': filter_distance})
        bdata[trt] = collect_bin_data(ruptures, sitecol, cmaker, iml4,
                                      truncation_level, n_epsilons)
    if sum(len(bd.mags) for bd in bdata.values()) == 0:
        warnings.warn(
            'No ruptures have contributed to the hazard at site %s' % site,
            RuntimeWarning)
        return None, None

    min_mag = min(bd.mags.min() for bd in bdata.values())
    max_mag = max(bd.mags.max() for bd in bdata.values())
    mag_bins = mag_bin_width * numpy.arange(
        int(numpy.floor(min_mag / mag_bin_width)),
        int(numpy.ceil(max_mag / mag_bin_width) + 1))

    min_dist = min(bd.dists.min() for bd in bdata.values())
    max_dist = max(bd.dists.max() for bd in bdata.values())
    dist_bins = dist_bin_width * numpy.arange(
        int(numpy.floor(min_dist / dist_bin_width)),
        int(numpy.ceil(max_dist / dist_bin_width) + 1))

    bb = (min(bd.lons.min() for bd in bdata.values()),
          min(bd.lats.min() for bd in bdata.values()),
          max(bd.lons.max() for bd in bdata.values()),
          max(bd.lats.max() for bd in bdata.values()))
    lon_bins, lat_bins = lon_lat_bins(bb, coord_bin_width)

    eps_bins = numpy.linspace(-truncation_level, truncation_level,
                              n_epsilons + 1)

    bin_edges = (mag_bins, dist_bins, [lon_bins], [lat_bins], eps_bins)
    matrix = numpy.zeros(
        (len(mag_bins) - 1, len(dist_bins) - 1, len(lon_bins) - 1,
         len(lat_bins) - 1, len(eps_bins) - 1, len(trts)))
    for trt in bdata:
        dic = build_disagg_matrix(bdata[trt], bin_edges, sid=0)
        if dic:  # (poe, imt, rlzi) -> matrix
            [mat] = dic.values()
            matrix[..., trt_num[trt]] = mat
    return bin_edges + (trts, ), matrix
Exemplo n.º 56
0
def main(what, report=False):
    """
    Give information about the passed keyword or filename
    """
    if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
        os.environ['OQ_DISTRIBUTE'] = 'processpool'
    if what == 'calculators':
        for calc in sorted(base.calculators):
            print(calc)
    elif what == 'gsims':
        for gs in gsim.get_available_gsims():
            print(gs)
    elif what == 'portable_gsims':
        for gs in gsim.get_portable_gsims():
            print(gs)
    elif what == 'imts':
        for im in vars(imt).values():
            if inspect.isfunction(im) and is_upper(im):
                print(im.__name__)
    elif what == 'views':
        for name in sorted(view):
            print(name)
    elif what == 'exports':
        dic = groupby(export, operator.itemgetter(0),
                      lambda group: [r[1] for r in group])
        items = [(DISPLAY_NAME.get(exporter, '?'), exporter, formats)
                 for exporter, formats in dic.items()]
        n = 0
        for dispname, exporter, formats in sorted(items):
            print(dispname, '"%s"' % exporter, formats)
            n += len(formats)
        print('There are %d exporters defined.' % n)
    elif what == 'extracts':
        for key in extract:
            func = extract[key]
            if hasattr(func, '__wrapped__'):
                fm = FunctionMaker(func.__wrapped__)
            elif hasattr(func, 'func'):  # for partial objects
                fm = FunctionMaker(func.func)
            else:
                fm = FunctionMaker(func)
            print('%s(%s)%s' % (fm.name, fm.signature, fm.doc))
    elif what == 'parameters':
        docs = OqParam.docs()
        names = set()
        for val in vars(OqParam).values():
            if hasattr(val, 'name'):
                names.add(val.name)
        params = sorted(names)
        for param in params:
            print(param)
            print(docs[param])
    elif what == 'mfds':
        for cls in gen_subclasses(BaseMFD):
            print(cls.__name__)
    elif what == 'venv':
        print(sys.prefix)
    elif what == 'sources':
        for cls in gen_subclasses(BaseSeismicSource):
            print(cls.__name__)
    elif what == 'consequences':
        known = scientific.KNOWN_CONSEQUENCES
        print('The following %d consequences are implemented:' % len(known))
        for cons in known:
            print(cons)
    elif os.path.isdir(what) and report:
        with Monitor('info', measuremem=True) as mon:
            with mock.patch.object(logging.root, 'info'):  # reduce logging
                do_build_reports(what)
        print(mon)
    elif what.endswith('.xml'):
        node = nrml.read(what)
        if node[0].tag.endswith('sourceModel'):
            print(source_model_info([node]))
        elif node[0].tag.endswith('logicTree'):
            bset = node[0][0]
            if bset.tag.endswith("logicTreeBranchingLevel"):
                bset = bset[0]
            if bset.attrib['uncertaintyType'] == 'sourceModel':
                sm_nodes = []
                for smpath in logictree.collect_info(what).smpaths:
                    sm_nodes.append(nrml.read(smpath))
                print(source_model_info(sm_nodes))
            elif bset.attrib['uncertaintyType'] == 'gmpeModel':
                print(logictree.GsimLogicTree(what))
        else:
            print(node.to_str())
    elif what.endswith(('.ini', '.zip')):
        with Monitor('info', measuremem=True) as mon:
            if report:
                print('Generated', reportwriter.build_report(what))
            else:
                print(readinput.get_oqparam(what).json())
        if mon.duration > 1:
            print(mon)
    elif what:
        print("No info for '%s'" % what)
Exemplo n.º 57
0
    def submit_tasks(self, smap):
        """
        Submit tasks to the passed Starmap
        """
        oq = self.oqparam
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        src_groups = self.csm.src_groups

        def srcweight(src):
            trt = src.tectonic_region_type
            g = len(gsims_by_trt[trt])
            return src.weight * g

        logging.info('Weighting the sources')
        totweight = sum(sum(srcweight(src) for src in sg) for sg in src_groups)
        C = oq.concurrent_tasks or 1
        if oq.calculation_mode == 'preclassical':
            f1 = f2 = preclassical
            C *= 50  # use more tasks because there will be slow tasks
        elif oq.disagg_by_src or oq.is_ucerf() or oq.split_sources is False:
            # do not split the sources
            C *= 5  # use more tasks, especially in UCERF
            f1, f2 = classical, classical
        else:
            f1, f2 = classical, classical_split_filter
        min_weight = oq.min_weight * (10 if self.few_sites else 1)
        max_weight = max(min(totweight / C, oq.max_weight), min_weight)
        logging.info('tot_weight={:_d}, max_weight={:_d}'.format(
            int(totweight), int(max_weight)))
        param = dict(
            truncation_level=oq.truncation_level, imtls=oq.imtls,
            filter_distance=oq.filter_distance, reqv=oq.get_reqv(),
            maximum_distance=oq.maximum_distance,
            pointsource_distance=self.psd,
            point_rupture_bins=oq.point_rupture_bins,
            shift_hypo=oq.shift_hypo, max_weight=max_weight,
            collapse_level=oq.collapse_level,
            max_sites_disagg=oq.max_sites_disagg)
        srcfilter = self.src_filter(self.datastore.tempname)
        for sg in src_groups:
            gsims = gsims_by_trt[sg.trt]
            param['rescale_weight'] = len(gsims)
            if sg.atomic:
                # do not split atomic groups
                nb = 1
                smap.submit((sg, srcfilter, gsims, param), f1)
            else:  # regroup the sources in blocks
                blks = (groupby(sg, operator.attrgetter('source_id')).values()
                        if oq.disagg_by_src
                        else block_splitter(sg, totweight/C, srcweight))
                blocks = list(blks)
                nb = len(blocks)
                for block in blocks:
                    logging.debug('Sending %d source(s) with weight %d',
                                  len(block),
                                  sum(srcweight(src) for src in block))
                    smap.submit((block, srcfilter, gsims, param), f2)

            w = sum(srcweight(src) for src in sg)
            logging.info('TRT = %s', sg.trt)
            if oq.maximum_distance.magdist:
                it = sorted(oq.maximum_distance.magdist[sg.trt].items())
                md = '%s->%d ... %s->%d' % (it[0] + it[-1])
            else:
                md = oq.maximum_distance(sg.trt)
            logging.info('max_dist={}, gsims={}, weight={:_d}, blocks={}'.
                         format(md, len(gsims), int(w), nb))
Exemplo n.º 58
0
    def full_disaggregation(self, curves):
        """
        Run the disaggregation phase.

        :param curves: a list of hazard curves, one per site

        The curves can be all None if iml_disagg is set in the job.ini
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = SourceFilter(self.sitecol, oq.maximum_distance)
        csm = self.csm
        if not csm.get_sources():
            raise RuntimeError('All sources were filtered away!')

        R = len(self.rlzs_assoc.realizations)
        I = len(oq.imtls)
        P = len(oq.poes_disagg) or 1
        if R * I * P > 10:
            logging.warn(
                'You have %d realizations, %d IMTs and %d poes_disagg: the '
                'disaggregation will be heavy and memory consuming', R, I, P)
        iml4 = disagg.make_iml4(
            R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves)
        if oq.disagg_by_src:
            if R == 1:
                self.build_disagg_by_src(iml4)
            else:
                logging.warn('disagg_by_src works only with 1 realization, '
                             'you have %d', R)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)
        self.bin_edges = {}

        # build trt_edges
        trts = tuple(sorted(set(sg.trt for smodel in csm.source_models
                                for sg in smodel.src_groups)))
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        # build all_args
        all_args = []
        maxweight = csm.get_maxweight(weight, oq.concurrent_tasks)
        mon = self.monitor('disaggregation')
        R = iml4.shape[1]
        self.imldict = {}  # sid, rlzi, poe, imt -> iml
        for s in self.sitecol.sids:
            for r in range(R):
                for p, poe in enumerate(oq.poes_disagg or [None]):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, r, poe, imt] = iml4[s, r, m, p]

        for smodel in csm.source_models:
            sm_id = smodel.ordinal
            for trt, groups in groupby(
                    smodel.src_groups, operator.attrgetter('trt')).items():
                trti = trt_num[trt]
                sources = sum([grp.sources for grp in groups], [])
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id)
                cmaker = ContextMaker(
                    rlzs_by_gsim, src_filter.integration_distance,
                    {'filter_distance': oq.filter_distance})
                for block in block_splitter(sources, maxweight, weight):
                    all_args.append(
                        (src_filter, block, cmaker, iml4, trti, self.bin_edges,
                         oq, mon))

        self.num_ruptures = [0] * len(self.trts)
        self.cache_info = numpy.zeros(3)  # operations, cache_hits, num_zeros
        results = parallel.Starmap(
            compute_disagg, all_args, self.monitor()
        ).reduce(self.agg_result, AccumDict(accum={}))

        # set eff_ruptures
        trti = csm.info.trt2i()
        for smodel in csm.info.source_models:
            for sg in smodel.src_groups:
                sg.eff_ruptures = self.num_ruptures[trti[sg.trt]]
        self.datastore['csm_info'] = csm.info

        ops, hits, num_zeros = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        logging.info('Discarded zero matrices: %d', num_zeros)
        return results
Exemplo n.º 59
0
def calc_hazard_curves(groups,
                       ss_filter,
                       imtls,
                       gsim_by_trt,
                       truncation_level=None,
                       apply=sequential_apply,
                       filter_distance='rjb'):
    """
    Compute hazard curves on a list of sites, given a set of seismic source
    groups and a dictionary of ground shaking intensity models (one per
    tectonic region type).

    Probability of ground motion exceedance is computed in different ways
    depending if the sources are independent or mutually exclusive.

    :param groups:
        A sequence of groups of seismic sources objects (instances of
        of :class:`~openquake.hazardlib.source.base.BaseSeismicSource`).
    :param ss_filter:
        A source filter over the site collection or the site collection itself
    :param imtls:
        Dictionary mapping intensity measure type strings
        to lists of intensity measure levels.
    :param gsim_by_trt:
        Dictionary mapping tectonic region types (members
        of :class:`openquake.hazardlib.const.TRT`) to
        :class:`~openquake.hazardlib.gsim.base.GMPE` or
        :class:`~openquake.hazardlib.gsim.base.IPE` objects.
    :param truncation_level:
        Float, number of standard deviations for truncation of the intensity
        distribution.
    :param maximum_distance:
        The integration distance, if any
    :returns:
        An array of size N, where N is the number of sites, which elements
        are records with fields given by the intensity measure types; the
        size of each field is given by the number of levels in ``imtls``.
    """
    # This is ensuring backward compatibility i.e. processing a list of
    # sources
    if not isinstance(groups[0], SourceGroup):  # sent a list of sources
        odic = groupby(groups, operator.attrgetter('tectonic_region_type'))
        groups = [
            SourceGroup(trt, odic[trt], 'src_group', 'indep', 'indep')
            for trt in odic
        ]
    for i, grp in enumerate(groups):
        for src in grp:
            if src.src_group_id is None:
                src.src_group_id = i
    if hasattr(ss_filter, 'sitecol'):  # a filter, as it should be
        sitecol = ss_filter.sitecol
    else:  # backward compatibility, a site collection was passed
        sitecol = ss_filter
        ss_filter = SourceFilter(sitecol, {})

    imtls = DictArray(imtls)
    param = dict(imtls=imtls,
                 truncation_level=truncation_level,
                 filter_distance=filter_distance)
    pmap = ProbabilityMap(len(imtls.array), 1)
    # Processing groups with homogeneous tectonic region
    gsim = gsim_by_trt[groups[0][0].tectonic_region_type]
    mon = Monitor()
    for group in groups:
        if group.src_interdep == 'mutex':  # do not split the group
            it = [classical(group, ss_filter, [gsim], param, mon)]
        else:  # split the group and apply `classical` in parallel
            it = apply(classical, (group, ss_filter, [gsim], param, mon),
                       weight=operator.attrgetter('weight'))
        for res in it:
            for grp_id in res:
                pmap |= res[grp_id]
    return pmap.convert(imtls, len(sitecol.complete))
Exemplo n.º 60
0
def main(what, report=False):
    """
    Give information about the passed keyword or filename
    """
    if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
        os.environ['OQ_DISTRIBUTE'] = 'processpool'
    if what == 'calculators':
        for calc in sorted(base.calculators):
            print(calc)
    elif what == 'gsims':
        for gs in gsim.get_available_gsims():
            print(gs)
    elif what == 'imts':
        for im in gen_subclasses(imt.IMT):
            print(im.__name__)
    elif what == 'views':
        for name in sorted(view):
            print(name)
    elif what == 'exports':
        dic = groupby(export, operator.itemgetter(0),
                      lambda group: [r[1] for r in group])
        n = 0
        for exporter, formats in dic.items():
            print(exporter, formats)
            n += len(formats)
        print('There are %d exporters defined.' % n)
    elif what == 'extracts':
        for key in extract:
            func = extract[key]
            if hasattr(func, '__wrapped__'):
                fm = FunctionMaker(func.__wrapped__)
            elif hasattr(func, 'func'):  # for partial objects
                fm = FunctionMaker(func.func)
            else:
                fm = FunctionMaker(func)
            print('%s(%s)%s' % (fm.name, fm.signature, fm.doc))
    elif what == 'parameters':
        params = []
        for val in vars(OqParam).values():
            if hasattr(val, 'name'):
                params.append(val)
        params.sort(key=lambda x: x.name)
        for param in params:
            print(param.name)
    elif what == 'mfds':
        for cls in gen_subclasses(BaseMFD):
            print(cls.__name__)
    elif what == 'sources':
        for cls in gen_subclasses(BaseSeismicSource):
            print(cls.__name__)
    elif os.path.isdir(what) and report:
        with Monitor('info', measuremem=True) as mon:
            with mock.patch.object(logging.root, 'info'):  # reduce logging
                do_build_reports(what)
        print(mon)
    elif what.endswith('.xml'):
        node = nrml.read(what)
        if node[0].tag.endswith('sourceModel'):
            print(source_model_info([node]))
        elif node[0].tag.endswith('logicTree'):
            sm_nodes = []
            for smpath in logictree.collect_info(what).smpaths:
                sm_nodes.append(nrml.read(smpath))
            print(source_model_info(sm_nodes))
        else:
            print(node.to_str())
    elif what.endswith(('.ini', '.zip')):
        with Monitor('info', measuremem=True) as mon:
            if report:
                print('Generated', reportwriter.build_report(what))
            else:
                print(readinput.get_oqparam(what).json())
        if mon.duration > 1:
            print(mon)
    elif what:
        print("No info for '%s'" % what)