Пример #1
0
def build_hazard(pgetter, N, hstats, individual_curves, max_sites_disagg,
                 amplifier, monitor):
    """
    :param pgetter: an :class:`openquake.commonlib.getters.PmapGetter`
    :param N: the total number of sites
    :param hstats: a list of pairs (statname, statfunc)
    :param individual_curves: if True, also build the individual curves
    :param max_sites_disagg: if there are less sites than this, store rup info
    :param amplifier: instance of Amplifier or None
    :param monitor: instance of Monitor
    :returns: a dictionary kind -> ProbabilityMap

    The "kind" is a string of the form 'rlz-XXX' or 'mean' of 'quantile-XXX'
    used to specify the kind of output.
    """
    with monitor('read PoEs'):
        pgetter.init()
        if amplifier:
            ampcode = pgetter.dstore['sitecol'].ampcode
    imtls, poes, weights = pgetter.imtls, pgetter.poes, pgetter.weights
    M = len(imtls)
    P = len(poes)
    L = len(imtls.array) if amplifier is None else len(amplifier.amplevels) * M
    R = len(weights)
    S = len(hstats)
    pmap_by_kind = {}
    if R > 1 and individual_curves or not hstats:
        pmap_by_kind['hcurves-rlzs'] = [ProbabilityMap(L) for r in range(R)]
    if hstats:
        pmap_by_kind['hcurves-stats'] = [ProbabilityMap(L) for r in range(S)]
        if poes:
            pmap_by_kind['hmaps-stats'] = [
                ProbabilityMap(M, P) for r in range(S)
            ]
    combine_mon = monitor('combine pmaps', measuremem=False)
    compute_mon = monitor('compute stats', measuremem=False)
    for sid in pgetter.sids:
        with combine_mon:
            pcurves = pgetter.get_pcurves(sid)
            if amplifier:
                pcurves = amplifier.amplify(ampcode[sid], pcurves)
        if sum(pc.array.sum() for pc in pcurves) == 0:  # no data
            continue
        with compute_mon:
            if hstats:
                arr = numpy.array([pc.array for pc in pcurves])
                for s, (statname, stat) in enumerate(hstats.items()):
                    pc = getters.build_stat_curve(arr, imtls, stat, weights)
                    pmap_by_kind['hcurves-stats'][s][sid] = pc
                    if poes:
                        hmap = calc.make_hmap(pc, pgetter.imtls, poes, sid)
                        pmap_by_kind['hmaps-stats'][s].update(hmap)
            if R > 1 and individual_curves or not hstats:
                for pmap, pc in zip(pmap_by_kind['hcurves-rlzs'], pcurves):
                    pmap[sid] = pc
                if poes:
                    pmap_by_kind['hmaps-rlzs'] = [
                        calc.make_hmap(pc, imtls, poes, sid) for pc in pcurves
                    ]
    return pmap_by_kind
Пример #2
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, self.datastore['source_mags'])
        self.save_bin_edges(edges)
        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        nrows = len(dstore['_poes/sid'])
        self.pgetter = getters.PmapGetter(dstore, ws, [(0, nrows + 1)],
                                          oq.imtls, oq.poes)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    pcurve = self.pgetter.get_pcurve(sid)
                    mean = getters.build_stat_curve(pcurve, oq.imtls,
                                                    stats.mean_curve, ws)
                    # get the closest realization to the mean
                    rlzs[sid] = util.closest_to_ref(pcurve.array.T,
                                                    mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs
        self.curves = []

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
        self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                            curves)
        if self.hmap4.array.sum() == 0:
            raise SystemExit('Cannot do any disaggregation: zero hazard')
        self.datastore['hmap4'] = self.hmap4
        self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array)
        return self.compute()
Пример #3
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml3 = _iml3(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        self.save_bin_edges()
        sd = shapedic.copy()
        sd.pop('trt')
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        tot = get_outputs_size(shapedic, oq.disagg_outputs or disagg.pmf_map)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for imt in oq.imtls:
                        self.imldic[s, rlz, poe, imt] = self.iml3[imt][s, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        M = len(oq.imtls)
        tasks_per_imt = numpy.ceil(oq.concurrent_tasks / M) or 1
        rups_per_task = len(dstore['rup/mag']) / tasks_per_imt
        logging.info('Considering ~%d ruptures per task', rups_per_task)
        indices = get_indices(dstore, tasks_per_imt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        trt_num = {trt: i for i, trt in enumerate(self.trts)}
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                for imt in oq.imtls:
                    smap.submit((dstore, idxs, cmaker, self.iml3[imt], trti,
                                 self.bin_edges, oq))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
Пример #4
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
            if not self.csm.get_sources():
                raise RuntimeError('All sources were filtered away!')

        csm_info = self.datastore['csm_info']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.rlzs_assoc.realizations]
        self.pgetter = getters.PmapGetter(self.datastore, self.ws,
                                          self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(curves, oq.imtls,
                                                    stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                          curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(csm_info.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = csm_info.min_mag
        max_mag = csm_info.max_mag
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                logging.info('Site #%d, disaggregating for rlz=#%d', s, rlz)
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit disagg tasks
        gid = self.datastore['rup/grp_id'][()]
        indices_by_grp = get_indices(gid)  # grp_id -> [(start, stop),...]
        blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1
        # NB: removing the blocksize causes slow disaggregation tasks
        allargs = []
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        for grp_id, trt in csm_info.trt_by_grp.items():
            trti = trt_num[trt]
            rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
            cmaker = ContextMaker(
                trt, rlzs_by_gsim, {
                    'truncation_level': oq.truncation_level,
                    'maximum_distance': src_filter.integration_distance,
                    'filter_distance': oq.filter_distance,
                    'imtls': oq.imtls
                })
            for start, stop in indices_by_grp[grp_id]:
                for slc in gen_slices(start, stop, blocksize):
                    allargs.append((dstore, slc, self.sitecol, oq, cmaker,
                                    self.iml4, trti, self.bin_edges))
        results = parallel.Starmap(compute_disagg,
                                   allargs,
                                   h5=self.datastore.hdf5).reduce(
                                       self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
Пример #5
0
def postclassical(pgetter, N, hstats, individual_rlzs, max_sites_disagg,
                  amplifier, monitor):
    """
    :param pgetter: an :class:`openquake.commonlib.getters.PmapGetter`
    :param N: the total number of sites
    :param hstats: a list of pairs (statname, statfunc)
    :param individual_rlzs: if True, also build the individual curves
    :param max_sites_disagg: if there are less sites than this, store rup info
    :param amplifier: instance of Amplifier or None
    :param monitor: instance of Monitor
    :returns: a dictionary kind -> ProbabilityMap

    The "kind" is a string of the form 'rlz-XXX' or 'mean' of 'quantile-XXX'
    used to specify the kind of output.
    """
    with monitor('read PoEs', measuremem=True):
        pgetter.init()

    if amplifier:
        with hdf5.File(pgetter.filename, 'r') as f:
            ampcode = f['sitecol'].ampcode
        imtls = DictArray({imt: amplifier.amplevels for imt in pgetter.imtls})
    else:
        imtls = pgetter.imtls
    poes, weights = pgetter.poes, pgetter.weights
    M = len(imtls)
    P = len(poes)
    L = imtls.size
    R = len(weights)
    S = len(hstats)
    pmap_by_kind = {}
    if R > 1 and individual_rlzs or not hstats:
        pmap_by_kind['hcurves-rlzs'] = [ProbabilityMap(L) for r in range(R)]
        if poes:
            pmap_by_kind['hmaps-rlzs'] = [
                ProbabilityMap(M, P) for r in range(R)
            ]
    if hstats:
        pmap_by_kind['hcurves-stats'] = [ProbabilityMap(L) for r in range(S)]
        if poes:
            pmap_by_kind['hmaps-stats'] = [
                ProbabilityMap(M, P) for r in range(S)
            ]
    combine_mon = monitor('combine pmaps', measuremem=False)
    compute_mon = monitor('compute stats', measuremem=False)
    for sid in pgetter.sids:
        with combine_mon:
            pc = pgetter.get_pcurve(sid)  # shape (L, R)
            if amplifier:
                pc = amplifier.amplify(ampcode[sid], pc)
                # NB: the pcurve have soil levels != IMT levels
        if pc.array.sum() == 0:  # no data
            continue
        with compute_mon:
            if hstats:
                for s, (statname, stat) in enumerate(hstats.items()):
                    sc = getters.build_stat_curve(pc, imtls, stat, weights)
                    pmap_by_kind['hcurves-stats'][s][sid] = sc
                    if poes:
                        hmap = calc.make_hmap(sc, imtls, poes, sid)
                        pmap_by_kind['hmaps-stats'][s].update(hmap)
            if R > 1 and individual_rlzs or not hstats:
                for r, pmap in enumerate(pmap_by_kind['hcurves-rlzs']):
                    pmap[sid] = pc.extract(r)
                if poes:
                    for r in range(R):
                        hmap = calc.make_hmap(pc.extract(r), imtls, poes, sid)
                        pmap_by_kind['hmaps-rlzs'][r].update(hmap)
    return pmap_by_kind
Пример #6
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError('Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(self.datastore, ws,
                                          self.sitecol.sids, oq.imtls, oq.poes)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(curves, oq.imtls,
                                                    stats.mean_curve, ws)
                    # get the closest realization to the mean
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs
        self.curves = []

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
        self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                            curves)
        if self.hmap4.array.sum() == 0:
            raise SystemExit('Cannot do any disaggregation: zero hazard')
        self.datastore['hmap4'] = self.hmap4
        self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        return self.compute()
Пример #7
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError(
                'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        self.datastore['iml4'] = self.iml4
        self.datastore['poe4'] = numpy.zeros_like(self.iml4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            iml3 = self.iml4[s]
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldic[s, rlz, poe, imt] = iml3[m, p, z]

        # submit disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        mag_edges = self.bin_edges[0]
        indices = get_indices_by_gidx_mag(dstore, mag_edges)
        allargs = []
        totweight = sum(sum(ri.weight for ri in indices[gm])
                        for gm in indices)
        maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1)))
        grp_ids = dstore['grp_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        G, U = 0, 0
        for gidx, magi in indices:
            trti = grp_ids[gidx][0] // num_eff_rlzs
            trt = self.trts[trti]
            cmaker = ContextMaker(
                trt, rlzs_by_gsim[gidx],
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': oq.maximum_distance,
                 'collapse_level': oq.collapse_level,
                 'imtls': oq.imtls})
            G = max(G, len(cmaker.gsims))
            for rupidxs in block_splitter(
                    indices[gidx, magi], maxweight, weight):
                idxs = numpy.array([ri.index for ri in rupidxs])
                U = max(U, len(idxs))
                allargs.append((dstore, idxs, cmaker, self.iml4,
                                trti, magi, self.bin_edges[1:], oq))
                task_inputs.append((trti, magi, len(idxs)))

        nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U))
        logging.info('Maximum mean_std per task:\n%s', msg)
        sd = self.shapedic.copy()
        sd.pop('trt')
        sd.pop('mag')
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(
            compute_disagg, allargs, h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
Пример #8
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(self.full_lt.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        mags = [float(mag) for mag in self.datastore['source_mags']]
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min(mags) / oq.mag_bin_width)),
            int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max(mags))
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        shapedic = self.save_bin_edges()
        del shapedic['trt']
        shapedic['N'] = self.N
        shapedic['M'] = len(oq.imtls)
        shapedic['P'] = len(oq.poes_disagg)
        shapedic['Z'] = Z
        shapedic['concurrent_tasks'] = oq.concurrent_tasks
        nbytes, msg = get_array_nbytes(shapedic)
        if nbytes > oq.max_data_transfer:
            raise ValueError('Estimated data transfer too big\n%s' % msg)
        logging.info('Estimated data transfer: %s', msg)
        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        indices = get_indices(dstore, oq.concurrent_tasks or 1)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'filter_distance': oq.filter_distance, 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                smap.submit((dstore, idxs, cmaker, self.iml4, trti,
                             self.bin_edges))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array