Exemplo n.º 1
0
    def split_sources(self,
                      sources=None,
                      src_filter=None,
                      maxweight=None,
                      concurrent_tasks=None):
        """
        Split a set of sources of the same source group; light sources
        (i.e. with weight <= maxweight) are not split.

        :param sources: sources of the same source group
        :param src_filter: SourceFilter instance
        :param maxweight: weight used to decide if a source is light
        :yields: blocks of sources of weight around maxweight
        """
        if sources is None:
            sources = self.get_sources()
        if src_filter is None:
            src_filter = self.src_filter
        if maxweight is None:
            maxweight = self.get_maxweight(concurrent_tasks)
        light = [src for src in sources if src.weight <= maxweight]
        for block in block_splitter(light,
                                    maxweight,
                                    weight=operator.attrgetter('weight')):
            yield block
        heavy = [src for src in sources if src.weight > maxweight]
        for src in heavy:
            srcs = split_filter_source(src, src_filter)
            for block in block_splitter(srcs,
                                        maxweight,
                                        weight=operator.attrgetter('weight')):
                yield block
Exemplo n.º 2
0
    def split_in_blocks(self, maxweight, sources):
        """
        Split a set of sources in blocks of weight up to maxweight; heavy
        sources (i.e. with weight > maxweight) are split.

        :param maxweight: maximum weight of a block
        :param sources: sources of the same source group
        :yields: blocks of sources of weight around maxweight
        """
        sources.sort(key=weight)

        # yield light sources in blocks
        light = [src for src in sources if src.weight <= maxweight]
        for block in block_splitter(light, maxweight, weight):
            yield block

        # yield heavy sources in blocks
        heavy = [src for src in sources if src.weight > maxweight]
        for src in heavy:
            srcs = [
                s for s in source.split_source(src)
                if self.src_filter.get_close_sites(s) is not None
            ]
            for block in block_splitter(srcs, maxweight, weight):
                yield block
Exemplo n.º 3
0
    def split_sources(self, sources, src_filter, maxweight=MAXWEIGHT):
        """
        Split a set of sources of the same source group; light sources
        (i.e. with weight <= maxweight) are not split.

        :param sources: sources of the same source group
        :param src_filter: SourceFilter instance
        :param maxweight: weight used to decide if a source is light
        :yields: blocks of sources of weight around maxweight
        """
        light = [src for src in sources if src.weight <= maxweight]
        self.add_infos(light)
        for block in block_splitter(light,
                                    maxweight,
                                    weight=operator.attrgetter('weight')):
            yield block
        heavy = [src for src in sources if src.weight > maxweight]
        self.add_infos(heavy)
        for src in heavy:
            srcs = sourceconverter.split_filter_source(src, src_filter)
            if len(srcs) > 1:
                logging.info('Splitting %s "%s" in %d sources',
                             src.__class__.__name__, src.source_id, len(srcs))
            for block in block_splitter(srcs,
                                        maxweight,
                                        weight=operator.attrgetter('weight')):
                yield block
Exemplo n.º 4
0
    def build_starmap(self, ssm, sitecol, assetcol, riskmodel, imts,
                      trunc_level, correl_model, min_iml, monitor):
        """
        :param ssm: CompositeSourceModel containing a single source model
        :param sitecol: a SiteCollection instance
        :param assetcol: an AssetCollection instance
        :param riskmodel: a RiskModel instance
        :param imts: a list of Intensity Measure Types
        :param trunc_level: truncation level
        :param correl_model: correlation model
        :param min_iml: vector of minimum intensities, one per IMT
        :param monitor: a Monitor instance
        :returns: a pair (starmap, dictionary)
        """
        ruptures_by_grp = AccumDict()
        num_ruptures = 0
        num_events = 0
        allargs = []
        grp_trt = {}
        # collect the sources
        maxweight = ssm.get_maxweight(self.oqparam.concurrent_tasks)
        logging.info('Using a maxweight of %d', maxweight)
        for src_group in ssm.src_groups:
            grp_trt[src_group.id] = trt = src_group.trt
            gsims = ssm.gsim_lt.values[trt]
            for block in block_splitter(src_group, maxweight, getweight):
                allargs.append((block, self.sitecol, gsims, monitor))
        # collect the ruptures
        for dic in parallel.starmap(self.compute_ruptures, allargs):
            ruptures_by_grp += dic
            [rupts] = dic.values()
            num_ruptures += len(rupts)
            num_events += dic.num_events
        ruptures_by_grp.num_events = num_events
        save_ruptures(self, ruptures_by_grp)

        # determine the realizations
        rlzs_assoc = ssm.info.get_rlzs_assoc(
            count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, 0)))
        allargs = []
        # prepare the risk inputs
        ruptures_per_block = self.oqparam.ruptures_per_block
        for src_group in ssm.src_groups:
            for rupts in block_splitter(
                    ruptures_by_grp[src_group.id], ruptures_per_block):
                trt = grp_trt[rupts[0].grp_id]
                ri = riskinput.RiskInputFromRuptures(
                    trt, imts, sitecol, rupts, trunc_level,
                    correl_model, min_iml)
                allargs.append((ri, riskmodel, rlzs_assoc, assetcol, monitor))
        taskname = '%s#%d' % (losses_by_taxonomy.__name__, ssm.sm_id + 1)
        smap = starmap(losses_by_taxonomy, allargs, name=taskname)
        attrs = dict(num_ruptures={
            sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items()},
                     num_events=num_events,
                     num_rlzs=len(rlzs_assoc.realizations),
                     sm_id=ssm.sm_id)
        return smap, attrs
Exemplo n.º 5
0
    def gen_args(self, src_groups, oq, monitor):
        """
        Used in the case of large source model logic trees.

        :param src_groups: a list of SourceGroup instances
        :param oq: a :class:`openquake.commonlib.oqvalidation.OqParam` instance
        :param monitor: a :class:`openquake.baselib.performance.Monitor`
        :yields: (sources, sites, gsims, monitor) tuples
        """
        ngroups = len(src_groups)
        maxweight = self.csm.get_maxweight(oq.concurrent_tasks)
        logging.info('Using a maxweight of %d', maxweight)
        nheavy = nlight = 0
        self.infos = {}
        for sg in src_groups:
            logging.info('Sending source group #%d of %d (%s, %d sources)',
                         sg.id + 1, ngroups, sg.trt, len(sg.sources))
            gsims = self.rlzs_assoc.gsims_by_grp_id[sg.id]
            if oq.poes_disagg:  # only for disaggregation
                monitor.sm_id = self.rlzs_assoc.sm_ids[sg.id]
            monitor.seed = self.rlzs_assoc.seed
            monitor.samples = self.rlzs_assoc.samples[sg.id]
            light = [src for src in sg.sources if src.weight <= maxweight]
            for block in block_splitter(
                    light, maxweight, weight=operator.attrgetter('weight')):
                for src in block:
                    self.infos[sg.id, src.source_id] = source.SourceInfo(src)
                yield block, self.sitecol, gsims, monitor
                nlight += 1
            heavy = [src for src in sg.sources if src.weight > maxweight]
            if not heavy:
                continue
            with self.monitor('split/filter heavy sources', autoflush=True):
                for src in heavy:
                    sites = self.ss_filter.affected(src)
                    self.infos[sg.id, src.source_id] = source.SourceInfo(src)
                    sources = split_filter_source(
                        src, sites, self.ss_filter, self.random_seed)
                    if len(sources) > 1:
                        logging.info(
                            'Splitting %s "%s" in %d sources',
                            src.__class__.__name__,
                            src.source_id, len(sources))
                    for block in block_splitter(
                            sources, maxweight,
                            weight=operator.attrgetter('weight')):
                        yield block, sites, gsims, monitor
                        nheavy += 1
        logging.info('Sent %d light and %d heavy tasks', nlight, nheavy)
Exemplo n.º 6
0
    def apply(cls, task, args, concurrent_tasks=cpu_count * 3,
              maxweight=None, weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              distribute=None, progress=logging.info):
        r"""
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param args: the arguments to be passed to the task function
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        :param distribute: if not given, inferred from OQ_DISTRIBUTE
        :param progress: logging function to use (default logging.info)
        :returns: an :class:`IterResult` object
        """
        arg0 = args[0]  # this is assumed to be a sequence
        mon = args[-1]
        args = args[1:-1]
        if maxweight:  # block_splitter is lazy
            task_args = ((blk,) + args for blk in block_splitter(
                arg0, maxweight, weight, key))
        else:  # split_in_blocks is eager
            task_args = [(blk,) + args for blk in split_in_blocks(
                arg0, concurrent_tasks or 1, weight, key)]
        return cls(task, task_args, mon, distribute, progress).submit_all()
Exemplo n.º 7
0
def get_rupture_getters(dstore, ct=0, slc=slice(None), srcfilter=None):
    """
    :param dstore: a :class:`openquake.commonlib.datastore.DataStore`
    :param ct: number of concurrent tasks
    :returns: a list of RuptureGetters
    """
    full_lt = dstore['full_lt']
    rlzs_by_gsim = full_lt.get_rlzs_by_gsim()
    rup_array = dstore['ruptures'][slc]
    if len(rup_array) == 0:
        raise NotFound('There are no ruptures in %s' % dstore)
    rup_array.sort(order=['trt_smr', 'n_occ'])
    scenario = 'scenario' in dstore['oqparam'].calculation_mode
    proxies = [RuptureProxy(rec, scenario) for rec in rup_array]
    maxweight = rup_array['n_occ'].sum() / (ct / 2 or 1)
    rgetters = []
    for block in general.block_splitter(proxies,
                                        maxweight,
                                        operator.itemgetter('n_occ'),
                                        key=operator.itemgetter('trt_smr')):
        trt_smr = block[0]['trt_smr']
        rbg = rlzs_by_gsim[trt_smr]
        rg = RuptureGetter(block, dstore.filename, trt_smr,
                           full_lt.trt_by(trt_smr), rbg)
        rgetters.append(rg)
    return rgetters
Exemplo n.º 8
0
def gen_rupture_getters(dstore, slc=slice(None), concurrent_tasks=1,
                        filename=None):
    """
    :yields: RuptureGetters
    """
    try:
        e0s = dstore['eslices'][:, 0]
    except KeyError:
        e0s = None
    if dstore.parent:
        dstore = dstore.parent
    csm_info = dstore['csm_info']
    trt_by_grp = csm_info.grp_by("trt")
    samples = csm_info.get_samples_by_grp()
    rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    maxweight = numpy.ceil(len(rup_array) / (concurrent_tasks or 1))
    nr, ne = 0, 0
    for grp_id, arr in general.group_array(rup_array, 'grp_id').items():
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        for block in general.block_splitter(arr, maxweight):
            if e0s is None:
                e0 = numpy.zeros(len(block), U32)
            else:
                e0 = e0s[nr: nr + len(block)]
            rgetter = RuptureGetter(
                numpy.array(block), filename or dstore.filename, grp_id,
                trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id], e0)
            yield rgetter
            nr += len(block)
            ne += rgetter.num_events
Exemplo n.º 9
0
    def apply(cls, task, task_args,
              concurrent_tasks=executor.num_tasks_hint,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        return cls(task, [(chunk,) + args for chunk in chunks], name)
Exemplo n.º 10
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = readinput.random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    sources = []
    with monitor("filtering/splitting sources"):
        for src, _sites in srcfilter(srcs):
            if src.num_ruptures >= params['maxweight']:
                splits, stime = split_sources([src])
                sources.extend(srcfilter.filter(splits))
            else:
                sources.append(src)
        blocks = list(block_splitter(sources, params['maxweight'],
                                     operator.attrgetter('num_ruptures')))
    if blocks:
        # yield the first blocks (if any) and compute the last block in core
        # NB: the last block is usually the smallest one
        for block in blocks[:-1]:
            yield classical, block, srcfilter, gsims, params
        yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Exemplo n.º 11
0
    def _send_sources(self, smap):
        oq = self.oqparam
        opt = self.oqparam.optimize_same_id_sources
        nrup = operator.attrgetter('num_ruptures')
        param = dict(
            truncation_level=oq.truncation_level, imtls=oq.imtls,
            filter_distance=oq.filter_distance, reqv=oq.get_reqv(),
            pointsource_distance=oq.pointsource_distance,
            maxweight=min(self.csm.get_maxweight(nrup, oq.concurrent_tasks),
                          base.RUPTURES_PER_BLOCK))
        logging.info('Max ruptures per task = %(maxweight)d', param)

        num_tasks = 0
        num_sources = 0

        if self.csm.has_dupl_sources and not opt:
            logging.warning('Found %d duplicated sources',
                            self.csm.has_dupl_sources)

        for trt, sources in self.csm.get_trt_sources():
            gsims = self.csm.info.gsim_lt.get_gsims(trt)
            num_sources += len(sources)
            if hasattr(sources, 'atomic') and sources.atomic:
                smap.submit(sources, self.src_filter, gsims, param,
                            func=classical)
                yield sources
                num_tasks += 1
            else:  # regroup the sources in blocks
                for block in block_splitter(sources, param['maxweight'], nrup):
                    smap.submit(block, self.src_filter, gsims, param)
                    yield block
                    num_tasks += 1
        logging.info('Sent %d sources in %d tasks', num_sources, num_tasks)
Exemplo n.º 12
0
 def submit_sources(self, sitecol, siteidx=0):
     """
     Submit the light sources and then the (split) heavy sources.
     Only the sources affecting the sitecol as considered. Also,
     set the .seed attribute of each source.
     """
     rlzs_assoc = self.csm.info.get_rlzs_assoc()
     for kind in ('light', 'heavy'):
         sources = list(self.get_sources(kind, sitecol))
         if not sources:
             continue
         # set a seed for each split source; the seed is used
         # only by the event based calculator, but it is set anyway
         for src in sources:
             self.csm.filtered_weight += src.weight
         nblocks = 0
         for block in block_splitter(
                 sources, self.maxweight,
                 operator.attrgetter('weight'),
                 operator.attrgetter('trt_model_id')):
             sent = self.tm.submit(block, sitecol, siteidx,
                                   rlzs_assoc, self.monitor.new())
             self.source_chunks.append(
                 (len(block), block.weight, sum(sent.values())))
             nblocks += 1
         logging.info('Sent %d sources in %d block(s)',
                      len(sources), nblocks)
Exemplo n.º 13
0
def classical_split_filter(sources, rlzs_by_gsim, params, monitor):
    """
    Compute the PoEs from filtered sources.
    """
    minw = params['min_weight']
    maxw = params['max_weight'] / 2
    blocks = list(block_splitter(sources, maxw, get_weight))
    if not blocks:
        yield {'pmap': {}, 'extra': {}}
        return
    heavy = []
    light = list(blocks[-1])
    for block in blocks[:-1]:
        if block.weight < minw:  # extend light sources
            light.extend(block)
        else:  # heavy block, turn it into a subtask
            heavy.append(int(block.weight))
            yield classical, block, rlzs_by_gsim, params
    if heavy:
        msg = 'produced %d subtask with weights %s' % (len(heavy), heavy)
        try:
            logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG',
                       'classical_split_filter#%d' % monitor.task_no, msg)
        except Exception:
            # a foreign key error in case of `oq run` is expected
            print(msg)
    yield classical(light, rlzs_by_gsim, params, monitor)
Exemplo n.º 14
0
 def submit_sources(self, sitecol, siteidx=0):
     """
     Submit the light sources and then the (split) heavy sources.
     Only the sources affecting the sitecol as considered. Also,
     set the .seed attribute of each source.
     """
     rlzs_assoc = self.csm.info.get_rlzs_assoc()
     for kind in ('light', 'heavy'):
         sources = list(self.get_sources(kind, sitecol))
         if not sources:
             continue
         # set a seed for each split source; the seed is used
         # only by the event based calculator, but it is set anyway
         for src in sources:
             self.csm.filtered_weight += src.weight
         nblocks = 0
         for block in block_splitter(sources, self.maxweight,
                                     operator.attrgetter('weight'),
                                     operator.attrgetter('trt_model_id')):
             sent = self.tm.submit(block, sitecol, siteidx, rlzs_assoc,
                                   self.monitor.new())
             self.source_chunks.append(
                 (len(block), block.weight, sum(sent.values())))
             nblocks += 1
         logging.info('Sent %d sources in %d block(s)', len(sources),
                      nblocks)
Exemplo n.º 15
0
 def gen_args(self, tiles):
     """
     Yield (sources, sitecol, siteidx, rlzs_assoc, monitor) by
     looping on the tiles and on the source blocks.
     """
     siteidx = 0
     for i, sitecol in enumerate(tiles, 1):
         if len(tiles) > 1:
             logging.info('Processing tile %d', i)
         tile = Tile(sitecol, self.maximum_distance)
         for kind in ('light', 'heavy'):
             if self.filter_sources:
                 logging.info('Filtering %s sources', kind)
             sources = list(self.get_sources(kind, tile))
             if not sources:
                 continue
             for src in sources:
                 self.csm.filtered_weight += src.weight
             nblocks = 0
             for block in block_splitter(
                     sources, self.maxweight,
                     operator.attrgetter('weight'),
                     operator.attrgetter('src_group_id')):
                 yield (block, sitecol, siteidx,
                        self.rlzs_assoc, self.monitor.new())
                 nblocks += 1
             logging.info('Sent %d sources in %d block(s)',
                          len(sources), nblocks)
         siteidx += len(sitecol)
Exemplo n.º 16
0
def gen_rupture_getters(dstore,
                        slc=slice(None),
                        concurrent_tasks=1,
                        hdf5cache=None):
    """
    :yields: RuptureGetters
    """
    if dstore.parent:
        dstore = dstore.parent
    csm_info = dstore['csm_info']
    trt_by_grp = csm_info.grp_by("trt")
    samples = csm_info.get_samples_by_grp()
    rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    maxweight = numpy.ceil(len(rup_array) / (concurrent_tasks or 1))
    nr, ne = 0, 0
    for grp_id, arr in general.group_array(rup_array, 'grp_id').items():
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        for block in general.block_splitter(arr, maxweight):
            rgetter = RuptureGetter(hdf5cache or dstore.filename,
                                    numpy.array(block), grp_id,
                                    trt_by_grp[grp_id], samples[grp_id],
                                    rlzs_by_gsim[grp_id])
            rgetter.weight = getattr(block, 'weight', len(block))
            yield rgetter
            nr += len(block)
            ne += rgetter.num_events
    logging.info('Read %d ruptures and %d events', nr, ne)
Exemplo n.º 17
0
    def _send_sources(self, smap):
        oq = self.oqparam
        opt = self.oqparam.optimize_same_id_sources
        nrup = operator.attrgetter('num_ruptures')
        param = dict(
            truncation_level=oq.truncation_level, imtls=oq.imtls,
            filter_distance=oq.filter_distance, reqv=oq.get_reqv(),
            pointsource_distance=oq.pointsource_distance,
            maxweight=min(self.csm.get_maxweight(nrup, oq.concurrent_tasks),
                          base.RUPTURES_PER_BLOCK))
        logging.info('Max ruptures per task = %(maxweight)d', param)

        num_tasks = 0
        num_sources = 0

        if self.csm.has_dupl_sources and not opt:
            logging.warning('Found %d duplicated sources',
                            self.csm.has_dupl_sources)

        for trt, sources in self.csm.get_trt_sources():
            gsims = self.csm.info.gsim_lt.get_gsims(trt)
            num_sources += len(sources)
            if hasattr(sources, 'atomic') and sources.atomic:
                smap.submit(sources, self.src_filter, gsims, param,
                            func=classical)
                yield sources
                num_tasks += 1
            else:  # regroup the sources in blocks
                for block in block_splitter(sources, param['maxweight'], nrup):
                    smap.submit(block, self.src_filter, gsims, param)
                    yield block
                    num_tasks += 1
        logging.info('Sent %d sources in %d tasks', num_sources, num_tasks)
Exemplo n.º 18
0
 def _gen_riskinputs(self, kind, eps, num_events):
     rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)])
     rinfo = []
     assets_by_site = self.assetcol.assets_by_site()
     dstore = self.can_read_parent() or self.datastore
     for sid, assets in enumerate(assets_by_site):
         if len(assets) == 0:
             continue
         # build the riskinputs
         if kind == 'poe':  # hcurves, shape (R, N)
             getter = getters.PmapGetter(dstore, self.rlzs_assoc, [sid])
             getter.num_rlzs = self.R
         else:  # gmf
             getter = getters.GmfDataGetter(dstore, [sid], self.R)
         if dstore is self.datastore:
             # read the hazard data in the controller node
             getter.init()
         else:
             # the datastore must be closed to avoid the HDF5 fork bug
             assert dstore.hdf5 == (), '%s is not closed!' % dstore
         for block in general.block_splitter(
                 assets, self.oqparam.assets_per_site_limit):
             # dictionary of epsilons for the reduced assets
             reduced_eps = {ass.ordinal: eps[ass.ordinal]
                            for ass in block
                            if eps is not None and len(eps)}
             yield riskinput.RiskInput(getter, [block], reduced_eps)
         rinfo.append((sid, len(block)))
         if len(block) >= TWO16:
             logging.error('There are %d assets on site #%d!',
                           len(block), sid)
     self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
Exemplo n.º 19
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    # NB: splitting all the sources improves the distribution significantly,
    # compared to splitting only the big source
    sources = []
    with monitor("filtering/splitting sources"):
        for src, _sites in srcfilter(srcs):
            splits, _stime = split_sources([src])
            sources.extend(srcfilter.filter(splits))
    if sources:
        sources.sort(key=weight)
        totsites = len(srcfilter.sitecol)
        mw = 1000 if totsites <= params['max_sites_disagg'] else 50000
        mweight = max(mw, sum(src.weight for src in sources) /
                      params['task_multiplier'])
        blocks = list(block_splitter(sources, mweight, weight))
        for block in blocks[:-1]:
            yield classical, block, srcfilter, gsims, params
        yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Exemplo n.º 20
0
 def submit_sources(self, sitecol, siteidx=0):
     """
     Submit the light sources and then the (split) heavy sources.
     Only the sources affecting the sitecol as considered.
     """
     tile = Tile(sitecol, self.maximum_distance)
     for kind in ('light', 'heavy'):
         if self.filter_sources:
             logging.info('Filtering %s sources', kind)
         sources = list(self.get_sources(kind, tile))
         if not sources:
             continue
         for src in sources:
             self.csm.filtered_weight += src.weight
         nblocks = 0
         for block in block_splitter(sources, self.maxweight,
                                     operator.attrgetter('weight'),
                                     operator.attrgetter('trt_model_id')):
             sent = self.tm.submit(block, sitecol, siteidx, self.rlzs_assoc,
                                   self.monitor.new())
             self.source_chunks.append(
                 (len(block), block.weight, sum(sent.values())))
             nblocks += 1
         logging.info('Sent %d sources in %d block(s)', len(sources),
                      nblocks)
Exemplo n.º 21
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = readinput.random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    sources = []
    with monitor("filtering/splitting sources"):
        for src, _sites in srcfilter(srcs):
            if src.num_ruptures >= params['maxweight']:
                splits, stime = split_sources([src])
                sources.extend(srcfilter.filter(splits))
            else:
                sources.append(src)
        blocks = list(block_splitter(sources, params['maxweight'],
                                     operator.attrgetter('num_ruptures')))
    if blocks:
        # yield the first blocks (if any) and compute the last block in core
        # NB: the last block is usually the smallest one
        for block in blocks[:-1]:
            yield classical, block, srcfilter, gsims, params
        yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Exemplo n.º 22
0
 def gen_args(self, ruptures_by_grp):
     """
     :param ruptures_by_grp: a dictionary of EBRupture objects
     :yields: the arguments for compute_gmfs_and_curves
     """
     oq = self.oqparam
     monitor = self.monitor(self.core_task.__name__)
     imts = list(oq.imtls)
     min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts)
     correl_model = oq.get_correl_model()
     try:
         csm_info = self.csm.info
     except AttributeError:  # no csm
         csm_info = self.datastore['csm_info']
     samples_by_grp = csm_info.get_samples_by_grp()
     for grp_id in ruptures_by_grp:
         ruptures = ruptures_by_grp[grp_id]
         if not ruptures:
             continue
         rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
         for block in block_splitter(ruptures, oq.ruptures_per_block):
             samples = samples_by_grp[grp_id]
             getter = GmfGetter(rlzs_by_gsim, block, self.sitecol, imts,
                                min_iml, oq.truncation_level, correl_model,
                                samples)
             yield getter, oq, monitor
Exemplo n.º 23
0
    def from_sources(self, par, monitor):
        """
        Prefilter the composite source model and store the source_info
        """
        self.R = self.csm.info.get_num_rlzs()
        num_rlzs = {grp_id: sum(
            len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
                    for grp_id in self.rlzs_by_gsim_grp}
        param = {'ruptures_per_block': RUPTURES_PER_BLOCK}
        param['filter_distance'] = self.oqparam.filter_distance
        param['ses_per_logic_tree_path'] = self.oqparam.ses_per_logic_tree_path
        param['gsims_by_trt'] = self.csm.gsim_lt.values
        param['pointsource_distance'] = self.oqparam.pointsource_distance
        logging.info('Building ruptures')
        ires = parallel.Starmap.apply(
            build_ruptures,
            (self.csm.get_sources(), self.src_filter, param, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=operator.attrgetter('num_ruptures'),
            key=operator.attrgetter('src_group_id'))

        def weight(ebr):
            return numpy.sqrt(num_rlzs[ebr.grp_id] * ebr.multiplicity *
                              len(ebr.sids))
        for ruptures in block_splitter(self._store_ruptures(ires), BLOCKSIZE,
                                       weight, operator.attrgetter('grp_id')):
            ebr = ruptures[0]
            rlzs_by_gsim = self.rlzs_by_gsim_grp[ebr.grp_id]
            par = par.copy()
            par['samples'] = self.samples_by_grp[ebr.grp_id]
            yield ruptures, self.src_filter, rlzs_by_gsim, par, monitor

        self.setting_events()
        if self.oqparam.ground_motion_fields:
            logging.info('Building GMFs')
Exemplo n.º 24
0
def get_rupture_getters(dstore, ct=0, slc=slice(None), srcfilter=None):
    """
    :param dstore: a :class:`openquake.commonlib.datastore.DataStore`
    :param ct: number of concurrent tasks
    :returns: a list of RuptureGetters
    """
    full_lt = dstore['full_lt']
    rlzs_by_gsim = full_lt.get_rlzs_by_gsim()
    rup_array = dstore['ruptures'][slc]
    if len(rup_array) == 0:
        raise NotFound('There are no ruptures in %s' % dstore)
    rup_array.sort(order='trt_smr')  # avoid generating too many tasks
    scenario = 'scenario' in dstore['oqparam'].calculation_mode
    if srcfilter is None:
        proxies = [RuptureProxy(rec, None, scenario) for rec in rup_array]
    elif len(rup_array) <= 1000:  # do not parallelize
        proxies = weight_ruptures(rup_array, srcfilter, full_lt.trt_by,
                                  scenario)
    else:  # parallelize the weighting of the ruptures
        proxies = parallel.Starmap.apply(
            weight_ruptures, (rup_array, srcfilter, full_lt.trt_by, scenario),
            concurrent_tasks=ct).reduce(acc=[])
    maxweight = sum(proxy.weight for proxy in proxies) / (ct or 1)
    rgetters = []
    for block in general.block_splitter(proxies,
                                        maxweight,
                                        operator.attrgetter('weight'),
                                        key=operator.itemgetter('trt_smr')):
        trt_smr = block[0]['trt_smr']
        rg = RuptureGetter(block, dstore.filename, trt_smr,
                           full_lt.trt_by(trt_smr), rlzs_by_gsim[trt_smr])
        rgetters.append(rg)
    return rgetters
Exemplo n.º 25
0
 def gen_args(self, tiles):
     """
     Yield (sources, sitecol, siteidx, rlzs_assoc, monitor) by
     looping on the tiles and on the source blocks.
     """
     siteidx = 0
     for i, sitecol in enumerate(tiles, 1):
         if len(tiles) > 1:
             logging.info('Processing tile %d', i)
         tile = Tile(sitecol, self.maximum_distance)
         for kind in ('light', 'heavy'):
             if self.filter_sources:
                 logging.info('Filtering %s sources', kind)
             sources = list(self.get_sources(kind, tile))
             if not sources:
                 continue
             for src in sources:
                 self.csm.filtered_weight += src.weight
             nblocks = 0
             for block in block_splitter(
                     sources, self.maxweight, operator.attrgetter('weight'),
                     operator.attrgetter('trt_model_id')):
                 yield (block, sitecol, siteidx, self.rlzs_assoc,
                        self.monitor.new())
                 nblocks += 1
             logging.info('Sent %d sources in %d block(s)', len(sources),
                          nblocks)
         siteidx += len(sitecol)
Exemplo n.º 26
0
def gen_rupture_getters(dstore, slc=slice(None),
                        concurrent_tasks=1, hdf5cache=None):
    """
    :yields: RuptureGetters
    """
    if dstore.parent:
        dstore = dstore.parent
    csm_info = dstore['csm_info']
    trt_by_grp = csm_info.grp_by("trt")
    samples = csm_info.get_samples_by_grp()
    rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    maxweight = numpy.ceil(len(rup_array) / (concurrent_tasks or 1))
    nr, ne, first_event = 0, 0, 0
    for grp_id, arr in general.group_array(rup_array, 'grp_id').items():
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        for block in general.block_splitter(arr, maxweight):
            rgetter = RuptureGetter(
                hdf5cache or dstore.filename, numpy.array(block), grp_id,
                trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id],
                first_event)
            rgetter.weight = getattr(block, 'weight', len(block))
            first_event += rgetter.num_events
            yield rgetter
            nr += len(block)
            ne += rgetter.num_events
    logging.info('Read %d ruptures and %d events', nr, ne)
Exemplo n.º 27
0
    def apply(cls,
              task,
              task_args,
              concurrent_tasks=executor.num_tasks_hint,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        return cls(task, [(chunk, ) + args for chunk in chunks], name)
Exemplo n.º 28
0
 def _gen_riskinputs_gmf(self, dstore):
     if 'gmf_data' not in dstore:  # needed for case_shakemap
         dstore.close()
         dstore = self.datastore
     if 'gmf_data' not in dstore:
         raise InvalidFile('Did you forget gmfs_csv in %s?' %
                           self.oqparam.inputs['job_ini'])
     with self.monitor('reading GMFs'):
         rlzs = dstore['events']['rlz_id']
         gmf_df = dstore.read_df('gmf_data', 'sid')
         by_sid = dict(list(gmf_df.groupby(gmf_df.index)))
     logging.info('Grouped the GMFs by site ID')
     for sid, assets in enumerate(self.assetcol.assets_by_site()):
         if len(assets) == 0:
             continue
         try:
             df = by_sid[sid]
         except KeyError:
             getter = getters.ZeroGetter(sid, rlzs, self.R)
         else:
             df['rlzs'] = rlzs[df.eid.to_numpy()]
             getter = getters.GmfDataGetter(sid, df, len(rlzs), self.R)
         if len(dstore['gmf_data/gmv_0']) == 0:
             raise RuntimeError(
                 'There are no GMFs available: perhaps you did set '
                 'ground_motion_fields=False or a large minimum_intensity')
         for block in general.block_splitter(
                 assets, self.oqparam.assets_per_site_limit):
             yield riskinput.RiskInput(sid, getter, numpy.array(block))
         if len(block) >= TWO16:
             logging.error('There are %d assets on site #%d!', len(block),
                           sid)
Exemplo n.º 29
0
def gen_rupture_getters(dstore, srcfilter, slc=slice(None)):
    """
    :yields: filtered RuptureGetters
    """
    full_lt = dstore['full_lt']
    trt_by_grp = full_lt.trt_by_grp
    samples = full_lt.get_samples_by_grp()
    rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    ct = dstore['oqparam'].concurrent_tasks or 1
    items = list(general.group_array(rup_array, 'grp_id').items())
    items.sort(key=lambda it: len(it[1]))
    maxweight = None
    while items:
        grp_id, rups = items.pop()  # from the largest group
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        trt = trt_by_grp[grp_id]
        proxies = list(_gen(rups, srcfilter, trt, samples[grp_id]))
        if not maxweight:
            maxweight = sum(p.weight for p in proxies) / ct
        blocks = list(
            general.block_splitter(proxies, maxweight,
                                   operator.attrgetter('weight')))
        logging.info('Group %d: %d ruptures -> %d task(s)', grp_id, len(rups),
                     len(blocks))
        for block in blocks:
            rgetter = RuptureGetter(block, dstore.filename, grp_id, trt,
                                    samples[grp_id], rlzs_by_gsim[grp_id])
            yield rgetter
Exemplo n.º 30
0
 def execute(self):
     """
     Parallelize on the riskinputs and returns a dictionary of results.
     Require a `.core_task` to be defined with signature
     (riskinputs, crmodel, param, monitor).
     """
     if not hasattr(self, 'riskinputs'):  # in the reportwriter
         return
     ct = self.oqparam.concurrent_tasks or 1
     maxw = sum(ri.weight for ri in self.riskinputs) / ct
     smap = parallel.Starmap(self.core_task.__func__,
                             h5=self.datastore.hdf5)
     smap.monitor.save('crmodel', self.crmodel)
     for block in general.block_splitter(self.riskinputs,
                                         maxw,
                                         get_weight,
                                         sort=True):
         for ri in block:
             # we must use eager reading for performance reasons:
             # concurrent reading on the workers would be extra-slow;
             # also, I could not get lazy reading to work with
             # the SWMR mode for event_based_risk
             if not isinstance(ri.hazard_getter, getters.PmapGetter):
                 ri.hazard_getter.init()
         smap.submit((block, self.param))
     return smap.reduce(self.combine)
Exemplo n.º 31
0
 def test_block_splitter_with_generator(self):
     # Test the block with a data set of unknown length
     # (such as a generator)
     data = range(10)
     expected = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
Exemplo n.º 32
0
    def apply(cls, task, task_args,
              concurrent_tasks=executor.num_tasks_hint,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :function:
        `openquake.baselib.general.split_in_blocks`).
        Then reduce the results with an aggregation function.
        The chunks which are generated internally can be seen directly (
        useful for debugging purposes) by looking at the attribute `._chunks`,
        right after the `apply` function has been called.

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        return cls.starmap(task, [(chunk,) + args for chunk in chunks], name)
Exemplo n.º 33
0
 def submit_sources(self, sitecol, siteidx=0):
     """
     Submit the light sources and then the (split) heavy sources.
     Only the sources affecting the sitecol as considered.
     """
     tile = Tile(sitecol, self.maximum_distance)
     for kind in ('light', 'heavy'):
         if self.filter_sources:
             logging.info('Filtering %s sources', kind)
         sources = list(self.get_sources(kind, tile))
         if not sources:
             continue
         for src in sources:
             self.csm.filtered_weight += src.weight
         nblocks = 0
         for block in block_splitter(
                 sources, self.maxweight,
                 operator.attrgetter('weight'),
                 operator.attrgetter('trt_model_id')):
             sent = self.tm.submit(block, sitecol, siteidx,
                                   self.rlzs_assoc, self.monitor.new())
             self.source_chunks.append(
                 (len(block), block.weight, sum(sent.values())))
             nblocks += 1
         logging.info('Sent %d sources in %d block(s)',
                      len(sources), nblocks)
Exemplo n.º 34
0
 def _gen_riskinputs(self, kind, eps, num_events):
     assets_by_site = self.assetcol.assets_by_site()
     dstore = self.can_read_parent() or self.datastore
     for sid, assets in enumerate(assets_by_site):
         if len(assets) == 0:
             continue
         # build the riskinputs
         if kind == 'poe':  # hcurves, shape (R, N)
             getter = PmapGetter(dstore, self.rlzs_assoc, [sid])
             getter.num_rlzs = self.R
         else:  # gmf
             getter = GmfDataGetter(dstore, [sid], self.R,
                                    self.oqparam.imtls)
         if dstore is self.datastore:
             # read the hazard data in the controller node
             getter.init()
         else:
             # the datastore must be closed to avoid the HDF5 fork bug
             assert dstore.hdf5 == (), '%s is not closed!' % dstore
         for block in general.block_splitter(assets, 1000):
             # dictionary of epsilons for the reduced assets
             reduced_eps = {
                 ass.ordinal: eps[ass.ordinal]
                 for ass in block if eps is not None and len(eps)
             }
             yield riskinput.RiskInput(getter, [block], reduced_eps)
Exemplo n.º 35
0
    def apply(cls,
              task,
              args,
              concurrent_tasks=cpu_count * 3,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None,
              distribute=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param args: the arguments to be passed to the task function
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        :param name: name of the task to be used in the log
        :param distribute: if not given, inferred from OQ_DISTRIBUTE
        :returns: an :class:`IterResult` object
        """
        arg0 = args[0]  # this is assumed to be a sequence
        args = args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        task_args = [(ch, ) + args for ch in chunks]
        return cls(task, task_args, name, distribute).submit_all()
Exemplo n.º 36
0
def gen_rupture_getters(dstore, srcfilter, ct):
    """
    :param dstore: a :class:`openquake.baselib.datastore.DataStore`
    :param srcfilter: a :class:`openquake.hazardlib.calc.filters.SourceFilter`
    :param ct: number of concurrent tasks
    :yields: filtered RuptureGetters
    """
    full_lt = dstore['full_lt']
    trt_by_grp = full_lt.trt_by_grp
    samples = full_lt.get_samples_by_grp()
    rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][()]
    items = list(general.group_array(rup_array, 'grp_id').items())
    items.sort(key=lambda item: len(item[1]))  # other weights were much worse
    maxweight = None
    while items:
        grp_id, rups = items.pop()  # from the largest group
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        trt = trt_by_grp[grp_id]
        proxies = list(_gen(rups, srcfilter, trt, samples[grp_id]))
        if not maxweight:
            maxweight = sum(p.weight for p in proxies) / (ct // 2 or 1)
        blocks = list(general.block_splitter(
            proxies, maxweight, operator.attrgetter('weight')))
        logging.info('Group %d: %d ruptures -> %d task(s)',
                     grp_id, len(rups), len(blocks))
        for block in blocks:
            rgetter = RuptureGetter(
                block, dstore.filename, grp_id,
                trt, samples[grp_id], rlzs_by_gsim[grp_id])
            yield rgetter
Exemplo n.º 37
0
    def start_tasks(self, sm_id, ruptures_by_grp, sitecol, assetcol, riskmodel,
                    imts, trunc_level, correl_model, min_iml, monitor):
        """
        :param sm_id: source model ordinal
        :param ruptures_by_grp: dictionary of ruptures by src_group_id
        :param sitecol: a SiteCollection instance
        :param assetcol: an AssetCollection instance
        :param riskmodel: a RiskModel instance
        :param imts: a list of Intensity Measure Types
        :param trunc_level: truncation level
        :param correl_model: correlation model
        :param min_iml: vector of minimum intensities, one per IMT
        :param monitor: a Monitor instance
        :returns: an IterResult instance
        """
        csm_info = self.csm_info.get_info(sm_id)
        grp_ids = sorted(csm_info.get_sm_by_grp())
        rlzs_assoc = csm_info.get_rlzs_assoc(
            count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, [])))
        num_events = sum(ebr.multiplicity for grp in ruptures_by_grp
                         for ebr in ruptures_by_grp[grp])
        seeds = self.oqparam.random_seed + numpy.arange(num_events)

        allargs = []
        # prepare the risk inputs
        ruptures_per_block = self.oqparam.ruptures_per_block
        start = 0
        ignore_covs = self.oqparam.ignore_covs
        for grp_id in grp_ids:
            rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(grp_id)
            samples = rlzs_assoc.samples[grp_id]
            for rupts in block_splitter(ruptures_by_grp.get(grp_id, []),
                                        ruptures_per_block):
                if ignore_covs or not self.riskmodel.covs:
                    eps = None
                elif self.oqparam.asset_correlation:
                    eps = EpsilonMatrix1(num_events, self.oqparam.master_seed)
                else:
                    n_events = sum(ebr.multiplicity for ebr in rupts)
                    eps = EpsilonMatrix0(len(self.assetcol),
                                         seeds[start:start + n_events])
                    start += n_events
                getter = riskinput.GmfGetter(grp_id, rlzs_by_gsim, rupts,
                                             sitecol, imts, min_iml,
                                             trunc_level, correl_model,
                                             samples)
                ri = riskinput.RiskInputFromRuptures(getter, eps)
                allargs.append((ri, riskmodel, assetcol, monitor))

        self.vals = self.assetcol.values()
        taskname = '%s#%d' % (event_based_risk.__name__, sm_id + 1)
        ires = Starmap(event_based_risk, allargs, name=taskname).submit_all()
        ires.num_ruptures = {
            sg_id: len(rupts)
            for sg_id, rupts in ruptures_by_grp.items()
        }
        ires.num_events = num_events
        ires.num_rlzs = len(rlzs_assoc.realizations)
        ires.sm_id = sm_id
        return ires
Exemplo n.º 38
0
 def execute(self):
     oq = self.oqparam
     self.set_param(num_taxonomies=self.assetcol.num_taxonomies_by_site(),
                    maxweight=oq.ebrisk_maxweight /
                    (oq.concurrent_tasks or 1))
     parent = self.datastore.parent
     if parent:
         hdf5path = parent.filename
         grp_indices = parent['ruptures'].attrs['grp_indices']
         nruptures = len(parent['ruptures'])
     else:
         hdf5path = self.datastore.hdf5cache()
         grp_indices = self.datastore['ruptures'].attrs['grp_indices']
         nruptures = len(self.datastore['ruptures'])
         with hdf5.File(hdf5path, 'r+') as cache:
             self.datastore.hdf5.copy('weights', cache)
             self.datastore.hdf5.copy('ruptures', cache)
             self.datastore.hdf5.copy('rupgeoms', cache)
     self.init_logic_tree(self.csm_info)
     smap = parallel.Starmap(self.core_task.__func__,
                             monitor=self.monitor())
     trt_by_grp = self.csm_info.grp_by("trt")
     samples = self.csm_info.get_samples_by_grp()
     rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
     ruptures_per_block = numpy.ceil(nruptures / (oq.concurrent_tasks or 1))
     for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items():
         start, stop = grp_indices[grp_id]
         for indices in general.block_splitter(range(start, stop),
                                               ruptures_per_block):
             rgetter = getters.RuptureGetter(hdf5path, list(indices),
                                             grp_id, trt_by_grp[grp_id],
                                             samples[grp_id], rlzs_by_gsim)
             smap.submit(rgetter, self.src_filter, self.param)
     return smap.reduce(self.agg_dicts, numpy.zeros(self.N))
Exemplo n.º 39
0
    def gen_task_queue(self):
        """
        Build a task queue to be attached to the Starmap instance
        """
        oq = self.oqparam
        gsims_by_trt = self.csm_info.get_gsims_by_trt()
        trt_sources = self.csm.get_trt_sources(optimize_dupl=True)
        del self.csm  # save memory

        def srcweight(src):
            trt = src.tectonic_region_type
            g = len(gsims_by_trt[trt])
            m = (oq.maximum_distance(trt) / 300)**2
            return src.weight * g * m

        totweight = sum(
            sum(srcweight(src) for src in sources)
            for trt, sources, atomic in trt_sources)
        param = dict(truncation_level=oq.truncation_level,
                     imtls=oq.imtls,
                     filter_distance=oq.filter_distance,
                     reqv=oq.get_reqv(),
                     maximum_distance=oq.maximum_distance,
                     pointsource_distance=oq.pointsource_distance,
                     shift_hypo=oq.shift_hypo,
                     max_weight=oq.max_weight,
                     max_sites_disagg=oq.max_sites_disagg)
        srcfilter = self.src_filter(self.datastore.tempname)
        if oq.calculation_mode == 'preclassical' and self.N == 1:
            f1 = f2 = ruptures_by_mag_dist
        elif oq.calculation_mode == 'preclassical':
            f1 = f2 = preclassical
        elif oq.split_by_magnitude:
            f1 = f2 = classical
        else:
            f1, f2 = classical, classical_split_filter
        C = oq.concurrent_tasks or 1
        for trt, sources, atomic in trt_sources:
            param['effect'] = self.effect.get(trt)
            gsims = gsims_by_trt[trt]
            if atomic:
                # do not split atomic groups
                nb = 1
                yield f1, (sources, srcfilter, gsims, param)
            else:  # regroup the sources in blocks
                if oq.split_by_magnitude:
                    sources = split_by_mag(sources)
                blocks = list(block_splitter(sources, totweight / C,
                                             srcweight))
                nb = len(blocks)
                for block in blocks:
                    logging.debug('Sending %d sources with weight %d',
                                  len(block), block.weight)
                    yield f2, (block, srcfilter, gsims, param)

            nr = sum(src.weight for src in sources)
            logging.info('TRT = %s', trt)
            logging.info('max_dist=%d km, gsims=%d, ruptures=%d, blocks=%d',
                         oq.maximum_distance(trt), len(gsims), nr, nb)
Exemplo n.º 40
0
def split_sources(csm, sources, src_filter, maxweight):
    """
    Fast replacement of CompositeSourceModel.split_sources
    """
    csm.add_infos(sources)
    return general.block_splitter(sources,
                                  maxweight,
                                  weight=operator.attrgetter('weight'))
Exemplo n.º 41
0
 def test_split_with_weight(self):
     weights = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20),
                     ('f', 5), ('g', 30), ('h', 17), ('i', 25)])
     blocks = list(block_splitter('abcdefghi', 50, weights.get))
     self.assertEqual(
         repr(blocks),
         "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c'], weight=100>, <WeightedSequence ['d', 'e', 'f'], weight=40>, <WeightedSequence ['g', 'h'], weight=47>, <WeightedSequence ['i'], weight=25>]"
     )
Exemplo n.º 42
0
 def test_block_splitter(self):
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(self.DATA, 3)]
     self.assertEqual(expected, actual)
Exemplo n.º 43
0
 def split(cls, src, block_size):
     """
     Split the given fault source into MultiRuptureSources depending
     on the given block size.
     """
     for i, ruptures in enumerate(
             block_splitter(src.iter_ruptures(), block_size)):
         yield cls(ruptures, '%s-%s' % (src.source_id, i),
                   src.tectonic_region_type, src.trt_model_id)
Exemplo n.º 44
0
 def test_split_with_weight(self):
     weights = dict(
         [("a", 11), ("b", 10), ("c", 100), ("d", 15), ("e", 20), ("f", 5), ("g", 30), ("h", 17), ("i", 25)]
     )
     blocks = list(block_splitter("abcdefghi", 50, weights.get))
     self.assertEqual(
         repr(blocks),
         "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c'], weight=100>, <WeightedSequence ['d', 'e', 'f'], weight=40>, <WeightedSequence ['g', 'h'], weight=47>, <WeightedSequence ['i'], weight=25>]",
     )
Exemplo n.º 45
0
    def build_starmap(self, sm_id, ruptures_by_grp, sitecol,
                      assetcol, riskmodel, imts, trunc_level, correl_model,
                      min_iml, monitor):
        """
        :param sm_id: source model ordinal
        :param ruptures_by_grp: dictionary of ruptures by src_group_id
        :param sitecol: a SiteCollection instance
        :param assetcol: an AssetCollection instance
        :param riskmodel: a RiskModel instance
        :param imts: a list of Intensity Measure Types
        :param trunc_level: truncation level
        :param correl_model: correlation model
        :param min_iml: vector of minimum intensities, one per IMT
        :param monitor: a Monitor instance
        :returns: a pair (starmap, dictionary of attributes)
        """
        csm_info = self.csm_info.get_info(sm_id)
        grp_ids = sorted(csm_info.get_sm_by_grp())
        rlzs_assoc = csm_info.get_rlzs_assoc(
            count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, [])))
        num_events = sum(ebr.multiplicity for grp in ruptures_by_grp
                         for ebr in ruptures_by_grp[grp])
        seeds = self.oqparam.random_seed + numpy.arange(num_events)

        allargs = []
        # prepare the risk inputs
        ruptures_per_block = self.oqparam.ruptures_per_block
        start = 0
        grp_trt = csm_info.grp_trt()
        ignore_covs = self.oqparam.ignore_covs
        for grp_id in grp_ids:
            for rupts in block_splitter(
                    ruptures_by_grp.get(grp_id, []), ruptures_per_block):
                if ignore_covs or not self.riskmodel.covs:
                    eps = None
                elif self.oqparam.asset_correlation:
                    eps = EpsilonMatrix1(num_events, self.oqparam.master_seed)
                else:
                    n_events = sum(ebr.multiplicity for ebr in rupts)
                    eps = EpsilonMatrix0(
                        len(self.assetcol), seeds[start: start + n_events])
                    start += n_events
                ri = riskinput.RiskInputFromRuptures(
                    grp_trt[grp_id], rlzs_assoc, imts, sitecol,
                    rupts, trunc_level, correl_model, min_iml, eps)
                allargs.append((ri, riskmodel, assetcol, monitor))

        self.vals = self.assetcol.values()
        taskname = '%s#%d' % (event_based_risk.__name__, sm_id + 1)
        smap = starmap(event_based_risk, allargs, name=taskname)
        attrs = dict(num_ruptures={
            sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items()},
                     num_events=num_events,
                     num_rlzs=len(rlzs_assoc.realizations),
                     sm_id=sm_id)
        return smap, attrs
Exemplo n.º 46
0
 def test_block_splitter_with_iter(self):
     # Test the block with a data set of unknown length
     data = iter(range(10))
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
Exemplo n.º 47
0
def split(src, chunksize=MINWEIGHT):
    """
    Split a complex fault source in chunks
    """
    for i, block in enumerate(block_splitter(src.iter_ruptures(), chunksize,
                                             key=operator.attrgetter('mag'))):
        rup = block[0]
        source_id = '%s:%d' % (src.source_id, i)
        amfd = mfd.ArbitraryMFD([rup.mag], [rup.mag_occ_rate])
        rcs = RuptureCollectionSource(
            source_id, src.name, src.tectonic_region_type, amfd, block)
        yield rcs
Exemplo n.º 48
0
 def actual_data(self, job):
     damage_states = list(models.DmgState.objects.filter(
         risk_calculation=job).order_by('lsi'))
     data = list(block_splitter(
         models.DamageData.objects.filter(
             dmg_state__risk_calculation=job).order_by(
             'exposure_data', 'dmg_state'),
         len(damage_states)))
     # this is a test with 5 damage states
     # no_damage, slight, moderate, extreme, complete
     # NB: you can print the actual values with the command
     # print [[round(col.fraction, 8) for col in row] for row in data]
     return [[col.fraction for col in row] for row in data]
Exemplo n.º 49
0
def supertask(text, monitor):
    # a supertask spawning subtasks of kind get_length
    with monitor('waiting'):
        time.sleep(.1)
    for block in general.block_splitter(text, max_weight=10):
        items = [(k, len(list(grp))) for k, grp in itertools.groupby(block)]
        if len(items) == 1:
            # for instance items = [('i', 1)]
            k, v = items[0]
            yield get_length(k * v, monitor)
            return
        # for instance items = [('a', 4), ('e', 4), ('i', 2)]
        for k, v in items:
            yield get_length, k * v
Exemplo n.º 50
0
 def _gen_riskinputs(self, kind):
     rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)])
     rinfo = []
     assets_by_site = self.assetcol.assets_by_site()
     for sid, assets in enumerate(assets_by_site):
         if len(assets) == 0:
             continue
         getter = self.get_getter(kind, sid)
         for block in general.block_splitter(
                 assets, self.oqparam.assets_per_site_limit):
             yield riskinput.RiskInput(getter, numpy.array(block))
         rinfo.append((sid, len(block)))
         if len(block) >= TWO16:
             logging.error('There are %d assets on site #%d!',
                           len(block), sid)
     self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
Exemplo n.º 51
0
 def block_splitter(self, sources, weight=get_weight, key=lambda src: 1):
     """
     :param sources: a list of sources
     :param weight: a weight function (default .weight)
     :param key: None or 'src_group_id'
     :returns: an iterator over blocks of sources
     """
     ct = self.oqparam.concurrent_tasks or 1
     maxweight = self.csm.get_maxweight(weight, ct, source.MINWEIGHT)
     if not hasattr(self, 'logged'):
         if maxweight == source.MINWEIGHT:
             logging.info('Using minweight=%d', source.MINWEIGHT)
         else:
             logging.info('Using maxweight=%d', maxweight)
         self.logged = True
     return general.block_splitter(sources, maxweight, weight, key)
Exemplo n.º 52
0
 def actual_data(self, job):
     damage_states = list(models.DmgState.objects.filter(
         risk_calculation=job).order_by('lsi'))
     outs = models.Output.objects.filter(oq_job=job).order_by('id')
     rows = []
     for out in outs:
         data = [[
             col.fraction for col in row] for row in block_splitter(
             models.DamageData.objects.filter(
                 damage=out.damage).order_by(
                 'exposure_data', 'dmg_state'),
             len(damage_states))]
         rows.append(data)
     # this is a test with 5 damage states
     # no_damage, slight, moderate, extreme, complete
     return rows
Exemplo n.º 53
0
    def test_split_with_kind(self):
        Source = namedtuple("Source", "typology, weight")
        s1 = Source("point", 1)
        s2 = Source("point", 1)
        s3 = Source("area", 2)
        s4 = Source("area", 4)
        s5 = Source("area", 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter("weight"), kind=attrgetter("typology"))
        )
        self.assertEqual(list(map(len, blocks)), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter("weight"), key=attrgetter("typology"))
        )
        self.assertEqual(list(map(len, blocks)), [2, 1, 1, 1])
        self.assertEqual([b.weight for b in blocks], [2, 2, 4, 4])
Exemplo n.º 54
0
 def split(self, maxweight):
     """
     :yields: RuptureGetters with weight <= maxweight
     """
     # NB: can be called only after .set_weights() has been called
     idx = {ri: i for i, ri in enumerate(self.rup_indices)}
     fe = self.first_event
     for rup_indices in general.block_splitter(
             self.rup_indices, maxweight, lambda ri: self.weights[idx[ri]]):
         if rup_indices:
             # some indices may have weight 0 and are discarded
             rgetter = self.__class__(
                 self.filename, list(rup_indices), self.grp_id,
                 self.trt, self.samples, self.rlzs_by_gsim, fe)
             fe += rgetter.num_events
             rgetter.weight = sum([self.weights[idx[ri]]
                                   for ri in rup_indices])
             yield rgetter
Exemplo n.º 55
0
 def execute(self):
     oq = self.oqparam
     self.set_param(
         num_taxonomies=self.assetcol.num_taxonomies_by_site(),
         maxweight=oq.ebrisk_maxweight / (oq.concurrent_tasks or 1),
         epspath=cache_epsilons(
             self.datastore, oq, self.assetcol, self.riskmodel, self.E))
     parent = self.datastore.parent
     if parent:
         hdf5path = parent.filename
         grp_indices = parent['ruptures'].attrs['grp_indices']
         nruptures = len(parent['ruptures'])
     else:
         hdf5path = self.datastore.hdf5cache()
         grp_indices = self.datastore['ruptures'].attrs['grp_indices']
         nruptures = len(self.datastore['ruptures'])
         with hdf5.File(hdf5path, 'r+') as cache:
             self.datastore.hdf5.copy('weights', cache)
             self.datastore.hdf5.copy('ruptures', cache)
             self.datastore.hdf5.copy('rupgeoms', cache)
     self.init_logic_tree(self.csm_info)
     smap = parallel.Starmap(
         self.core_task.__func__, monitor=self.monitor())
     trt_by_grp = self.csm_info.grp_by("trt")
     samples = self.csm_info.get_samples_by_grp()
     rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
     ruptures_per_block = numpy.ceil(nruptures / (oq.concurrent_tasks or 1))
     first_event = 0
     for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items():
         start, stop = grp_indices[grp_id]
         for indices in general.block_splitter(
                 range(start, stop), ruptures_per_block):
             rgetter = getters.RuptureGetter(
                 hdf5path, list(indices), grp_id,
                 trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim,
                 first_event)
             first_event += rgetter.num_events
             smap.submit(rgetter, self.src_filter, self.param)
     self.events_per_sid = []
     self.gmf_nbytes = 0
     res = smap.reduce(self.agg_dicts, numpy.zeros(self.N))
     logging.info('Produced %s of GMFs', general.humansize(self.gmf_nbytes))
     return res
Exemplo n.º 56
0
    def test_split_with_kind(self):
        Source = namedtuple('Source', 'typology, weight')
        s1 = Source('point', 1)
        s2 = Source('point', 1)
        s3 = Source('area', 2)
        s4 = Source('area', 4)
        s5 = Source('area', 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5], max_weight=6,
                           weight=attrgetter('weight'),
                           kind=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5], hint=6,
                            weight=attrgetter('weight'),
                            key=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [1, 1, 1, 2])
        self.assertEqual([b.weight for b in blocks], [2, 4, 4, 2])
Exemplo n.º 57
0
def export_dmg_per_asset_csv(key, output, target):
    """
    Classical Damage Per Asset in CSV format
    """
    dest = _get_result_export_dest(target, output)

    damage_states = list(models.DmgState.objects.filter(
        risk_calculation=output.oq_job).order_by('lsi'))
    data = block_splitter(
        models.DamageData.objects.filter(
            dmg_state__risk_calculation=output.oq_job).order_by(
            'exposure_data', 'dmg_state'),
        len(damage_states))

    with FileWrapper(dest, mode='wb') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['asset_ref'] + [ds.dmg_state for ds in damage_states])
        for row in data:
            asset = row[0].exposure_data
            fractions = [rec.fraction for rec in row]
            writer.writerow(
                [asset.asset_ref] + map(writers.scientificformat, fractions))
    return dest
Exemplo n.º 58
0
 def test_split_with_weight(self):
     weights = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20),
                     ('f', 5), ('g', 30), ('h', 17), ('i', 25)])
     blocks = list(block_splitter('abcdefghi', 50, weights.get))
     self.assertEqual(repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c'], weight=100>, <WeightedSequence ['d', 'e', 'f'], weight=40>, <WeightedSequence ['g', 'h'], weight=47>, <WeightedSequence ['i'], weight=25>]")
Exemplo n.º 59
0
 def test_block_splitter_block_size_lt_zero(self):
     gen = block_splitter(self.DATA, -1)
     with self.assertRaises(ValueError):
         next(gen)
Exemplo n.º 60
0
 def test_block_splitter_block_size_gt_data_len(self):
     expected = [self.DATA]
     actual = [x for x in block_splitter(self.DATA, 11)]
     self.assertEqual(expected, actual)