Exemplo n.º 1
0
    def __iter__(self):
        self.received = []
        for fut in self.futures:
            check_mem_usage()  # log a warning if too much memory is used
            if hasattr(fut, 'result'):
                result = fut.result()
            else:
                result = fut
            if isinstance(result, BaseException):
                # this happens for instance with WorkerLostError with celery
                raise result
            elif hasattr(result, 'unpickle'):
                self.received.append(len(result))
                val, etype, mon = result.unpickle()
            else:
                val, etype, mon = result
                self.received.append(len(Pickled(result)))
            if etype:
                raise RuntimeError(val)
            if self.num_tasks:
                next(self.log_percent)
            if not self.name.startswith('_'):  # no info for private tasks
                self.save_task_data(mon)
            yield val

        if self.received:
            tot = sum(self.received)
            max_per_task = max(self.received)
            self.progress('Received %s of data, maximum per task %s',
                          humansize(tot), humansize(max_per_task))
            received = {'max_per_task': max_per_task, 'tot': tot}
            tname = self.name
            dic = {tname: {'sent': self.sent, 'received': received}}
            mon.save_info(dic)
Exemplo n.º 2
0
    def reduce(self, agg=operator.add, acc=None):
        """
        Loop on a set of results and update the accumulator
        by using the aggregation function.

        :param agg: the aggregation function, (acc, val) -> new acc
        :param acc: the initial value of the accumulator
        :returns: the final value of the accumulator
        """
        if acc is None:
            acc = AccumDict()
        log_percent = log_percent_gen(
            self.name, len(self.results), self.progress)
        next(log_percent)

        def agg_and_percent(acc, val_exc):
            (val, exc) = val_exc
            if exc:
                raise RuntimeError(val)
            res = agg(acc, val)
            next(log_percent)
            return res

        if self.no_distribute:
            agg_result = reduce(agg_and_percent, self.results, acc)
        else:
            self.progress('Sent %s of data', humansize(self.sent))
            agg_result = self.aggregate_result_set(agg_and_percent, acc)
            self.progress('Received %s of data', humansize(self.received))
        self.results = []
        return agg_result
Exemplo n.º 3
0
def ebr_data_transfer(token, dstore):
    """
    Display the data transferred in an event based risk calculation
    """
    attrs = dstore['losses_by_event'].attrs
    sent = humansize(attrs['sent'])
    received = humansize(attrs['tot_received'])
    return 'Event Based Risk: sent %s, received %s' % (sent, received)
Exemplo n.º 4
0
def view_contents(token, dstore):
    """
    Returns the size of the contents of the datastore and its total size
    """
    oq = dstore['oqparam']
    data = sorted((dstore.getsize(key), key) for key in dstore)
    rows = [(key, humansize(nbytes)) for nbytes, key in data]
    total = '\n%s : %s' % (
        dstore.hdf5path, humansize(os.path.getsize(dstore.hdf5path)))
    return rst_table(rows, header=(oq.description, '')) + total
Exemplo n.º 5
0
def source_data_transfer(token, dstore):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.
    """
    block_info, to_send_forward, to_send_back = get_data_transfer(dstore)
    tbl = [
        ('Number of tasks to generate', len(block_info)),
        ('Estimated sources to send', humansize(to_send_forward)),
        ('Estimated hazard curves to receive', humansize(to_send_back))]
    return rst_table(tbl)
Exemplo n.º 6
0
def view_contents(token, dstore):
    """
    Returns the size of the contents of the datastore and its total size
    """
    try:
        desc = dstore['oqparam'].description
    except KeyError:
        desc = ''
    data = sorted((dstore.getsize(key), key) for key in dstore)
    rows = [(key, humansize(nbytes)) for nbytes, key in data]
    total = '\n%s : %s' % (
        dstore.filename, humansize(os.path.getsize(dstore.filename)))
    return rst_table(rows, header=(desc, '')) + total
Exemplo n.º 7
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['rlzs_assoc']
    rupture_by_tag = sum(dstore['sescollection'], AccumDict())
    all_tags = dstore['tags'].value
    oq = dstore['oqparam']
    investigation_time = (None if oq.calculation_mode == 'scenario'
                          else oq.investigation_time)
    samples = oq.number_of_logic_tree_samples
    fmt = ekey[-1]
    gmfs = dstore[ekey[0]]
    nbytes = gmfs.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warn(GMF_WARNING, dstore.hdf5path)
    fnames = []
    for rlz, gmf_by_idx in zip(
            rlzs_assoc.realizations, rlzs_assoc.combine_gmfs(gmfs)):
        tags = all_tags[list(gmf_by_idx)]
        gmfs = list(gmf_by_idx.values())
        if not gmfs:
            continue
        ruptures = [rupture_by_tag[tag] for tag in tags]
        fname = build_name(rlz, 'gmf', fmt, samples)
        fnames.append(os.path.join(dstore.export_dir, fname))
        globals()['export_gmf_%s' % fmt](
            ('gmf', fmt), dstore.export_dir, fname, sitecol,
            ruptures, gmfs, rlz, investigation_time)
    return fnames
Exemplo n.º 8
0
def run(job_ini, concurrent_tasks=None,
        loglevel='info', hc=None, exports=''):
    """
    Run a calculation. Optionally, set the number of concurrent_tasks
    (0 to disable the parallelization).
    """
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.Monitor('total', measuremem=True)

    if len(job_inis) == 1:  # run hazard or risk
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc)
        if hc and hc < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc = calc_ids[hc]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc))
        calc = base.calculators(oqparam, monitor)
        monitor.monitor_dir = calc.datastore.calc_dir
        with monitor:
            calc.run(concurrent_tasks=concurrent_tasks, exports=exports,
                     hazard_calculation_id=hc)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, exports, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s/output.hdf5' %
          calc.datastore.calc_dir)
    return calc
Exemplo n.º 9
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['rlzs_assoc']
    oq = dstore['oqparam']
    investigation_time = (None if oq.calculation_mode == 'scenario'
                          else oq.investigation_time)
    samples = oq.number_of_logic_tree_samples
    fmt = ekey[-1]
    sid_data = dstore['sid_data']
    gmf_data = dstore['gmf_data']
    nbytes = gmf_data.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warn(GMF_WARNING, dstore.hdf5path)
    fnames = []
    for rlz, rup_by_etag in zip(rlzs_assoc.realizations,
                                rlzs_assoc.combine_gmfs(gmf_data, sid_data)):
        ruptures = [rup_by_etag[etag] for etag in sorted(rup_by_etag)]
        fname = build_name(dstore, rlz, 'gmf', fmt, samples)
        fnames.append(fname)
        globals()['export_gmf_%s' % fmt](
            ('gmf', fmt), fname, sitecol, ruptures, rlz, investigation_time)
    return fnames
Exemplo n.º 10
0
 def __repr__(self):
     if self.measuremem:
         return '<%s %s, duration=%ss, memory=%s>' % (
             self.__class__.__name__, self.operation, self.duration,
             humansize(self.mem))
     return '<%s %s, duration=%ss>' % (self.__class__.__name__,
                                       self.operation, self.duration)
Exemplo n.º 11
0
    def post_execute(self, result):
        """
        Save the event loss table in the datastore.

        :param result:
            a numpy array of shape (O, L, R) containing lists of arrays
        """
        nses = self.oqparam.ses_per_logic_tree_path
        saved = {out: 0 for out in self.outs}
        N = len(self.assetcol)
        with self.monitor('saving loss table',
                          autoflush=True, measuremem=True):
            for (o, l, r), data in numpy.ndenumerate(result):
                if not data:  # empty list
                    continue
                if o in (ELT, ILT):  # loss tables, data is a list of arrays
                    losses = numpy.concatenate(data)
                    self.datasets[o, l, r].extend(losses)
                    saved[self.outs[o]] += losses.nbytes
                else:  # risk curves, data is a list of counts dictionaries
                    cb = self.riskmodel.curve_builders[l]
                    counts_matrix = cb.get_counts(N, data)
                    curves = cb.build_rcurves(
                        counts_matrix, nses, self.assetcol)
                    self.datasets[o, l, r].dset[:] = curves
                    saved[self.outs[o]] += curves.nbytes
                self.datastore.hdf5.flush()

        for out in self.outs:
            nbytes = saved[out]
            if nbytes:
                self.datastore[out].attrs['nbytes'] = nbytes
                logging.info('Saved %s in %s', humansize(nbytes), out)
            else:  # remove empty outputs
                del self.datastore[out]
Exemplo n.º 12
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['csm_info'].get_rlzs_assoc()
    oq = dstore['oqparam']
    investigation_time = (None if oq.calculation_mode == 'scenario'
                          else oq.investigation_time)
    samples = oq.number_of_logic_tree_samples
    fmt = ekey[-1]
    etags = dstore['etags'].value
    gmf_data = dstore['gmf_data']
    nbytes = gmf_data.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warn(GMF_WARNING, dstore.hdf5path)
    fnames = []
    for rlz in rlzs_assoc.realizations:
        gmf_arr = gmf_data['%04d' % rlz.ordinal].value
        ruptures = []
        for eid, gmfa in group_array(gmf_arr, 'eid').items():
            rup = util.Rupture(etags[eid], sorted(set(gmfa['sid'])))
            rup.gmfa = gmfa
            ruptures.append(rup)
        ruptures.sort(key=operator.attrgetter('etag'))
        fname = build_name(dstore, rlz, 'gmf', fmt, samples)
        fnames.append(fname)
        globals()['export_gmf_%s' % fmt](
            ('gmf', fmt), fname, sitecol, oq.imtls, ruptures, rlz,
            investigation_time)
    return fnames
Exemplo n.º 13
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    oq = dstore['oqparam']
    if not oq.calculation_mode.startswith('scenario'):
        return []
    sitecol = dstore['sitecol']
    investigation_time = (None if oq.calculation_mode == 'scenario'
                          else oq.investigation_time)
    fmt = ekey[-1]
    gmf_data = dstore['gmf_data']
    nbytes = gmf_data.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warning(GMF_WARNING, dstore.filename)
    data = gmf_data['data'].value
    ses_idx = 1  # for scenario only
    events = []
    for eid, gmfa in group_array(data, 'eid').items():
        rup = Event(eid, ses_idx, sorted(set(gmfa['sid'])), gmfa)
        events.append(rup)
    fname = dstore.build_fname('gmf', 'scenario', fmt)
    writer = hazard_writers.EventBasedGMFXMLWriter(
        fname, sm_lt_path='', gsim_lt_path='')
    writer.serialize(
        GmfCollection(sitecol, oq.imtls, events, investigation_time))
    return [fname]
Exemplo n.º 14
0
    def post_execute(self, num_events):
        """
        Save an array of losses by taxonomy of shape (T, L, R).
        """
        event_based.EventBasedRuptureCalculator.__dict__['post_execute'](
            self, num_events)
        if self.gmfbytes == 0:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        logging.info('Generated %s of GMFs', humansize(self.gmfbytes))
        self.datastore.save('job_info', {'gmfbytes': self.gmfbytes})

        A, E = len(self.assetcol), num_events
        if 'all_loss_ratios' in self.datastore:
            for rlzname in self.datastore['all_loss_ratios']:
                self.datastore.set_nbytes('all_loss_ratios/' + rlzname)
            self.datastore.set_nbytes('all_loss_ratios')
            asslt = self.datastore['all_loss_ratios']
            for rlz, dset in asslt.items():
                for ds in dset.values():
                    ds.attrs['nonzero_fraction'] = len(ds) / (A * E)

        if 'agg_loss_table' not in self.datastore:
            logging.warning(
                'No losses were generated: most likely there is an error in y'
                'our input files or the GMFs were below the minimum intensity')
        else:
            for rlzname in self.datastore['agg_loss_table']:
                self.datastore.set_nbytes('agg_loss_table/' + rlzname)
            self.datastore.set_nbytes('agg_loss_table')
            agglt = self.datastore['agg_loss_table']
            for rlz, dset in agglt.items():
                for ds in dset.values():
                    ds.attrs['nonzero_fraction'] = len(ds) / E
Exemplo n.º 15
0
 def submit_all(self):
     """
     :returns: an IterResult object
     """
     try:
         nargs = len(self.task_args)
     except TypeError:  # generators have no len
         nargs = ''
     if nargs == 1:
         [args] = self.task_args
         self.progress('Executing a single task in process')
         fut = mkfuture(safely_call(self.task_func, args))
         return IterResult([fut], self.name)
     task_no = 0
     for args in self.task_args:
         task_no += 1
         if task_no == 1:  # first time
             self.progress('Submitting %s "%s" tasks', nargs, self.name)
         if isinstance(args[-1], Monitor):  # add incremental task number
             args[-1].task_no = task_no
             weight = getattr(args[0], 'weight', None)
             if weight:
                 args[-1].weight = weight
         self.submit(*args)
     if not task_no:
         self.progress('No %s tasks were submitted', self.name)
     # NB: keep self._iterfutures() an iterator, especially with celery!
     ir = IterResult(self._iterfutures(), self.name, task_no,
                     self.progress)
     ir.sent = self.sent  # for information purposes
     if self.sent:
         self.progress('Sent %s of data in %d task(s)',
                       humansize(sum(self.sent.values())),
                       ir.num_tasks)
     return ir
Exemplo n.º 16
0
def view_job_info(token, dstore):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.
    """
    data = [['task', 'sent', 'received']]
    for task in dstore['task_info']:
        dset = dstore['task_info/' + task]
        if 'argnames' in dset.attrs:
            argnames = dset.attrs['argnames'].split()
            totsent = dset.attrs['sent']
            sent = ['%s=%s' % (a, humansize(s))
                    for s, a in sorted(zip(totsent, argnames), reverse=True)]
            recv = dset['received'].sum()
            data.append((task, ' '.join(sent), humansize(recv)))
    return rst_table(data)
Exemplo n.º 17
0
def _humansize(literal):
    dic = ast.literal_eval(decode(literal))
    if isinstance(dic, dict):
        items = sorted(dic.items(), key=operator.itemgetter(1), reverse=True)
        lst = ['%s %s' % (k, humansize(v)) for k, v in items]
        return ', '.join(lst)
    else:
        return str(dic)
Exemplo n.º 18
0
 def __repr__(self):
     calc_id = ' #%s ' % self.calc_id if self.calc_id else ' '
     msg = '%s%s%s' % (self.__class__.__name__, calc_id, self.operation)
     if self.measuremem:
         return '<%s, duration=%ss, memory=%s>' % (
             msg, self.duration, humansize(self.mem))
     elif self.duration:
         return '<%s, duration=%ss>' % (msg, self.duration)
     else:
         return '<%s>' % msg
Exemplo n.º 19
0
def show(calc_id, key=None, rlzs=None):
    """
    Show the content of a datastore.

    :param calc_id: numeric calculation ID; if 0, show all calculations
    :param key: key of the datastore
    :param rlzs: flag; if given, print out the realizations in order
    """
    if not calc_id:
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                oq = OqParam.from_(datastore.DataStore(calc_id).attrs)
                cmode, descr = oq.calculation_mode, oq.description
            except:  # invalid datastore directory
                logging.warn('Removed invalid calculation %d', calc_id)
                shutil.rmtree(os.path.join(
                    datastore.DATADIR, 'calc_%s' % calc_id))
            else:
                rows.append((calc_id, cmode, descr))
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    ds = datastore.DataStore(calc_id)
    if key:
        if key in datastore.view:
            print(datastore.view(key, ds))
            return
        obj = ds[key]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
        return
    # print all keys
    oq = OqParam.from_(ds.attrs)
    print(oq.calculation_mode, 'calculation (%r) saved in %s contains:' %
          (oq.description, ds.hdf5path))
    for key in ds:
        print(key, humansize(ds.getsize(key)))

    # this part is experimental and not tested on purpose
    if rlzs and 'curves_by_trt_gsim' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = combined_curves(ds)
        dists = []
        for rlz in sorted(curves_by_rlz):
            curves = curves_by_rlz[rlz]
            dist = sum(rmsep(mean_curves[imt], curves[imt], min_value)
                       for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        for dist, rlz in sorted(dists):
            print('rlz=%s, rmsep=%s' % (rlz, dist))
Exemplo n.º 20
0
    def reduce(self, agg=operator.add, acc=None, posthook=None):
        """
        Loop on a set of results and update the accumulator
        by using the aggregation function.

        :param agg: the aggregation function, (acc, val) -> new acc
        :param acc: the initial value of the accumulator
        :returns: the final value of the accumulator
        """
        if acc is None:
            acc = AccumDict()
        num_tasks = len(self.results)
        if num_tasks == 0:
            logging.warn('No tasks were submitted')
            return acc

        log_percent = log_percent_gen(self.name, num_tasks, self.progress)
        next(log_percent)

        def agg_and_percent(acc, triple):
            (val, exc, mon) = triple
            if exc:
                raise RuntimeError(val)
            res = agg(acc, val)
            next(log_percent)
            mon.flush()
            return res

        if self.no_distribute:
            agg_result = reduce(agg_and_percent, self.results, acc)
        else:
            self.progress('Sent %s of data in %d task(s)',
                          humansize(sum(self.sent.values())), num_tasks)
            agg_result = self.aggregate_result_set(agg_and_percent, acc)
            self.progress('Received %s of data, maximum per task %s',
                          humansize(sum(self.received)),
                          humansize(max(self.received)))
        if posthook:
            posthook(self)
        self.results = []
        return agg_result
Exemplo n.º 21
0
def view_gmfs_total_size(name, dstore):
    """
    :returns:
        the total size of the GMFs as human readable string; it assumes
        4 bytes for the rupture index, 4 bytes for the realization index
        and 8 bytes for each float (there are num_imts floats per gmf)
    """
    nbytes = 0
    num_imts = len(dstore['oqparam'].imtls)
    for counts in dstore['counts_per_rlz']:
        nbytes += 8 * counts['gmf'] * (num_imts + 1)
    return humansize(nbytes)
Exemplo n.º 22
0
 def post_execute(self, num_events):
     """
     Save an array of losses by taxonomy of shape (T, L, R).
     """
     if self.gmfbytes == 0:
         raise RuntimeError('No GMFs were generated, perhaps they were '
                            'all below the minimum_intensity threshold')
     logging.info('Generated %s of GMFs', humansize(self.gmfbytes))
     self.datastore.save('job_info', {'gmfbytes': self.gmfbytes})
     logging.info('Saved %s losses by taxonomy', (self.T, self.L, self.R))
     logging.info('Saved %d event losses', num_events)
     self.datastore.set_nbytes('agg_loss_table')
     self.datastore.set_nbytes('events')
Exemplo n.º 23
0
def avglosses_data_transfer(token, dstore):
    """
    Determine the amount of average losses transferred from the workers to the
    controller node in a risk calculation.
    """
    oq = OqParam.from_(dstore.attrs)
    N = len(dstore['assetcol'])
    R = len(dstore['rlzs_assoc'].realizations)
    L = len(dstore['riskmodel'].loss_types)
    ct = oq.concurrent_tasks
    size_bytes = N * R * L * 2 * 8 * ct  # two 8 byte floats, loss and ins_loss
    return ('%d asset(s) x %d realization(s) x %d loss type(s) x 2 losses x '
            '8 bytes x %d tasks = %s' % (N, R, L, ct, humansize(size_bytes)))
Exemplo n.º 24
0
def zip_all(directory):
    """
    Zip source models and exposures recursively
    """
    zips = []
    for cwd, dirs, files in os.walk(directory):
        if 'ssmLT.xml' in files:
            zips.append(zip_source_model(os.path.join(cwd, 'ssmLT.xml')))
        for f in files:
            if f.endswith('.xml') and 'exposure' in f.lower():
                zips.append(zip_exposure(os.path.join(cwd, f)))
    total = sum(os.path.getsize(z) for z in zips)
    logging.info('Generated %s of zipped data', general.humansize(total))
Exemplo n.º 25
0
def show(calc_id, key=None, rlzs=None):
    """
    Show the content of a datastore.

    :param calc_id: numeric calculation ID; if 0, show all calculations
    :param key: key of the datastore
    :param rlzs: flag; if given, print out the realizations in order
    """
    if not calc_id:
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for name in sorted(os.listdir(datastore.DATADIR)):
            mo = re.match('calc_(\d+)', name)
            if mo:
                calc_id = int(mo.group(1))
                try:
                    oq = datastore.DataStore(calc_id)['oqparam']
                except:  # invalid datastore directory
                    shutil.rmtree(os.path.join(
                        datastore.DATADIR, 'calc_%s' % calc_id))
                else:
                    rows.append((calc_id, oq.calculation_mode, oq.description))
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    ds = datastore.DataStore(calc_id)
    if key:
        obj = ds[key]
        if key.startswith('/') and hasattr(obj, 'value'):
            print(obj.value)
        else:
            print(obj)
        return
    # print all keys
    oq = ds['oqparam']
    print(oq.calculation_mode, 'calculation (%r) saved in %s contains:' %
          (oq.description, ds.calc_dir))
    for key in ds:
        print(key, humansize(ds.getsize(key)))
    if rlzs and 'curves_by_trt_gsim' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = combined_curves(ds)
        dists = []
        for rlz in sorted(curves_by_rlz):
            curves = curves_by_rlz[rlz]
            dist = sum(rmsep(mean_curves[imt], curves[imt], min_value)
                       for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        for dist, rlz in sorted(dists):
            print('rlz=%s, rmsep=%s' % (rlz, dist))
Exemplo n.º 26
0
 def __iter__(self):
     self.received = []
     for fut in self.futures:
         check_mem_usage()  # log a warning if too much memory is used
         if hasattr(fut, 'result'):
             result = fut.result()
         else:
             result = fut
         if hasattr(result, 'unpickle'):
             self.received.append(len(result))
             val, etype, mon = result.unpickle()
         else:
             val, etype, mon = result
         if etype:
             raise RuntimeError(val)
         if self.num_tasks:
             next(self.log_percent)
         self.save_task_data(mon)
         yield val
     if self.received:
         self.progress('Received %s of data, maximum per task %s',
                       humansize(sum(self.received)),
                       humansize(max(self.received)))
Exemplo n.º 27
0
def export(calc_id, datastore_key, format='csv', export_dir='.'):
    """
    Export an output from the datastore.
    """
    logging.basicConfig(level=logging.INFO)
    dstore = datastore.DataStore(calc_id)
    dstore.export_dir = export_dir
    with performance.PerformanceMonitor('export', measuremem=True) as mon:
        for fmt in format.split(','):
            fnames = export_((datastore_key, fmt), dstore)
            nbytes = sum(os.path.getsize(f) for f in fnames)
            print('Exported %s in %s' % (general.humansize(nbytes), fnames))
    if mon.duration > 1:
        print(mon)
Exemplo n.º 28
0
def avglosses_data_transfer(token, dstore):
    """
    Determine the amount of average losses transferred from the workers to the
    controller node in a risk calculation.
    """
    oq = dstore['oqparam']
    N = len(dstore['assetcol'])
    R = dstore['csm_info'].get_num_rlzs()
    L = len(dstore.get_attr('risk_model', 'loss_types'))
    ct = oq.concurrent_tasks
    size_bytes = N * R * L * 8 * ct  # 8 byte floats
    return (
        '%d asset(s) x %d realization(s) x %d loss type(s) losses x '
        '8 bytes x %d tasks = %s' % (N, R, L, ct, humansize(size_bytes)))
Exemplo n.º 29
0
def info(name, filtersources=False, weightsources=False, datatransfer=False):
    """
    Give information. You can pass the name of an available calculator,
    a job.ini file, or a zip archive with the input files.
    """
    logging.basicConfig(level=logging.INFO)
    with Monitor('info', measuremem=True) as mon:
        if datatransfer:
            oqparam = readinput.get_oqparam(name)
            calc = base.calculators(oqparam)
            calc.pre_execute()
            n_tasks, to_send_forward, to_send_back = data_transfer(calc)
            _print_info(calc.rlzs_assoc, oqparam,
                        calc.composite_source_model, calc.sitecol,
                        weightsources=True)
            print('Number of tasks to be generated: %d' % n_tasks)
            print('Estimated data to be sent forward: %s' %
                  humansize(to_send_forward))
            print('Estimated data to be sent back: %s' %
                  humansize(to_send_back))
        else:
            _info(name, filtersources, weightsources)
    if mon.duration > 1:
        print(mon)
Exemplo n.º 30
0
 def __init__(self, dstore):
     self.dstore = dstore
     self.oq = oq = dstore['oqparam']
     self.text = (oq.description.encode('utf8') + '\n' +
                  '=' * len(oq.description))
     info = dstore['job_info']
     dpath = dstore.hdf5path
     mtime = os.path.getmtime(dpath)
     self.text += '\n\n%s:%s updated %s' % (info.hostname, dpath,
                                            time.ctime(mtime))
     # NB: in the future, the sitecol could be transferred as
     # an array by leveraging the HDF5 serialization protocol in
     # litetask decorator; for the moment however the size of the
     # data to transfer is given by the usual pickle
     sitecol_size = humansize(len(parallel.Pickled(dstore['sitecol'])))
     self.text += '\n\nnum_sites = %d, sitecol = %s' % (len(
         dstore['sitemesh']), sitecol_size)
Exemplo n.º 31
0
 def __init__(self, iresults, taskname, argnames, num_tasks, sent,
              progress=logging.info, hdf5=None):
     self.iresults = iresults
     self.name = taskname
     self.argnames = ' '.join(argnames)
     self.num_tasks = num_tasks
     self.sent = sent
     self.progress = progress
     self.hdf5 = hdf5
     self.received = []
     if self.num_tasks:
         self.log_percent = self._log_percent()
         next(self.log_percent)
     else:
         self.progress('No %s tasks were submitted', self.name)
     self.progress('Sent %s of data in %s task(s)',
                   humansize(sent.sum()), num_tasks)
Exemplo n.º 32
0
def export(datastore_key, export_dir='.', calc_id=-1, exports='csv'):
    """
    Export an output from the datastore.
    """
    logging.basicConfig(level=logging.INFO)
    dstore = datastore.read(calc_id)
    parent_id = dstore['oqparam'].hazard_calculation_id
    if parent_id:
        dstore.set_parent(datastore.read(parent_id))
    dstore.export_dir = export_dir
    with performance.Monitor('export', measuremem=True) as mon:
        for fmt in exports.split(','):
            fnames = export_((datastore_key, fmt), dstore)
            nbytes = sum(os.path.getsize(f) for f in fnames)
            print('Exported %s in %s' % (general.humansize(nbytes), fnames))
    if mon.duration > 1:
        print(mon)
Exemplo n.º 33
0
def export(datastore_key, calc_id=-1, exports='csv', export_dir='.'):
    """
    Export an output from the datastore.
    """
    logging.basicConfig(level=logging.INFO)
    dstore = datastore.read(calc_id)
    parent_id = dstore['oqparam'].hazard_calculation_id
    if parent_id:
        dstore.parent = datastore.read(parent_id)
    dstore.export_dir = export_dir
    with performance.Monitor('export', measuremem=True) as mon:
        for fmt in exports.split(','):
            fnames = export_((datastore_key, fmt), dstore)
            nbytes = sum(os.path.getsize(f) for f in fnames)
            print('Exported %s in %s' % (general.humansize(nbytes), fnames))
    if mon.duration > 1:
        print(mon)
Exemplo n.º 34
0
def _run(job_inis, concurrent_tasks, calc_id, pdb, loglevel, hc, exports,
         params):
    global calc_path
    assert len(job_inis) in (1, 2), job_inis
    # set the logs first of all
    calc_id = logs.init(calc_id, getattr(logging, loglevel.upper()))
    # disable gzip_input
    base.BaseCalculator.gzip_inputs = lambda self: None
    with performance.Monitor('total runtime', measuremem=True) as monitor:
        if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
            os.environ['OQ_DISTRIBUTE'] = 'processpool'
        if len(job_inis) == 1:  # run hazard or risk
            if hc:
                hc_id = hc[0]
                rlz_ids = hc[1:]
            else:
                hc_id = None
                rlz_ids = ()
            oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc_id)
            if not oqparam.cachedir:  # enable caching
                oqparam.cachedir = datastore.get_datadir()
            if hc_id and hc_id < 0:  # interpret negative calculation ids
                calc_ids = datastore.get_calc_ids()
                try:
                    hc_id = calc_ids[hc_id]
                except IndexError:
                    raise SystemExit('There are %d old calculations, cannot '
                                     'retrieve the %s' %
                                     (len(calc_ids), hc_id))
            calc = base.calculators(oqparam, calc_id)
            calc.run(concurrent_tasks=concurrent_tasks,
                     pdb=pdb,
                     exports=exports,
                     hazard_calculation_id=hc_id,
                     rlz_ids=rlz_ids,
                     **params)
        else:  # run hazard + risk
            calc = run2(job_inis[0], job_inis[1], calc_id, concurrent_tasks,
                        pdb, loglevel, exports, params)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    print('See the output with silx view %s' % calc.datastore.filename)
    calc_path, _ = os.path.splitext(calc.datastore.filename)  # used below
    return calc
Exemplo n.º 35
0
def main(datastore_key, calc_id: int = -1, *, exports='csv', export_dir='.'):
    """
    Export an output from the datastore. To see the available datastore
    keys, use the command `oq info exports`.
    """
    dstore = util.read(calc_id)
    parent_id = dstore['oqparam'].hazard_calculation_id
    if parent_id:
        dstore.parent = util.read(parent_id)
    dstore.export_dir = export_dir
    with performance.Monitor('export', measuremem=True) as mon:
        for fmt in exports.split(','):
            fnames = export_((datastore_key, fmt), dstore)
            nbytes = sum(os.path.getsize(f) for f in fnames)
            print('Exported %s in %s' % (general.humansize(nbytes), fnames))
    if mon.duration > 1:
        print(mon)
    dstore.close()
Exemplo n.º 36
0
 def get(self, what):
     """
     :param what: what to extract
     :returns: an ArrayWrapper instance
     """
     url = '%s/v1/calc/%d/extract/%s' % (self.server, self.calc_id, what)
     logging.info('GET %s', url)
     resp = self.sess.get(url)
     if resp.status_code != 200:
         raise WebAPIError(resp.text)
     logging.info('Read %s of data' % general.humansize(len(resp.content)))
     npz = numpy.load(io.BytesIO(resp.content))
     attrs = {k: npz[k] for k in npz if k != 'array'}
     try:
         arr = npz['array']
     except KeyError:
         arr = ()
     return ArrayWrapper(arr, attrs)
Exemplo n.º 37
0
def zip_all_jobs(directory):
    """
    Zip job.ini files recursively
    """
    zips = []
    for cwd, dirs, files in os.walk(directory):
        job_inis = [os.path.join(cwd, f) for f in sorted(files)
                    if f.endswith('.ini')]
        if not job_inis:
            continue
        elif len(job_inis) == 2:
            job_ini, risk_ini = job_inis
        else:
            [job_ini], risk_ini = job_inis, ''
        archive_zip = job_ini[:-4].replace('_hazard', '') + '.zip'
        zips.append(zip_job(job_ini, archive_zip, risk_ini))
        total = sum(os.path.getsize(z) for z in zips)
    logging.info('Generated %s of zipped data', general.humansize(total))
Exemplo n.º 38
0
def run_calc(log):
    """
    Run a calculation.

    :param log:
        LogContext of the current job
    """
    register_signals()
    setproctitle('oq-job-%d' % log.calc_id)
    with log:
        oqparam = log.get_oqparam()
        calc = base.calculators(oqparam, log.calc_id)
        logging.info('%s running %s [--hc=%s]',
                     getpass.getuser(),
                     calc.oqparam.inputs['job_ini'],
                     calc.oqparam.hazard_calculation_id)
        logging.info('Using engine version %s', __version__)
        msg = check_obsolete_version(oqparam.calculation_mode)
        # NB: disabling the warning should be done only for users with
        # an updated LTS version, but we are doing it for all users
        # if msg:
        #    logging.warning(msg)
        calc.from_engine = True
        if config.zworkers['host_cores']:
            set_concurrent_tasks_default(calc)
        else:
            logging.warning('Assuming %d %s workers',
                            parallel.Starmap.num_cores, OQ_DISTRIBUTE)
        t0 = time.time()
        calc.run()
        logging.info('Exposing the outputs to the database')
        expose_outputs(calc.datastore)
        path = calc.datastore.filename
        size = general.humansize(getsize(path))
        logging.info('Stored %s on %s in %d seconds',
                     size, path, time.time() - t0)
        calc.datastore.close()
        for line in logs.dbcmd('list_outputs', log.calc_id, False):
            general.safeprint(line)
        # sanity check to make sure that the logging on file is working
        if (log.log_file and log.log_file != os.devnull and
                getsize(log.log_file) == 0):
            logging.warning('The log file %s is empty!?' % log.log_file)
    return calc
Exemplo n.º 39
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['csm_info'].get_rlzs_assoc()
    oq = dstore['oqparam']
    investigation_time = (None if oq.calculation_mode == 'scenario' else
                          oq.investigation_time)
    fmt = ekey[-1]
    gmf_data = dstore['gmf_data']
    nbytes = gmf_data.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warn(GMF_WARNING, dstore.hdf5path)
    fnames = []
    ruptures_by_rlz = collections.defaultdict(list)
    for grp_id, gsim in rlzs_assoc:
        key = 'grp-%02d' % grp_id
        try:
            events = dstore['events/' + key]
        except KeyError:  # source model producing zero ruptures
            continue
        eventdict = dict(zip(events['eid'], events))
        try:
            data = gmf_data['%s/%s' % (key, gsim)].value
        except KeyError:  # no GMFs for the given realization
            continue
        for rlzi, rlz in enumerate(rlzs_assoc[grp_id, gsim]):
            ruptures = ruptures_by_rlz[rlz]
            gmf_arr = get_array(data, rlzi=rlzi)
            for eid, gmfa in group_array(gmf_arr, 'eid').items():
                ses_idx = eventdict[eid]['ses']
                rup = Rup(eid, ses_idx, sorted(set(gmfa['sid'])), gmfa)
                ruptures.append(rup)
    for rlz in sorted(ruptures_by_rlz):
        ruptures_by_rlz[rlz].sort(key=operator.attrgetter('eid'))
        fname = dstore.build_fname('gmf', rlz, fmt)
        fnames.append(fname)
        globals()['export_gmf_%s' % fmt](('gmf', fmt), fname, sitecol,
                                         oq.imtls, ruptures_by_rlz[rlz], rlz,
                                         investigation_time)
    return fnames
Exemplo n.º 40
0
 def execute(self):
     oq = self.oqparam
     self.set_param(
         num_taxonomies=self.assetcol.num_taxonomies_by_site(),
         maxweight=oq.ebrisk_maxweight / (oq.concurrent_tasks or 1),
         epspath=cache_epsilons(self.datastore, oq, self.assetcol,
                                self.riskmodel, self.E))
     parent = self.datastore.parent
     if parent:
         hdf5path = parent.filename
         grp_indices = parent['ruptures'].attrs['grp_indices']
         nruptures = len(parent['ruptures'])
     else:
         hdf5path = self.datastore.hdf5cache()
         grp_indices = self.datastore['ruptures'].attrs['grp_indices']
         nruptures = len(self.datastore['ruptures'])
         with hdf5.File(hdf5path, 'r+') as cache:
             self.datastore.hdf5.copy('weights', cache)
             self.datastore.hdf5.copy('ruptures', cache)
             self.datastore.hdf5.copy('rupgeoms', cache)
     self.init_logic_tree(self.csm_info)
     smap = parallel.Starmap(self.core_task.__func__,
                             monitor=self.monitor())
     trt_by_grp = self.csm_info.grp_by("trt")
     samples = self.csm_info.get_samples_by_grp()
     rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
     ruptures_per_block = numpy.ceil(nruptures / (oq.concurrent_tasks or 1))
     first_event = 0
     for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items():
         start, stop = grp_indices[grp_id]
         for indices in general.block_splitter(range(start, stop),
                                               ruptures_per_block):
             rgetter = getters.RuptureGetter(hdf5path, list(indices),
                                             grp_id, trt_by_grp[grp_id],
                                             samples[grp_id], rlzs_by_gsim,
                                             first_event)
             first_event += rgetter.num_events
             smap.submit(rgetter, self.src_filter, self.param)
     self.events_per_sid = []
     self.gmf_nbytes = 0
     res = smap.reduce(self.agg_dicts, numpy.zeros(self.N))
     logging.info('Produced %s of GMFs', general.humansize(self.gmf_nbytes))
     return res
Exemplo n.º 41
0
def save_gmdata(calc, n_rlzs):
    """
    Save a composite array `gmdata` in the datastore.

    :param calc: a calculator with a dictionary .gmdata {rlz: data}
    :param n_rlzs: the total number of realizations
    """
    n_sites = len(calc.sitecol)
    dtlist = ([(imt, F32) for imt in calc.oqparam.imtls] + [('events', U32),
                                                            ('nbytes', U32)])
    array = numpy.zeros(n_rlzs, dtlist)
    for rlzi in sorted(calc.gmdata):
        data = calc.gmdata[rlzi]  # (imts, events, nbytes)
        events = data[-2]
        nbytes = data[-1]
        gmv = data[:-2] / events / n_sites
        array[rlzi] = tuple(gmv) + (events, nbytes)
    calc.datastore['gmdata'] = array
    logging.info('Generated %s of GMFs', humansize(array['nbytes'].sum()))
Exemplo n.º 42
0
 def post_execute(self, dummy):
     oq = self.oqparam
     L, Dc = self.dmgcsq.shape[1:]
     """
     for loss_id in range(L):
         for dci in range(Dc):
             dmgcsq = self.dmgcsq[:, loss_id, dci] * oq.time_ratio
             dic['loss_id'] = loss_id
     self.datastore.create_df('dmgcsq', pandas.DataFrame(dic))
     """
     size = self.datastore.getsize('risk_by_event')
     logging.info('Building aggregated curves from %s of risk_by_event',
                  general.humansize(size))
     builder = get_loss_builder(self.datastore)
     alt_df = self.datastore.read_df('risk_by_event')
     del alt_df['event_id']
     dic = general.AccumDict(accum=[])
     columns = sorted(
         set(alt_df.columns) - {'agg_id', 'loss_id', 'variance'})
     periods = [0] + list(builder.return_periods)
     for (agg_id,
          loss_id), df in alt_df.groupby([alt_df.agg_id, alt_df.loss_id]):
         tots = [df[col].sum() * oq.time_ratio for col in columns]
         curves = [
             builder.build_curve(df[col].to_numpy()) for col in columns
         ]
         for p, period in enumerate(periods):
             dic['agg_id'].append(agg_id)
             dic['loss_id'].append(loss_id)
             dic['return_period'].append(period)
             if p == 0:
                 for col, tot in zip(columns, tots):
                     dic[col].append(tot)
             else:
                 for col, curve in zip(columns, curves):
                     dic[col].append(curve[p - 1])
     fix_dtype(dic, U16, ['agg_id'])
     fix_dtype(dic, U8, ['loss_id'])
     fix_dtype(dic, U32, ['return_period'])
     fix_dtype(dic, F32, columns)
     ls = ' '.join(self.crmodel.damage_states[1:])
     self.datastore.create_df('aggcurves', dic.items(), limit_states=ls)
Exemplo n.º 43
0
 def post_execute(self, dummy):
     oq = self.oqparam
     A, L, Dc = self.dmgcsq.shape
     dmgcsq = self.dmgcsq * oq.time_ratio
     self.datastore['damages-rlzs'] = dmgcsq.reshape((A, 1, L, Dc))
     set_rlzs_stats(self.datastore,
                    'damages',
                    asset_id=self.assetcol['id'],
                    loss_type=oq.loss_names,
                    dmg_state=['no_damage'] + self.crmodel.get_dmg_csq())
     size = self.datastore.getsize('risk_by_event')
     logging.info('Building aggregated curves from %s of risk_by_event',
                  general.humansize(size))
     alt_df = self.datastore.read_df('risk_by_event')
     del alt_df['event_id']
     dic = general.AccumDict(accum=[])
     columns = sorted(
         set(alt_df.columns) - {'agg_id', 'loss_id', 'variance'})
     periods = [0] + list(self.builder.return_periods)
     for (agg_id,
          loss_id), df in alt_df.groupby([alt_df.agg_id, alt_df.loss_id]):
         tots = [df[col].sum() * oq.time_ratio for col in columns]
         curves = [
             self.builder.build_curve(df[col].to_numpy()) for col in columns
         ]
         for p, period in enumerate(periods):
             dic['agg_id'].append(agg_id)
             dic['loss_id'].append(loss_id)
             dic['return_period'].append(period)
             if p == 0:
                 for col, tot in zip(columns, tots):
                     dic[col].append(tot)
             else:
                 for col, curve in zip(columns, curves):
                     dic[col].append(curve[p - 1])
     fix_dtype(dic, U16, ['agg_id'])
     fix_dtype(dic, U8, ['loss_id'])
     fix_dtype(dic, U32, ['return_period'])
     fix_dtype(dic, F32, columns)
     ls = ' '.join(self.crmodel.damage_states[1:])
     self.datastore.create_df('aggcurves', dic.items(), limit_states=ls)
     self.sanity_check(dmgcsq)
Exemplo n.º 44
0
 def __init__(self,
              iresults,
              taskname,
              num_tasks,
              progress=logging.info,
              sent=0):
     self.iresults = iresults
     self.name = taskname
     self.num_tasks = num_tasks
     self.progress = progress
     self.sent = sent
     self.received = []
     if self.num_tasks:
         self.log_percent = self._log_percent()
         next(self.log_percent)
     else:
         self.progress('No %s tasks were submitted', self.name)
     if sent:
         self.progress('Sent %s of data in %s task(s)',
                       humansize(sum(sent.values())), num_tasks)
Exemplo n.º 45
0
 def pre_execute(self):
     oq = self.oqparam
     ds = self.datastore
     self.reaggreate = False
     if oq.hazard_calculation_id and not ds.parent:
         ds.parent = datastore.read(oq.hazard_calculation_id)
         assetcol = ds['assetcol']
         self.aggkey = base.save_agg_values(ds, assetcol, oq.loss_names,
                                            oq.aggregate_by)
         aggby = ds.parent['oqparam'].aggregate_by
         self.reaggreate = aggby and oq.aggregate_by != aggby
         if self.reaggreate:
             self.num_tags = dict(
                 zip(aggby, assetcol.tagcol.agg_shape(aggby)))
     else:
         assetcol = ds['assetcol']
         self.aggkey = assetcol.tagcol.get_aggkey(oq.aggregate_by)
     self.L = len(oq.loss_names)
     size = general.humansize(ds.getsize('agg_loss_table'))
     logging.info('Stored %s in the agg_loss_table', size)
Exemplo n.º 46
0
def get_max_gmf_size(dstore):
    """
    Upper limit for the size of the GMFs
    """
    oq = dstore['oqparam']
    n_imts = len(oq.imtls)
    rlzs_by_trt_id = dstore['csm_info'].get_rlzs_assoc().get_rlzs_by_trt_id()
    n_ruptures = collections.Counter()
    size = collections.Counter()  # by trt_id
    for serial in dstore['sescollection']:
        ebr = dstore['sescollection/' + serial]
        trt_id = ebr.trt_id
        n_ruptures[trt_id] += 1
        # there are 4 bytes per float
        size[trt_id] += (len(ebr.indices) * ebr.multiplicity *
                         len(rlzs_by_trt_id[trt_id]) * n_imts) * 4
    [(trt_id, maxsize)] = size.most_common(1)
    return dict(n_imts=n_imts, size=maxsize, n_ruptures=n_ruptures[trt_id],
                n_rlzs=len(rlzs_by_trt_id[trt_id]),
                trt_id=trt_id, humansize=humansize(maxsize))
Exemplo n.º 47
0
def run(job_ini,
        concurrent_tasks=None,
        pdb=None,
        loglevel='info',
        hc=None,
        exports=''):
    """
    Run a calculation. Optionally, set the number of concurrent_tasks
    (0 to disable the parallelization).
    """
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.PerformanceMonitor('total', measuremem=True)

    if len(job_inis) == 1:  # run hazard or risk
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc)
        if hc and hc < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc = calc_ids[hc]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc))
        calc = base.calculators(oqparam, monitor)
        monitor.monitor_dir = calc.datastore.calc_dir
        with monitor:
            calc.run(concurrent_tasks=concurrent_tasks,
                     pdb=pdb,
                     exports=exports,
                     hazard_calculation_id=hc)
    else:  # run hazard + risk
        calc = run2(job_inis[0], job_inis[1], concurrent_tasks, pdb, exports,
                    monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s/output.hdf5' %
          calc.datastore.calc_dir)
    return calc
Exemplo n.º 48
0
    def submit_all(self):
        """
        :returns: an IterResult object
        """
        try:
            nargs = len(self.task_args)
        except TypeError:  # generators have no len
            nargs = ''
        if nargs == 1:
            [args] = self.task_args
            self.progress('Executing "%s" in process', self.name)
            fut = mkfuture(safely_call(self.task_func, args))
            return IterResult([fut], self.name, nargs)

        if self.distribute == 'qsub':
            logging.warn('EXPERIMENTAL: sending tasks to the grid engine')
            allargs = list(self.task_args)
            return IterResult(qsub(self.task_func, allargs),
                              self.name, len(allargs), self.progress)

        task_no = 0
        for args in self.task_args:
            task_no += 1
            if task_no == 1:  # first time
                self.progress('Submitting %s "%s" tasks', nargs, self.name)
            if isinstance(args[-1], Monitor):
                # add incremental task number and task weight
                args[-1].task_no = task_no
                args[-1].weight = getattr(args[0], 'weight', 1.)
            self.submit(*args)
        if not task_no:
            self.progress('No %s tasks were submitted', self.name)
        # NB: keep self._iterfutures() an iterator, especially with celery!
        ir = IterResult(self._iterfutures(), self.name, task_no,
                        self.progress)
        ir.sent = self.sent  # for information purposes
        if self.sent:
            self.progress('Sent %s of data in %d task(s)',
                          humansize(sum(self.sent.values())),
                          ir.num_tasks)
        return ir
Exemplo n.º 49
0
 def __init__(self,
              futures,
              taskname,
              num_tasks,
              progress=logging.info,
              sent=0):
     self.futures = futures
     self.name = taskname
     self.num_tasks = num_tasks
     if self.name.startswith("_"):  # private task, log only in debug
         self.progress = logging.debug
     else:
         self.progress = progress
     self.sent = sent
     self.received = []
     if self.num_tasks:
         self.log_percent = self._log_percent()
         next(self.log_percent)
     if sent:
         self.progress('Sent %s of data in %s task(s)',
                       humansize(sum(sent.values())), num_tasks)
Exemplo n.º 50
0
def _run(job_ini, concurrent_tasks, pdb, loglevel, hc, exports, params):
    global calc_path
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.Monitor(
        'total runtime', measuremem=True)
    if len(job_inis) == 1:  # run hazard or risk
        if hc:
            hc_id = hc[0]
            rlz_ids = hc[1:]
        else:
            hc_id = None
            rlz_ids = ()
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc_id)
        if hc_id and hc_id < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc_id = calc_ids[hc_id]
            except IndexError:
                raise SystemExit(
                    'There are %d old calculations, cannot '
                    'retrieve the %s' % (len(calc_ids), hc_id))
        calc = base.calculators(oqparam, monitor)
        with calc.monitor:
            calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb,
                     exports=exports, hazard_calculation_id=hc_id,
                     rlz_ids=rlz_ids, **params)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, pdb,
            exports, params, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s' % calc.datastore.hdf5path)
    calc_path = calc.datastore.calc_dir  # used for the .pstat filename
    return calc
Exemplo n.º 51
0
def _print_info(dstore, filtersources=True, weightsources=True):
    assoc = dstore['rlzs_assoc']
    oqparam = OqParam.from_(dstore.attrs)
    csm = dstore['composite_source_model']
    sitecol = dstore['sitecol']
    print(csm.get_info())
    print('See https://github.com/gem/oq-risklib/blob/master/doc/'
          'effective-realizations.rst for an explanation')
    print(assoc)
    if filtersources or weightsources:
        [info] = readinput.get_job_info(oqparam, csm, sitecol)
        info['n_sources'] = csm.get_num_sources()
        curve_matrix_size = (info['n_sites'] * info['n_levels'] *
                             info['n_imts'] * len(assoc) * 8)
        for k in info.dtype.fields:
            if k == 'input_weight' and not weightsources:
                pass
            else:
                print(k, info[k])
        print('curve_matrix_size', humansize(curve_matrix_size))
    if 'num_ruptures' in dstore:
        print(datastore.view('rupture_collections', dstore))
Exemplo n.º 52
0
def _run(job_inis, concurrent_tasks, pdb, loglevel, hc, exports, params):
    global calc_path
    assert len(job_inis) in (1, 2), job_inis
    # set the logs first of all
    calc_id = logs.init(level=getattr(logging, loglevel.upper()))
    with performance.Monitor('total runtime', measuremem=True) as monitor:
        if len(job_inis) == 1:  # run hazard or risk
            if hc:
                hc_id = hc[0]
                rlz_ids = hc[1:]
            else:
                hc_id = None
                rlz_ids = ()
            oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc_id)
            vars(oqparam).update(params)
            if hc_id and hc_id < 0:  # interpret negative calculation ids
                calc_ids = datastore.get_calc_ids()
                try:
                    hc_id = calc_ids[hc_id]
                except IndexError:
                    raise SystemExit('There are %d old calculations, cannot '
                                     'retrieve the %s' %
                                     (len(calc_ids), hc_id))
            calc = base.calculators(oqparam, calc_id)
            calc.run(concurrent_tasks=concurrent_tasks,
                     pdb=pdb,
                     exports=exports,
                     hazard_calculation_id=hc_id,
                     rlz_ids=rlz_ids)
        else:  # run hazard + risk
            calc = run2(job_inis[0], job_inis[1], calc_id, concurrent_tasks,
                        pdb, loglevel, exports, params)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    print('See the output with hdfview %s' % calc.datastore.hdf5path)
    calc_path, _ = os.path.splitext(calc.datastore.hdf5path)  # used below
    return calc
Exemplo n.º 53
0
def export_hcurves_rlzs(ekey, dstore):
    """
    Export all hazard curves in a single .hdf5 file. This is not
    recommended, even if this exporter is parallel and very efficient.
    I was able to export 6 GB of curves per minute. However for large
    calculations it is then impossible to view the .hdf5 file with the
    hdfviewer because you will run out of memory. Also, compression is not
    enabled, otherwise all the time will be spent in the compression phase
    in the controller node with the workers doing nothing.
    The  recommended way to postprocess large computations is to instantiate
    the PmapGetter and to work one block of sites at the time,
    discarding what it is not needed. The exporter here is meant for
    small/medium calculation and as an example of what you should
    implement yourself if you need to postprocess the hazard curves.
    """
    oq = dstore['oqparam']
    imtls = oq.imtls
    rlzs_assoc = dstore['csm_info'].get_rlzs_assoc()
    sitecol = dstore['sitecol']
    pgetter = calc.PmapGetter(dstore, rlzs_assoc)
    N = len(sitecol)
    R = len(rlzs_assoc.realizations)
    fname = dstore.export_path('%s.%s' % ekey)
    monitor = performance.Monitor(ekey[0], fname)
    size = humansize(dstore.get_attr('poes', 'nbytes'))
    logging.info('Reading %s of probability maps', size)
    allargs = [(pgetter.new(tile.sids), imtls, monitor)
               for tile in sitecol.split_in_tiles(R)]
    with hdf5.File(fname, 'w') as f:
        f['imtls'] = imtls
        dset = f.create_dataset('hcurves-rlzs', (N, R), imtls.dt)
        dset.attrs['investigation_time'] = oq.investigation_time
        logging.info('Building the hazard curves for %d sites, %d rlzs', N, R)
        for sids, allcurves in parallel.Processmap(build_hcurves, allargs):
            for sid, curves in zip(sids, allcurves):
                dset[sid] = curves
    return [fname]
Exemplo n.º 54
0
def export_gmf(ekey, dstore):
    """
    :param ekey: export key, i.e. a pair (datastore key, fmt)
    :param dstore: datastore object
    """
    oq = dstore['oqparam']
    if not oq.calculation_mode.startswith('scenario'):
        logging.warn('The GMF exporter in .xml format has been removed, '
                     'use the one in .csv format')
        return []
    sitecol = dstore['sitecol']
    investigation_time = (None if oq.calculation_mode == 'scenario'
                          else oq.investigation_time)
    fmt = ekey[-1]
    gmf_data = dstore['gmf_data']
    nbytes = gmf_data.attrs['nbytes']
    logging.info('Internal size of the GMFs: %s', humansize(nbytes))
    if nbytes > GMF_MAX_SIZE:
        logging.warn(GMF_WARNING, dstore.hdf5path)
    fnames = []
    events_by_rlz = collections.defaultdict(list)
    data = gmf_data['data'].value
    ses_idx = 1  # for scenario only
    for rlzi, gmf_arr in group_array(data, 'rlzi').items():
        events = events_by_rlz[rlzi]
        for eid, gmfa in group_array(gmf_arr, 'eid').items():
            rup = Event(eid, ses_idx, sorted(set(gmfa['sid'])), gmfa)
            events.append(rup)
    rlzs = dstore['csm_info'].get_rlzs_assoc().realizations
    for rlzi in sorted(events_by_rlz):
        events_by_rlz[rlzi].sort(key=operator.attrgetter('eid'))
        fname = dstore.build_fname('gmf', rlzi, fmt)
        fnames.append(fname)
        globals()['export_gmf_%s' % fmt](
            ('gmf', fmt), fname, sitecol, oq.imtls, events_by_rlz[rlzi],
            rlzs[rlzi], investigation_time)
    return fnames
Exemplo n.º 55
0
 def execute(self):
     self.datastore.flush()  # just to be sure
     oq = self.oqparam
     self.set_param(hdf5path=self.datastore.filename,
                    tempname=cache_epsilons(self.datastore, oq,
                                            self.assetcol, self.crmodel,
                                            self.E))
     srcfilter = self.src_filter()
     logging.info('Sending {:_d} ruptures'.format(
         len(self.datastore['ruptures'])))
     self.events_per_sid = []
     self.datastore.swmr_on()
     self.avg_gmf = general.AccumDict(accum=numpy.zeros(self.N,
                                                        F32))  # imt -> gmvs
     smap = parallel.Starmap(start_ebrisk, h5=self.datastore.hdf5)
     smap.monitor.save('srcfilter', srcfilter)
     smap.monitor.save('crmodel', self.crmodel)
     for rg in getters.gen_rupture_getters(self.datastore,
                                           oq.concurrent_tasks):
         smap.submit((rg, self.param))
     smap.reduce(self.agg_dicts)
     gmf_bytes = self.datastore['gmf_info']['gmfbytes'].sum()
     logging.info('Produced %s of GMFs', general.humansize(gmf_bytes))
     return 1
Exemplo n.º 56
0
 def pre_execute(self):
     oq = self.oqparam
     oq.ground_motion_fields = False
     super().pre_execute()
     self.param['lba'] = lba = (LossesByAsset(self.assetcol, oq.loss_names,
                                              self.policy_name,
                                              self.policy_dict))
     self.param['ses_ratio'] = oq.ses_ratio
     self.param['aggregate_by'] = oq.aggregate_by
     self.param.pop('oqparam', None)  # unneeded
     self.L = L = len(lba.loss_names)
     A = len(self.assetcol)
     self.datastore.create_dset('avg_losses-stats', F32, (A, 1, L))  # mean
     shp = self.assetcol.tagcol.agg_shape((L, ), oq.aggregate_by)
     elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, shp))]
     elt_nbytes = 4 * self.E * numpy.prod(shp)
     logging.info('Approx size of the event loss table: %s',
                  general.humansize(elt_nbytes))
     if elt_nbytes / (oq.concurrent_tasks or 1) > TWO32:
         raise RuntimeError('The event loss table is too big to be transfer'
                            'red with %d tasks' % oq.concurrent_tasks)
     self.datastore.create_dset('losses_by_event', elt_dt)
     self.zerolosses = numpy.zeros(shp, F32)  # to get the multi-index
     self.datastore.create_dset('gmf_info', gmf_info_dt)
Exemplo n.º 57
0
 def __init__(self, dstore):
     self.dstore = dstore
     self.oq = oq = dstore['oqparam']
     self.text = (decode(oq.description) + '\n' + '=' * len(oq.description))
     try:
         info = {
             decode(k): ast.literal_eval(decode(v))
             for k, v in dict(dstore['job_info']).items()
         }
     except KeyError:  # job_info not in the datastore (scenario hazard)
         info = dict(hostname='localhost')
     dpath = dstore.hdf5path
     mtime = os.path.getmtime(dpath)
     host = '%s:%s' % (info['hostname'], decode(dpath))
     updated = str(time.ctime(mtime))
     versions = sorted(dstore['/'].attrs.items())
     self.text += '\n\n' + views.rst_table([[host, updated]] + versions)
     # NB: in the future, the sitecol could be transferred as
     # an array by leveraging the HDF5 serialization protocol;
     # for the moment however the size of the
     # data to transfer is given by the usual pickle
     sitecol_size = humansize(len(parallel.Pickled(dstore['sitecol'])))
     self.text += '\n\nnum_sites = %d, sitecol = %s' % (len(
         dstore['sitecol']), sitecol_size)
Exemplo n.º 58
0
    def post_execute(self, result):
        """
        Save the SES collection
        """
        oq = self.oqparam
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if oq.hazard_calculation_id is None:
            self.rupser.close()
            num_events = sum(set_counts(self.datastore, 'events').values())
            if num_events == 0:
                raise RuntimeError(
                    'No seismic events! Perhaps the investigation time is too '
                    'small or the maximum_distance is too small')
            if oq.save_ruptures:
                logging.info('Setting %d event years on %d ruptures',
                             num_events, self.rupser.nruptures)
            with self.monitor('setting event years',
                              measuremem=True,
                              autoflush=True):
                numpy.random.seed(self.oqparam.ses_seed)
                set_random_years(self.datastore, 'events',
                                 int(self.oqparam.investigation_time))

        if self.gmf_size:
            self.datastore.set_attrs('events', max_gmf_size=self.gmf_size)
            msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else ''
            logging.info('Generating %s%s of GMFs', msg,
                         humansize(self.gmf_size))

        if oq.hazard_curves_from_gmfs:
            rlzs = self.csm_info.rlzs_assoc.realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            hstats = self.oqparam.hazard_stats()
            if len(hstats):
                logging.info('Computing statistical hazard curves')
                for kind, stat in hstats:
                    pmap = compute_pmap_stats(result.values(), [stat], weights)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves/' + kind] = arr
            self.save_hmaps()
        if self.datastore.parent:
            self.datastore.parent.open('r')
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            rdiff, index = util.max_rel_diff_index(cl_mean_curves,
                                                   eb_mean_curves)
            logging.warn(
                'Relative difference with the classical '
                'mean curves: %d%% at site index %d', rdiff * 100, index)
Exemplo n.º 59
0
    def execute(self):
        self.datastore.flush()  # just to be sure
        oq = self.oqparam
        parent = self.datastore.parent
        if parent:
            grp_indices = parent['ruptures'].attrs['grp_indices']
            n_occ = parent['ruptures']['n_occ']
            dstore = parent
            csm_info = parent['csm_info']
        else:
            grp_indices = self.datastore['ruptures'].attrs['grp_indices']
            n_occ = self.datastore['ruptures']['n_occ']
            dstore = self.datastore
            csm_info = self.csm_info
        per_block = numpy.ceil(n_occ.sum() / (oq.concurrent_tasks or 1))
        self.set_param(
            hdf5path=self.datastore.filename,
            task_duration=oq.task_duration or 1200,  # 20min
            tempname=cache_epsilons(self.datastore, oq, self.assetcol,
                                    self.crmodel, self.E))

        self.init_logic_tree(csm_info)
        trt_by_grp = csm_info.grp_by("trt")
        samples = csm_info.get_samples_by_grp()
        rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp()
        ngroups = 0
        fe = 0
        eslices = self.datastore['eslices']
        allargs = []
        allpairs = list(enumerate(n_occ))
        srcfilter = self.src_filter(self.datastore.tempname)
        for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items():
            start, stop = grp_indices[grp_id]
            if start == stop:  # no ruptures for the given grp_id
                continue
            ngroups += 1
            for pairs in general.block_splitter(allpairs[start:stop],
                                                per_block,
                                                weight=get_n_occ):
                indices = [i for i, n in pairs]
                rup_array = dstore['ruptures'][indices]
                rgetter = getters.RuptureGetter(
                    rup_array, dstore.filename, grp_id, trt_by_grp[grp_id],
                    samples[grp_id], rlzs_by_gsim,
                    eslices[fe:fe + len(indices), 0])
                allargs.append((rgetter, srcfilter, self.param))
                fe += len(indices)
        logging.info('Found %d/%d source groups with ruptures', ngroups,
                     len(rlzs_by_gsim_grp))
        self.events_per_sid = []
        self.lossbytes = 0
        self.datastore.swmr_on()
        smap = parallel.Starmap(self.core_task.__func__,
                                allargs,
                                h5=self.datastore.hdf5)
        res = smap.reduce(self.agg_dicts, numpy.zeros(self.N))
        gmf_bytes = self.datastore['gmf_info']['gmfbytes'].sum()
        logging.info('Produced %s of GMFs', general.humansize(gmf_bytes))
        logging.info('Produced %s of losses',
                     general.humansize(self.lossbytes))
        return res
Exemplo n.º 60
0
    def acc0(self):
        """
        Initial accumulator, a dict grp_id -> ProbabilityMap(L, G)
        """
        zd = AccumDict()
        num_levels = len(self.oqparam.imtls.array)
        rparams = {'grp_id', 'occurrence_rate',
                   'weight', 'probs_occur', 'lon_', 'lat_', 'rrup_' #}
            ,'source_id'}
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        n = len(self.full_lt.sm_rlzs)
        trts = list(self.full_lt.gsim_lt.values)
        for sm in self.full_lt.sm_rlzs:
            for grp_id in self.full_lt.grp_ids(sm.ordinal):
                trt = trts[grp_id // n]
                gsims = gsims_by_trt[trt]
                cm = ContextMaker(trt, gsims)
                rparams.update(cm.REQUIRES_RUPTURE_PARAMETERS)
                for dparam in cm.REQUIRES_DISTANCES:
                    rparams.add(dparam + '_')
                zd[grp_id] = ProbabilityMap(num_levels, len(gsims))
        zd.eff_ruptures = AccumDict(accum=0)  # trt -> eff_ruptures
        #if self.few_sites:
        self.rparams = sorted(rparams)
        for k in self.rparams:
            # variable length arrays
            if k == 'grp_id':
                #print(k)
                self.datastore.create_dset('rup/' + k, U16)

             #############################################
            elif k =='source_id':
                print(k)
                self.datastore.create_dset('rup/' + k, hdf5.vstr)
            #################################################

            elif k == 'probs_occur':  # vlen
                #print(k)
                self.datastore.create_dset('rup/' + k, hdf5.vfloat32)
            elif k.endswith('_'):  # array of shape (U, N)
                #print(k)
                self.datastore.create_dset(
                    'rup/' + k, F32, shape=(None, self.N),
                    compression='gzip')
            else:
                self.datastore.create_dset('rup/' + k, F32)
    #else:
     #       self.rparams = {}
        self.by_task = {}  # task_no => src_ids
        self.totrups = 0  # total number of ruptures before collapsing
        self.maxradius = 0
        self.gidx = {tuple(grp_ids): i
                     for i, grp_ids in enumerate(self.datastore['grp_ids'])}


        # estimate max memory per core
        max_num_gsims = max(len(gsims) for gsims in gsims_by_trt.values())
        max_num_grp_ids = max(len(grp_ids) for grp_ids in self.gidx)
        pmapbytes = self.N * num_levels * max_num_gsims * max_num_grp_ids * 8
        if pmapbytes > TWO32:
            logging.warning(
                TOOBIG % (self.N, num_levels, max_num_gsims, max_num_grp_ids,
                          humansize(pmapbytes)))
        logging.info(MAXMEMORY % (self.N, num_levels, max_num_gsims,
                                  max_num_grp_ids, humansize(pmapbytes)))
        return zd