Пример #1
0
def _taxonomy_mapping(filename, taxonomies):
    try:
        tmap_df = pandas.read_csv(filename, converters=dict(weight=float))
    except Exception as e:
        raise e.__class__('%s while reading %s' % (e, filename))
    if 'weight' not in tmap_df:
        tmap_df['weight'] = 1.

    assert set(tmap_df) in ({'taxonomy', 'conversion', 'weight'},
                            {'taxonomy', 'risk_id', 'weight'})
    # NB: conversion was the old name in the header for engine <= 3.12
    risk_id = 'risk_id' if 'risk_id' in tmap_df.columns else 'conversion'
    dic = dict(list(tmap_df.groupby('taxonomy')))
    taxonomies = taxonomies[1:]  # strip '?'
    missing = set(taxonomies) - set(dic)
    if missing:
        raise InvalidFile('The taxonomies %s are in the exposure but not in '
                          'the taxonomy mapping %s' % (missing, filename))
    lst = [[("?", 1)]]
    for taxo in taxonomies:
        recs = dic[taxo]
        if abs(recs['weight'].sum() - 1.) > pmf.PRECISION:
            raise InvalidFile('%s: the weights do not sum up to 1 for %s' %
                              (filename, taxo))
        lst.append([(rec[risk_id], rec['weight'])
                    for r, rec in recs.iterrows()])
    return lst
Пример #2
0
def get_crmodel(oqparam):
    """
    Return a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance

   :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    """
    risklist = get_risk_functions(oqparam)
    if not oqparam.limit_states and risklist.limit_states:
        oqparam.limit_states = risklist.limit_states
    elif 'damage' in oqparam.calculation_mode and risklist.limit_states:
        assert oqparam.limit_states == risklist.limit_states
    loss_types = oqparam.loss_dt().names
    consdict = {}
    if 'consequence' in oqparam.inputs:
        # build consdict of the form consequence_by_tagname -> tag -> array
        for by, fnames in oqparam.inputs['consequence'].items():
            if isinstance(fnames, str):  # single file
                fnames = [fnames]
            dtypedict = {
                by: str,
                'consequence': str,
                'loss_type': str,
                None: float
            }

            # i.e. files collapsed.csv, fatalities.csv, ... with headers
            # taxonomy,consequence,loss_type,slight,moderate,extensive
            arrays = []
            for fname in fnames:
                arr = hdf5.read_csv(fname, dtypedict).array
                arrays.append(arr)
                for no, row in enumerate(arr, 2):
                    if row['loss_type'] not in loss_types:
                        msg = '%s: %s is not a recognized loss type, line=%d'
                        raise InvalidFile(msg % (fname, row['loss_type'], no))

            array = numpy.concatenate(arrays)
            dic = group_array(array, 'consequence')
            for consequence, group in dic.items():
                if consequence not in scientific.KNOWN_CONSEQUENCES:
                    raise InvalidFile('Unknown consequence %s in %s' %
                                      (consequence, fnames))
                bytag = {
                    tag: _cons_coeffs(grp, loss_types, risklist.limit_states)
                    for tag, grp in group_array(group, by).items()
                }
                consdict['%s_by_%s' % (consequence, by)] = bytag
    # for instance consdict['collapsed_by_taxonomy']['W_LFM-DUM_H3']
    # is [(0.05,), (0.2 ,), (0.6 ,), (1.  ,)] for damage state and structural
    crm = riskmodels.CompositeRiskModel(oqparam, risklist, consdict)
    return crm
Пример #3
0
def read_csv(fname, dtypedict={None: float}, renamedict={}, sep=','):
    """
    :param fname: a CSV file with an header and float fields
    :param dtypedict: a dictionary fieldname -> dtype, None -> default
    :param renamedict: aliases for the fields to rename
    :param sep: separator (default comma)
    :return: a structured array of floats
    """
    attrs = {}
    with open(fname, encoding='utf-8-sig') as f:
        while True:
            first = next(f)
            if first.startswith('#'):
                attrs = dict(parse_comment(first.strip('#,\n ')))
                continue
            break
        header = first.strip().split(sep)
        try:
            arr = _read_csv(f, build_dt(dtypedict, header))
        except KeyError:
            raise KeyError('Missing None -> default in dtypedict')
        except Exception as exc:
            raise InvalidFile('%s: %s' % (fname, exc))
    if renamedict:
        newnames = []
        for name in arr.dtype.names:
            new = renamedict.get(name, name)
            newnames.append(new)
        arr.dtype.names = newnames
    return ArrayWrapper(arr, attrs)
Пример #4
0
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()):
    """
    :param oqparam: calculation parameters
    :param haz_sitecol: the hazard site collection
    :param cost_types: the expected cost types
    :returns: (site collection, asset collection, discarded)
    """
    global exposure
    asset_hazard_distance = max(oqparam.asset_hazard_distance.values())
    if exposure is None:
        # haz_sitecol not extracted from the exposure
        exposure = get_exposure(oqparam)
    if haz_sitecol is None:
        haz_sitecol = get_site_collection(oqparam)
    if oqparam.region_grid_spacing:
        haz_distance = oqparam.region_grid_spacing * 1.414
        if haz_distance != asset_hazard_distance:
            logging.debug('Using asset_hazard_distance=%d km instead of %d km',
                          haz_distance, asset_hazard_distance)
    else:
        haz_distance = asset_hazard_distance

    if haz_sitecol.mesh != exposure.mesh:
        # associate the assets to the hazard sites
        sitecol, assets_by, discarded = geo.utils.assoc(
            exposure.assets_by_site, haz_sitecol, haz_distance, 'filter')
        assets_by_site = [[] for _ in sitecol.complete.sids]
        num_assets = 0
        for sid, assets in zip(sitecol.sids, assets_by):
            assets_by_site[sid] = assets
            num_assets += len(assets)
        logging.info('Associated {:_d} assets to {:_d} sites'.format(
            num_assets, len(sitecol)))
    else:
        # asset sites and hazard sites are the same
        sitecol = haz_sitecol
        assets_by_site = exposure.assets_by_site
        discarded = []
        logging.info('Read %d sites and %d assets from the exposure',
                     len(sitecol), sum(len(a) for a in assets_by_site))
    assetcol = asset.AssetCollection(
        exposure, assets_by_site, oqparam.time_event, oqparam.aggregate_by)
    if assetcol.occupancy_periods:
        missing = set(cost_types) - set(exposure.cost_types['name']) - set(
            ['occupants'])
    else:
        missing = set(cost_types) - set(exposure.cost_types['name'])
    if missing and not oqparam.calculation_mode.endswith('damage'):
        raise InvalidFile('The exposure %s is missing %s' %
                          (oqparam.inputs['exposure'], missing))
    if (not oqparam.hazard_calculation_id and 'gmfs' not in oqparam.inputs
            and 'hazard_curves' not in oqparam.inputs
            and sitecol is not sitecol.complete):
        # for predefined hazard you cannot reduce the site collection; instead
        # you can in other cases, typically with a grid which is mostly empty
        # (i.e. there are many hazard sites with no assets)
        assetcol.reduce_also(sitecol)
    return sitecol, assetcol, discarded
Пример #5
0
def get_input_files(oqparam):
    """
    :param oqparam: an OqParam instance
    :param hazard: if True, consider only the hazard files
    :returns: input path names in a specific order
    """
    fnames = set()  # files entering in the checksum
    uri = oqparam.shakemap_uri
    if isinstance(uri, dict) and uri:
        # local files
        for key, val in uri.items():
            if key == 'fname' or key.endswith('_url'):
                val = val.replace('file://', '')
                fname = os.path.join(oqparam.base_path, val)
                if os.path.exists(fname):
                    uri[key] = fname
                    fnames.add(fname)
        # additional separate shapefiles
        if uri['kind'] == 'shapefile' and not uri['fname'].endswith('.zip'):
            fnames.update(get_shapefiles(os.path.dirname(fname)))

    for key in oqparam.inputs:
        fname = oqparam.inputs[key]
        # collect .hdf5 tables for the GSIMs, if any
        if key == 'gsim_logic_tree':
            fnames.update(gsim_lt.collect_files(fname))
            fnames.add(fname)
        elif key == 'source_model':  # UCERF
            f = oqparam.inputs['source_model']
            fnames.add(f)
            fname = nrml.read(f).sourceModel.UCERFSource['filename']
            fnames.add(os.path.join(os.path.dirname(f), fname))
        elif key == 'exposure':  # fname is a list
            for exp in asset.Exposure.read_headers(fname):
                fnames.update(exp.datafiles)
            fnames.update(fname)
        elif isinstance(fname, dict):
            for key, val in fname.items():
                if isinstance(val, list):  # list of files
                    fnames.update(val)
                else:
                    fnames.add(val)
        elif isinstance(fname, list):
            for f in fname:
                if f == oqparam.input_dir:
                    raise InvalidFile('%s there is an empty path in %s' %
                                      (oqparam.inputs['job_ini'], key))
            fnames.update(fname)
        elif key == 'source_model_logic_tree':
            info = logictree.collect_info(fname)
            fnames.update(info.smpaths)
            fnames.update(info.h5paths)
            fnames.add(fname)
        else:
            fnames.add(fname)
    return sorted(fnames)
Пример #6
0
def _get_site_model(fname, req_site_params):
    sm = hdf5.read_csv(fname, site.site_param_dt).array
    sm['lon'] = numpy.round(sm['lon'], 5)
    sm['lat'] = numpy.round(sm['lat'], 5)
    dupl = general.get_duplicates(sm, 'lon', 'lat')
    if dupl:
        raise InvalidFile('Found duplicate sites %s in %s' % (dupl, fname))
    z = numpy.zeros(len(sm), sorted(sm.dtype.descr))
    for name in z.dtype.names:
        z[name] = sm[name]
    return z
Пример #7
0
 def __init__(self, ctrl_port=config.zworkers.ctrl_port, host_cores=None,
              remote_python=None, receiver_ports=None):
     # NB: receiver_ports is not used but needed for compliance
     self.ctrl_port = int(ctrl_port)
     self.host_cores = ([hc.split() for hc in host_cores.split(',')]
                        if host_cores else [])
     for host, cores in self.host_cores:
         if int(cores) < -1:
             raise InvalidFile('openquake.cfg: found %s %s' %
                               (host, cores))
     self.remote_python = remote_python or sys.executable
     self.popens = []
Пример #8
0
def get_site_collection(oqparam, h5=None):
    """
    Returns a SiteCollection instance by looking at the points and the
    site model defined by the configuration parameters.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    """
    if h5 and 'sitecol' in h5:
        return h5['sitecol']
    mesh = get_mesh(oqparam, h5)
    if mesh is None and oqparam.ground_motion_fields:
        raise InvalidFile('You are missing sites.csv or site_model.csv in %s'
                          % oqparam.inputs['job_ini'])
    elif mesh is None:
        # a None sitecol is okay when computing the ruptures only
        return
    else:  # use the default site params
        req_site_params = get_gsim_lt(oqparam).req_site_params
        if 'amplification' in oqparam.inputs:
            req_site_params.add('ampcode')
        if h5 and 'site_model' in h5:  # comes from a site_model.csv
            sm = h5['site_model'][:]
        else:
            sm = oqparam
        sitecol = site.SiteCollection.from_points(
            mesh.lons, mesh.lats, mesh.depths, sm, req_site_params)
    ss = oqparam.sites_slice  # can be None or (start, stop)
    if ss:
        if 'custom_site_id' not in sitecol.array.dtype.names:
            gh = sitecol.geohash(6)
            assert len(numpy.unique(gh)) == len(gh), 'geohashes are not unique'
            sitecol.add_col('custom_site_id', 'S6', gh)
        mask = (sitecol.sids >= ss[0]) & (sitecol.sids < ss[1])
        sitecol = sitecol.filter(mask)
        sitecol.make_complete()

    ss = os.environ.get('OQ_SAMPLE_SITES')
    if ss:
        # debugging tip to reduce the size of a calculation
        # OQ_SAMPLE_SITES=.1 oq engine --run job.ini
        # will run a computation with 10 times less sites
        sitecol.array = numpy.array(random_filter(sitecol.array, float(ss)))
        sitecol.make_complete()
    if h5:
        h5['sitecol'] = sitecol
    return sitecol
Пример #9
0
def read_csv(fname,
             dtypedict={None: float},
             renamedict={},
             sep=',',
             index=None,
             errors=None):
    """
    :param fname: a CSV file with an header and float fields
    :param dtypedict: a dictionary fieldname -> dtype, None -> default
    :param renamedict: aliases for the fields to rename
    :param sep: separator (default comma)
    :param index: if not None, returns a pandas DataFrame
    :param errors: passed to the underlying open function (default None)
    :returns: an ArrayWrapper, unless there is an index
    """
    attrs = {}
    with open(fname, encoding='utf-8-sig', errors=errors) as f:
        while True:
            first = next(f)
            if first.startswith('#'):
                attrs = dict(parse_comment(first.strip('#,\n ')))
                continue
            break
        header = first.strip().split(sep)
        if isinstance(dtypedict, dict):
            dt = build_dt(dtypedict, header)
        else:
            # in test_recompute dt is already a composite dtype
            dt = dtypedict
        try:
            arr = _read_csv(f, dt)
        except KeyError:
            raise KeyError('Missing None -> default in dtypedict')
        except Exception as exc:
            raise InvalidFile('%s: %s' % (fname, exc))
    if renamedict:
        newnames = []
        for name in arr.dtype.names:
            new = renamedict.get(name, name)
            newnames.append(new)
        arr.dtype.names = newnames
    if index:
        df = pandas.DataFrame.from_records(arr, index)
        vars(df).update(attrs)
        return df
    return ArrayWrapper(arr, attrs)
Пример #10
0
def get_site_model(oqparam):
    """
    Convert the NRML file into an array of site parameters.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        an array with fields lon, lat, vs30, ...
    """
    req_site_params = get_gsim_lt(oqparam).req_site_params
    if 'amplification' in oqparam.inputs:
        req_site_params.add('ampcode')
    arrays = []
    for fname in oqparam.inputs['site_model']:
        if isinstance(fname, str) and fname.endswith('.csv'):
            sm = hdf5.read_csv(fname, site.site_param_dt).array
            sm['lon'] = numpy.round(sm['lon'], 5)
            sm['lat'] = numpy.round(sm['lat'], 5)
            dupl = get_duplicates(sm, 'lon', 'lat')
            if dupl:
                raise InvalidFile(
                    'Found duplicate sites %s in %s' % (dupl, fname))
            if 'site_id' in sm.dtype.names:
                raise InvalidFile('%s: you passed a sites.csv file instead of '
                                  'a site_model.csv file!' % fname)
            params = sorted(set(sm.dtype.names) | req_site_params)
            z = numpy.zeros(
                len(sm), [(p, site.site_param_dt[p]) for p in params])
            for name in z.dtype.names:
                try:
                    z[name] = sm[name]
                except ValueError:  # missing, use the global parameter
                    # exercised in the test classical/case_28
                    value = getattr(oqparam, site.param[name])
                    if name == 'vs30measured':  # special case
                        value = value == 'measured'
                    z[name] = value
            arrays.append(z)
            continue
        nodes = nrml.read(fname).siteModel
        params = [valid.site_param(node.attrib) for node in nodes]
        missing = req_site_params - set(params[0])
        if 'vs30measured' in missing:  # use a default of False
            missing -= {'vs30measured'}
            for param in params:
                param['vs30measured'] = False
        if 'backarc' in missing:  # use a default of False
            missing -= {'backarc'}
            for param in params:
                param['backarc'] = False
        if 'ampcode' in missing:  # use a default of b''
            missing -= {'ampcode'}
            for param in params:
                param['ampcode'] = b''
        if missing:
            raise InvalidFile('%s: missing parameter %s' %
                              (oqparam.inputs['site_model'],
                               ', '.join(missing)))
        # NB: the sorted in sorted(params[0]) is essential, otherwise there is
        # an heisenbug in scenario/test_case_4
        site_model_dt = numpy.dtype([(p, site.site_param_dt[p])
                                     for p in sorted(params[0])])
        sm = numpy.array([tuple(param[name] for name in site_model_dt.names)
                          for param in params], site_model_dt)
        dupl = "\n".join(
            '%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items()
            if n > 1)
        if dupl:
            raise InvalidFile('There are duplicated sites in %s:\n%s' %
                              (fname, dupl))
        arrays.append(sm)
    return numpy.concatenate(arrays)
Пример #11
0
def get_mesh(oqparam, h5=None):
    """
    Extract the mesh of points to compute from the sites,
    the sites_csv, the region, the site model, the exposure in this order.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    """
    global pmap, exposure, gmfs, eids
    if 'exposure' in oqparam.inputs and exposure is None:
        # read it only once
        exposure = get_exposure(oqparam)
    if oqparam.sites:
        return geo.Mesh.from_coords(oqparam.sites)
    elif 'sites' in oqparam.inputs:
        fname = oqparam.inputs['sites']
        header = get_csv_header(fname)
        if 'lon' in header:
            data = []
            for i, row in enumerate(
                    csv.DictReader(open(fname, encoding='utf-8-sig'))):
                if header[0] == 'site_id' and row['site_id'] != str(i):
                    raise InvalidFile('%s: expected site_id=%d, got %s' % (
                        fname, i, row['site_id']))
                data.append(' '.join([row['lon'], row['lat']]))
        elif 'gmfs' in oqparam.inputs:
            raise InvalidFile('Missing header in %(sites)s' % oqparam.inputs)
        else:
            data = [line.replace(',', ' ')
                    for line in open(fname, encoding='utf-8-sig')]
        coords = valid.coordinates(','.join(data))
        # sorting the coordinates so that event_based results do not
        # depend on the order in the sites.csv file
        c = coords if header[0] == 'site_id' else sorted(coords)
        # NB: Notice the sort=False below
        # Calculations starting from predefined ground motion fields
        # require at least two input files related to the gmf data:
        #   1. A sites.csv file, listing {site_id, lon, lat} tuples
        #   2. A gmfs.csv file, listing {event_id, site_id, gmv[IMT1],
        #      gmv[IMT2], ...} tuples
        # The site coordinates defined in the sites file do not need to be in
        # sorted order.
        # We must only ensure uniqueness of the provided site_ids and
        # coordinates.
        # When creating the site mesh from the site coordinates read from
        # the csv file, the sort=False flag maintains the user-specified
        # site_ids instead of reassigning them after sorting.
        return geo.Mesh.from_coords(c, sort=False)
    elif 'hazard_curves' in oqparam.inputs:
        fname = oqparam.inputs['hazard_curves']
        if isinstance(fname, list):  # for csv
            mesh, pmap = get_pmap_from_csv(oqparam, fname)
        else:
            raise NotImplementedError('Reading from %s' % fname)
        return mesh
    elif oqparam.region_grid_spacing:
        if oqparam.region:
            poly = geo.Polygon.from_wkt(oqparam.region)
        elif exposure:
            # in case of implicit grid the exposure takes precedence over
            # the site model
            poly = exposure.mesh.get_convex_hull()
        elif 'site_model' in oqparam.inputs:
            # this happens in event_based/case_19, where there is an implicit
            # grid over the site model
            sm = get_site_model(oqparam)  # do not store in h5!
            poly = geo.Mesh(sm['lon'], sm['lat']).get_convex_hull()
        else:
            raise InvalidFile('There is a grid spacing but not a region, '
                              'nor a site model, nor an exposure in %s' %
                              oqparam.inputs['job_ini'])
        try:
            logging.info('Inferring the hazard grid')
            mesh = poly.dilate(oqparam.region_grid_spacing).discretize(
                oqparam.region_grid_spacing)
            return geo.Mesh.from_coords(zip(mesh.lons, mesh.lats))
        except Exception:
            raise ValueError(
                'Could not discretize region with grid spacing '
                '%(region_grid_spacing)s' % vars(oqparam))
    # the site model has the precedence over the exposure, see the
    # discussion in https://github.com/gem/oq-engine/pull/5217
    elif 'site_model' in oqparam.inputs:
        logging.info('Extracting the hazard sites from the site model')
        sm = get_site_model(oqparam)
        if h5:
            h5['site_model'] = sm
        mesh = geo.Mesh(sm['lon'], sm['lat'])
        return mesh
    elif 'exposure' in oqparam.inputs:
        return exposure.mesh
Пример #12
0
def get_input_files(oqparam, hazard=False):
    """
    :param oqparam: an OqParam instance
    :param hazard: if True, consider only the hazard files
    :returns: input path names in a specific order
    """
    fnames = set()  # files entering in the checksum
    uri = oqparam.shakemap_uri
    if isinstance(uri, dict) and uri:
        # local files
        for key, val in uri.items():
            if key == 'fname' or key.endswith('_url'):
                val = val.replace('file://', '')
                fname = os.path.join(oqparam.base_path, val)
                if os.path.exists(fname):
                    uri[key] = fname
                    fnames.add(fname)
        # additional separate shapefiles
        if uri['kind'] == 'shapefile' and not uri['fname'].endswith('.zip'):
            fnames.update(get_shapefiles(os.path.dirname(fname)))

    for key in oqparam.inputs:
        fname = oqparam.inputs[key]
        if hazard and key not in ('source_model_logic_tree',
                                  'gsim_logic_tree', 'source'):
            continue
        # collect .hdf5 tables for the GSIMs, if any
        elif key == 'gsim_logic_tree':
            gsim_lt = get_gsim_lt(oqparam)
            for gsims in gsim_lt.values.values():
                for gsim in gsims:
                    for k, v in gsim.kwargs.items():
                        if k.endswith(('_file', '_table')):
                            fnames.add(v)
            fnames.add(fname)
        elif key == 'source_model':  # UCERF
            f = oqparam.inputs['source_model']
            fnames.add(f)
            fname = nrml.read(f).sourceModel.UCERFSource['filename']
            fnames.add(os.path.join(os.path.dirname(f), fname))
        elif key == 'exposure':  # fname is a list
            for exp in asset.Exposure.read_headers(fname):
                fnames.update(exp.datafiles)
            fnames.update(fname)
        elif isinstance(fname, dict):
            for key, val in fname.items():
                if isinstance(val, list):  # list of files
                    fnames.update(val)
                else:
                    fnames.add(val)
        elif isinstance(fname, list):
            for f in fname:
                if f == oqparam.input_dir:
                    raise InvalidFile('%s there is an empty path in %s' %
                                      (oqparam.inputs['job_ini'], key))
            fnames.update(fname)
        elif key == 'source_model_logic_tree':
            args = (fname, oqparam.random_seed,
                    oqparam.number_of_logic_tree_samples,
                    oqparam.sampling_method)
            try:
                smlt = smlt_cache[args]
            except KeyError:
                smlt = smlt_cache[args] = logictree.SourceModelLogicTree(*args)
            fnames.update(smlt.hdf5_files)
            fnames.update(smlt.info.smpaths)
            fnames.add(fname)
        else:
            fnames.add(fname)
    return sorted(fnames)