Exemplo n.º 1
0
 def get(self, site_id, tagnames):
     """
     :param site_id: the site of interest
     :returns: assets, ass_by_aid
     """
     bools = site_id == self.sids
     aids, = numpy.where(bools)
     array = self.dstore['assetcol/array'][bools]
     tagidxs = {}  # aid -> tagidxs
     assets = []
     for aid, a in zip(aids, array):
         tagi = a[tagnames] if tagnames else ()
         tagidxs[aid] = tuple(idx - 1 for idx in tagi)
         values = {
             lt: a['value-' + lt]
             for lt in self.loss_types if lt != 'occupants'
         }
         for name in array.dtype.names:
             if name.startswith('occupants_'):
                 values[name] = a[name]
         asset = Asset(
             aid,
             [a[name] for name in self.tagcol.tagnames],
             number=a['number'],
             location=(
                 valid.longitude(a['lon']),  # round coordinates
                 valid.latitude(a['lat'])),
             values=values,
             area=a['area'],
             calc=self.cost_calculator)
         assets.append(asset)
     return assets, tagidxs
Exemplo n.º 2
0
 def __getitem__(self, aid):
     a = self.array[aid]
     values = {
         lt: a['value-' + lt]
         for lt in self.loss_types if lt != 'occupants'
     }
     if 'occupants' in self.array.dtype.names:
         values['occupants_' + str(self.time_event)] = a['occupants']
     return Asset(
         a['idx'],
         self.taxonomies[aid],
         number=a['number'],
         location=(
             valid.longitude(a['lon']),  # round coordinates
             valid.latitude(a['lat'])),
         values=values,
         area=a['area'],
         deductibles={lt[self.D:]: a[lt]
                      for lt in self.deduc},
         insurance_limits={lt[self.I:]: a[lt]
                           for lt in self.i_lim},
         retrofitteds={lt[self.R:]: a[lt]
                       for lt in self.retro},
         calc=self.cc,
         ordinal=aid)
Exemplo n.º 3
0
 def __getitem__(self, indices):
     if isinstance(indices, int):  # single asset
         a = self.array[indices]
         values = {
             lt: a['value-' + lt]
             for lt in self.loss_types if lt != 'occupants'
         }
         if 'occupants' in self.array.dtype.names:
             values['occupants_' + str(self.time_event)] = a['occupants']
         return riskmodels.Asset(
             a['idx'],
             self.taxonomies[a['taxonomy_id']],
             number=a['number'],
             location=(
                 valid.longitude(a['lon']),  # round coordinates
                 valid.latitude(a['lat'])),
             values=values,
             area=a['area'],
             deductibles={lt[self.D:]: a[lt]
                          for lt in self.deduc},
             insurance_limits={lt[self.I:]: a[lt]
                               for lt in self.i_lim},
             retrofitteds={lt[self.R:]: a[lt]
                           for lt in self.retro},
             calc=self.cc,
             ordinal=indices)
     new = object.__new__(self.__class__)
     new.time_event = self.time_event
     new.array = self.array[indices]
     new.taxonomies = self.taxonomies
     return new
Exemplo n.º 4
0
def csv2peril(fname, name, sitecol, tofloat, asset_hazard_distance):
    """
    Converts a CSV file into a peril array of length N
    """
    data = []
    with open(fname) as f:
        for row in csv.DictReader(f):
            intensity = tofloat(row['intensity'])
            if intensity > 0:
                data.append((valid.longitude(row['lon']),
                             valid.latitude(row['lat']), intensity))
    data = numpy.array(data, [('lon', float), ('lat', float),
                              ('number', float)])
    logging.info('Read %s with %d rows' % (fname, len(data)))
    if len(data) != len(numpy.unique(data[['lon', 'lat']])):
        raise InvalidFile('There are duplicated points in %s' % fname)
    try:
        distance = asset_hazard_distance[name]
    except KeyError:
        distance = asset_hazard_distance['default']
    sites, filtdata, _discarded = geo.utils.assoc(data, sitecol, distance,
                                                  'filter')
    peril = numpy.zeros(len(sitecol), float)
    peril[sites.sids] = filtdata['number']
    return peril
Exemplo n.º 5
0
 def test_latitude(self):
     self.assertEqual(valid.latitude('1'), 1.0)
     self.assertEqual(valid.latitude('90'), 90.0)
     with self.assertRaises(ValueError):
         valid.latitude('91')
     with self.assertRaises(ValueError):
         valid.latitude('-91')
Exemplo n.º 6
0
 def test_latitude(self):
     self.assertEqual(valid.latitude('1'), 1.0)
     self.assertEqual(valid.latitude('90'), 90.0)
     with self.assertRaises(ValueError):
         valid.latitude('91')
     with self.assertRaises(ValueError):
         valid.latitude('-91')
Exemplo n.º 7
0
 def _read_csv(self, csvnames, dirname):
     """
     :param csvnames: names of csv files, space separated
     :param dirname: the directory where the csv files are
     :yields: asset nodes
     """
     expected_header = self._csv_header()
     fnames = [os.path.join(dirname, f) for f in csvnames.split()]
     for fname in fnames:
         with open(fname, encoding='utf-8') as f:
             fields = next(csv.reader(f))
             header = set(fields)
             if len(header) < len(fields):
                 raise InvalidFile(
                     '%s: The header %s contains a duplicated field' %
                     (fname, header))
             elif expected_header - header:
                 raise InvalidFile(
                     'Unexpected header in %s\nExpected: %s\nGot: %s' %
                     (fname, sorted(expected_header), sorted(header)))
     occupancy_periods = self.occupancy_periods.split()
     for fname in fnames:
         with open(fname, encoding='utf-8') as f:
             for i, dic in enumerate(csv.DictReader(f), 1):
                 asset = Node('asset', lineno=i)
                 with context(fname, asset):
                     asset['id'] = dic['id']
                     asset['number'] = valid.positivefloat(dic['number'])
                     asset['taxonomy'] = dic['taxonomy']
                     if 'area' in dic:  # optional attribute
                         asset['area'] = dic['area']
                     loc = Node(
                         'location',
                         dict(lon=valid.longitude(dic['lon']),
                              lat=valid.latitude(dic['lat'])))
                     costs = Node('costs')
                     for cost in self.cost_types['name']:
                         a = dict(type=cost, value=dic[cost])
                         costs.append(Node('cost', a))
                     occupancies = Node('occupancies')
                     for period in occupancy_periods:
                         a = dict(occupants=float(dic[period]),
                                  period=period)
                         occupancies.append(Node('occupancy', a))
                     tags = Node('tags')
                     for tagname in self.tagcol.tagnames:
                         if tagname != 'taxonomy':
                             tags.attrib[tagname] = dic[tagname]
                     asset.nodes.extend([loc, costs, occupancies, tags])
                     if i % 100000 == 0:
                         logging.info('Read %d assets', i)
                 yield asset
Exemplo n.º 8
0
 def _read_csv(self):
     """
     :yields: asset nodes
     """
     expected_header = self._csv_header()
     for fname in self.datafiles:
         with open(fname, encoding='utf-8') as f:
             fields = next(csv.reader(f))
             header = set(fields)
             if len(header) < len(fields):
                 raise InvalidFile(
                     '%s: The header %s contains a duplicated field' %
                     (fname, header))
             elif expected_header - header - {'exposure', 'country'}:
                 raise InvalidFile(
                     'Unexpected header in %s\nExpected: %s\nGot: %s' %
                     (fname, sorted(expected_header), sorted(header)))
     occupancy_periods = self.occupancy_periods.split()
     for fname in self.datafiles:
         with open(fname, encoding='utf-8') as f:
             for i, dic in enumerate(csv.DictReader(f), 1):
                 asset = Node('asset', lineno=i)
                 with context(fname, asset):
                     asset['id'] = dic['id']
                     asset['number'] = valid.positivefloat(dic['number'])
                     asset['taxonomy'] = dic['taxonomy']
                     if 'area' in dic:  # optional attribute
                         asset['area'] = dic['area']
                     loc = Node(
                         'location',
                         dict(lon=valid.longitude(dic['lon']),
                              lat=valid.latitude(dic['lat'])))
                     costs = Node('costs')
                     for cost in self.cost_types['name']:
                         a = dict(type=cost, value=dic[cost])
                         if 'retrofitted' in dic:
                             a['retrofitted'] = dic['retrofitted']
                         costs.append(Node('cost', a))
                     occupancies = Node('occupancies')
                     for period in occupancy_periods:
                         a = dict(occupants=float(dic[period]),
                                  period=period)
                         occupancies.append(Node('occupancy', a))
                     tags = Node('tags')
                     for tagname in self.tagcol.tagnames:
                         if tagname not in ('taxonomy', 'exposure',
                                            'country'):
                             tags.attrib[tagname] = dic[tagname]
                     asset.nodes.extend([loc, costs, occupancies, tags])
                 yield asset
Exemplo n.º 9
0
 def _read_csv(self):
     """
     :yields: asset nodes
     """
     expected_header = self._csv_header()
     for fname in self.datafiles:
         with open(fname, encoding='utf-8') as f:
             fields = next(csv.reader(f))
             header = set(fields)
             if len(header) < len(fields):
                 raise InvalidFile(
                     '%s: The header %s contains a duplicated field' %
                     (fname, header))
             elif expected_header - header - {'exposure', 'country'}:
                 raise InvalidFile(
                     'Unexpected header in %s\nExpected: %s\nGot: %s' %
                     (fname, sorted(expected_header), sorted(header)))
     occupancy_periods = self.occupancy_periods.split()
     for fname in self.datafiles:
         with open(fname, encoding='utf-8') as f:
             for i, dic in enumerate(csv.DictReader(f), 1):
                 asset = Node('asset', lineno=i)
                 with context(fname, asset):
                     asset['id'] = dic['id']
                     asset['number'] = valid.positivefloat(dic['number'])
                     asset['taxonomy'] = dic['taxonomy']
                     if 'area' in dic:  # optional attribute
                         asset['area'] = dic['area']
                     loc = Node('location',
                                dict(lon=valid.longitude(dic['lon']),
                                     lat=valid.latitude(dic['lat'])))
                     costs = Node('costs')
                     for cost in self.cost_types['name']:
                         a = dict(type=cost, value=dic[cost])
                         if 'retrofitted' in dic:
                             a['retrofitted'] = dic['retrofitted']
                         costs.append(Node('cost', a))
                     occupancies = Node('occupancies')
                     for period in occupancy_periods:
                         a = dict(occupants=float(dic[period]),
                                  period=period)
                         occupancies.append(Node('occupancy', a))
                     tags = Node('tags')
                     for tagname in self.tagcol.tagnames:
                         if tagname not in (
                                 'taxonomy', 'exposure', 'country'):
                             tags.attrib[tagname] = dic[tagname]
                     asset.nodes.extend([loc, costs, occupancies, tags])
                 yield asset
Exemplo n.º 10
0
def split_coords_2d(seq):
    """
    :param seq: a flat list with lons and lats
    :returns: a validated list of pairs (lon, lat)

    >>> split_coords_2d([1.1, 2.1, 2.2, 2.3])
    [(1.1, 2.1), (2.2, 2.3)]
    """
    lons, lats = [], []
    for i, el in enumerate(seq):
        if i % 2 == 0:
            lons.append(valid.longitude(el))
        elif i % 2 == 1:
            lats.append(valid.latitude(el))
    return list(zip(lons, lats))
Exemplo n.º 11
0
def split_coords_2d(seq):
    """
    :param seq: a flat list with lons and lats
    :returns: a validated list of pairs (lon, lat)

    >>> split_coords_2d([1.1, 2.1, 2.2, 2.3])
    [(1.1, 2.1), (2.2, 2.3)]
    """
    lons, lats = [], []
    for i, el in enumerate(seq):
        if i % 2 == 0:
            lons.append(valid.longitude(el))
        elif i % 2 == 1:
            lats.append(valid.latitude(el))
    return list(zip(lons, lats))
Exemplo n.º 12
0
def split_coords_3d(seq):
    """
    :param seq: a flat list with lons, lats and depths
    :returns: a validated list of (lon, lat, depths) triplets

    >>> split_coords_3d([1.1, 2.1, 0.1, 2.3, 2.4, 0.1])
    [(1.1, 2.1, 0.1), (2.3, 2.4, 0.1)]
    """
    lons, lats, depths = [], [], []
    for i, el in enumerate(seq):
        if i % 3 == 0:
            lons.append(valid.longitude(el))
        elif i % 3 == 1:
            lats.append(valid.latitude(el))
        elif i % 3 == 2:
            depths.append(valid.depth(el))
    return list(zip(lons, lats, depths))
Exemplo n.º 13
0
def split_coords_3d(seq):
    """
    :param seq: a flat list with lons, lats and depths
    :returns: a validated list of (lon, lat, depths) triplets

    >>> split_coords_3d([1.1, 2.1, 0.1, 2.3, 2.4, 0.1])
    [(1.1, 2.1, 0.1), (2.3, 2.4, 0.1)]
    """
    lons, lats, depths = [], [], []
    for i, el in enumerate(seq):
        if i % 3 == 0:
            lons.append(valid.longitude(el))
        elif i % 3 == 1:
            lats.append(valid.latitude(el))
        elif i % 3 == 2:
            depths.append(valid.depth(el))
    return list(zip(lons, lats, depths))
Exemplo n.º 14
0
 def __getitem__(self, aid):
     a = self.array[aid]
     values = {lt: a['value-' + lt] for lt in self.loss_types
               if lt != 'occupants'}
     for name in self.array.dtype.names:
         if name.startswith('occupants_'):
             values[name] = a[name]
     return Asset(
         aid,
         [a[decode(name)] for name in self.tagnames],
         number=a['number'],
         location=(valid.longitude(a['lon']),  # round coordinates
                   valid.latitude(a['lat'])),
         values=values,
         area=a['area'],
         deductibles={lt[self.D:]: a[lt] for lt in self.deduc},
         insurance_limits={lt[self.I:]: a[lt] for lt in self.i_lim},
         retrofitted=a['retrofitted'] if self.retro else None,
         calc=self.cost_calculator)
Exemplo n.º 15
0
def get_mesh_csvdata(csvfile, imts, num_values, validvalues):
    """
    Read CSV data in the format `IMT lon lat value1 ... valueN`.

    :param csvfile:
        a file or file-like object with the CSV data
    :param imts:
        a list of intensity measure types
    :param num_values:
        dictionary with the number of expected values per IMT
    :param validvalues:
        validation function for the values
    :returns:
        the mesh of points and the data as a dictionary
        imt -> array of curves for each site
    """
    number_of_values = dict(zip(imts, num_values))
    lon_lats = {imt: set() for imt in imts}
    data = AccumDict()  # imt -> list of arrays
    check_imt = valid.Choice(*imts)
    for line, row in enumerate(csv.reader(csvfile, delimiter=' '), 1):
        try:
            imt = check_imt(row[0])
            lon_lat = valid.longitude(row[1]), valid.latitude(row[2])
            if lon_lat in lon_lats[imt]:
                raise DuplicatedPoint(lon_lat)
            lon_lats[imt].add(lon_lat)
            values = validvalues(' '.join(row[3:]))
            if len(values) != number_of_values[imt]:
                raise ValueError('Found %d values, expected %d' %
                                 (len(values), number_of_values[imt]))
        except (ValueError, DuplicatedPoint) as err:
            raise err.__class__('%s: file %s, line %d' % (err, csvfile, line))
        data += {imt: [numpy.array(values)]}
    points = lon_lats.pop(imts[0])
    for other_imt, other_points in lon_lats.items():
        if points != other_points:
            raise ValueError('Inconsistent locations between %s and %s' %
                             (imts[0], other_imt))
    lons, lats = zip(*sorted(points))
    mesh = geo.Mesh(numpy.array(lons), numpy.array(lats))
    return mesh, {imt: numpy.array(lst) for imt, lst in data.items()}
Exemplo n.º 16
0
 def save_multi_peril(self):
     """
     Read the hazard fields as csv files, associate them to the sites
     and create the `hazard` dataset.
     """
     oq = self.oqparam
     fnames = oq.inputs['multi_peril']
     dt = [(haz, float) for haz in oq.multi_peril]
     N = len(self.sitecol)
     self.datastore['multi_peril'] = z = numpy.zeros(N, dt)
     for name, fname in zip(oq.multi_peril, fnames):
         if name in 'LAVA LAHAR PYRO':
             tofloat = valid.probability
         else:
             tofloat = valid.positivefloat
         data = []
         with open(fname) as f:
             for row in csv.DictReader(f):
                 intensity = tofloat(row['intensity'])
                 if intensity > 0:
                     data.append((valid.longitude(row['lon']),
                                  valid.latitude(row['lat']),
                                  intensity))
         data = numpy.array(data, [('lon', float), ('lat', float),
                                   ('number', float)])
         logging.info('Read %s with %d rows' % (fname, len(data)))
         if len(data) != len(numpy.unique(data[['lon', 'lat']])):
             raise InvalidFile('There are duplicated points in %s' % fname)
         try:
             asset_hazard_distance = oq.asset_hazard_distance[name]
         except KeyError:
             asset_hazard_distance = oq.asset_hazard_distance['default']
         sites, filtdata, _discarded = geo.utils.assoc(
             data, self.sitecol, asset_hazard_distance, 'filter')
         z = numpy.zeros(N, float)
         z[sites.sids] = filtdata['number']
         self.datastore['multi_peril'][name] = z
     self.datastore.set_attrs('multi_peril', nbytes=z.nbytes)
Exemplo n.º 17
0
 def save_multi_peril(self):
     """
     Read the hazard fields as csv files, associate them to the sites
     and create the `hazard` dataset.
     """
     oq = self.oqparam
     fnames = oq.inputs['multi_peril']
     dt = [(haz, float) for haz in oq.multi_peril]
     N = len(self.sitecol)
     self.datastore['multi_peril'] = z = numpy.zeros(N, dt)
     for name, fname in zip(oq.multi_peril, fnames):
         if name in 'LAVA LAHAR PYRO':
             tofloat = valid.probability
         else:
             tofloat = valid.positivefloat
         data = []
         with open(fname) as f:
             for row in csv.DictReader(f):
                 intensity = tofloat(row['intensity'])
                 if intensity > 0:
                     data.append((valid.longitude(row['lon']),
                                  valid.latitude(row['lat']), intensity))
         data = numpy.array(data, [('lon', float), ('lat', float),
                                   ('number', float)])
         logging.info('Read %s with %d rows' % (fname, len(data)))
         if len(data) != len(numpy.unique(data[['lon', 'lat']])):
             raise InvalidFile('There are duplicated points in %s' % fname)
         try:
             asset_hazard_distance = oq.asset_hazard_distance[name]
         except KeyError:
             asset_hazard_distance = oq.asset_hazard_distance['default']
         sites, filtdata, _discarded = geo.utils.assoc(
             data, self.sitecol, asset_hazard_distance, 'filter')
         z = numpy.zeros(N, float)
         z[sites.sids] = filtdata['number']
         self.datastore['multi_peril'][name] = z
     self.datastore.set_attrs('multi_peril', nbytes=z.nbytes)
Exemplo n.º 18
0
def get_mesh_hcurves(oqparam):
    """
    Read CSV data in the format `lon lat, v1-vN, w1-wN, ...`.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        the mesh of points and the data as a dictionary
        imt -> array of curves for each site
    """
    imtls = oqparam.imtls
    lon_lats = set()
    data = AccumDict()  # imt -> list of arrays
    ncols = len(imtls) + 1  # lon_lat + curve_per_imt ...
    csvfile = oqparam.inputs['hazard_curves']
    for line, row in enumerate(csv.reader(csvfile), 1):
        try:
            if len(row) != ncols:
                raise ValueError('Expected %d columns, found %d' % ncols,
                                 len(row))
            x, y = row[0].split()
            lon_lat = valid.longitude(x), valid.latitude(y)
            if lon_lat in lon_lats:
                raise DuplicatedPoint(lon_lat)
            lon_lats.add(lon_lat)
            for i, imt_ in enumerate(imtls, 1):
                values = valid.decreasing_probabilities(row[i])
                if len(values) != len(imtls[imt_]):
                    raise ValueError('Found %d values, expected %d' %
                                     (len(values), len(imtls([imt_]))))
                data += {imt_: [numpy.array(values)]}
        except (ValueError, DuplicatedPoint) as err:
            raise err.__class__('%s: file %s, line %d' % (err, csvfile, line))
    lons, lats = zip(*sorted(lon_lats))
    mesh = geo.Mesh(numpy.array(lons), numpy.array(lats))
    return mesh, {imt: numpy.array(lst) for imt, lst in data.items()}
Exemplo n.º 19
0
def prepare_site_model(exposure_xml, sites_csv, vs30_csv,
                       z1pt0, z2pt5, vs30measured, grid_spacing=0,
                       assoc_distance=5, output='site_model.csv'):
    """
    Prepare a site_model.csv file from exposure xml files/site csv files,
    vs30 csv files and a grid spacing which can be 0 (meaning no grid).
    For each site the closest vs30 parameter is used. The command can also
    generate (on demand) the additional fields z1pt0, z2pt5 and vs30measured
    which may be needed by your hazard model, depending on the required GSIMs.
    """
    hdf5 = datastore.hdf5new()
    req_site_params = {'vs30'}
    fields = ['lon', 'lat', 'vs30']
    if z1pt0:
        req_site_params.add('z1pt0')
        fields.append('z1pt0')
    if z2pt5:
        req_site_params.add('z2pt5')
        fields.append('z2pt5')
    if vs30measured:
        req_site_params.add('vs30measured')
        fields.append('vs30measured')
    with performance.Monitor(hdf5.path, hdf5, measuremem=True) as mon:
        if exposure_xml:
            mesh, assets_by_site = Exposure.read(
                exposure_xml, check_dupl=False).get_mesh_assets_by_site()
            mon.hdf5['assetcol'] = assetcol = site.SiteCollection.from_points(
                mesh.lons, mesh.lats, req_site_params=req_site_params)
            if grid_spacing:
                grid = mesh.get_convex_hull().dilate(
                    grid_spacing).discretize(grid_spacing)
                haz_sitecol = site.SiteCollection.from_points(
                    grid.lons, grid.lats, req_site_params=req_site_params)
                logging.info(
                    'Associating exposure grid with %d locations to %d '
                    'exposure sites', len(haz_sitecol), len(assets_by_site))
                haz_sitecol, assets_by, discarded = assoc(
                    assets_by_site, haz_sitecol,
                    grid_spacing * SQRT2, 'filter')
                if len(discarded):
                    logging.info('Discarded %d sites with assets '
                                 '[use oq plot_assets]', len(discarded))
                    mon.hdf5['discarded'] = numpy.array(discarded)
                haz_sitecol.make_complete()
            else:
                haz_sitecol = assetcol
                discarded = []
        elif sites_csv:
            lons, lats = [], []
            for fname in sites_csv:
                with open(fname) as csv:
                    for line in csv:
                        if line.startswith('lon,lat'):  # possible header
                            continue
                        lon, lat = line.split(',')[:2]
                        lons.append(valid.longitude(lon))
                        lats.append(valid.latitude(lat))
            haz_sitecol = site.SiteCollection.from_points(
                lons, lats, req_site_params=req_site_params)
            if grid_spacing:
                grid = mesh.get_convex_hull().dilate(
                    grid_spacing).discretize(grid_spacing)
                haz_sitecol = site.SiteCollection.from_points(
                    grid.lons, grid.lats, req_site_params=req_site_params)
        else:
            raise RuntimeError('Missing exposures or missing sites')
        vs30orig = read_vs30(vs30_csv)
        logging.info('Associating %d hazard sites to %d site parameters',
                     len(haz_sitecol), len(vs30orig))
        sitecol, vs30, _ = assoc(
            vs30orig, haz_sitecol, assoc_distance, 'warn')
        sitecol.array['vs30'] = vs30['vs30']
        if z1pt0:
            sitecol.array['z1pt0'] = calculate_z1pt0(vs30['vs30'])
        if z2pt5:
            sitecol.array['z2pt5'] = calculate_z2pt5_ngaw2(vs30['vs30'])
        if vs30measured:
            sitecol.array['vs30measured'] = False  # it is inferred
        mon.hdf5['sitecol'] = sitecol
        write_csv(output, sitecol.array[fields])
    logging.info('Saved %d rows in %s' % (len(sitecol), output))
    logging.info(mon)
    return sitecol
Exemplo n.º 20
0
def main(vs30_csv,
         z1pt0=False,
         z2pt5=False,
         vs30measured=False,
         *,
         exposure_xml=None,
         sites_csv=None,
         grid_spacing: float = 0,
         assoc_distance: float = 5,
         output='site_model.csv'):
    """
    Prepare a site_model.csv file from exposure xml files/site csv files,
    vs30 csv files and a grid spacing which can be 0 (meaning no grid).
    For each site the closest vs30 parameter is used. The command can also
    generate (on demand) the additional fields z1pt0, z2pt5 and vs30measured
    which may be needed by your hazard model, depending on the required GSIMs.
    """
    hdf5 = datastore.hdf5new()
    req_site_params = {'vs30'}
    fields = ['lon', 'lat', 'vs30']
    if z1pt0:
        req_site_params.add('z1pt0')
        fields.append('z1pt0')
    if z2pt5:
        req_site_params.add('z2pt5')
        fields.append('z2pt5')
    if vs30measured:
        req_site_params.add('vs30measured')
        fields.append('vs30measured')
    with performance.Monitor(measuremem=True) as mon:
        if exposure_xml:
            mesh, assets_by_site = Exposure.read(
                exposure_xml, check_dupl=False).get_mesh_assets_by_site()
            hdf5['assetcol'] = assetcol = site.SiteCollection.from_points(
                mesh.lons, mesh.lats, req_site_params=req_site_params)
            if grid_spacing:
                grid = mesh.get_convex_hull().dilate(grid_spacing).discretize(
                    grid_spacing)
                haz_sitecol = site.SiteCollection.from_points(
                    grid.lons, grid.lats, req_site_params=req_site_params)
                logging.info(
                    'Associating exposure grid with %d locations to %d '
                    'exposure sites', len(haz_sitecol), len(assets_by_site))
                haz_sitecol, assets_by, discarded = assoc(
                    assets_by_site, haz_sitecol, grid_spacing * SQRT2,
                    'filter')
                if len(discarded):
                    logging.info(
                        'Discarded %d sites with assets '
                        '[use oq plot_assets]', len(discarded))
                    hdf5['discarded'] = numpy.array(discarded)
                haz_sitecol.make_complete()
            else:
                haz_sitecol = assetcol
                discarded = []
        elif sites_csv:
            lons, lats = [], []
            for fname in sites_csv:
                check_fname(fname, 'sites_csv', output)
                with read(fname) as csv:
                    for line in csv:
                        if line.startswith('lon,lat'):  # possible header
                            continue
                        lon, lat = line.split(',')[:2]
                        lons.append(valid.longitude(lon))
                        lats.append(valid.latitude(lat))
            haz_sitecol = site.SiteCollection.from_points(
                lons, lats, req_site_params=req_site_params)
            if grid_spacing:
                grid = haz_sitecol.mesh.get_convex_hull().dilate(
                    grid_spacing).discretize(grid_spacing)
                haz_sitecol = site.SiteCollection.from_points(
                    grid.lons, grid.lats, req_site_params=req_site_params)
        else:
            raise RuntimeError('Missing exposures or missing sites')
        vs30 = associate(haz_sitecol, vs30_csv, assoc_distance)
        if z1pt0:
            haz_sitecol.array['z1pt0'] = calculate_z1pt0(vs30['vs30'])
        if z2pt5:
            haz_sitecol.array['z2pt5'] = calculate_z2pt5_ngaw2(vs30['vs30'])
        hdf5['sitecol'] = haz_sitecol
        writers.write_csv(output, haz_sitecol.array[fields])
    logging.info('Saved %d rows in %s' % (len(haz_sitecol), output))
    logging.info(mon)
    return haz_sitecol