def hit_country_per_hazard(intensity_path, names_path, reg_id_path, date_path):
    """hit_country_per_hazard: create list of hit countries from hazard set

    Parameters
    ----------
    intensity_path : str
        Path to file containing sparse matrix with hazards as rows and grid points
        as cols, values only at location with impacts
    names_path : str
        Path to file with identifier for each hazard (i.e. IBtracID) (rows of the matrix)
    reg_id_path : str
        Path to file with ISO country ID of each grid point (cols of the matrix)
    date_path : str
        Path to file with start date of each hazard (rows of the matrix)

    Returns
    -------
        pd.DataFrame with all hit countries per hazard
    """
    with open(intensity_path, 'rb') as filef:
        inten = pickle.load(filef)
    with open(names_path, 'rb') as filef:
        names = pickle.load(filef)
    with open(reg_id_path, 'rb') as filef:
        reg_id = pickle.load(filef)
    with open(date_path, 'rb') as filef:
        date = pickle.load(filef)
    # loop over the tracks (over the rows of the intensity matrix)
    all_hits = []
    for track in range(0, len(names)):
        # select track
        tc_track = inten[track, ]
        # select only indices that are not zero
        hits = tc_track.nonzero()[1]
        # get the country of these indices and remove dublicates
        hits = list(set(reg_id[hits]))
        # append hit countries to list
        all_hits.append(hits)

    # create data frame for output
    hit_countries = pd.DataFrame(
        columns=['hit_country', 'Date_start', 'ibtracsID'])
    for track, _ in enumerate(names):
        # Check if track has hit any country else go to the next track
        if len(all_hits[track]) > 0:
            # loop over hit_country
            for hit in range(0, len(all_hits[track])):
                # Hit country ISO
                ctry_iso = u_coord.country_to_iso(all_hits[track][hit],
                                                  "alpha3")
                # create entry for each country a hazard has hit
                hit_countries = hit_countries.append(
                    {
                        'hit_country': ctry_iso,
                        'Date_start': date[track],
                        'ibtracsID': names[track]
                    },
                    ignore_index=True)
    # retrun data frame with all hit countries per hazard
    return hit_countries
示例#2
0
    def __init__(self,
                 hazard_dict,
                 exposure,
                 impact_funcs,
                 haz_model="NWP",
                 exposure_name=None):
        """Initialization with hazard, exposure and vulnerability.

        Parameters
        ----------
        hazard_dict : dict
            Dictionary of the format {run_datetime: Hazard} with run_datetime
            being the initialization time of a weather forecast run and Hazard
            being a CLIMADA Hazard derived from that forecast for one event.
            A probabilistic representation of that one event is possible,
            as long as the attribute Hazard.date is the same for all
            events. Several run_datetime:Hazard combinations for the same
            event can be provided.
        exposure : Exposure
        impact_funcs : ImpactFuncSet
        haz_model : str, optional
            Short string specifying the model used to create the hazard,
            if possible three big letters. Default is 'NWP' for numerical
            weather prediction.
        exposure_name : str, optional
            string specifying the exposure (e.g. 'EU'), which is used to
            name output files.
        """
        self.run_datetime = list(hazard_dict.keys())
        self.hazard = list(hazard_dict.values())
        # check event_date
        hazard_date = np.unique(
            [date for hazard in self.hazard for date in hazard.date])
        if not len(hazard_date) == 1:
            raise ValueError(
                "Please provide hazards containing only one " +
                "event_date. The current hazards contain several " +
                "events with different event_dates and the Forecast " +
                "class cannot function proberly with such hazards.")
        self.event_date = dt.datetime.fromordinal(hazard_date[0])
        self.haz_model = haz_model
        self.exposure = exposure
        if exposure_name is None:
            try:
                self.exposure_name = u_coord.country_to_iso(
                    exposure.gdf.region_id.unique()[0], "name")
            except (KeyError, AttributeError):
                self.exposure_name = "custom"
        else:
            self.exposure_name = exposure_name
        self.vulnerability = impact_funcs
        self._impact = [Impact() for dt in self.run_datetime]
示例#3
0
    def _map_exp_to_mriot(self, exp_regid, mriot_type):
        """
        Map regions names in exposure into Input-output regions names.
        exp_regid must be according to ISO 3166 numeric country codes.
        """

        if mriot_type == 'WIOD':
            mriot_reg_name = u_coord.country_to_iso(exp_regid, "alpha3")
            idx_country = np.where(self.mriot_reg_names == mriot_reg_name)[0]

            if not idx_country.size > 0.:
                mriot_reg_name = 'ROW'

        elif mriot_type == '':
            mriot_reg_name = exp_regid

        return mriot_reg_name
def emdat_countries_by_hazard(emdat_file_csv, hazard=None, year_range=None):
    """return list of all countries exposed to a chosen hazard type
    from EMDAT data as CSV.

    Parameters
    ----------
    emdat_file : str, Path, or DataFrame
        Either string with full path to CSV-file or
        pandas.DataFrame loaded from EM-DAT CSV
    hazard : list or str
        List of Disaster (sub-)type accordung EMDAT terminology, i.e.:
        Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
        Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
        Storm, Volcanic activity, Wildfire;
        Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
        Tsunami, etc.;
        OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
    year_range : list or tuple
        Year range to be extracted, e.g. (2000, 2015);
        (only min and max are considered)

    Returns
    -------
    countries_iso3a : list
        List of ISO3-codes of countries impacted by the disaster (sub-)types
    countries_names : list
        List of names of countries impacted by the disaster (sub-)types
    """
    df_data = clean_emdat_df(emdat_file_csv,
                             hazard=hazard,
                             year_range=year_range)
    countries_iso3a = list(df_data.ISO.unique())
    countries_names = list()
    for iso3a in countries_iso3a:
        try:
            countries_names.append(u_coord.country_to_iso(iso3a, "name"))
        except LookupError:
            countries_names.append('NA')
    return countries_iso3a, countries_names
def emdat_to_impact(emdat_file_csv,
                    hazard_type_climada,
                    year_range=None,
                    countries=None,
                    hazard_type_emdat=None,
                    reference_year=None,
                    imp_str="Total Damages"):
    """function to load EM-DAT data return impact per event

    Parameters
    ----------
     emdat_file_csv : str or pd.DataFrame
        Either string with full path to CSV-file or
        pandas.DataFrame loaded from EM-DAT CSV
    countries : list of str
        country ISO3-codes or names, e.g. ['JAM', 'CUB'].
        default: countries=None for all countries
    hazard_type_climada : list or str
        List of Disaster (sub-)type accordung EMDAT terminology, i.e.:
        Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
        Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
        Storm, Volcanic activity, Wildfire;
        Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
        Tsunami, etc.;
        OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
    year_range : list or tuple
        Year range to be extracted, e.g. (2000, 2015);
        (only min and max are considered)
    reference_year : int reference year of exposures. Impact is scaled
        proportional to GDP to the value of the reference year. Default: No scaling
        for 0
    imp_str : str
        Column name of impact metric in EMDAT CSV,
        default = "Total Damages ('000 US$)"

    Returns
    -------
    impact_instance : instance of climada.engine.Impact
    impact object of same format as output from CLIMADA
    impact computation.
    Values scaled with GDP to reference_year if reference_year is given.
    i.e. current US$ for imp_str="Total Damages ('000 US$) scaled" (factor 1000 is applied)
    impact_instance.eai_exp holds expected annual impact for each country.
    impact_instance.coord_exp holds rough central coordinates for each country.
    countries (list): ISO3-codes of countries in same order as in impact_instance.eai_exp
    """
    if "Total Damages" in imp_str:
        imp_str = "Total Damages ('000 US$)"
    elif "Insured Damages" in imp_str:
        imp_str = "Insured Damages ('000 US$)"
    elif "Reconstruction Costs" in imp_str:
        imp_str = "Reconstruction Costs ('000 US$)"
    imp_str = VARNAMES_EMDAT[max(VARNAMES_EMDAT.keys())][imp_str]
    if not hazard_type_emdat:
        hazard_type_emdat = [hazard_type_climada]
    if reference_year == 0:
        reference_year = None
    # Inititate Impact-instance:
    impact_instance = Impact()

    impact_instance.tag = dict()
    impact_instance.tag['haz'] = TagHaz(
        haz_type=hazard_type_climada,
        file_name=emdat_file_csv,
        description='EM-DAT impact, direct import')
    impact_instance.tag['exp'] = Tag(
        file_name=emdat_file_csv, description='EM-DAT impact, direct import')
    impact_instance.tag['impf_set'] = Tag(file_name=None, description=None)

    # Load EM-DAT impact data by event:
    em_data = emdat_impact_event(emdat_file_csv,
                                 countries=countries,
                                 hazard=hazard_type_emdat,
                                 year_range=year_range,
                                 reference_year=reference_year,
                                 imp_str=imp_str,
                                 version=max(VARNAMES_EMDAT.keys()))

    if isinstance(countries, str):
        countries = [countries]
    elif not countries:
        countries = emdat_countries_by_hazard(emdat_file_csv,
                                              year_range=year_range,
                                              hazard=hazard_type_emdat)[0]

    if em_data.empty:
        return impact_instance, countries
    impact_instance.event_id = np.array(em_data.index, int)
    impact_instance.event_name = list(em_data[VARNAMES_EMDAT[max(
        VARNAMES_EMDAT.keys())]['Dis No']])

    date_list = list()
    for year in list(em_data['Year']):
        date_list.append(datetime.toordinal(datetime.strptime(str(year),
                                                              '%Y')))
    if 'Start Year' in em_data.columns and 'Start Month' in em_data.columns \
            and 'Start Day' in em_data.columns:
        idx = 0
        for year, month, day in zip(em_data['Start Year'],
                                    em_data['Start Month'],
                                    em_data['Start Day']):
            if np.isnan(year):
                idx += 1
                continue
            if np.isnan(month):
                month = 1
            if np.isnan(day):
                day = 1
            date_list[idx] = datetime.toordinal(
                datetime.strptime('%02i/%02i/%04i' % (day, month, year),
                                  '%d/%m/%Y'))
            idx += 1
    impact_instance.date = np.array(date_list, int)
    impact_instance.crs = DEF_CRS

    if not reference_year:
        impact_instance.at_event = np.array(em_data["impact"])
    else:
        impact_instance.at_event = np.array(em_data["impact_scaled"])
    impact_instance.at_event[np.isnan(impact_instance.at_event)] = 0
    if not year_range:
        year_range = [em_data['Year'].min(), em_data['Year'].max()]
    impact_instance.frequency = np.ones(
        em_data.shape[0]) / (1 + np.diff(year_range))
    impact_instance.tot_value = 0
    impact_instance.aai_agg = np.nansum(impact_instance.at_event *
                                        impact_instance.frequency)
    impact_instance.unit = 'USD'
    impact_instance.imp_mat = []

    # init rough exposure with central point per country
    shp = shapereader.natural_earth(resolution='110m',
                                    category='cultural',
                                    name='admin_0_countries')
    shp = shapereader.Reader(shp)
    countries_reg_id = list()
    countries_lat = list()
    countries_lon = list()
    impact_instance.eai_exp = np.zeros(
        len(countries))  # empty: damage at exposure
    for idx, cntry in enumerate(countries):
        try:
            cntry = u_coord.country_to_iso(cntry, "alpha3")
        except LookupError:
            LOGGER.warning('Country not found in iso_country: %s', cntry)
        cntry_boolean = False
        for rec in shp.records():
            if rec.attributes['ADM0_A3'].casefold() == cntry.casefold():
                bbox = rec.geometry.bounds
                cntry_boolean = True
                break
        if cntry_boolean:
            countries_lat.append(np.mean([bbox[1], bbox[3]]))
            countries_lon.append(np.mean([bbox[0], bbox[2]]))
        else:
            countries_lat.append(np.nan)
            countries_lon.append(np.nan)
        try:
            countries_reg_id.append(u_coord.country_to_iso(cntry, "numeric"))
        except LookupError:
            countries_reg_id.append(0)
        df_tmp = em_data[em_data[VARNAMES_EMDAT[max(
            VARNAMES_EMDAT.keys())]['ISO']].str.contains(cntry)]
        if not reference_year:
            impact_instance.eai_exp[idx] = sum(
                np.array(df_tmp["impact"]) * impact_instance.frequency[0])
        else:
            impact_instance.eai_exp[idx] = sum(
                np.array(df_tmp["impact_scaled"]) *
                impact_instance.frequency[0])

    impact_instance.coord_exp = np.stack([countries_lat, countries_lon],
                                         axis=1)
    return impact_instance, countries
def emdat_impact_event(emdat_file_csv,
                       countries=None,
                       hazard=None,
                       year_range=None,
                       reference_year=None,
                       imp_str="Total Damages ('000 US$)",
                       version=2020):
    """function to load EM-DAT data return impact per event

    Parameters
    ----------
     emdat_file_csv : str or DataFrame
        Either string with full path to CSV-file or
        pandas.DataFrame loaded from EM-DAT CSV
    countries : list of str
        country ISO3-codes or names, e.g. ['JAM', 'CUB'].
        default: countries=None for all countries
    hazard : list or str
        List of Disaster (sub-)type accordung EMDAT terminology, i.e.:
        Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
        Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
        Storm, Volcanic activity, Wildfire;
        Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
        Tsunami, etc.;
        OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
    year_range : list or tuple
        Year range to be extracted, e.g. (2000, 2015);
        (only min and max are considered)
    reference_year : int reference year of exposures. Impact is scaled
        proportional to GDP to the value of the reference year. Default: No scaling
        for 0
    imp_str : str
        Column name of impact metric in EMDAT CSV,
        default = "Total Damages ('000 US$)"
    version : int
        EM-DAT version to take variable/column names from (defaul: 2020)

    Returns
    -------
    out : pd.DataFrame
        EMDAT DataFrame with new columns "year",
        "region_id", and "impact" and +impact_scaled" total impact per event with
        same unit as chosen impact, but multiplied by 1000 if impact is given
        as 1000 US$ (e.g. imp_str="Total Damages ('000 US$) scaled").
    """
    imp_str = VARNAMES_EMDAT[version][imp_str]
    df_data = clean_emdat_df(emdat_file_csv,
                             hazard=hazard,
                             year_range=year_range,
                             countries=countries,
                             target_version=version)
    df_data['year'] = df_data['Year']
    df_data['reference_year'] = reference_year
    df_data['impact'] = df_data[imp_str]
    df_data['impact_scaled'] = scale_impact2refyear(
        df_data[imp_str].values,
        df_data.Year.values,
        df_data.ISO.values,
        reference_year=reference_year)
    df_data['region_id'] = np.nan
    for country in df_data.ISO.unique():
        try:
            df_data.loc[df_data.ISO == country, 'region_id'] = \
                u_coord.country_to_iso(country, "numeric")
        except LookupError:
            LOGGER.warning('ISO3alpha code not found in iso_country: %s',
                           country)
    if '000 US' in imp_str:
        df_data['impact'] *= 1e3
        df_data['impact_scaled'] *= 1e3
    return df_data.reset_index(drop=True)
def emdat_impact_yearlysum(emdat_file_csv,
                           countries=None,
                           hazard=None,
                           year_range=None,
                           reference_year=None,
                           imp_str="Total Damages ('000 US$)",
                           version=2020):
    """function to load EM-DAT data and sum impact per year

    Parameters
    ----------
     emdat_file_csv : str or DataFrame
        Either string with full path to CSV-file or
        pandas.DataFrame loaded from EM-DAT CSV
    countries : list of str
        country ISO3-codes or names, e.g. ['JAM', 'CUB'].
        countries=None for all countries (default)
    hazard : list or str
        List of Disaster (sub-)type accordung EMDAT terminology, i.e.:
        Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
        Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
        Storm, Volcanic activity, Wildfire;
        Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
        Tsunami, etc.;
        OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
    year_range : list or tuple
        Year range to be extracted, e.g. (2000, 2015);
        (only min and max are considered)
    version : int
        required EM-DAT data format version (i.e. year of download),
        changes naming of columns/variables (default: 2020)

    Returns
    -------
    out : pd.DataFrame
        DataFrame with summed impact and scaled impact per
        year and country.
    """
    imp_str = VARNAMES_EMDAT[version][imp_str]
    df_data = clean_emdat_df(emdat_file_csv,
                             countries=countries,
                             hazard=hazard,
                             year_range=year_range,
                             target_version=version)

    df_data[imp_str + " scaled"] = scale_impact2refyear(
        df_data[imp_str].values,
        df_data.Year.values,
        df_data.ISO.values,
        reference_year=reference_year)
    out = pd.DataFrame(columns=[
        'ISO', 'region_id', 'year', 'impact', 'impact_scaled', 'reference_year'
    ])
    for country in df_data.ISO.unique():
        country = u_coord.country_to_iso(country, "alpha3")
        if not df_data.loc[df_data.ISO == country].size:
            continue
        all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
        data_out = pd.DataFrame(index=np.arange(0, len(all_years)),
                                columns=out.columns)
        df_country = df_data.loc[df_data.ISO == country]
        for cnt, year in enumerate(all_years):
            data_out.loc[cnt, 'year'] = year
            data_out.loc[cnt, 'reference_year'] = reference_year
            data_out.loc[cnt, 'ISO'] = country
            data_out.loc[cnt, 'region_id'] = u_coord.country_to_iso(
                country, "numeric")
            data_out.loc[cnt, 'impact'] = \
                np.nansum(df_country[df_country.Year.isin([year])][imp_str])
            data_out.loc[cnt, 'impact_scaled'] = \
                np.nansum(df_country[df_country.Year.isin([year])][imp_str + " scaled"])
            if '000 US' in imp_str:  # EM-DAT damages provided in '000 USD
                data_out.loc[cnt, 'impact'] = data_out.loc[cnt, 'impact'] * 1e3
                data_out.loc[
                    cnt,
                    'impact_scaled'] = data_out.loc[cnt, 'impact_scaled'] * 1e3
        out = pd.concat([out, data_out])
    out = out.reset_index(drop=True)
    return out
def clean_emdat_df(emdat_file,
                   countries=None,
                   hazard=None,
                   year_range=None,
                   target_version=2020):
    """
    Get a clean and standardized DataFrame from EM-DAT-CSV-file
    (1) load EM-DAT data from CSV to DataFrame and remove header/footer,
    (2) handle version, clean up, and add columns, and
    (3) filter by country, hazard type and year range (if any given)

    Parameters
    ----------
    emdat_file : str, Path, or DataFrame
        Either string with full path to CSV-file or
        pandas.DataFrame loaded from EM-DAT CSV
    countries : list of str
        country ISO3-codes or names, e.g. ['JAM', 'CUB'].
        countries=None for all countries (default)
    hazard : list or str
        List of Disaster (sub-)type accordung EMDAT terminology, i.e.:
        Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
        Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
        Storm, Volcanic activity, Wildfire;
        Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
        Tsunami, etc.;
        OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
    year_range : list or tuple
        Year range to be extracted, e.g. (2000, 2015);
        (only min and max are considered)
    target_version : int
        required EM-DAT data format version (i.e. year of download),
        changes naming of columns/variables (default: 2020)

    Returns
    -------
    df_data : pd.DataFrame
        DataFrame containing cleaned and filtered EM-DAT impact data
    """
    # (1) load EM-DAT data from CSV to DataFrame, skipping the header:
    if isinstance(emdat_file, (str, Path)):
        df_emdat = pd.read_csv(emdat_file, encoding="ISO-8859-1", header=0)
        counter = 0
        while not ('Country' in df_emdat.columns
                   and 'ISO' in df_emdat.columns):
            counter += 1
            df_emdat = pd.read_csv(emdat_file,
                                   encoding="ISO-8859-1",
                                   header=counter)
            if counter == 10:
                break
        del counter
    elif isinstance(emdat_file, pd.DataFrame):
        df_emdat = emdat_file
    else:
        raise TypeError('emdat_file needs to be str or DataFrame')
    # drop rows with 9 or more NaN values (e.g. footer):
    df_emdat = df_emdat.dropna(thresh=9)

    # (2)  handle version, clean up, and add columns:
    # (2.1) identify underlying EMDAT version of csv:
    version = 2020
    for vers in list(VARNAMES_EMDAT.keys()):
        if len(df_emdat.columns) >= len(VARNAMES_EMDAT[vers]) and \
           all(item in list(df_emdat.columns) for item in VARNAMES_EMDAT[vers].values()):
            version = vers
    # (2.2) create new DataFrame df_data with column names as target version
    df_data = pd.DataFrame(index=df_emdat.index.values,
                           columns=VARNAMES_EMDAT[target_version].values())
    if 'Year' not in df_data.columns:  # make sure column "Year" exists
        df_data['Year'] = np.nan
    for _, col in enumerate(df_data.columns):  # loop over columns
        if col in VARNAMES_EMDAT[version]:
            df_data[col] = df_emdat[VARNAMES_EMDAT[version][col]]
        elif col in df_emdat.columns:
            df_data[col] = df_emdat[col]
        elif col == 'Year' and version <= 2018:
            years_list = list()
            for _, disaster_no in enumerate(
                    df_emdat[VARNAMES_EMDAT[version]['Dis No']]):
                if isinstance(disaster_no, str):
                    years_list.append(int(disaster_no[0:4]))
                else:
                    years_list.append(np.nan)
            df_data[col] = years_list
    if version <= 2018 and target_version >= 2020:
        date_list = list()
        year_list = list()
        month_list = list()
        day_list = list()
        for year in list(df_data['Year']):
            if not np.isnan(year):
                date_list.append(datetime.strptime(str(year), '%Y'))
            else:
                date_list.append(datetime.strptime(str('0001'), '%Y'))
        boolean_warning = True
        for idx, datestr in enumerate(list(df_emdat['Start date'])):
            try:
                date_list[idx] = datetime.strptime(datestr[-7:], '%m/%Y')
            except ValueError:
                if boolean_warning:
                    LOGGER.warning('EM_DAT CSV contains invalid time formats')
                    boolean_warning = False
            try:
                date_list[idx] = datetime.strptime(datestr, '%d/%m/%Y')
            except ValueError:
                if boolean_warning:
                    LOGGER.warning('EM_DAT CSV contains invalid time formats')
                    boolean_warning = False
            day_list.append(date_list[idx].day)
            month_list.append(date_list[idx].month)
            year_list.append(date_list[idx].year)
        df_data['Start Month'] = np.array(month_list, dtype='int')
        df_data['Start Day'] = np.array(day_list, dtype='int')
        df_data['Start Year'] = np.array(year_list, dtype='int')
        for var in ['Disaster Subtype', 'Disaster Type', 'Country']:
            df_data[VARNAMES_EMDAT[target_version][var]].fillna('None',
                                                                inplace=True)

    # (3) Filter by countries, year range, and disaster type
    # (3.1) Countries:
    if countries and isinstance(countries, str):
        countries = [countries]
    if countries and isinstance(countries, list):
        for idx, country in enumerate(countries):
            # convert countries to iso3 alpha code:
            countries[idx] = u_coord.country_to_iso(country, "alpha3")
        df_data = df_data[df_data['ISO'].isin(countries)].reset_index(
            drop=True)
    # (3.2) Year range:
    if year_range:
        for idx in df_data.index:
            if np.isnan(df_data.loc[0, 'Year']):
                df_data.loc[0, 'Year'] = \
                    df_data.loc[0, VARNAMES_EMDAT[target_version]['Start Year']]
        df_data = df_data[(df_data['Year'] >= min(year_range))
                          & (df_data['Year'] <= max(year_range))]

    # (3.3) Disaster type:
    if hazard and isinstance(hazard, str):
        hazard = [hazard]
    if hazard and isinstance(hazard, list):
        disaster_types = list()
        disaster_subtypes = list()
        for idx, haz in enumerate(hazard):
            if haz in df_data[VARNAMES_EMDAT[target_version]
                              ['Disaster Type']].unique():
                disaster_types.append(haz)
            if haz in df_data[VARNAMES_EMDAT[target_version]
                              ['Disaster Subtype']].unique():
                disaster_subtypes.append(haz)
            if haz in PERIL_TYPE_MATCH_DICT.keys():
                disaster_types += PERIL_TYPE_MATCH_DICT[haz]
            if haz in PERIL_SUBTYPE_MATCH_DICT.keys():
                disaster_subtypes += PERIL_SUBTYPE_MATCH_DICT[haz]
        df_data = df_data[
            (df_data[VARNAMES_EMDAT[target_version]['Disaster Type']].
             isin(disaster_types)) |
            (df_data[VARNAMES_EMDAT[target_version]['Disaster Subtype']].
             isin(disaster_subtypes))]
    return df_data.reset_index(drop=True)
示例#9
0
    def init_spam_agrar(self, **parameters):
        """initiates agriculture exposure from SPAM data:

            https://dataverse.harvard.edu/
            dataset.xhtml?persistentId=doi:10.7910/DVN/DHXBJX

        Optional parameters:
            data_path (str): absolute path where files are stored.
                Default: SYSTEM_DIR

            country (str): Three letter country code of country to be cut out.
                No default (global)
            name_adm1 (str): Name of admin1 (e.g. Federal State) to be cut out.
                No default
            name_adm2 (str): Name of admin2 to be cut out.
                No default

            spam_variable (str): select one agricultural variable:
                'A'		physical area
                'H'		harvested area
                'P'		production
                'Y'		yield
                'V_agg'	value of production, aggregated to all crops,
                                 food and non-food (default)
                 Warning: for A, H, P and Y, currently all crops are summed up

            spam_technology (str): select one agricultural technology type:
                'TA'	   all technologies together, ie complete crop (default)
                'TI'   irrigated portion of crop
                'TH'   rainfed high inputs portion of crop
                'TL'   rainfed low inputs portion of crop
                'TS'   rainfed subsistence portion of crop
                'TR'   rainfed portion of crop (= TA - TI, or TH + TL + TS)
                ! different impact_ids are assigned to each technology (1-6)

            save_name_adm1 (Boolean): Determines how many aditional data are saved:
                False: only basics (lat, lon, total value), region_id per country
                True: like 1 + name of admin1

            haz_type (str): hazard type abbreviation, e.g.
                'DR' for Drought or
                'CP' for CropPotential


        Returns:
        """
        data_p = parameters.get('data_path', SYSTEM_DIR)
        spam_t = parameters.get('spam_technology', 'TA')
        spam_v = parameters.get('spam_variable', 'V_agg')
        adm0 = parameters.get('country')
        adm1 = parameters.get('name_adm1')
        adm2 = parameters.get('name_adm2')
        save_adm1 = parameters.get('save_name_adm1', False)
        haz_type = parameters.get('haz_type', DEF_HAZ_TYPE)

        # Test if parameters make sense:
        if spam_v not in ['A', 'H', 'P', 'Y', 'V_agg'] or \
        spam_t not in ['TA', 'TI', 'TH', 'TL', 'TS', 'TR']:
            raise ValueError('Invalid input parameter(s).')

        # read data from CSV:
        data = self._read_spam_file(data_path=data_p,
                                    spam_technology=spam_t,
                                    spam_variable=spam_v,
                                    result_mode=1)

        # extract country or admin level (if provided)
        data, region = self._spam_set_country(data,
                                              country=adm0,
                                              name_adm1=adm1,
                                              name_adm2=adm2)

        # sort by alloc_key to make extraction of lat / lon easier:
        data = data.sort_values(by=['alloc_key'])

        lat, lon = self._spam_get_coordinates(data.loc[:, 'alloc_key'],
                                              data_path=data_p)
        if save_adm1:
            self.name_adm1 = data.loc[:, 'name_adm1'].values

        if spam_v == 'V_agg':  # total only (column 7)
            i_1 = 7
            i_2 = 8
        else:
            i_1 = 7  # get sum over all crops (columns 7 to 48)
            i_2 = 49
        self.gdf['value'] = data.iloc[:, i_1:i_2].sum(axis=1).values
        self.gdf['latitude'] = lat.values
        self.gdf['longitude'] = lon.values
        LOGGER.info('Lat. range: {:+.3f} to {:+.3f}.'.format(
            np.min(self.gdf.latitude), np.max(self.gdf.latitude)))
        LOGGER.info('Lon. range: {:+.3f} to {:+.3f}.'.format(
            np.min(self.gdf.longitude), np.max(self.gdf.longitude)))

        # set region_id (numeric ISO3):
        country_id = data.loc[:, 'iso3']
        if country_id.unique().size == 1:
            region_id = np.ones(self.gdf.value.size, int)\
                * u_coord.country_to_iso(country_id.iloc[0], "numeric")
        else:
            region_id = np.zeros(self.gdf.value.size, int)
            for i in range(0, self.gdf.value.size):
                region_id[i] = u_coord.country_to_iso(country_id.iloc[i],
                                                      "numeric")
        self.gdf['region_id'] = region_id
        self.ref_year = 2005
        self.tag = Tag()
        self.tag.description = ("SPAM agrar exposure for variable " + spam_v +
                                " and technology " + spam_t)

        # if impact id variation iiv = 1, assign different damage function ID
        # per technology type.
        self._set_impf(spam_t, haz_type)

        self.tag.file_name = (FILENAME_SPAM + '_' + spam_v + '_' + spam_t +
                              '.csv')
        #        self.tag.shape = cntry_info[2]
        #self.tag.country = cntry_info[1]
        if spam_v in ('A', 'H'):
            self.value_unit = 'Ha'
        elif spam_v == 'Y':
            self.value_unit = 'kg/Ha'
        elif spam_v == 'P':
            self.value_unit = 'mt'
        else:
            self.value_unit = 'USD'

        LOGGER.info('Total {} {} {}: {:.1f} {}.'.format(
            spam_v, spam_t, region, self.gdf.value.sum(), self.value_unit))
        self.check()
示例#10
0
def country_iso_geom(countries, shp_file, admin_key=['ADMIN', 'ADM0_A3']):
    """ Get country ISO alpha_3, country id (defined as the United Nations
    Statistics Division (UNSD) 3-digit equivalent numeric codes and 0 if
    country not found) and country's geometry shape.

    Parameters
    ----------
    countries : list or dict
        list of country names (admin0) or dict with key = admin0 name
        and value = [admin1 names]
    shp_file : cartopy.io.shapereader.Reader
        shape file
    admin_key: str
        key to find admin0 or subunit name

    Returns
    -------
    cntry_info : dict
        key = ISO alpha_3 country, value = [country id, country name, country geometry],
    cntry_admin1 : dict
        key = ISO alpha_3 country, value = [admin1 geometries]

    """
    countries_shp = {}
    list_records = list(shp_file.records())
    for info_idx, info in enumerate(list_records):
        countries_shp[info.attributes[admin_key[0]].title()] = info_idx

    cntry_info = dict()
    cntry_admin1 = dict()
    if isinstance(countries, list):
        countries = {cntry: [] for cntry in countries}
        admin1_rec = list()
    else:
        admin1_rec = shapereader.natural_earth(resolution='10m',
                                               category='cultural',
                                               name='admin_1_states_provinces')
        admin1_rec = shapereader.Reader(admin1_rec)
        admin1_rec = list(admin1_rec.records())

    for country_name, prov_list in countries.items():
        country_idx = countries_shp.get(country_name.title())
        if country_idx is None:
            options = [
                country_opt for country_opt in countries_shp
                if country_name.title() in country_opt
            ]
            if not options:
                options = list(countries_shp.keys())
            raise ValueError('Country %s not found. Possible options: %s' %
                             (country_name, options))
        iso3 = list_records[country_idx].attributes[admin_key[1]]
        try:
            cntry_id = u_coord.country_to_iso(iso3, "numeric")
        except LookupError:
            cntry_id = 0
        cntry_info[iso3] = [
            cntry_id,
            country_name.title(), list_records[country_idx].geometry
        ]
        cntry_admin1[iso3] = _fill_admin1_geom(iso3, admin1_rec, prov_list)

    return cntry_info, cntry_admin1