Exemplo n.º 1
0
def extend_columns(level, rows, adms, admininfo, *args):
    columns = list()
    for arg in args:
        if arg:
            columns.extend(arg)
    if adms is None:
        adms = ['global']
    for i, adm in enumerate(adms):
        if level == 'global':
            row = list()
        elif level == 'regional':
            row = [adm]
        elif level == 'national':
            row = [
                adm,
                Country.get_country_name_from_iso3(adm),
                '|'.join(sorted(list(admininfo.iso3_to_region_and_hrp[adm])))
            ]
        elif level == 'subnational':
            countryiso3 = admininfo.pcode_to_iso3[adm]
            countryname = Country.get_country_name_from_iso3(countryiso3)
            adm1_name = admininfo.pcode_to_name[adm]
            row = [countryiso3, countryname, adm, adm1_name]
        for column in columns:
            row.append(column.get(adm))
        rows.append(row)
    return columns
Exemplo n.º 2
0
def extend_columns(level, rows, adms, hrp_countries, region, adminone, headers,
                   *args):
    columns = list()
    for arg in args:
        if arg:
            columns.extend(arg)
    if adms is None:
        adms = ['global']
    for i, adm in enumerate(adms):
        if level == 'global':
            row = list()
        elif level == 'regional':
            row = [adm]
        elif level == 'national':
            ishrp = 'Y' if adm in hrp_countries else 'N'
            regions = sorted(list(region.iso3_to_region_and_hrp[adm]))
            regions.remove('GHO')
            row = [
                adm,
                Country.get_country_name_from_iso3(adm), ishrp,
                '|'.join(regions)
            ]
        elif level == 'subnational':
            countryiso3 = adminone.pcode_to_iso3[adm]
            countryname = Country.get_country_name_from_iso3(countryiso3)
            adm1_name = adminone.pcode_to_name[adm]
            row = [countryiso3, countryname, adm, adm1_name]
        else:
            raise ValueError('Invalid level')
        append = True
        for existing_row in rows[2:]:
            match = True
            for i, col in enumerate(row):
                if existing_row[i] != col:
                    match = False
                    break
            if match:
                append = False
                row = existing_row
                break
        if append:
            for i, hxltag in enumerate(rows[1][len(row):]):
                if hxltag not in headers[1]:
                    row.append(None)
        for column in columns:
            row.append(column.get(adm))
        if append:
            rows.append(row)
    return columns
Exemplo n.º 3
0
def get_countriesdata(download_url, downloader):
    countrynameisomapping = dict()
    countriesdata = dict()
    headers, iterator = downloader.get_tabular_rows(download_url,
                                                    headers=1,
                                                    dict_form=True)
    countries = list()
    for row in iterator:
        countryname = row['country']
        countryiso = countrynameisomapping.get(countryname)
        if countryiso is None:
            countryiso, _ = Country.get_iso3_country_code_fuzzy(
                countryname, exception=ValueError)
            countrynameisomapping[countryname] = countryiso
            countries.append({
                'iso3':
                countryiso,
                'countryname':
                Country.get_country_name_from_iso3(countryiso),
                'origname':
                countryname
            })
        row['iso3'] = countryiso
        dict_of_lists_add(countriesdata, countryiso, row)
    headers.insert(30, 'iso3')
    headers.insert(3, 'end_year')
    headers.insert(3, 'start_year')
    return countries, headers, countriesdata
def Get_Country_Name_From_ISO3_Extended(countryISO):
    """
    Creates a subset of the quick chart data for a specific country.  The subset includes all those rows containing
    the given country either as the origin or as the country of asylum.
    """

    countryName = ""

    # June-22 - This function has been updated to include a to upper without a check on if the data is null or not
    # So we need to wrap it in a try catch
    try:
        countryName = Country.get_country_name_from_iso3(countryISO)
    except:
        print("Failed to get the country from get_country_name_from_iso3.")

    # Now lets try to find it for the three typical non-standard codes
    if countryName is None or countryName == "":

        print("Non-standard ISO code:", countryISO)

        if countryISO == "UKN":
            countryName = "Various / unknown"
        elif countryISO == "STA":
            countryName = "Stateless"
        elif countryISO == "TIB":
            countryName = "Tibetan"
        else:
            print("!!SERIOUS!! Unknown ISO code identified:", countryISO)
            # Lets add a sensible default here...
            countryName = "Various / unknown"

    return countryName
Exemplo n.º 5
0
def get_countries(countries_url, downloader):
    countrymapping = dict()

    _, iterator = downloader.get_tabular_rows(countries_url,
                                              headers=1,
                                              dict_form=True,
                                              format="csv")
    for row in iterator:
        countryiso = row["ISO3 Code"].strip()
        if not countryiso:
            continue
        try:
            int(countryiso)
            continue
        except ValueError:
            pass
        countrymapping[row["Country Code"].strip()] = (
            countryiso,
            row["Country"].strip(),
        )
    countries = list()
    for countryiso, countryname in sorted(countrymapping.values()):
        newcountryname = Country.get_country_name_from_iso3(countryiso)
        if newcountryname:
            countries.append({
                "iso3": countryiso,
                "countryname": newcountryname,
                "origname": countryname,
            })
    return countries, countrymapping
Exemplo n.º 6
0
 def test_get_country_name_from_iso3(self):
     assert Country.get_country_name_from_iso3('jpn',
                                               use_live=False) == 'Japan'
     assert Country.get_country_name_from_iso3('awe',
                                               use_live=False) is None
     assert Country.get_country_name_from_iso3('Pol',
                                               use_live=False) == 'Poland'
     assert Country.get_country_name_from_iso3(
         'SGP', use_live=False) == 'Singapore'
     assert Country.get_country_name_from_iso3('uy', use_live=False) is None
     with pytest.raises(LocationError):
         Country.get_country_name_from_iso3('uy',
                                            use_live=False,
                                            exception=LocationError)
     assert Country.get_country_name_from_iso3('uy', use_live=False) is None
     assert Country.get_country_name_from_iso3(
         'VeN', use_live=False) == 'Venezuela (Bolivarian Republic of)'
     assert Country.get_country_name_from_iso3(
         'TWN', use_live=False) == 'Taiwan (Province of China)'
Exemplo n.º 7
0
def get_countries(countries_url, downloader):
    countries = list()
    headers, iterator = downloader.get_tabular_rows(countries_url, headers=1, dict_form=True, format='xlsx')
    for row in iterator:
        m49 = row['ISO Code']
        if not m49:
            continue
        iso3 = Country.get_iso3_from_m49(m49)
        countryname = Country.get_country_name_from_iso3(iso3)
        countries.append({'m49': m49, 'iso3': iso3, 'countryname': countryname})
    return countries
Exemplo n.º 8
0
def get_countriesdata(countries_url, downloader):
    countries = list()
    for row in downloader.get_tabular_rows(countries_url, dict_rows=True, headers=1, format='xlsx'):
        # country = row['Name']
        # iso3, _ = Country.get_iso3_country_code_fuzzy(country, exception=ValueError)
        # m49 = Country.get_m49_from_iso3(iso3)
        m49 = row['ISO Country Number']
        if not m49:
            continue
        iso3 = Country.get_iso3_from_m49(m49)
        countryname = Country.get_country_name_from_iso3(iso3)
        countries.append({'m49': m49, 'iso3': iso3, 'countryname': countryname})
    return countries
Exemplo n.º 9
0
def countries_from_iso_list(countriesset):
    """
    Create a list of dictionaries describing each country in the countriesset.
    The countriesset is a list or set of iso3 country identifiers.
    Output list contains a dictionary with "iso3" and "name" of a country.
    """
    countries = list()
    for countryiso in sorted(list(countriesset)):
        if countryiso == WORLD:
            countries.append({"iso3": WORLD, "name": "World"})
        else:
            countryname = Country.get_country_name_from_iso3(countryiso)
            if countryname is None:
                continue
            countries.append({"iso3": countryiso, "name": countryname})
    return countries
def countries():
    "Table of countries (iso3 and country name) used in the data"
    countries = set()
    for data in [
        "asylum_applications",
        "asylum_decisions",
        "demographics",
        "population_totals",
        "solutions",
    ]:
        df = evaluate(data).get()
        countries.update(df.ISO3CoO)
        countries.update(df.ISO3CoA)
    countries = sorted(countries)
    countrynames = [
        Country.get_country_name_from_iso3(countryiso) for countryiso in countries
    ]
    return pd.DataFrame(dict(iso3=countries, country=countrynames))
Exemplo n.º 11
0
def generate_dataset_and_showcase(folder, countryiso, countrydata,
                                  qc_indicators):
    countryname = Country.get_country_name_from_iso3(countryiso)
    title = '%s - Human Development Indicators' % countryname
    slugified_name = slugify('HDRO data for %s' % countryname).lower()
    logger.info('Creating dataset: %s' % title)
    dataset = Dataset({'name': slugified_name, 'title': title})
    dataset.set_maintainer('872427e4-7e9b-44d6-8c58-30d5052a00a2')
    dataset.set_organization('89ebe982-abe9-4748-9dde-cf04632757d6')
    dataset.set_expected_update_frequency('Every year')
    dataset.set_subnational(False)
    dataset.add_country_location(countryiso)
    tags = [
        'health', 'education', 'socioeconomic', 'demographics', 'development',
        'indicators', 'hxl'
    ]
    dataset.add_tags(tags)

    filename = 'hdro_indicators_%s.csv' % countryiso
    resourcedata = {
        'name': 'Human Development Indicators for %s' % countryname,
        'description': 'Human development data with HXL tags'
    }
    quickcharts = {
        'hashtag':
        '#indicator+code',
        'values': [x['code'] for x in qc_indicators],
        'cutdown':
        2,
        'cutdownhashtags':
        ['#indicator+code', '#date+year', '#indicator+value+num']
    }

    def yearcol_function(row):
        result = dict()
        year = row['year']
        if year:
            if len(year) == 9:
                startyear = year[:4]
                endyear = year[5:]
                result['startdate'], _ = parse_date_range(startyear,
                                                          date_format='%Y')
                _, result['enddate'] = parse_date_range(endyear,
                                                        date_format='%Y')
            else:
                result['startdate'], result['enddate'] = parse_date_range(
                    year, date_format='%Y')
        return result

    success, results = dataset.generate_resource_from_iterator(
        countrydata[0].keys(),
        countrydata,
        hxltags,
        folder,
        filename,
        resourcedata,
        date_function=yearcol_function,
        quickcharts=quickcharts)
    if success is False:
        logger.error('%s has no data!' % countryname)
        return None, None, None

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        'Indicators for %s' % countryname,
        'notes':
        'Human Development indicators for %s' % countryname,
        'url':
        'http://hdr.undp.org/en/countries/profiles/%s' % countryiso,
        'image_url':
        'https://s1.stabroeknews.com/images/2019/12/undp.jpg'
    })
    showcase.add_tags(tags)

    return dataset, showcase, results['bites_disabled']
Exemplo n.º 12
0
def generate_dataset_and_showcase(mvam_url, showcase_url, downloader, folder,
                                  countrydata, variables):
    """Parse json of the form:
    {
    },
    """
    iso3 = countrydata['iso3']
    countryname = Country.get_country_name_from_iso3(iso3)
    country_code = countrydata['code']
    if not checkfor_mvamdata(mvam_url, downloader, 'pblStatsSum',
                             country_code):
        logger.warning('%s has no data!' % countryname)
        return None, None, None
    title = '%s - Food Security Indicators' % countryname
    logger.info('Creating dataset: %s' % title)
    name = 'WFP Food Security indicators for %s' % countryname
    slugified_name = slugify(name).lower()
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('eda0ee04-7436-47f0-87ab-d1b9edcd3bb9')
    dataset.set_organization('3ecac442-7fed-448d-8f78-b385ef6f84e7')
    dataset.set_expected_update_frequency('Every month')
    dataset.set_subnational(False)
    try:
        dataset.add_country_location(iso3)
    except HDXError as e:
        logger.exception('%s has a problem! %s' % (countryname, e))
        return None, None, None

    tags = ['hxl', 'food security', 'indicators']
    dataset.add_tags(tags)

    dateformat = '%Y-%m-%dT%H:%M:%S'
    table = 'pblStatsSum'
    inputrows = get_mvamdata(mvam_url, downloader, table, country_code)

    filename = ('%s.csv' % table).lower()
    resourcedata = {'name': table, 'description': '%s: %s' % (table, title)}

    def process_date(row):
        if row['NumObs'] <= 25:
            return None
        row['VariableDescription'] = variables.get(row['Variable'], '')
        svydate = row['SvyDate']
        if svydate is None:
            return None
        svydate = datetime.strptime(svydate, dateformat)
        return {'startdate': svydate, 'enddate': svydate}

    quickcharts = {
        'hashtag':
        '#indicator+code',
        'values': ['FCS', 'rCSI', 'Proteins'],
        'cutdown':
        2,
        'cutdownhashtags':
        ['#date', '#category', '#indicator+code', '#indicator+value+num']
    }
    success, results = dataset.generate_resource_from_iterator(
        headers,
        inputrows,
        hxltags,
        folder,
        filename,
        resourcedata,
        date_function=process_date,
        quickcharts=quickcharts)
    if success is False:
        logger.warning('%s has no data!' % countryname)
        return None, None, None

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        title,
        'notes':
        'Reports on food security for %s' % countryname,
        'url':
        showcase_url % iso3,
        'image_url':
        'https://media.licdn.com/media/gcrc/dms/image/C5612AQHtvuWFVnGKAA/article-cover_image-shrink_423_752/0?e=2129500800&v=beta&t=00XnoAp85WXIxpygKvG7eGir_LqfxzXZz5lRGRrLUZw'
    })
    showcase.add_tags(tags)
    return dataset, showcase, results['bites_disabled']
Exemplo n.º 13
0
def generate_dataset_and_showcases(
    downloader, countryiso, indicator_metadata, countryalias
):
    """Parse json of the form:
    {'id': '1482', 'title': 'The spatial distribution of population in 2000,
        Zimbabwe', 'desc': 'Estimated total number of people per grid-cell...',  'doi': '10.5258/SOTON/WP00645',
        'date': '2018-11-01', 'popyear': '2000', 'citation': 'WorldPop',
        'data_file': 'GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif', 'archive': 'N', 'public': 'Y',
        'source': 'WorldPop, University of Southampton, UK', 'data_format': 'Geotiff', 'author_email': '*****@*****.**',
        'author_name': 'WorldPop', 'maintainer_name': 'WorldPop', 'maintainer_email': '*****@*****.**',
        'project': 'Population', 'category': 'Global per country 2000-2020', 'gtype': 'Population',
        'continent': 'Africa', 'country': 'Zimbabwe', 'iso3': 'ZWE',
        'files': ['ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif'],
        'url_img': 'https://www.worldpop.org/tabs/gdata/img/1482/zwe_ppp_wpgp_2000_Image.png',
        'organisation': 'WorldPop, University of Southampton, UK, www.worldpop.org',
        'license': 'https://www.worldpop.org/data/licence.txt',
        'url_summary': 'https://www.worldpop.org/geodata/summary?id=1482'}
    """
    allmetadata = dict()
    for subalias in countryalias:
        urls = countryalias[subalias]
        allmetadata_subalias = allmetadata.get(subalias, list())
        for url in urls:
            downloader.download(url)
            json = downloader.get_json()
            data = json["data"]
            if isinstance(data, list):
                allmetadata_subalias.extend(data)
            else:
                allmetadata_subalias.append(data)
        allmetadata[subalias] = allmetadata_subalias
    allmetadatavalues = list(allmetadata.values())
    lastmetadata = allmetadatavalues[0][-1]
    indicator_title = indicator_metadata["title"]
    if countryiso == "World":
        countryname = countryiso
    else:
        countryname = Country.get_country_name_from_iso3(countryiso)
        if not countryname:
            logger.exception(f"ISO3 {countryiso} not recognised!")
            return None, None
    title = f"{countryname} - {indicator_title}"
    slugified_name = slugify(f"WorldPop {indicator_title} for {countryname}").lower()
    logger.info(f"Creating dataset: {title}")
    licence_url = lastmetadata[
        "license"
    ].lower()  # suggest that they remove license and rename this field license
    downloader.download(licence_url)
    licence = downloader.get_text()
    methodologies = list()
    url_imgs = list()
    for allmetadatavalue in allmetadatavalues:
        lastallmetadatavalue = allmetadatavalue[-1]
        methodologies.append(lastallmetadatavalue["desc"])
        url_img = lastallmetadatavalue["url_img"]
        if not url_img:
            for lastallmetadatavalue in reversed(allmetadatavalue[:-1]):
                url_img = lastallmetadatavalue["url_img"]
                if url_img:
                    break
        url_imgs.append(url_img)
    methodology = get_matching_then_nonmatching_text(methodologies)
    dataset = Dataset(
        {
            "name": slugified_name,
            "title": title,
            "notes": f"{indicator_metadata['desc']}  \nData for earlier dates is available directly from WorldPop.  \n  \n{lastmetadata['citation']}",
            "methodology": "Other",
            "methodology_other": methodology,
            "dataset_source": lastmetadata["source"],
            "license_id": "hdx-other",
            "license_other": licence,
            "private": False,
        }
    )
    dataset.set_maintainer("37023db4-a571-4f28-8d1f-15f0353586af")
    dataset.set_organization("3f077dff-1d05-484d-a7c2-4cb620f22689")
    dataset.set_expected_update_frequency("Every year")
    dataset.set_subnational(True)
    try:
        dataset.add_other_location(countryiso)
    except HDXError as e:
        logger.exception(f"{countryname} has a problem! {e}")
        return None, None

    tags = [indicator_metadata["name"].lower(), "geodata"]
    dataset.add_tags(tags)

    earliest_year = 10000
    latest_year = 0
    resources_dict = dict()
    for subalias in allmetadata:
        for metadata in allmetadata[subalias]:
            if metadata["public"].lower() != "y":
                continue
            year = metadata["popyear"]
            if not year:
                year = metadata["date"][:4]
            year = int(year)
            if year > latest_year:
                latest_year = year
            if year < earliest_year:
                earliest_year = year
            for url in sorted(metadata["files"], reverse=True):
                resource_name = url[url.rfind("/") + 1 :]
                description = metadata["title"]
                if not re.match(r".*([1-3][0-9]{3})", resource_name):
                    resource_parts = resource_name.split(".")
                    resource_name = f"{resource_parts[0]}_{year}"
                    if len(resource_parts) >= 2:
                        resource_name = f"{resource_name}.{resource_parts[1]}"
                    description = f"{description} in {year}"
                resource = {
                    "name": resource_name,
                    "format": metadata["data_format"],
                    "url": url,
                    "description": description,
                }
                dict_of_lists_add(resources_dict, year, resource)
    if not resources_dict:
        logger.error(f"{title} has no data!")
        return None, None
    for year in sorted(resources_dict.keys(), reverse=True)[:5]:  # Just get last 5 years of data
        for resource in resources_dict[year]:
            dataset.add_update_resource(resource)

    dataset.set_dataset_year_range(earliest_year, latest_year)

    showcases = list()
    for i, url_img in enumerate(url_imgs):
        if not url_img:
            continue
        allmetadatavalue = allmetadatavalues[i][-1]
        url_summary = allmetadatavalue["url_summary"]
        if i == 0:
            name = f"{slugified_name}-showcase"
        else:
            name = f"{slugified_name}-{i + 1}-showcase"
        showcase = Showcase(
            {
                "name": name,
                "title": f"WorldPop {countryname} {indicator_title} Summary Page",
                "notes": f"Summary for {allmetadatavalue['category']} - {countryname}",
                "url": url_summary,
                "image_url": url_img,
            }
        )
        showcase.add_tags(tags)
        showcases.append(showcase)
    return dataset, showcases
Exemplo n.º 14
0
def main():
    """Generate dataset and create it in HDX"""
    configuration = Configuration.read()
    with Download() as downloader:
        constants = float_value_convert(
            downloader.download_tabular_key_value(
                configuration['constants_url']))
        constants['Lighting Grid Tier'] = int(constants['Lighting Grid Tier'])

        camp_overrides = downloader.download_tabular_cols_as_dicts(
            configuration['camp_overrides_url'])
        camp_overrides['Population'] = integer_value_convert(
            camp_overrides['Population'], dropfailedvalues=True)
        camp_overrides['Country'] = key_value_convert(
            camp_overrides['Country'], valuefn=get_iso3)
        datasets = Dataset.search_in_hdx('displacement',
                                         fq='organization:unhcr')
        all_camps_per_country, unhcr_non_camp, unhcr_camp, unhcr_camp_excluded = \
            get_camp_non_camp_populations(constants['Non Camp Types'], constants['Camp Types'],
                                          camp_overrides, datasets, downloader)
        country_totals = copy.deepcopy(all_camps_per_country)

        world_bank_url = configuration['world_bank_url']
        urbanratios = get_worldbank_series(
            world_bank_url % configuration['urban_ratio_wb'], downloader)
        slumratios = get_slumratios(configuration['slum_ratio_url'],
                                    downloader)

        noncamp_elec_access = dict()
        noncamp_elec_access['Urban'] = get_worldbank_series(
            world_bank_url % configuration['urban_elec_wb'], downloader)
        noncamp_elec_access['Rural'] = get_worldbank_series(
            world_bank_url % configuration['rural_elec_wb'], downloader)
        noncamp_elec_access['Slum'] = avg_dicts(noncamp_elec_access['Rural'],
                                                noncamp_elec_access['Urban'])

        ieadata = downloader.download_tabular_cols_as_dicts(
            configuration['iea_data_url'])
        elecappliances = key_value_convert(ieadata['Electrical Appliances'],
                                           keyfn=get_iso3,
                                           valuefn=float,
                                           dropfailedkeys=True)
        cookinglpg = key_value_convert(ieadata['Cooking LPG'],
                                       keyfn=get_iso3,
                                       valuefn=float,
                                       dropfailedkeys=True)
        elecgridtiers = key_value_convert(
            downloader.download_tabular_key_value(
                configuration['elec_grid_tiers_url']),
            keyfn=int,
            valuefn=float)
        elecgriddirectenergy = float_value_convert(
            downloader.download_tabular_key_value(
                configuration['elec_grid_direct_energy_url']))
        elecgridco2 = key_value_convert(downloader.download_tabular_key_value(
            configuration['elec_grid_co2_url']),
                                        keyfn=get_iso3,
                                        valuefn=float,
                                        dropfailedkeys=True)

        def get_elecgridco2(iso, inf):
            elgridco2 = elecgridco2.get(iso)
            if elgridco2 is None:
                elgridco2, reg = model.calculate_regional_average(
                    'Grid CO2', elecgridco2, iso)
                inf.append('elco2(%s)=%.3g' % (reg, elgridco2))
            return elgridco2

        noncamptypes = downloader.download_tabular_cols_as_dicts(
            configuration['noncamp_types_url'])
        noncamplightingoffgridtypes = integer_value_convert(
            noncamptypes['Lighting OffGrid'])
        noncampcookingsolidtypes = integer_value_convert(
            noncamptypes['Cooking Solid'])

        camptypes = get_camptypes(configuration['camp_types_url'], downloader)
        camptypes_fallbacks_offgrid, camptypes_fallbacks_solid = \
            get_camptypes_fallbacks(configuration['camp_types_fallbacks_url'], downloader, keyfn=get_iso3)

        costs = downloader.download_tabular_cols_as_dicts(
            configuration['costs_url'])
        lightingoffgridcost = float_value_convert(costs['Lighting OffGrid'])
        cookingsolidcost = float_value_convert(costs['Cooking Solid'])

        noncamp_nonsolid_access = downloader.download_tabular_cols_as_dicts(
            configuration['noncamp_cooking_nonsolid_url'])
        noncamp_nonsolid_access['Urban'] = key_value_convert(
            noncamp_nonsolid_access['Urban'],
            keyfn=get_iso3,
            valuefn=float,
            dropfailedkeys=True)
        noncamp_nonsolid_access['Rural'] = key_value_convert(
            noncamp_nonsolid_access['Rural'],
            keyfn=get_iso3,
            valuefn=float,
            dropfailedkeys=True)
        noncamp_nonsolid_access['Slum'] = noncamp_nonsolid_access['Urban']

        small_camptypes = get_camptypes(configuration['small_camptypes_url'],
                                        downloader)
        small_camp_data = downloader.download_tabular_cols_as_dicts(
            configuration['small_camps_data_url'])
        smallcamps = float_value_convert(small_camp_data['Population'])
        small_camps_elecgridco2 = float_value_convert(
            small_camp_data['Electricity Grid CO2'])

        type_descriptions = downloader.download_tabular_cols_as_dicts(
            configuration['type_descriptions_url'])
        lighting_type_descriptions = type_descriptions['Lighting Descriptions']
        cooking_type_descriptions = type_descriptions['Cooking Descriptions']

    model = ChathamHouseModel(constants)
    pop_types = ['Urban', 'Slum', 'Rural', 'Camp', 'Small Camp']
    headers = list()
    results = list()

    for i, pop_type in enumerate(pop_types):
        results.append(list())
        if pop_type == 'Camp':
            headers.append(['ISO3 Country Code', 'Country Name', 'Camp Name'])
            hxlheaders = ['#country+code', '#country+name', '#loc+name']
        elif pop_type == 'Small Camp':
            headers.append(['Region'])
            hxlheaders = ['#region+name']
        else:
            headers.append(['ISO3 Country Code', 'Country Name'])
            hxlheaders = ['#country+code', '#country+name']
        headers[-1].extend(['Population', 'Tier'])
        hxlheaders.extend(['#population+num', '#indicator+tier'])
        if pop_type not in ['Camp', 'Small Camp']:
            headers[-1].extend(
                ['Grid Expenditure ($m/yr)', 'Grid CO2 Emissions (t/yr)'])
            hxlheaders.extend([
                '#indicator+value+grid+expenditure',
                '#indicator+value+grid+co2_emissions'
            ])
        headers[-1].extend([
            'Offgrid Type', 'Lighting Type Description',
            'Offgrid Expenditure ($m/yr)', 'Offgrid Capital Costs ($m)',
            'Offgrid CO2 Emissions (t/yr)'
        ])
        hxlheaders.extend([
            '#indicator+type+offgrid', '#indicator+text+lighting',
            '#indicator+value+offgrid+expenditure',
            '#indicator+value+offgrid+capital_costs',
            '#indicator+value+offgrid+co2_emissions'
        ])
        if pop_type not in ['Camp', 'Small Camp']:
            headers[-1].extend([
                'Nonsolid Expenditure ($m/yr)', 'Nonsolid CO2 Emissions (t/yr)'
            ])
            hxlheaders.extend([
                '#indicator+value+nonsolid+expenditure',
                '#indicator+value+nonsolid+co2_emissions'
            ])
        headers[-1].extend([
            'Solid Type', 'Cooking Type Description',
            'Solid Expenditure ($m/yr)', 'Solid Capital Costs ($m)',
            'Solid CO2_Emissions (t/yr)'
        ])
        hxlheaders.extend([
            '#indicator+type+solid', '#indicator+text+cooking',
            '#indicator+value+solid+expenditure',
            '#indicator+value+solid+capital_costs',
            '#indicator+value+solid+co2_emissions'
        ])
        if pop_type != 'Small Camp':
            headers[-1].append('Info')
            hxlheaders.append('#meta+info')

        results[i].append(hxlheaders)

    results.append(list())
    headers.append(['ISO3 Country Code', 'Country Name', 'Population'])
    hxlheaders = ['#country+code', '#country+name', '#population+num']
    results[len(results) - 1].append(hxlheaders)

    results.append(list())
    headers.append([
        'ISO3 Country Code', 'Country Name', 'Camp', 'Tier',
        'Cooking Spending', 'Cooking Description',
        'Population not using Biomass', 'Population using Biomass',
        'Lighting Spending', 'Lighting Description', 'Population on Grid',
        'Population off Grid'
    ])

    results.append(list())
    headers.append([
        'code', 'title', 'value', 'latest_date', 'source', 'source_link',
        'notes', 'explore', 'units'
    ])

    today = datetime.utcnow()

    for iso3 in sorted(unhcr_non_camp):
        info = list()
        population = model.sum_population(unhcr_non_camp, iso3,
                                          all_camps_per_country)
        number_hh_by_pop_type = model.calculate_population(
            iso3, population, urbanratios, slumratios, info)
        country_elecappliances = elecappliances.get(iso3)
        if country_elecappliances is None:
            country_elecappliances, region = \
                model.calculate_regional_average('Electrical Appliances', elecappliances, iso3)
            info.append('elap(%s)=%.3g' % (region, country_elecappliances))
        country_elecgridco2 = get_elecgridco2(iso3, info)
        country_cookinglpg = cookinglpg.get(iso3)
        if country_cookinglpg is None:
            country_cookinglpg, region = model.calculate_regional_average(
                'LPG', cookinglpg, iso3)
            info.append('lpg(%s)=%.3g' % (region, country_elecappliances))

        cn = Country.get_country_name_from_iso3(iso3)
        for pop_type in number_hh_by_pop_type:
            model.reset_pop_counters()
            info2 = copy.deepcopy(info)
            number_hh = number_hh_by_pop_type[pop_type]

            country_elec_access = noncamp_elec_access[pop_type].get(iso3)
            if country_elec_access is None:
                country_elec_access, region = \
                    model.calculate_regional_average('Grid access', noncamp_elec_access[pop_type], iso3)
                info2.append('elac(%s)=%.3g' %
                             (region, country_elecappliances))
            hh_grid_access, hh_offgrid = model.calculate_hh_access(
                number_hh, country_elec_access)
            pop_grid_access = model.calculate_population_from_hh(
                hh_grid_access)
            pop_offgrid_access = model.calculate_population_from_hh(hh_offgrid)
            model.pop_grid += pop_grid_access

            country_noncamp_nonsolid_access = noncamp_nonsolid_access[
                pop_type].get(iso3)
            if country_noncamp_nonsolid_access is None:
                country_noncamp_nonsolid_access, region = \
                    model.calculate_regional_average('Nonsolid access', noncamp_nonsolid_access[pop_type], iso3)
                info2.append('nsac(%s)=%.3g' %
                             (region, country_elecappliances))
            hh_nonsolid_access, hh_no_nonsolid_access = \
                model.calculate_hh_access(number_hh, country_noncamp_nonsolid_access)
            pop_biomass_access = model.calculate_population_from_hh(
                hh_no_nonsolid_access)
            pop_nonbiomass_access = model.calculate_population_from_hh(
                hh_nonsolid_access)
            model.pop_nonbiomass += pop_nonbiomass_access

            ge, gc = model.calculate_ongrid_lighting(hh_grid_access,
                                                     elecgridtiers,
                                                     country_elecappliances,
                                                     country_elecgridco2)
            ne, nc = model.calculate_non_solid_cooking(hh_nonsolid_access,
                                                       country_cookinglpg)

            for tier in model.tiers:
                info3 = copy.deepcopy(info2)
                noncamplightingoffgridtype = model.get_noncamp_type(
                    noncamplightingoffgridtypes, pop_type, tier)
                noncampcookingsolidtype = model.get_noncamp_type(
                    noncampcookingsolidtypes, pop_type, tier)

                res = model.calculate_offgrid_solid(
                    tier, hh_offgrid, lighting_type_descriptions,
                    noncamplightingoffgridtype, lightingoffgridcost,
                    elecgriddirectenergy, country_elecgridco2,
                    hh_no_nonsolid_access, cooking_type_descriptions,
                    noncampcookingsolidtype, cookingsolidcost)
                noncamplightingtypedesc, oe, oc, oco2, noncampcookingtypedesc, se, sc, sco2 = res
                model.add_keyfigures(iso3,
                                     cn,
                                     pop_type,
                                     tier,
                                     se,
                                     oe,
                                     noncampcookingtypedesc,
                                     pop_biomass_access,
                                     noncamplightingtypedesc,
                                     pop_offgrid_access,
                                     results,
                                     ne=ne,
                                     ge=ge)
                population = model.calculate_population_from_hh(number_hh)
                info3 = ','.join(info3)
                row = [
                    iso3, cn, population, tier, ge, gc,
                    noncamplightingoffgridtype, noncamplightingtypedesc, oe,
                    oc, oco2, ne, nc, noncampcookingsolidtype,
                    noncampcookingtypedesc, se, sc, sco2, info3
                ]
                results[pop_types.index(pop_type.capitalize())].append(row)

    camp_offgridtypes_in_countries = dict()
    camp_solidtypes_in_countries = dict()
    missing_from_unhcr = list()
    for name in sorted(camptypes):
        model.reset_pop_counters()
        info = list()
        unhcrcampname = name
        result = unhcr_camp.get(unhcrcampname)
        if result is None:
            firstpart = name.split(':')[0].strip()
            for unhcrcampname in sorted(unhcr_camp):
                if firstpart in unhcrcampname:
                    result = unhcr_camp[unhcrcampname]
                    logger.info(
                        'Matched first part of name of %s to UNHCR name: %s' %
                        (name, unhcrcampname))
                    info.append('Matched %s' % firstpart)
                    break
        if result is None:
            camptype = unhcr_camp_excluded.get(name)
            if camptype is None:
                if check_name_dispersed(name):
                    logger.info(
                        'Camp %s from the spreadsheet has been treated as non-camp!'
                        % name)
                else:
                    missing_from_unhcr.append(name)
            else:
                logger.info('Camp %s is in UNHCR data but has camp type %s!' %
                            (name, camptype))
            continue
        population, iso3, accommodation_type = result
        del all_camps_per_country[iso3][accommodation_type][unhcrcampname]

        camp_camptypes = camptypes[name]

        number_hh = model.calculate_number_hh(population)
        country_elecgridco2 = get_elecgridco2(iso3, info)

        for tier in model.tiers:
            info2 = copy.deepcopy(info)
            camplightingoffgridtype = camp_camptypes.get(
                'Lighting OffGrid %s' % tier)
            if camplightingoffgridtype is None:
                logger.warning('No Lighting OffGrid %s for %s in %s' %
                               (tier, name, cn))
            campcookingsolidtype = camp_camptypes.get('Cooking Solid %s' %
                                                      tier)
            if campcookingsolidtype is None:
                logger.warning('No Cooking Solid %s for %s in %s' %
                               (tier, name, cn))

            res = model.calculate_offgrid_solid(
                tier, number_hh, lighting_type_descriptions,
                camplightingoffgridtype, lightingoffgridcost,
                elecgriddirectenergy, country_elecgridco2, number_hh,
                cooking_type_descriptions, campcookingsolidtype,
                cookingsolidcost)
            camplightingtypedesc, oe, oc, oco2, campcookingtypedesc, se, sc, sco2 = res
            cn = Country.get_country_name_from_iso3(iso3)
            model.add_keyfigures(iso3, cn, name, tier, se, oe,
                                 campcookingtypedesc, population,
                                 camplightingtypedesc, population, results)
            info2 = ','.join(info2)
            row = [
                iso3, cn, name, population, tier, camplightingoffgridtype,
                camplightingtypedesc, oe, oc, oco2, campcookingsolidtype,
                campcookingtypedesc, se, sc, sco2, info2
            ]
            results[pop_types.index('Camp')].append(row)
            if camplightingoffgridtype:
                append_value(camp_offgridtypes_in_countries, iso3, tier, name,
                             camplightingoffgridtype)
            if campcookingsolidtype:
                append_value(camp_solidtypes_in_countries, iso3, tier, name,
                             campcookingsolidtype)

    logger.info(
        'The following camps are in the spreadsheet but not in the UNHCR data : %s'
        % ', '.join(missing_from_unhcr))

    for iso3 in sorted(country_totals):
        info = list()
        population = model.sum_population(country_totals, iso3)
        cn = Country.get_country_name_from_iso3(iso3)
        row = [iso3, cn, population]
        results[len(results) - 3].append(row)

        extra_camp_types = all_camps_per_country[iso3]

        country_elecgridco2 = get_elecgridco2(iso3, info)

        for accommodation_type in sorted(extra_camp_types):
            camps = extra_camp_types[accommodation_type]
            for name in sorted(camps):
                model.reset_pop_counters()
                info2 = copy.deepcopy(info)
                population = camps[name]
                if population < 20000:
                    logger.info(
                        'Ignoring extra camp %s from UNHCR data with population %s (<20000) and accommodation type %s in country %s.'
                        % (name, population, accommodation_type, cn))
                    continue
                number_hh = model.calculate_number_hh(population)
                offgrid_tiers_in_country = camp_offgridtypes_in_countries.get(
                    iso3)
                if offgrid_tiers_in_country is None:
                    offgrid_tiers_in_country = camptypes_fallbacks_offgrid.get(
                        iso3)
                    if not offgrid_tiers_in_country:
                        logger.warning(
                            'Missing fallback for country %s, where UNHCR data has extra camp %s with population %s and accommodation type %s'
                            % (cn, name, population, accommodation_type))
                        continue
                info2.append('UNHCR only')
                for tier in offgrid_tiers_in_country:
                    info3 = copy.deepcopy(info2)
                    camplightingoffgridtype = offgrid_tiers_in_country[tier]
                    if isinstance(camplightingoffgridtype, int):
                        campcookingsolidtype = camptypes_fallbacks_solid[iso3][
                            tier]
                        info3.append('Fallback')
                    else:
                        camplightingoffgridtype = model.calculate_mostfrequent(
                            offgrid_tiers_in_country[tier])
                        campcookingsolidtype = model.calculate_mostfrequent(
                            camp_solidtypes_in_countries[iso3][tier])

                    res = model.calculate_offgrid_solid(
                        tier, number_hh, lighting_type_descriptions,
                        camplightingoffgridtype, lightingoffgridcost,
                        elecgriddirectenergy, country_elecgridco2, number_hh,
                        cooking_type_descriptions, campcookingsolidtype,
                        cookingsolidcost)
                    camplightingtypedesc, oe, oc, oco2, campcookingtypedesc, se, sc, sco2 = res
                    model.add_keyfigures(iso3, cn, name, tier, se, oe,
                                         campcookingtypedesc, population,
                                         camplightingtypedesc, population,
                                         results)
                    info3 = ','.join(info3)
                    row = [
                        iso3, cn, name, population, tier,
                        camplightingoffgridtype, camplightingtypedesc, oe, oc,
                        oco2, campcookingsolidtype, campcookingtypedesc, se,
                        sc, sco2, info3
                    ]
                    results[pop_types.index('Camp')].append(row)

    for region in sorted(smallcamps):
        model.reset_pop_counters()
        info = list()
        population = smallcamps[region]
        if not population or population == '-':
            continue
        number_hh = model.calculate_number_hh(population)
        region_camptypes = small_camptypes.get(region)
        if region_camptypes is None:
            logger.info('Missing camp group %s in small camp types!' % region)
            continue

        elecco2 = small_camps_elecgridco2[region]
        if not elecco2 or elecco2 == '-':
            info.append('Blank elco2')
            elecco2 = 0

        for tier in model.tiers:
            info2 = copy.deepcopy(info)
            camplightingoffgridtype = region_camptypes['Lighting OffGrid %s' %
                                                       tier]
            campcookingsolidtype = region_camptypes['Cooking Solid %s' % tier]

            res = model.calculate_offgrid_solid(
                tier, number_hh, lighting_type_descriptions,
                camplightingoffgridtype, lightingoffgridcost,
                elecgriddirectenergy, elecco2, number_hh,
                cooking_type_descriptions, campcookingsolidtype,
                cookingsolidcost)
            camplightingtypedesc, oe, oc, oco2, campcookingtypedesc, se, sc, sco2 = res
            model.add_keyfigures('', region, 'small camp', tier, se, oe,
                                 campcookingtypedesc, population,
                                 camplightingtypedesc, population, results)
            info2 = ','.join(info2)
            row = [
                region,
                model.round(population), tier, camplightingoffgridtype,
                camplightingtypedesc, oe, oc, oco2, campcookingsolidtype,
                campcookingtypedesc, se, sc, sco2, info2
            ]
            results[pop_types.index('Small Camp')].append(row)

    date = today.date().isoformat()
    source = 'Estimate from the Moving Energy Initiative'
    data_url = 'https://data.humdata.org/dataset/energy-consumption-of-refugees-and-displaced-people'
    rows = [
        [
            'MEI01',
            '% of Refugees and Displaced People Cooking with Biomass in Camps',
            model.get_camp_percentage_biomass(), date, source, data_url, '',
            '', 'ratio'
        ],
        [
            'MEI02', '% of Refugees and Displaced People Off-Grid in Camps',
            model.get_camp_percentage_offgrid(), date, source, data_url, '',
            '', 'ratio'
        ],
        [
            'MEI03',
            'Total Annual Energy Spending by Refugees and Displaced People',
            model.get_total_spending(), date, source, data_url, '', '',
            'dollars_million'
        ],
        [
            'MEI04', 'No. of Countries Hosting Refugees and Displaced People',
            len(country_totals), date, source, data_url, '', '', 'count'
        ]
    ]
    results[len(results) - 1].extend(rows)

    dataset, resources, showcase = generate_dataset_resources_and_showcase(
        pop_types, today)
    folder = gettempdir()
    file_to_upload = None
    for i, _ in enumerate(results):
        resource = resources[i]
        file_to_upload = join(folder, resource['name'])
        write_list_to_csv(results[i], file_to_upload, headers=headers[i])
        resource.set_file_to_upload(file_to_upload)
    dataset.add_update_resources(resources)
    dataset.update_from_yaml()
    #    dataset.create_in_hdx()
    for resource in dataset.get_resources():
        name = resource['name'].lower()
        if 'figures' in name and 'disagg' not in name:
            logger.info('Updating key figures datastore for %s' % name)
Exemplo n.º 15
0
def generate_datasets_and_showcases(downloader, folder, indicatorname,
                                    indicatortypedata, countriesdata,
                                    showcase_base_url):
    dataset_template = Dataset()
    dataset_template.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset_template.set_organization('ed727a5b-3e6e-4cd6-b97e-4a71532085e6')
    dataset_template.set_expected_update_frequency('Every year')
    dataset_template.set_subnational(False)
    tags = ['hxl', indicatorname.lower()]
    dataset_template.add_tags(tags)

    earliest_year = 10000
    latest_year = 0
    countrycode = None
    iso3 = None
    countryname = None
    rows = None
    datasets = list()
    showcases = list()

    def output_csv():
        if rows is None:
            return
        headers = deepcopy(downloader.response.headers)
        for i, header in enumerate(headers):
            if 'year' in header.lower():
                headers.insert(i, 'EndYear')
                headers.insert(i, 'StartYear')
                break
        headers.insert(0, 'Iso3')
        hxlrow = dict()
        for header in headers:
            hxlrow[header] = hxltags.get(header, '')
        rows.insert(0, hxlrow)
        filepath = join(folder, '%s_%s.csv' % (indicatorname, countrycode))
        write_list_to_csv(rows, filepath, headers=headers)
        ds = datasets[-1]
        ds.set_dataset_year_range(earliest_year, latest_year)
        ds.resources[0].set_file_to_upload(filepath)

    for row in downloader.get_tabular_rows(indicatortypedata['FileLocation'],
                                           dict_rows=True,
                                           headers=1,
                                           format='csv',
                                           encoding='WINDOWS-1252'):
        newcountry = row['Area Code']
        if newcountry != countrycode:
            output_csv()
            rows = None
            countrycode = newcountry
            result = countriesdata.get(countrycode)
            if result is None:
                logger.warning('Ignoring %s' % countrycode)
                continue
            iso3, cn = result
            countryname = Country.get_country_name_from_iso3(iso3)
            if countryname is None:
                logger.error('Missing country %s: %s, %s' %
                             (countrycode, cn, iso3))
                continue
            rows = list()
            title = '%s - %s Indicators' % (countryname, indicatorname)
            logger.info('Generating dataset: %s' % title)
            name = 'FAOSTAT %s indicators for %s' % (countryname,
                                                     indicatorname)
            slugified_name = slugify(name).lower()
            dataset = Dataset(deepcopy(dataset_template.data))
            dataset['name'] = slugified_name
            dataset['title'] = title
            dataset.update_from_yaml()
            dataset.add_country_location(countryname)
            earliest_year = 10000
            latest_year = 0

            resource = Resource({'name': title, 'description': ''})
            resource.set_file_type('csv')
            dataset.add_update_resource(resource)
            datasets.append(dataset)
            showcase = Showcase({
                'name':
                '%s-showcase' % slugified_name,
                'title':
                title,
                'notes':
                dataset['notes'],
                'url':
                '%s%s' % (showcase_base_url, countrycode),
                'image_url':
                'http://www.fao.org/uploads/pics/food-agriculture.png'
            })
            showcase.add_tags(tags)
            showcases.append(showcase)
        row['Iso3'] = iso3
        row['Area'] = countryname
        year = row['Year']
        if '-' in year:
            years = year.split('-')
            row['StartYear'] = years[0]
            row['EndYear'] = years[1]
        else:
            years = [year]
            row['StartYear'] = year
            row['EndYear'] = year
        for year in years:
            year = int(year)
            if year < earliest_year:
                earliest_year = year
            if year > latest_year:
                latest_year = year
        if rows is not None:
            rows.append(row)
    output_csv()
    return datasets, showcases
Exemplo n.º 16
0
def generate_country_dataset_and_showcase(downloader, folder, headersdata,
                                          countryiso, countrydata,
                                          indicator_datasets, tags):
    indicator_datasets_list = indicator_datasets.values()
    title = extract_list_from_list_of_dict(indicator_datasets_list, 'title')
    countryname = Country.get_country_name_from_iso3(countryiso)
    dataset = get_dataset('%s - %s' % (countryname, title[0]), tags,
                          'IDMC IDP data for %s' % countryname)
    try:
        dataset.add_country_location(countryiso)
    except HDXError as e:
        logger.exception('%s has a problem! %s' % (countryname, e))
        return None, None, None
    description = extract_list_from_list_of_dict(indicator_datasets_list,
                                                 'notes')
    dataset['notes'] = get_matching_then_nonmatching_text(description,
                                                          separator='\n\n',
                                                          ignore='\n')
    methodology = extract_list_from_list_of_dict(indicator_datasets_list,
                                                 'methodology_other')
    dataset['methodology_other'] = get_matching_then_nonmatching_text(
        methodology)
    caveats = extract_list_from_list_of_dict(indicator_datasets_list,
                                             'caveats')
    dataset['caveats'] = get_matching_then_nonmatching_text(caveats)

    years = set()
    bites_disabled = [True, True, True]
    for endpoint in countrydata:
        data = countrydata[endpoint]
        headers, hxltags = headersdata[endpoint]
        rows = [headers, hxltags]
        for row in data:
            newrow = list()
            for hxltag in hxltags:
                newrow.append(row.get(hxltag))
            rows.append(newrow)
            year = row.get('#date+year')
            conflict_stock = row.get('#affected+idps+ind+stock+conflict')
            if conflict_stock:
                bites_disabled[0] = False
            conflict_new = row.get('#affected+idps+ind+newdisp+conflict')
            if conflict_new:
                bites_disabled[1] = False
            disaster_new = row.get('#affected+idps+ind+newdisp+disaster')
            if disaster_new:
                bites_disabled[2] = False
            if year is None:
                continue
            years.add(year)
        name = indicator_datasets[endpoint].get_resources()[0]['description']
        resourcedata = {
            'name': endpoint,
            'description': '%s for %s' % (name, countryname)
        }
        filename = '%s_%s.csv' % (endpoint, countryname)
        dataset.generate_resource_from_rows(folder, filename, rows,
                                            resourcedata)
    years = sorted(list(years))
    dataset.set_dataset_year_range(years[0], years[-1])
    url = 'http://www.internal-displacement.org/countries/%s/' % countryname.replace(
        ' ', '-')
    try:
        downloader.setup(url)
    except DownloadError:
        altname = Country.get_country_info_from_iso3(
            countryiso)['#country+alt+i_en+name+v_unterm']
        url = 'http://www.internal-displacement.org/countries/%s/' % altname
        try:
            downloader.setup(url)
        except DownloadError:
            return dataset, None, bites_disabled
    showcase = Showcase({
        'name':
        '%s-showcase' % dataset['name'],
        'title':
        'IDMC %s Summary Page' % countryname,
        'notes':
        'Click the image on the right to go to the IDMC summary page for the %s dataset'
        % countryname,
        'url':
        url,
        'image_url':
        'http://www.internal-displacement.org/sites/default/files/logo_0.png'
    })
    showcase.add_tags(tags)
    return dataset, showcase, bites_disabled
Exemplo n.º 17
0
def generate_datasets_and_showcase(configuration, base_url, downloader, folder,
                                   country, dhstags):
    """
    """
    countryiso = country['iso3']
    dhscountrycode = country['dhscode']
    countryname = Country.get_country_name_from_iso3(countryiso)
    title = '%s - Demographic and Health Data' % countryname
    logger.info('Creating datasets for %s' % title)
    tags = ['hxl', 'health', 'demographics']

    dataset = get_dataset(countryiso, tags)
    if dataset is None:
        return None, None, None, None
    dataset['title'] = title.replace('Demographic', 'National Demographic')
    slugified_name = slugify('DHS Data for %s' % countryname).lower()
    dataset['name'] = slugified_name
    dataset.set_subnational(False)

    subdataset = get_dataset(countryiso, tags)
    if dataset is None:
        return None, None, None, None

    subdataset['title'] = title.replace('Demographic',
                                        'Subnational Demographic')
    subslugified_name = slugify('DHS Subnational Data for %s' %
                                countryname).lower()
    subdataset['name'] = subslugified_name
    subdataset.set_subnational(True)

    dataset['notes'] = description % (
        subdataset['title'], configuration.get_dataset_url(subslugified_name))
    subdataset['notes'] = description % (
        dataset['title'], configuration.get_dataset_url(slugified_name))

    bites_disabled = {'national': dict(), 'subnational': dict()}

    def process_national_row(_, row):
        row['ISO3'] = countryiso
        if tagname == 'DHS Quickstats':
            process_quickstats_row(row, bites_disabled['national'])
        return row

    def process_subnational_row(_, row):
        row['ISO3'] = countryiso
        val = row['CharacteristicLabel']
        if val[:2] == '..':
            val = val[2:]
        row['Location'] = val
        if tagname == 'DHS Quickstats':
            process_quickstats_row(row, bites_disabled['subnational'])
        return row

    years = set()
    subyears = set()

    for dhstag in dhstags:
        tagname = dhstag['TagName'].strip()
        resource_name = '%s Data for %s' % (tagname, countryname)
        resourcedata = {
            'name': resource_name,
            'description': 'HXLated csv containing %s data' % tagname
        }

        url = '%sdata/%s?tagids=%s&breakdown=national&perpage=10000&f=csv' % (
            base_url, dhscountrycode, dhstag['TagID'])
        filename = '%s_national_%s.csv' % (tagname, countryiso)
        _, results = dataset.download_and_generate_resource(
            downloader,
            url,
            hxltags,
            folder,
            filename,
            resourcedata,
            header_insertions=[(0, 'ISO3')],
            row_function=process_national_row,
            yearcol='SurveyYear')
        years.update(results['years'])

        url = url.replace('breakdown=national', 'breakdown=subnational')
        filename = '%s_subnational_%s.csv' % (tagname, countryiso)
        try:
            insertions = [(0, 'ISO3'), (1, 'Location')]
            _, results = subdataset.download_and_generate_resource(
                downloader,
                url,
                hxltags,
                folder,
                filename,
                resourcedata,
                header_insertions=insertions,
                row_function=process_subnational_row,
                yearcol='SurveyYear')
            subyears.update(results['years'])
        except DownloadError as ex:
            cause = ex.__cause__
            if cause is not None:
                if 'Variable RET is undefined' not in str(cause):
                    raise ex
            else:
                raise ex
    if len(dataset.get_resources()) == 0:
        dataset = None
    else:
        set_dataset_date_bites(dataset, years, bites_disabled, 'national')
    if len(subdataset.get_resources()) == 0:
        subdataset = None
    else:
        set_dataset_date_bites(subdataset, subyears, bites_disabled,
                               'subnational')

    publication = get_publication(base_url, downloader, dhscountrycode)
    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': publication['PublicationTitle'],
        'notes': publication['PublicationDescription'],
        'url': publication['PublicationURL'],
        'image_url': publication['ThumbnailURL']
    })
    showcase.add_tags(tags)
    return dataset, subdataset, showcase, bites_disabled
Exemplo n.º 18
0
 def get_country_name_from_iso3(self, countryiso):
     countryname = self.country_name_mappings.get(countryiso)
     if countryname:
         return countryname
     else:
         return Country.get_country_name_from_iso3(countryiso)
Exemplo n.º 19
0
    def generate_dataset_and_showcase(self, countryiso3, folder):
        countryname = Country.get_country_name_from_iso3(countryiso3)
        title = f'{countryname} - Food Prices'
        logger.info(f'Creating dataset: {title}')
        name = f'WFP food prices for {countryname}'
        slugified_name = slugify(name).lower()

        dataset = Dataset({
            'name': slugified_name,
            'title': title,
        })
        dataset.set_maintainer('f1921552-8c3e-47e9-9804-579b14a83ee3')
        dataset.set_organization('3ecac442-7fed-448d-8f78-b385ef6f84e7')

        dataset.set_expected_update_frequency('weekly')
        dataset.add_country_location(countryname)
        dataset.set_subnational(True)
        tags = ['commodities', 'prices', 'markets', 'hxl']
        dataset.add_tags(tags)

        prices_data = self.get_list('MarketPrices/PriceMonthly', countryiso3)
        if not prices_data:
            logger.info(f'{countryiso3} has no prices data!')
            return None, None, None
        market_to_adm = dict()
        for market in self.get_list('Markets/List', countryiso3):
            market_to_adm[market['marketId']] = market['admin1Name'], market['admin2Name'], market['marketLatitude'],\
                                                market['marketLongitude']

        rows = dict()
        sources = dict()
        markets = dict()
        for price_data in prices_data:
            if price_data['commodityPriceFlag'] not in ('actual', 'aggregate'):
                continue
            date = price_data['commodityPriceDate']
            category = self.commodity_to_category[price_data['commodityID']]
            market = price_data['marketName']
            if market == 'National Average':
                adm1 = adm2 = lat = lon = ''
            else:
                market_id = price_data['marketID']
                if market_id in market_to_adm:
                    adm1, adm2, lat, lon = market_to_adm[market_id]
                else:
                    adm1 = adm2 = lat = lon = ''
            orig_source = price_data['commodityPriceSourceName'].replace(
                'M/o', 'Ministry of').replace('+', '/')
            regex = r'Government.*,(Ministry.*)'
            match = re.search(regex, orig_source)
            if match:
                split_sources = [match.group(1)]
            else:
                split_sources = orig_source.replace(',', '/').replace(
                    ';', '/').split('/')
            for source in split_sources:
                source = source.strip()
                if not source:
                    continue
                if source[-1] == '.':
                    source = source[:-1]
                source_lower = source.lower()
                if 'mvam' in source_lower and len(source_lower) <= 8:
                    source = 'WFP mVAM'
                elif '?stica' in source:
                    source = source.replace('?stica', 'ística')
                source_lower = source.lower()
                if not self.match_source(sources.keys(), source_lower):
                    sources[source_lower] = source
            commodity = price_data['commodityName']
            unit = price_data['commodityUnitName']
            price = price_data['commodityPrice']
            currency = price_data['currencyName']
            pricetype = price_data['commodityPriceFlag']
            key = date, adm1, adm2, market, category, commodity, unit
            rows[key] = {
                'date': date,
                'adm1name': adm1,
                'adm2name': adm2,
                'market': market,
                'latitude': lat,
                'longitude': lon,
                'category': category,
                'commodity': commodity,
                'unit': unit,
                'currency': currency,
                'pricetype': pricetype,
                'price': price
            }
            if adm1 and adm2 and category:
                adm1adm2market = adm1, adm2, market
                commodities = markets.get(adm1adm2market, dict())
                dict_of_lists_add(commodities, (commodity, unit, currency),
                                  (date, price))
                markets[adm1adm2market] = commodities
        if not rows:
            logger.info(f'{countryiso3} has no prices!')
            return None, None, None
        number_market = list()
        for key, commodities in markets.items():
            number_market.append((len(commodities), key))
        number_market = sorted(number_market, reverse=True)
        qc_indicators = list()
        qc_rows = [qc_hxltags]
        chosen_commodities = set()
        # Go through markets starting with the one with most commodities
        for _, adm1adm2market in number_market:
            commodities = markets[adm1adm2market]
            number_commodity = list()
            for commodityunitcurrency, details in commodities.items():
                number_commodity.append((len(details), commodityunitcurrency))
            number_commodity = sorted(number_commodity, reverse=True)
            index = 0
            # Pick commodity with most rows that has not already been used for another market
            commodity, unit, currency = number_commodity[index][1]
            while commodity in chosen_commodities:
                index += 1
                if index == len(number_commodity):
                    commodity, unit, currency = number_commodity[0][1]
                    break
                commodity, unit, currency = number_commodity[index][1]
            adm1, adm2, market = adm1adm2market
            code = f'{adm1}-{adm2}-{market}-{commodity}-{unit}-{currency}'
            for date, price in sorted(commodities[(commodity, unit,
                                                   currency)]):
                qc_rows.append({'date': date, 'code': code, 'price': price})
            chosen_commodities.add(commodity)
            marketname = market
            if adm2 != market:
                marketname = f'{adm2}/{marketname}'
            if adm1 != adm2:
                marketname = f'{adm1}/{marketname}'
            qc_indicators.append({
                'code': code,
                'title': f'Price of {commodity} in {market}',
                'unit': f'Currency {currency}',
                'description':
                f'Price of {commodity} ({currency}/{unit}) in {marketname}',
                'code_col': '#meta+code',
                'value_col': '#value',
                'date_col': '#date'
            })
            if len(qc_indicators) == 3:
                break
        dataset['dataset_source'] = ', '.join(sorted(sources.values()))
        filename = f'wfp_food_prices_{countryiso3.lower()}.csv'
        resourcedata = {
            'name': title,
            'description': 'Food prices data with HXL tags',
            'format': 'csv'
        }
        rows = [rows[key] for key in sorted(rows)]
        dataset.generate_resource_from_iterator(headers,
                                                rows,
                                                hxltags,
                                                folder,
                                                filename,
                                                resourcedata,
                                                datecol='date')
        filename = f'wfp_food_prices_{countryiso3.lower()}_qc.csv'
        resourcedata = {
            'name': f'QuickCharts: {title}',
            'description': 'Food prices QuickCharts data with HXL tags',
            'format': 'csv'
        }
        dataset.generate_resource_from_rows(folder,
                                            filename,
                                            qc_rows,
                                            resourcedata,
                                            headers=list(qc_hxltags.keys()))
        showcase = Showcase({
            'name':
            f'{slugified_name}-showcase',
            'title':
            f'{title} showcase',
            'notes':
            f'{countryname} food prices data from World Food Programme displayed through VAM Economic Explorer',
            'url':
            f'http://dataviz.vam.wfp.org/economic_explorer/prices?iso3={countryiso3}',
            'image_url':
            'http://dataviz.vam.wfp.org/_images/home/3_economic.jpg'
        })
        showcase.add_tags(tags)
        return dataset, showcase, qc_indicators
Exemplo n.º 20
0
def generate_dataset(dataset_id,
                     configuration,
                     downloader,
                     output_failures=False):
    metadata_url = configuration["metadata_url"] % dataset_id
    response = downloader.download(
        f"{configuration['base_url']}{metadata_url}")
    json = response.json()
    study_desc = json["study_desc"]
    title_statement = study_desc["title_statement"]
    title = title_statement["title"]
    logger.info(f"Creating dataset: {title}")
    study_info = study_desc["study_info"]
    data_collection = study_desc["method"]["data_collection"]
    sources = [x["name"] for x in study_desc["authoring_entity"]]
    methodology = list()
    data_kind = study_info.get("data_kind")
    if data_kind is not None:
        methodology.append(f"Kind of Data: {data_kind}  \n")
    unit_analysis = study_info.get("universe")
    if unit_analysis is None:
        unit_analysis = study_info.get("analysis_unit")
    if unit_analysis is not None:
        methodology.append(f"Unit of Analysis: {unit_analysis}  \n")
    sampling = data_collection.get("sampling_procedure")
    if sampling is not None:
        methodology.append(f"Sampling Procedure: {sampling}  \n")
    collection = data_collection.get("coll_mode")
    if collection is not None:
        methodology.append(f"Data Collection Mode: {collection}  \n")
    dataset_name = slugify(title_statement["idno"])
    countryiso3s = set()
    for nation in study_info["nation"]:
        countryiso3 = nation["abbreviation"]
        if not countryiso3:
            countryname = nation["name"]
            if countryname:
                countryiso3, _ = Country.get_iso3_country_code_fuzzy(
                    countryname)
        if countryiso3:
            countryiso3s.add(countryiso3)
    if len(countryiso3s) == 1:
        countryname = Country.get_country_name_from_iso3(min(countryiso3s))
        title = f"{countryname} - {title}"
    dataset = Dataset({
        "name": dataset_name,
        "title": title,
        "notes": study_info["abstract"],
        "dataset_source": ", ".join(sources),
        "methodology": "Other",
        "methodology_other": "".join(methodology),
    })
    dataset.set_maintainer("ac47b0c8-548b-4c37-a685-7377e75aad55")
    dataset.set_organization("abf4ca86-8e69-40b1-92f7-71509992be88")
    dataset.set_expected_update_frequency("Never")
    dataset.set_subnational(True)
    if output_failures:
        try:
            dataset.add_country_locations(countryiso3s)
        except HDXError:
            ui_url = configuration["ui_url"] % dataset_id
            url = f"{configuration['base_url']}{ui_url}"
            failures.append(
                f"Invalid country id {countryiso3s} in dataset {url}!")
            return None
    else:
        dataset.add_country_locations(countryiso3s)
    tags = list()

    def add_tags(inwords, key):
        for inword in inwords:
            inword = inword[key].strip().lower()
            if "," in inword:
                words = inword.split(",")
            elif "/" in inword:
                words = inword.split("/")
            else:
                words = [inword]
            newwords = list()
            for innerword in words:
                if "and" in innerword:
                    newwords.extend(innerword.split(" and "))
                elif "&" in innerword:
                    newwords.extend(innerword.split(" & "))
                elif "other" in innerword:
                    newwords.extend(innerword.split("other"))
                else:
                    newwords.append(innerword)
            for word in newwords:
                word = word.strip()
                if word:
                    tags.append(word.strip())

    add_tags(study_info["topics"], "topic")
    add_tags(study_info.get("keywords", list()), "keyword")
    dataset.add_tags(tags)
    dataset.clean_tags()
    coll_dates = study_info["coll_dates"][0]
    startdate, _ = parse_date_range(coll_dates["start"])
    _, enddate = parse_date_range(coll_dates["end"])
    dataset.set_date_of_dataset(startdate, enddate)

    auth_url = configuration["auth_url"] % dataset_id
    resourcedata = {
        "name": title,
        "description":
        'Clicking "Download" leads outside HDX where you can request access to the data in csv, xlsx & dta formats',
        "url": f"{configuration['base_url']}{auth_url}",
        "format": "web app",
    }
    dataset.add_update_resource(resourcedata)

    documentation_url = configuration["documentation_url"] % dataset_id
    resourcedata = {
        "name": "Codebook",
        "description":
        "Contains information about the dataset's metadata and data",
        "url": f"{configuration['base_url']}{documentation_url}",
        "format": "pdf",
    }
    dataset.add_update_resource(resourcedata)

    return dataset