def test_ocha_feed_file_working(self):
     countries = hxl.data(script_dir_plus_file('Countries_UZB_Deleted.csv', TestCountry), allow_local=True)
     Country.set_countriesdata(countries)
     assert Country.get_iso3_country_code('UZBEKISTAN') is None
     assert Country.get_iso3_country_code('south sudan') == 'SSD'
     Country.set_ocha_url()
     Country._countriesdata = None
     assert Country.get_iso3_country_code('UZBEKISTAN', use_live=True) == 'UZB'
     Country.set_ocha_url('NOTEXIST')
     Country._countriesdata = None
     assert Country.get_iso3_from_iso2('AF') == 'AFG'
def get_access(configuration, admininfo, downloader, scrapers=None):
    name = inspect.currentframe().f_code.co_name
    if scrapers and not any(scraper in name for scraper in scrapers):
        return list(), list(), list(), list(), list(), list(), list(), list(
        ), list()
    access_configuration = configuration['access_constraints']
    ranking_url = access_configuration['ranking_url']
    headers, rows = read_tabular(downloader, {
        'url': ranking_url,
        'headers': 1,
        'format': 'csv'
    })
    sheets = access_configuration['sheets']
    constraint_rankings = {x: dict() for x in sheets}
    nocountries_per_region = {'global': 0}
    top3counts = {'global': dict()}
    for region in admininfo.regions:
        nocountries_per_region[region] = 0
        top3counts[region] = dict()
    for row in rows:
        countryiso = row['iso3']
        nocountries_per_region['global'] += 1
        for region in admininfo.iso3_to_region_and_hrp.get(countryiso, list()):
            nocountries_per_region[region] += 1
        for sheet in sheets:
            if '%s_1' % sheet not in row:
                continue
            type_ranking = constraint_rankings.get(sheet, dict())
            for i in range(1, 4):
                constraint = row['%s_%d' % (sheet, i)]
                dict_of_lists_add(type_ranking, countryiso, constraint)
            constraint_rankings[sheet] = type_ranking
    data = dict()
    datasetinfo = {
        'dataset': access_configuration['dataset'],
        'headers': 1,
        'format': 'xlsx'
    }
    for sheet, sheetinfo in sheets.items():
        datasetinfo['sheet'] = sheetinfo['sheetname']
        headers, rows = read_hdx(downloader, datasetinfo)
        datasheet = data.get(sheet, dict())
        for row in rows:
            countryiso = Country.get_iso3_country_code(
                row[sheetinfo['isocol']])
            if countryiso not in admininfo.countryiso3s:
                continue
            countrydata = datasheet.get(countryiso, dict())
            score = countrydata.get('score', 0)
            newscore = row[sheetinfo['scorecol']]
            textcol = sheetinfo.get('textcol')
            if textcol:
                text = row[textcol]
                dict_of_lists_add(countrydata, 'text', (newscore, text))
                for region, top3countsregion in top3counts.items():
                    if region != 'global' and region not in admininfo.iso3_to_region_and_hrp.get(
                            countryiso, list()):
                        continue
                    top3countssheet = top3countsregion.get(sheet, dict())
                    if sheet == 'impact':
                        if newscore != 0:
                            top3countssheet[text] = top3countssheet.get(
                                text, 0) + 1
                    else:
                        if newscore == 3:
                            top3countssheet[text] = top3countssheet.get(
                                text, 0) + 1
                    top3countsregion[sheet] = top3countssheet
                weights = sheetinfo.get('weights')
                if weights:
                    weight = weights.get(text)
                    if weight:
                        newscore *= weight
                score += newscore
            else:
                dict_of_lists_add(countrydata, 'text', (newscore, newscore))
                for region, top3countsregion in top3counts.items():
                    if region != 'global' and region not in admininfo.iso3_to_region_and_hrp.get(
                            countryiso, list()):
                        continue
                    top3countssheet = top3countsregion.get(sheet, dict())
                    if newscore == 'yes':
                        top3countssheet[sheet] = top3countssheet.get(sheet,
                                                                     0) + 1
                    top3countsregion[sheet] = top3countssheet
                score = newscore
            countrydata['score'] = score
            datasheet[countryiso] = countrydata
        data[sheet] = datasheet
    gvaluedicts = [dict() for _ in range(7)]
    rvaluedicts = [dict() for _ in range(7)]
    for region, top3countsregion in top3counts.items():
        if region == 'global':
            valuedicts = gvaluedicts
        else:
            valuedicts = rvaluedicts
        for i, (sheet, top3countssheet) in enumerate(top3countsregion.items()):
            sortedcounts = sorted(top3countssheet,
                                  key=top3countssheet.get,
                                  reverse=True)
            texts = list()
            pcts = list()
            for text in sortedcounts[:3]:
                texts.append(text)
                pcts.append(
                    get_fraction_str(top3countssheet[text],
                                     nocountries_per_region[region]))
            if sheet == 'mitigation':
                valuedicts[i * 2][region] = pcts[0]
            else:
                valuedicts[i * 2][region] = '|'.join(texts)
                valuedicts[i * 2 + 1][region] = '|'.join(pcts)
    valuedicts = [dict() for _ in range(6)]
    severityscore = valuedicts[0]
    for i, sheet in enumerate(data):
        datasheet = data[sheet]
        for countryiso in datasheet:
            countrydata = datasheet[countryiso]
            ranked = sorted(countrydata['text'], reverse=True)
            top_value = ranked[0][0]
            texts = list()
            for value, text in countrydata['text']:
                if value == top_value:
                    if sheet == 'mitigation' or text in constraint_rankings[
                            sheet][countryiso]:
                        texts.append(text)
            valuedicts[i + 2][countryiso] = '|'.join(texts)
            if 'constraints' in sheet:
                score = severityscore.get(countryiso, 0)
                score += countrydata['score']
                severityscore[countryiso] = score
    ranges = access_configuration['category']
    severitycategory = valuedicts[1]
    for countryiso in severityscore:
        score = severityscore.get(countryiso)
        if score is None:
            severitycategory[countryiso] = None
            continue
        severitycategory[countryiso] = process_range(ranges, score)
    logger.info('Processed access')
    grheaders = [
        'Access Constraints Into', 'Access Constraints Into Pct',
        'Access Constraints Within', 'Access Constraints Within Pct',
        'Access Impact', 'Access Impact Pct', 'Mitigation Pct'
    ]
    headers = [
        'Access Severity Score', 'Access Severity Category',
        'Access Constraints Into', 'Access Constraints Within',
        'Access Impact', 'Mitigation'
    ]
    grhxltags = [
        '#access+constraints+into+desc', '#access+constraints+into+pct',
        '#access+constraints+within+desc', '#access+constraints+within+pct',
        '#access+impact+desc', '#access+impact+pct', '#access+mitigation+pct'
    ]
    hxltags = [
        '#severity+access+num+score', '#severity+access+category+num',
        '#access+constraints+into+desc', '#access+constraints+within+desc',
        '#access+impact+desc', '#access+mitigation+desc'
    ]
    return [grheaders, grhxltags], gvaluedicts, \
           [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in grhxltags], \
           [grheaders, grhxltags], rvaluedicts, \
           [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in grhxltags], \
           [headers, hxltags], valuedicts, \
           [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url']) for hxltag in hxltags]
示例#3
0
 def test_get_iso3_country_code(self):
     assert Country.get_iso3_country_code('jpn', use_live=False) == 'JPN'
     assert Country.get_iso3_country_code('Dem. Rep. of the Congo',
                                          use_live=False) == 'COD'
     assert Country.get_iso3_country_code('Russian Fed.',
                                          use_live=False) == 'RUS'
     assert Country.get_iso3_country_code(
         'Micronesia (Federated States of)', use_live=False) == 'FSM'
     assert Country.get_iso3_country_code('Iran (Islamic Rep. of)',
                                          use_live=False) == 'IRN'
     assert Country.get_iso3_country_code('United Rep. of Tanzania',
                                          use_live=False) == 'TZA'
     assert Country.get_iso3_country_code('Syrian Arab Rep.',
                                          use_live=False) == 'SYR'
     assert Country.get_iso3_country_code('Central African Rep.',
                                          use_live=False) == 'CAF'
     assert Country.get_iso3_country_code('Rep. of Korea',
                                          use_live=False) == 'KOR'
     assert Country.get_iso3_country_code('St. Pierre and Miquelon',
                                          use_live=False) == 'SPM'
     assert Country.get_iso3_country_code('Christmas Isl.',
                                          use_live=False) == 'CXR'
     assert Country.get_iso3_country_code('Cayman Isl.',
                                          use_live=False) == 'CYM'
     assert Country.get_iso3_country_code('jp', use_live=False) == 'JPN'
     assert Country.get_iso3_country_code('Taiwan (Province of China)',
                                          use_live=False) == 'TWN'
     assert Country.get_iso3_country_code_fuzzy('jpn',
                                                use_live=False) == ('JPN',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('ZWE',
                                                use_live=False) == ('ZWE',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('Vut',
                                                use_live=False) == ('VUT',
                                                                    True)
     assert Country.get_iso3_country_code('abc', use_live=False) is None
     with pytest.raises(LocationError):
         Country.get_iso3_country_code('abc',
                                       use_live=False,
                                       exception=LocationError)
     assert Country.get_iso3_country_code_fuzzy('abc',
                                                use_live=False) == (None,
                                                                    False)
     with pytest.raises(LocationError):
         Country.get_iso3_country_code_fuzzy('abc',
                                             use_live=False,
                                             exception=LocationError)
     assert Country.get_iso3_country_code_fuzzy('United Kingdom',
                                                use_live=False) == ('GBR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy(
         'United Kingdom of Great Britain and Northern Ireland',
         use_live=False) == ('GBR', True)
     assert Country.get_iso3_country_code_fuzzy('united states',
                                                use_live=False) == ('USA',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('united states of america',
                                                use_live=False) == ('USA',
                                                                    True)
     assert Country.get_iso3_country_code('UZBEKISTAN',
                                          use_live=False) == 'UZB'
     assert Country.get_iso3_country_code_fuzzy('UZBEKISTAN',
                                                use_live=False) == ('UZB',
                                                                    True)
     assert Country.get_iso3_country_code('Sierra', use_live=False) is None
     assert Country.get_iso3_country_code_fuzzy('Sierra',
                                                use_live=False) == ('SLE',
                                                                    False)
     assert Country.get_iso3_country_code('Venezuela',
                                          use_live=False) is None
     assert Country.get_iso3_country_code_fuzzy('Venezuela',
                                                use_live=False) == ('VEN',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Heard Isl.',
                                                use_live=False) == ('HMD',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Falkland Isl.',
                                                use_live=False) == ('FLK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Czech Republic',
                                                use_live=False) == ('CZE',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Czech Rep.',
                                                use_live=False) == ('CZE',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Islamic Rep. of Iran',
                                                use_live=False) == ('IRN',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Dem. Congo',
                                                use_live=False) == ('COD',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Congo, Republic of',
                                                use_live=False) == ('COG',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Republic of the Congo',
                                                use_live=False) == ('COG',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Vietnam',
                                                use_live=False) == ('VNM',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('South Korea',
                                                use_live=False) == ('KOR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Korea Republic',
                                                use_live=False) == ('KOR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Dem. Republic Korea',
                                                use_live=False) == ('PRK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('North Korea',
                                                use_live=False) == ('PRK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy(
         'Serbia and Kosovo: S/RES/1244 (1999)',
         use_live=False) == ('SRB', False)
     assert Country.get_iso3_country_code_fuzzy('U.S. Virgin Islands',
                                                use_live=False) == ('VIR',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('U.K. Virgin Islands',
                                                use_live=False) == ('VGB',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Taiwan',
                                                use_live=False) == ('TWN',
                                                                    False)
     with pytest.raises(ValueError):
         Country.get_iso3_country_code('abc',
                                       use_live=False,
                                       exception=ValueError)
     with pytest.raises(ValueError):
         Country.get_iso3_country_code_fuzzy('abc',
                                             use_live=False,
                                             exception=ValueError)
示例#4
0
def get_country_url_string(country_list):
    return ':OR:iso='.join([
        str(Country.get_m49_from_iso3(Country.get_iso3_country_code(country)))
        for country in country_list
    ])
 def test_country_conversion(self):
     assert Country.get_iso3_country_code("Afghanistan") == "AFG"
     assert Country.get_iso3_country_code("China") == "CHN"
def get_camp_non_camp_populations(noncamp_types, camp_types, camp_overrides,
                                  datasets, downloader):
    noncamp_types = noncamp_types.split(',')
    camp_types = camp_types.split(',')
    dataset_unhcr = None
    latest_date = None
    for dataset in datasets:
        if 'displacement' in dataset['title'].lower():
            date = dataset.get_dataset_date_as_datetime()
            if latest_date is None or date > latest_date:
                dataset_unhcr = dataset
                latest_date = date
    if dataset_unhcr is None:
        raise ValueError('No UNHCR dataset found!')
    url = dataset_unhcr.get_resources()[0]['url']
    country_ind = 0  # assume first column contains country
    iso3 = None
    row = None
    prev_row = None
    all_camps_per_country = dict()
    unhcr_non_camp = dict()
    unhcr_camp = dict()
    unhcr_camp_excluded = dict()
    rowiter = downloader.get_tabular_rows(url, sheet='Tab15')
    for row in rowiter:
        country = row[country_ind]
        iso3 = Country.get_iso3_country_code(country)
        if iso3 is not None:
            break
        prev_row = row
    accommodation_ind = None
    location_ind = None
    population_ind = None
    population = None
    for i, text in enumerate(prev_row):
        header = text.lower()
        value = row[i]
        if 'accommodation' in header:
            accommodation_ind = i
        elif 'location' in header and len(value) > 1:
            location_ind = i
        else:
            try:
                population = int(value)
                population_ind = i
                break
            except ValueError:
                pass
    campname = row[location_ind]

    def get_accommodation_type(name):
        accom_type = camp_overrides['Accommodation Type'].get(name)
        if accom_type is None:
            accom_type = row[accommodation_ind]
        else:
            logger.info('Overriding accommodation type to %s for %s' %
                        (accom_type, name))
        return accom_type.lower()

    accommodation_type = get_accommodation_type(campname)

    def match_camp_types(name, accom_type, pop, iso):
        if check_name_dispersed(name):
            accom_type = noncamp_types[0]
        found_camp_type = None
        for camp_type in camp_types:
            if camp_type in accom_type:
                found_camp_type = camp_type
                unhcr_camp[name] = pop, iso, found_camp_type
                break
        for noncamp_type in noncamp_types:
            if noncamp_type in accom_type:
                found_camp_type = noncamp_type
                append_value(unhcr_non_camp, iso, found_camp_type, name, pop)
                break
        if found_camp_type is None:
            append_value(unhcr_camp_excluded, iso, accom_type, name, pop)
            append_value(all_camps_per_country, iso, accom_type, name, pop)
        else:
            append_value(all_camps_per_country, iso, found_camp_type, name,
                         pop)

    match_camp_types(campname, accommodation_type, population, iso3)
    for row in rowiter:
        country = row[country_ind]
        if not country:
            continue
        if 'NOTES' in country.upper():
            break
        iso3, match = Country.get_iso3_country_code_fuzzy(country)
        if iso3 is None:
            logger.warning('Country %s could not be matched to ISO3 code!' %
                           country)
            continue
        else:
            if match is False:
                logger.info('Matched %s to ISO3: %s!' % (country, iso3))
        campname = row[location_ind]
        accommodation_type = get_accommodation_type(campname)
        population = int(row[population_ind])
        match_camp_types(campname, accommodation_type, population, iso3)

    for campname in sorted(camp_overrides['Population']):
        if campname in unhcr_camp:
            continue
        iso3 = camp_overrides['Country'][campname]
        accommodation_type = camp_overrides['Accommodation Type'][
            campname].lower()
        population = camp_overrides['Population'][campname]
        logger.info('Adding camp from override: %s (%s, %s): %d' %
                    (campname, iso3, accommodation_type, population))
        match_camp_types(campname, accommodation_type, population, iso3)

    return all_camps_per_country, unhcr_non_camp, unhcr_camp, unhcr_camp_excluded
示例#7
0
def update_fts(base_url, downloader, country_list, resource_updates):
    requirements_url = '%splan/country/' % base_url
    funding_url = '%sfts/flow?groupby=plan&countryISO3=' % base_url

    columns_to_keep = [
        'country', 'id', 'name', 'code', 'startDate', 'endDate', 'year',
        'revisedRequirements', 'totalFunding'
    ]
    combined = pd.DataFrame()
    hxl_names = {
        "country": "#country+name",
        "id": "#x_appeal+id",
        "name": "#x_appeal+name",
        "code": "#x_appeal+code",
        "revisedRequirements": "#x_requirement+x_usd+x_current",
        "endDate": "#date+end",
        "totalFunding": "#x_funding+x_usd",
        "startDate": "#date+start",
        "year": "#date+year",
        "percentFunded": "#x_requirement+x_met+x_percent"
    }

    for country in country_list:
        iso3 = Country.get_iso3_country_code(country)
        r = downloader.download('%s%s' % (requirements_url, iso3))
        data = r.json()['data']
        dfreq_norm = json_normalize(data)
        dfreq_norm['id'].fillna('missing')
        dfreq_loc = json_normalize(data, 'locations')
        dfreq_loc.rename(columns={'name': 'country'}, inplace=True)
        del dfreq_loc['id']
        dfreq_norm_loc = dfreq_norm.join(dfreq_loc)
        dfreq_year = json_normalize(data, 'years')
        del dfreq_year['id']
        dfreq = dfreq_norm_loc.join(dfreq_year)
        dfreq['id'] = dfreq.id.astype(str).str.replace('\\.0', '')
        r = downloader.download('%s%s' % (funding_url, iso3))
        data = r.json(
        )['data']['report3']['fundingTotals']['objects'][0]['objectsBreakdown']
        dffund = json_normalize(data)
        df = dfreq.merge(dffund, on='id')
        df.totalFunding += df.onBoundaryFunding
        df.rename(columns={'name_x': 'name'}, inplace=True)
        # drop unwanted columns
        df = drop_columns(df, columns_to_keep)
        combined = combined.append(df, ignore_index=True)

    # trim date strings
    combined.startDate = combined.startDate.str[:10]
    combined.endDate = combined.endDate.str[:10]

    # add column for % funded
    combined['percentFunded'] = (
        pd.to_numeric(combined.totalFunding) /
        pd.to_numeric(combined.revisedRequirements)) * 100

    # sort
    combined.sort_values(['country', 'endDate'],
                         ascending=[True, False],
                         inplace=True)

    # add HXL tags
    combined = hxlate(combined, hxl_names)

    # convert floats to string and trim ( formatters don't work on columns with mixed types)
    combined['percentFunded'] = combined['percentFunded'].astype(str)
    combined['percentFunded'] = \
    combined['percentFunded'].loc[combined['percentFunded'].str.contains('.')].str.split('.').str[0]

    combined.to_csv(resource_updates['fts']['path'],
                    encoding='utf-8',
                    index=False,
                    date_format='%Y-%m-%d')