示例#1
0
def country(from_key='name', to_key='iso'):
    """Creates and returns a mapper function to access country data.

    The mapper function that is returned must be called with one argument. In
    the default case you call it with a name and it returns a 3-letter
    ISO_3166-1 code, e. g. called with ``Spain`` it would return ``ESP``.

    :param from_key: (optional) the country attribute you give as input.
        Defaults to ``name``.
    :param to_key: (optional) the country attribute you want as output.
        Defaults to ``iso``.
    :return: mapper
    :rtype: function
    """

    gc = GeonamesCache()
    dataset = gc.get_dataset_by_key(gc.get_countries(), from_key)

    def mapper(input):
        # For country name inputs take the names mapping into account.
        if 'name' == from_key:
            input = mappings.country_names.get(input, input)
        # If there is a record return the demanded attribute.
        item = dataset.get(input)
        if item:
            return item[to_key]

    return mapper
示例#2
0
文件: plot.py 项目: dpitch40/maps
def plot_world_chloropleth(datafile,
                           dest,
                           colorscale,
                           bins,
                           nodatacolor='#dddddd',
                           scale=1,
                           projection='robin',
                           resolution='l',
                           usecol='Magnitude',
                           inputkwargs={}):
    """Format: CSV with 'Country Name', 'Country Code', and 'Magnitude' columns."""

    # See http://ramiro.org/notebook/basemap-choropleth/
    shapefile = 'ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes'
    num_colors = len(bins) - 1

    gc = GeonamesCache()
    iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())

    df = pd.read_csv(datafile, **inputkwargs)
    df.set_index('Country Code', inplace=True)
    df = df.reindex(
        iso3_codes)  #.dropna() # Filter out non-countries and missing values.

    values = df[usecol]
    # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps
    cm = plt.get_cmap(colorscale)
    scheme = [cm(i / num_colors) for i in range(num_colors)]
    scheme.append(nodatacolor)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False).head(10)

    # This doesn't work, is it important?
    # mpl.style.use('map')
    fig = plt.figure(figsize=(default_size * scale, default_size * scale))

    ax = fig.add_subplot(111, facecolor='w', frame_on=False)

    m = Basemap(lon_0=0, projection=projection, resolution=resolution)
    m.drawmapboundary(linewidth=default_map_linewidth * scale, color='w')

    m.readshapefile(shapefile,
                    'units',
                    color='#444444',
                    linewidth=default_border_linewidth * scale)
    for info, shape in zip(m.units_info, m.units):
        iso3 = info['ADM0_A3']
        if iso3 not in df.index:
            color = nodatacolor
        else:
            color = scheme[df.loc[iso3]['bin']]

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    plt.savefig(dest, bbox_inches='tight')
def country(from_key='name', to_key='iso'):
    gc = GeonamesCache()
    dataset = gc.get_dataset_by_key(gc.get_countries(), from_key)

    def mapper(key):
        if 'name' == from_key and key in mappings.country_names:
            key = mappings.country_names[key]
        item = dataset.get(key)
        if item:
            return item[to_key]

    return mapper
示例#4
0
def plotChoropleth(filename, imgfile, figNum):
    shapefile = 'data/ne/ne_10m_admin_0_countries'
    cols = ['CC', 'DISCON']
    num_colors = 20
    gc = GeonamesCache()
    iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys())
    df = pd.read_csv(filename, skiprows=0, usecols=cols)
    df.set_index('CC', inplace=True)
    df = df.ix[iso_codes].dropna(
    )  # Filter out non-countries and missing values.
    values = df['DISCON']
    cm = plt.get_cmap('Reds')
    scheme = [cm(float(i) / num_colors) for i in range(num_colors)]
    #bins = np.linspace(values.min(), values.max(), num_colors)
    bins = np.linspace(0, 1, num_colors)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False)  #.head(10)

    #print(df)

    mpl.style.use('seaborn-pastel')
    print('Plotting Figure {0}: {1}'.format(figNum, imgfile))
    fig = plt.figure(figNum, figsize=(22, 12))

    ax = fig.add_subplot(111, axisbg='w', frame_on=False)
    #plt.title('Disco Choropleth', fontsize=20)#, y=.95)

    m = Basemap(lon_0=0, projection='robin')
    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        #iso = info['ADM0_A3']
        iso = info['ISO_A2']
        #print(iso)
        try:
            if iso not in df.index:
                color = '#dddddd'
            else:
                color = scheme[int(df.ix[iso]['bin'])]
        except TypeError:
            print(iso)
            traceback.print_exc()

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend,
                                   cmap=cmap,
                                   ticks=bins,
                                   boundaries=bins,
                                   orientation='horizontal')
    cb.ax.set_xticklabels([str(round(i, 2)) for i in bins], rotation='80')

    # Set the map footer.
    #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
示例#5
0
def world(data,
          area_col,
          value_col,
          area_to_code=False,
          bin_mode='linear',
          log=None,
          title='',
          value_col_to_title=True,
          descripton='',
          num_colors=9,
          palette='Reds',
          filter_outlier=False):
    '''WORLD MAP PLOT

    Takes in data where one column is the values and another is either
    country or 3-alphabet country code according to ISO standard.

    USE
    ===

    world(data=emission,
          area_col='area',
          value_col=1999,
          area_to_code=True,
          title='Emission Intensity of Food Production (co2/kg)',
          value_col_to_title=False,
          palette='Reds',
          num_colors=9)


    area_col :: the column where is either country name or country code
    value_col :: the column with the values

    area_to_code :: must be True if area is not 3-alphabet code
    num_colors :: the number of colors to be used to describe intensity

    value_to_col_title :: Useful when the column is a year and you want to
                          have it shown in the title.

    log :: if true will use log values instead. Only works when bin_mode is
           linear.

    '''

    from geonamescache import GeonamesCache
    from ..utils.country_code import country_to_code

    data = data.copy(deep=True)

    if filter_outlier == True:
        data = outliers(data, value_col)

    if value_col_to_title == True:
        title = title + ' {}'.format(value_col)

    descripton.strip()

    if area_to_code == True:
        data[area_col] = data[area_col].apply(country_to_code)
        data.set_index(area_col, inplace=True)

    if data.index.name != area_col:
        data.set_index(area_col, inplace=True)

    # filter data based on geo codes
    gc = GeonamesCache()
    iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())
    data = data.loc[iso3_codes]
    data = data[~data[value_col].isna()]
    data[value_col] = data[value_col].astype(int)

    # set plot stuff
    values = data[value_col].dropna()

    if log == True:
        values = np.log1p(values)
        data[value_col] = np.log(data[value_col])

    if bin_mode == 'linear':
        bins = np.linspace(values.min(), values.max(), num_colors)
    elif bin_mode == 'quantile':
        bins = np.nanpercentile(values, np.arange(0, 100, num_colors))
    cm = plt.get_cmap(palette)
    scheme = [cm(i / num_colors) for i in range(num_colors)]

    # create the bin column
    data['temp'] = pd.cut(data[value_col], bins)
    cat_columns = data.select_dtypes(['category']).columns
    data['bin'] = data[cat_columns].apply(lambda x: x.cat.codes)
    data.drop('temp', axis=1, inplace=True)

    p = plt.figure(figsize=(17, 12))
    p.patch.set_facecolor('white')

    ax = p.add_subplot(111, frame_on=False)
    p.suptitle(title, color='grey', weight='bold', fontsize=26, y=.85)

    try:
        from mpl_toolkits.basemap import Basemap
        m = Basemap(lon_0=0, projection='robin')
    except ImportError:
        raise MissingImport(
            "Install Basemap >> pip install git+https://github.com/matplotlib/basemap.git"
        )

    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        iso3 = info['ADM0_A3']
        if iso3 not in data.index:
            color = '#dddddd'
        else:
            color = scheme[data.loc[iso3]['bin'].astype(int)]

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = p.add_axes([0.35, 0.24, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend,
                                   cmap=cmap,
                                   ticks=bins,
                                   boundaries=bins,
                                   orientation='horizontal')

    cb.ax.set_xticklabels([str(round(i, 1)) for i in bins],
                          rotation=45,
                          ha='right')

    plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    _thousand_sep(p, ax)
示例#6
0
shapefile = 'shp/countries/ne_10m_admin_0_countries_lakes'
num_colors = 9
year = '2012'
cols = ['Country Name', 'Country Code', year]
title = 'Forest area as percentage of land area in {}'.format(year)
imgfile = 'img/{}.png'.format(slug(title))

description = '''
Forest area is land under natural or planted stands of trees of at least 5 meters in situ, whether 
productive or not, and excludes tree stands in agricultural production systems (for example, in 
fruit plantationsand agroforestry systems) and trees in urban parks and gardens. Countries without 
data are shown in grey. Data: World Bank - worldbank.org • Author: Ramiro Gómez - ramiro.org'''.strip(
)

gc = GeonamesCache()
iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys())

df = pd.read_csv(filename, skiprows=4, usecols=cols)
df.set_index('Country Code', inplace=True)
df = df.ix[iso3_codes].dropna()  # Filter out non-countries and missing values.

values = df[year]
cm = plt.get_cmap('Greens')
scheme = [cm(i / num_colors) for i in range(num_colors)]
bins = np.linspace(values.min(), values.max(), num_colors)
df['bin'] = np.digitize(values, bins) - 1
df.sort_values('bin', ascending=False).head(10)

mpl.style.use('map')
fig = plt.figure(figsize=(22, 12))
示例#7
0
def plotChoropleth(filename,imgfile,figNum):
    shapefile = 'data/ne/ne_10m_admin_0_countries'
    cols = ['CC', 'DISCON']
    num_colors = 20
    gc = GeonamesCache()
    iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys())
    df = pd.read_csv(filename, skiprows=0, usecols=cols)
    df.set_index('CC', inplace=True)
    df = df.ix[iso_codes].dropna() # Filter out non-countries and missing values.
    values = df['DISCON']
    cm = plt.get_cmap('Reds')
    scheme = [cm(float(i) / num_colors) for i in range(num_colors)]
    #bins = np.linspace(values.min(), values.max(), num_colors)
    bins = np.linspace(0, 1, num_colors)
    df['bin'] = np.digitize(values, bins) - 1
    df.sort_values('bin', ascending=False)#.head(10)

    #print(df)

    mpl.style.use('seaborn-pastel')
    print('Plotting Figure {0}: {1}'.format(figNum,imgfile))
    fig = plt.figure(figNum,figsize=(22, 12))

    ax = fig.add_subplot(111, axisbg='w', frame_on=False)
    #plt.title('Disco Choropleth', fontsize=20)#, y=.95)

    m = Basemap(lon_0=0, projection='robin')
    m.drawmapboundary(color='w')

    m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
    for info, shape in zip(m.units_info, m.units):
        #iso = info['ADM0_A3']
        iso = info['ISO_A2']
        #print(iso)
        try:
            if iso not in df.index:
                color = '#dddddd'
            else:
                color = scheme[int(df.ix[iso]['bin'])]
        except TypeError:
            print(iso)
            traceback.print_exc()

        patches = [Polygon(np.array(shape), True)]
        pc = PatchCollection(patches)
        pc.set_facecolor(color)
        ax.add_collection(pc)

    # Cover up Antarctica so legend can be placed over it.
    ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2)

    # Draw color legend.
    ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3)
    cmap = mpl.colors.ListedColormap(scheme)
    cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal')
    cb.ax.set_xticklabels([str(round(i, 2)) for i in bins],rotation='80')

    # Set the map footer.
    #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction')

    plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
示例#8
0
Question 3
Team : golf

Script used to extract data from the article-per-line file and process it
to finaly write it in a csv file
"""
import pandas as pd
from geonamescache import GeonamesCache
from geonamescache.mappers import country

gc = GeonamesCache()  # we use the GeonamesCache to get the name of countries

# creating a mapper between the iso3 code and the country name
mapper = country(from_key='name', to_key='iso3')
countries = list(gc.get_dataset_by_key(
    gc.get_countries(),
    'name',
).keys())
# for the US we are going to use the states
states = list(gc.get_us_states_by_names())
#print(countries)
# any of these key words could indicate that we are reading about a star
key_words = ['movie', 'film', 'TV', 'television', 'actor', 'actress']
articles = []
dataset = {}

with open('article-per-line.txt', 'r', encoding="utf8") as f:
    articles = f.read().splitlines()

for a in articles:
    dec = a.split('born in', 1)
    proceed = True