def country(from_key='name', to_key='iso'): """Creates and returns a mapper function to access country data. The mapper function that is returned must be called with one argument. In the default case you call it with a name and it returns a 3-letter ISO_3166-1 code, e. g. called with ``Spain`` it would return ``ESP``. :param from_key: (optional) the country attribute you give as input. Defaults to ``name``. :param to_key: (optional) the country attribute you want as output. Defaults to ``iso``. :return: mapper :rtype: function """ gc = GeonamesCache() dataset = gc.get_dataset_by_key(gc.get_countries(), from_key) def mapper(input): # For country name inputs take the names mapping into account. if 'name' == from_key: input = mappings.country_names.get(input, input) # If there is a record return the demanded attribute. item = dataset.get(input) if item: return item[to_key] return mapper
def plot_world_chloropleth(datafile, dest, colorscale, bins, nodatacolor='#dddddd', scale=1, projection='robin', resolution='l', usecol='Magnitude', inputkwargs={}): """Format: CSV with 'Country Name', 'Country Code', and 'Magnitude' columns.""" # See http://ramiro.org/notebook/basemap-choropleth/ shapefile = 'ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes' num_colors = len(bins) - 1 gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) df = pd.read_csv(datafile, **inputkwargs) df.set_index('Country Code', inplace=True) df = df.reindex( iso3_codes) #.dropna() # Filter out non-countries and missing values. values = df[usecol] # https://matplotlib.org/api/pyplot_summary.html#matplotlib.pyplot.colormaps cm = plt.get_cmap(colorscale) scheme = [cm(i / num_colors) for i in range(num_colors)] scheme.append(nodatacolor) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) # This doesn't work, is it important? # mpl.style.use('map') fig = plt.figure(figsize=(default_size * scale, default_size * scale)) ax = fig.add_subplot(111, facecolor='w', frame_on=False) m = Basemap(lon_0=0, projection=projection, resolution=resolution) m.drawmapboundary(linewidth=default_map_linewidth * scale, color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=default_border_linewidth * scale) for info, shape in zip(m.units_info, m.units): iso3 = info['ADM0_A3'] if iso3 not in df.index: color = nodatacolor else: color = scheme[df.loc[iso3]['bin']] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) plt.savefig(dest, bbox_inches='tight')
def country(from_key='name', to_key='iso'): gc = GeonamesCache() dataset = gc.get_dataset_by_key(gc.get_countries(), from_key) def mapper(key): if 'name' == from_key and key in mappings.country_names: key = mappings.country_names[key] item = dataset.get(key) if item: return item[to_key] return mapper
def plotChoropleth(filename, imgfile, figNum): shapefile = 'data/ne/ne_10m_admin_0_countries' cols = ['CC', 'DISCON'] num_colors = 20 gc = GeonamesCache() iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys()) df = pd.read_csv(filename, skiprows=0, usecols=cols) df.set_index('CC', inplace=True) df = df.ix[iso_codes].dropna( ) # Filter out non-countries and missing values. values = df['DISCON'] cm = plt.get_cmap('Reds') scheme = [cm(float(i) / num_colors) for i in range(num_colors)] #bins = np.linspace(values.min(), values.max(), num_colors) bins = np.linspace(0, 1, num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False) #.head(10) #print(df) mpl.style.use('seaborn-pastel') print('Plotting Figure {0}: {1}'.format(figNum, imgfile)) fig = plt.figure(figNum, figsize=(22, 12)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) #plt.title('Disco Choropleth', fontsize=20)#, y=.95) m = Basemap(lon_0=0, projection='robin') m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): #iso = info['ADM0_A3'] iso = info['ISO_A2'] #print(iso) try: if iso not in df.index: color = '#dddddd' else: color = scheme[int(df.ix[iso]['bin'])] except TypeError: print(iso) traceback.print_exc() patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 2)) for i in bins], rotation='80') # Set the map footer. #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
def world(data, area_col, value_col, area_to_code=False, bin_mode='linear', log=None, title='', value_col_to_title=True, descripton='', num_colors=9, palette='Reds', filter_outlier=False): '''WORLD MAP PLOT Takes in data where one column is the values and another is either country or 3-alphabet country code according to ISO standard. USE === world(data=emission, area_col='area', value_col=1999, area_to_code=True, title='Emission Intensity of Food Production (co2/kg)', value_col_to_title=False, palette='Reds', num_colors=9) area_col :: the column where is either country name or country code value_col :: the column with the values area_to_code :: must be True if area is not 3-alphabet code num_colors :: the number of colors to be used to describe intensity value_to_col_title :: Useful when the column is a year and you want to have it shown in the title. log :: if true will use log values instead. Only works when bin_mode is linear. ''' from geonamescache import GeonamesCache from ..utils.country_code import country_to_code data = data.copy(deep=True) if filter_outlier == True: data = outliers(data, value_col) if value_col_to_title == True: title = title + ' {}'.format(value_col) descripton.strip() if area_to_code == True: data[area_col] = data[area_col].apply(country_to_code) data.set_index(area_col, inplace=True) if data.index.name != area_col: data.set_index(area_col, inplace=True) # filter data based on geo codes gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) data = data.loc[iso3_codes] data = data[~data[value_col].isna()] data[value_col] = data[value_col].astype(int) # set plot stuff values = data[value_col].dropna() if log == True: values = np.log1p(values) data[value_col] = np.log(data[value_col]) if bin_mode == 'linear': bins = np.linspace(values.min(), values.max(), num_colors) elif bin_mode == 'quantile': bins = np.nanpercentile(values, np.arange(0, 100, num_colors)) cm = plt.get_cmap(palette) scheme = [cm(i / num_colors) for i in range(num_colors)] # create the bin column data['temp'] = pd.cut(data[value_col], bins) cat_columns = data.select_dtypes(['category']).columns data['bin'] = data[cat_columns].apply(lambda x: x.cat.codes) data.drop('temp', axis=1, inplace=True) p = plt.figure(figsize=(17, 12)) p.patch.set_facecolor('white') ax = p.add_subplot(111, frame_on=False) p.suptitle(title, color='grey', weight='bold', fontsize=26, y=.85) try: from mpl_toolkits.basemap import Basemap m = Basemap(lon_0=0, projection='robin') except ImportError: raise MissingImport( "Install Basemap >> pip install git+https://github.com/matplotlib/basemap.git" ) m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): iso3 = info['ADM0_A3'] if iso3 not in data.index: color = '#dddddd' else: color = scheme[data.loc[iso3]['bin'].astype(int)] patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = p.add_axes([0.35, 0.24, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 1)) for i in bins], rotation=45, ha='right') plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') _thousand_sep(p, ax)
shapefile = 'shp/countries/ne_10m_admin_0_countries_lakes' num_colors = 9 year = '2012' cols = ['Country Name', 'Country Code', year] title = 'Forest area as percentage of land area in {}'.format(year) imgfile = 'img/{}.png'.format(slug(title)) description = ''' Forest area is land under natural or planted stands of trees of at least 5 meters in situ, whether productive or not, and excludes tree stands in agricultural production systems (for example, in fruit plantationsand agroforestry systems) and trees in urban parks and gardens. Countries without data are shown in grey. Data: World Bank - worldbank.org • Author: Ramiro Gómez - ramiro.org'''.strip( ) gc = GeonamesCache() iso3_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso3').keys()) df = pd.read_csv(filename, skiprows=4, usecols=cols) df.set_index('Country Code', inplace=True) df = df.ix[iso3_codes].dropna() # Filter out non-countries and missing values. values = df[year] cm = plt.get_cmap('Greens') scheme = [cm(i / num_colors) for i in range(num_colors)] bins = np.linspace(values.min(), values.max(), num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False).head(10) mpl.style.use('map') fig = plt.figure(figsize=(22, 12))
def plotChoropleth(filename,imgfile,figNum): shapefile = 'data/ne/ne_10m_admin_0_countries' cols = ['CC', 'DISCON'] num_colors = 20 gc = GeonamesCache() iso_codes = list(gc.get_dataset_by_key(gc.get_countries(), 'iso').keys()) df = pd.read_csv(filename, skiprows=0, usecols=cols) df.set_index('CC', inplace=True) df = df.ix[iso_codes].dropna() # Filter out non-countries and missing values. values = df['DISCON'] cm = plt.get_cmap('Reds') scheme = [cm(float(i) / num_colors) for i in range(num_colors)] #bins = np.linspace(values.min(), values.max(), num_colors) bins = np.linspace(0, 1, num_colors) df['bin'] = np.digitize(values, bins) - 1 df.sort_values('bin', ascending=False)#.head(10) #print(df) mpl.style.use('seaborn-pastel') print('Plotting Figure {0}: {1}'.format(figNum,imgfile)) fig = plt.figure(figNum,figsize=(22, 12)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) #plt.title('Disco Choropleth', fontsize=20)#, y=.95) m = Basemap(lon_0=0, projection='robin') m.drawmapboundary(color='w') m.readshapefile(shapefile, 'units', color='#444444', linewidth=.2) for info, shape in zip(m.units_info, m.units): #iso = info['ADM0_A3'] iso = info['ISO_A2'] #print(iso) try: if iso not in df.index: color = '#dddddd' else: color = scheme[int(df.ix[iso]['bin'])] except TypeError: print(iso) traceback.print_exc() patches = [Polygon(np.array(shape), True)] pc = PatchCollection(patches) pc.set_facecolor(color) ax.add_collection(pc) # Cover up Antarctica so legend can be placed over it. ax.axhspan(0, 1000 * 1800, facecolor='w', edgecolor='w', zorder=2) # Draw color legend. ax_legend = fig.add_axes([0.35, 0.14, 0.3, 0.03], zorder=3) cmap = mpl.colors.ListedColormap(scheme) cb = mpl.colorbar.ColorbarBase(ax_legend, cmap=cmap, ticks=bins, boundaries=bins, orientation='horizontal') cb.ax.set_xticklabels([str(round(i, 2)) for i in bins],rotation='80') # Set the map footer. #plt.annotate(descripton, xy=(-.8, -3.2), size=14, xycoords='axes fraction') plt.savefig(imgfile, bbox_inches='tight', pad_inches=.2)
Question 3 Team : golf Script used to extract data from the article-per-line file and process it to finaly write it in a csv file """ import pandas as pd from geonamescache import GeonamesCache from geonamescache.mappers import country gc = GeonamesCache() # we use the GeonamesCache to get the name of countries # creating a mapper between the iso3 code and the country name mapper = country(from_key='name', to_key='iso3') countries = list(gc.get_dataset_by_key( gc.get_countries(), 'name', ).keys()) # for the US we are going to use the states states = list(gc.get_us_states_by_names()) #print(countries) # any of these key words could indicate that we are reading about a star key_words = ['movie', 'film', 'TV', 'television', 'actor', 'actress'] articles = [] dataset = {} with open('article-per-line.txt', 'r', encoding="utf8") as f: articles = f.read().splitlines() for a in articles: dec = a.split('born in', 1) proceed = True