示例#1
0
def plot_indicators(country_list, indicator, start=2000, end=2015):
    """
    A grand function that takes as input a list of country codes, then uses
    our function above WB_country_data to get a list of indicators for each
    country,then plot a line diagram.
    :param country_list: a list of strings of country code
    :param indicator: indicator index from wbdata
    :param start: start of the year
    :param end: end of the year
    :return: a plot
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    import wbdata
    import re
    ind = wbdata.get_indicator(indicator, display=False)
    # capture the title which includes the unit after bracket
    title = ind[0]['name']
    # now take entire text from first letter to before opening bracket
    title = title[:title.find('(') - 1]
    # this is the patter to match anything between two brackets
    p = re.compile('\\((.*?)\\)')
    ylab = p.findall(ind[0]['name'])[0]
    sns.set_style('white')
    fig, axis = plt.subplots()
    for c in country_list:
        axis.plot(range(start, end + 1), country_data(c, indicator, start,
                                                      end))
    plt.legend(country_list)
    plt.title(title)
    plt.ylabel(ylab)
    plt.show()
def all_indicators(db, update_local_db=False):
    metrics = {}

    for row in wb.get_indicator():
        assert isinstance(row, dict)
        assert isinstance(row['sourceNote'], str)
        assert isinstance(row['topics'], list)
        assert isinstance(row['sourceOrganization'], str)
        assert isinstance(row['name'], str)
        #print(row)
        d = {
            'last_updated': now(),
            'wb_id': row['id'],
            'source_note': row['sourceNote'],
            'name': row['name'],
            'unit': row['unit'],
            'source_organisation': row['sourceOrganization'],
            'topics': row['topics']
        }
        if update_local_db:
            db.world_bank_indicators.update_one({'wb_id': d['wb_id']},
                                                {"$set": d},
                                                upsert=True)
        metrics[d['wb_id']] = d

    print("Found {} datasets from wbdata".format(len(metrics.keys())))
    return metrics
示例#3
0
    def getWBDataFromWeb(self, pStockCode, pStart, pEnd):
        #https://wbdata.readthedocs.io/en/latest/
        wbdata.get_source()
        wbdata.get_indicator(source=1)
        wbdata.search_countries("united")
        date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
        self.data = wbdata.get_data("IC.BUS.EASE.XQ",
                                    country=("USA", "GBR"),
                                    data_date=date)
        for row in self.data:
            print(row['country']['id'], row)
            #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
            #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
            #df.describe()

        return self.data
示例#4
0
文件: main.py 项目: ca9/world-stats
def get_ind_preview(y1=2010, ind="FR.INR.LEND"):
    if ind:
        if y1:
            ydt = datetime(int(y1), 1, 1)
        ind_details = wbdata.get_indicator(ind, display=False)[0]
        ind_details['dev'] = "Feel free to use this variable!"
        try:
            data = rpy2functions.get_values(wbdata.get_data(ind, data_date=(ydt, ydt), country=preview_countries))
            if 'q' in data.keys()[0].split('.')[0].lower():
                ind_details['dev'] = "If chosen, please make sure all variables are quarterly."
            if sum(map(map_adder, data.values())) < 5: #wont add properly.
                ind_details['dev'] = "Very scarce data. Avoid using this variable."
        except TypeError as e:
            data = {'{0}.{1}'.format(str(y1), cont):'ERROR' for cont in preview_countries}
            ind_details['dev'] = "No data found for this variable. Do not use this variable."
        return jsonify({'data': data, 'details': ind_details})
    return {}
示例#5
0
import pandas as pd

import wbdata
import datetime

import sys
sys.path.append('../other/')
import connect
# Connect to database
con = connect.connector()

# Get countries, indicators
countries = wbdata.get_country(display=False)
indicators = wbdata.get_indicator(display=False)

# Collect CountryID
countryID = []
for i in range(len(countries)):
    countryID.append(countries[i]['id'])

# Collect indicatorID
indicatorID = []
for i in range(1000, len(indicators)):
    indicatorID.append(indicators[i]['id'])

# Make indDict for fetching data
indDict = dict()
for indStr in indicatorID:
    indDict[indStr] = indStr.replace('.','_')

# fetch data
示例#6
0
def scrape():
    df = pd.read_csv(
        "../SecondProject2/Resources/Project2_idmc_disaster_all_dataset.csv")
    df.head()

    # In[4]:

    import pycountry_convert as pc

    # In[5]:

    country_code = pc.country_name_to_country_alpha2("China",
                                                     cn_name_format="default")
    print(country_code)
    continent_name = pc.country_alpha2_to_continent_code(country_code)
    print(continent_name)

    # In[6]:

    def country_code(name):
        try:
            code = pc.country_name_to_country_alpha2(name,
                                                     cn_name_format="default")
            return code
        except:
            return "N/A"

    def continent_name(name):
        try:
            continent = pc.country_alpha2_to_continent_code(name)
            return continent
        except:
            return "N/A"

    df["Country_2D"] = df["Country Name"].apply(country_code)
    df["Continent"] = df["Country_2D"].apply(continent_name)

    # In[7]:

    df.head()

    # In[8]:

    df_Africa = df[df["Continent"] == "AF"]
    df_Africa.head()

    # In[9]:

    df_africa = df_Africa.rename(columns={
        'Country Name': 'Country',
    })
    df_africa.head()

    # In[10]:

    df_africa.columns

    # In[11]:

    df.groupby("Continent").count()

    # In[12]:

    #Import Dependencies
    import os
    import requests
    import json
    import pprint
    import numpy as np
    import flask
    import wbdata
    import datetime

    # In[13]:

    countries = [
        'algeria', 'angola', 'benin', 'botswana', 'burkina faso', 'burundi',
        'cabo verde', 'cameroon', 'central african republic', 'chad',
        'comoros', 'congo', "cote d'ivoire", 'djibouti', 'egypt',
        'equatorial guinea', 'esqtini', 'ethiopia', 'gabon', 'gambia', 'ghana',
        'guinea', 'guinea-bissau', 'kenya', 'lesotho', 'liberia', 'libya',
        'madagascar', 'malawi', 'mali', 'mauritania', 'mauritius', 'morocco',
        'mozambique', 'namibia', 'niger', 'nigeria', 'rwanda',
        'sao tome and principe', 'senegal', 'seychelles', 'sirre leone',
        'somalia', 'south africa', 'south sudan', 'sudan', 'tanzania', 'togo',
        'tunisia', 'uganda', 'zambia', 'zimbabwe'
    ]
    country_codes = [
        'AGO', 'ALB', 'ARB', 'BDI', 'BEN', 'BFA', 'BMN', 'BSS', 'BWA', 'CAA',
        'CAF', 'CIV', 'CME', 'CMR', 'COG', 'COM', 'CPV', 'DJI', 'DMN', 'DSF',
        'DSS', 'DZA', 'EGY', 'ETH', 'GAB', 'GHA', 'GMB', 'GNB', 'GNQ', 'KEN',
        'LBR', 'LSO', 'MAR', 'MDG', 'MEA', 'MLI', 'MNA', 'MOZ', 'MRT', 'MUS',
        'MWI', 'NAF', 'NAM', 'NER', 'NGA', 'NLS', 'NRS', 'RRS', 'RSO', 'RWA',
        'SDN', 'SLE', 'SOM', 'SSA', 'SSD', 'SSF', 'SWZ', 'SXZ', 'SYC', 'TCD',
        'TGO', 'TMN', 'TSS', 'TUN', 'TZA', 'UGA', 'XZN', 'ZAF', 'ZMB', 'ZWE'
    ]
    indicators = "SP.POP.TOTL"
    data_date = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
    wbdata.get_indicator(source=50)

    # In[14]:

    wbdata.search_countries('')

    # In[15]:

    data = wbdata.get_data(indicators,
                           country=country_codes,
                           data_date=data_date)
    df_wbdata = pd.DataFrame(data)
    df_wbdata = df_wbdata.rename(
        columns={
            "indicator": "Indicator",
            "country": "Country",
            "countryiso3code": "Country code",
            "date": "Year",
            "value": "Population",
        })
    df_wbdata = df_wbdata.filter(
        items=['Country', 'Country code', 'Year', 'Population'])
    df_wbdata.dropna(inplace=True)
    df_wbdata['Country'] = df_wbdata['Country'].astype(str)
    df_wbdata['Country code'] = df_wbdata['Country code'].astype(str)
    df_wbdata['Year'] = df_wbdata['Year'].astype(str)
    df_wbdata['Population'] = df_wbdata['Population'].astype(str)
    df_wbdata['Country'] = df_wbdata['Country'].str.slice(23, -2)
    df_wbdata

    # In[16]:

    df_africa['Year'] = df_africa['Year'].astype('int64')
    df_wbdata['Year'] = df_wbdata['Year'].astype('int64')

    # In[17]:

    merged_df = pd.merge(left=df_africa,
                         right=df_wbdata,
                         how="left",
                         on=['Country code', 'Year', 'Country'])
    merged_df.head()

    # In[18]:

    merged_df = merged_df.rename(
        columns={
            'Country code': 'Country_Code',
            'Start Date': 'Start_Date',
            'Event Name': 'Event_Name',
            'Hazard Category': 'Hazard_Category',
            'Hazard Type': 'Hazard_Type',
            'New Displacements': 'New_Displacements',
        })
    merged_df.head()

    merged_df = merged_df.dropna()
    merged_df

    merged_df[merged_df['Population'].isna()].count()
    merged_df.to_csv('merged.csv', index=False)

    #Create the engine and pass in Postgresql
    engine = create_engine(
        'postgresql://*****:*****@localhost/project2_db')

    engine.table_names()

    query = pd.read_sql_query('select * from merged_data', con=engine)

    return (query)
示例#7
0
 def testGetIndicatorByTopic(self):
     wbdata.get_indicator(topic="1")
示例#8
0
 def testGetOneIndicator(self):
     wbdata.get_indicator("SH.XPD.PRIV.ZS")
示例#9
0
import wbdata

print(
    wbdata.get_country(country_id=None,
                       incomelevel=None,
                       lendingtype=None,
                       display=None))
'''wbdata.get_source("Doing Business")
#print("Doing Business")


wbdata.get_indicator(“IC.BUS.EASE.XQ”)

wbdata.search_countries("us")


wbdata.get_data("IC.BUS.EASE.XQ", country=USA)'''
示例#10
0
import wbdata as wb
import pandas as p

# Temporary way of keeping track of the debt amount.
DEBT = 1.5 * 10**12

if __name__ == '__main__':
    id = input("Enter ID for an indicator using current US dollars: ")
    indicator = wb.get_indicator(id)
    print("Checking unit compatibility...")
    if "current US$" in str(indicator[0].get("name")):
        print(
            "Indicator is in proper units of current US dollars. Continuing.")
        series = p.Series()
        series = wb.get_data(id, pandas=True)
    else:
        print("Indicator is not in proper units of current US dollars.")
示例#11
0
 def testGetOneIndicator(self):
     wbdata.get_indicator("SH.XPD.PRIV.ZS")
示例#12
0
 def testGetAllIndicators(self):
     wbdata.get_indicator()
示例#13
0
                global e_yr
                e_yr = int(a)
            else:
                assert False, "unhandled option"
    except Usage, err:
        print >> sys.stderr, err.msg
        print >> sys.stderr, "for help use --help"
        return 2

    # start timing the script
    time0 = datetime.datetime.utcnow()

    # get all entities in DB
    all_entities = wbdata.search_countries('', display=False)
    topics = wbdata.get_topic(display=False)
    indicators = wbdata.get_indicator(display=False)
    if None in (all_entities, topics, indicators):
        return 2

    # declare some blank lists to hold parsed data
    cnames = []
    anames = []
    countries = []
    aggregates = []

    # separate countries from aggregates
    for c in all_entities:
        if not c['incomeLevel']['value'] == 'Aggregates':
            cnames.append(c['name'])
            countries.append(c)
        else:
示例#14
0
import wbdata

from datetime import datetime

#def get_country_gdp(country):
#indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
#country_gdp= wbdata.get_dataframe(indicators, country=country, convert_date=True)

if __name__ == '__main__':
    indicators = wbdata.get_indicator(source=32)
    print(*indicators, sep="\n")
示例#15
0
# Keep_levels
# if True don’t reduce the number of index levels returned if only getting one date or country

# Cache
# use the cache

# Returns
# a WBDataFrame

import pandas as pd
import wbdata as wb
import datetime

# search for data sources in world bank data
wb.get_source()
wb.get_indicator(source=16)

# do country search
wb.search_countries('united')

# do wild search
wb.search_countries('niger*')

# get data for country
# SE.ADT.1524.LT.FM.ZS  Literacy rate, youth (ages 15-24), gender parity index (GPI)
# return a multi-dictionary(based on year) list
wb.get_data("SE.ADT.1524.LT.FM.ZS", country="USA")

# selecting data range
date_range = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
# SH.CON.1524.FE.ZS     Condom use, population ages 15-24, female (% of females ages 15-24)
# Created to get information from World Bank API

import wbdata


# All available topics

available_info = wbdata.get_source()
for x in available_info:
    print(x['id'], x['name'])

# Information about G20 Financial Inclusion Indicators

inclusion_indicators = wbdata.get_indicator(source=33)
print(inclusion_indicators)

# Information 'The consumer price index reflects the
# change in prices for the average consumer of a constant basket of consumer
# goods. Data is in nominal terms and seasonally adjusted.' for USA for all
# years.

cpi = wbdata.get_data('CPTOTSAXN',  country='USA')
print(cpi)

示例#17
0
def plotNANperindicator(data, background=False):
    # PLOT NUMBER OF NAN PER INDICATOR
    indicators = list(set(data.columns.levels[1]))
    # Checks how many nans per indicator
    how_many_nans = []
    for i in indicators:
        temp = data.xs(i, level=1, axis=1).isnull().astype(int).values.sum()
        how_many_nans.append(temp)
    # Sorts the indicators from most nan to less nan
    ind = np.argsort(how_many_nans)[::-1]

    if (background):
        #create labels2 dictt
        sources = wbdata.get_source(display=False)
        id_to_sourceName = dict(
            zip([k['id'] for k in sources], [k['name'] for k in sources]))

        all_indics = wbdata.get_indicator(display=False)

        indicator_to_id = dict(
            zip([k['id'] for k in all_indics],
                [k['source']['id'] for k in all_indics]))
        for i in (set(indicators) - set(indicator_to_id.keys())):
            indicator_to_id[i] = -1
        id_to_sourceName[-1] = 'NaN'
        # Gets labels, sorts them, puts them in (int) form
        range_indics = [indicators[k]
                        for k in ind]  # sorted indicator code (by most nans):
        range_labels_int = [indicator_to_id[key] for key in range_indics]
        range_labels = [
            id_to_sourceName[indicator_to_id[key]] for key in range_indics
        ]
        # Define cmap for coloring the labels
        cmap = plt.get_cmap('jet_r')
        color = cmap(np.linspace(0, 1.0, len(set(range_labels_int))))

    # Helper function to get arrays of consecutive values, to plot colors on xaxis, used below
    def consecutive(data, stepsize=1):
        return np.split(data, np.where(np.diff(data) != stepsize)[0] + 1)

    # Line plot of number of nan values per country, with background colored according to indicator label
    plt.figure(figsize=(10, 5))
    plt.plot(range(0, len(indicators)), [
        100 * (how_many_nans[k] / data.xs('SP.POP.TOTL', level=1, axis=1).size)
        for k in ind
    ])
    if (background):
        plt.title(
            'Number of NaN values per indicator, sorted (background = indicator label)',
            fontsize=15)
    else:
        plt.title('Number of NaN values per indicator, sorted', fontsize=15)
    plt.xlabel('Indicators', fontsize=15)
    plt.ylabel('# NaN values (% of total)', fontsize=15)
    plt.xticks([])

    if (background):
        # Prepare patches (to color the background according to the indicator label), and the legend
        legends = []
        a = 0
        for i in list(
                set(range_labels_int)):  # Let's say we have 4 labels : i=0:3
            index_country_label = [
                k for k in range(0, len(range_labels_int))
                if range_labels_int[k] == i
            ]  # we get the index of each indicator with label i
            index_country_label = consecutive(
                index_country_label
            )  # we get the consecutive indexes. For instance consecutive([1,2,3,5,7,8,9]) = [[1,3],[5],[7,9]]
            patch = mpatches.Patch(
                color=color[a], alpha=0.3
            )  # Colors the background of each indicator according to its label
            legends.append(patch)
            for j in range(0, len(index_country_label)):
                temp = len(index_country_label[j])
                plt.axvspan(index_country_label[j][0],
                            index_country_label[j][temp - 1],
                            color=color[a],
                            alpha=0.3,
                            lw=2.0)
                a += 1

                plt.legend(handles=legends,
                           loc='upper center',
                           bbox_to_anchor=(0.5, -0.06),
                           fancybox=True,
                           shadow=True,
                           ncol=int(
                               (1 / 5) * len(np.unique(range_labels_int))),
                           fontsize='small')

    plt.show()
示例#18
0
def plot_data(indicator, countries, start=2000, end=2015):
    ind = wb.get_indicator(indicator, display=False)

    title = ind[0]['name']

    new_df = pd.DataFrame()
    for country in countries:
        new_df = new_df.append(df2[df2['id'] == country])
    new_df['geometry'].plot(figsize=(20, 10))
    plt.axis('off')
    st.pyplot(plt)

    # Create a plotly figure
    colors = ['#F4652A', '#2E86C1', '#82427B', '#3A924C', '#34495E']
    trace_list = []
    layout = go.Layout(
        title=title,
        hovermode='closest',
        # paper_bgcolor = "rgba(0,0,0,0)"
        plot_bgcolor="white",
        xaxis={
            'title': '',
            'showgrid': True
        },
        yaxis={
            'title': 'deaths',
            'showgrid': True,
        })

    for country, color in zip(countries, colors):
        trace = go.Scatter(
            x=[i for i in range(start, end + 1)],
            y=get_country_indicator(country, indicator, start, end),
            # fill = 'tozeroy',
            # fillcolor = '#93BAFC',
            name=country,
            line=dict(width=2, color=color),
        )
        trace_list.append(trace)

    fig = go.Figure(data=trace_list, layout=layout)
    fig.update_xaxes(showline=True,
                     ticks='outside',
                     tickmode='linear',
                     tick0=1,
                     dtick=2,
                     linewidth=1,
                     linecolor='black',
                     mirror=True)
    fig.update_yaxes(showline=True,
                     ticks='outside',
                     tickmode='linear',
                     tick0=500000,
                     dtick=250000,
                     linewidth=1,
                     linecolor='black',
                     mirror=True)
    fig.update_layout(
        legend=go.layout.Legend(
            # x=0,
            # y=1,
            traceorder="normal",
            font=dict(family="sans-serif", size=12, color="black"),
            bgcolor="white",
            bordercolor="Black",
            borderwidth=1),
        # title=go.layout.Title(xanchor='center')
    )
    st.plotly_chart(fig, width=0, height=0, sharing='streamlit')
示例#19
0
 def testGetIndicatorBySource(self):
     wbdata.get_indicator(source=1)
示例#20
0
    def handle(self, *args, **options):

        scraper = 'wb'
        scraper_source_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQypCD98NFYyTynEmKnx_m-cZ6BDOYCOu_GrzziImtRBTqo9zeviAi0ilcTzRJCMOTILx6GKleUz2cw/pub?gid=680891341&single=true&output=csv'

        # Create publisher if doesn't exist
        publisher, created = DataIndicatorPublisher.objects.update_or_create(
            slug='world-bank-data-scraper',
            defaults={
                'name': 'World Bank Data Scraper',
                'url': 'https://data.worldbank.org/',
                'description': 'Free and open access to global development data.'
            }
        )

        # CSV to array
        scraper_source_csv = csv.reader(urlopen(scraper_source_url))
        scraper_source_csv_data = []
        for row in scraper_source_csv:
            scraper_source_csv_data.append(row)
        scraper_source_csv_data.pop(0)

        # Create dict of the indicators
        scraper_source_csv_header = ['code','title', 'description', 'indicators', 'publisher']
        scraper_indicators = []
        for data in scraper_source_csv_data:
            scraper_indicators.append(dict(zip(scraper_source_csv_header, data)))
        
        # Countries to scrape + map the id with Wazimap Geo Ids (geo_level + geo_code)
        scraper_countries = ["NG", "SN", "TZ"]
        wazimap_geo_ids = ["country-NGA", "country-SEN", "country-TZA"]
        scraper_countries_wazimap = dict(zip(scraper_countries, wazimap_geo_ids))

        for scraper_indicator in scraper_indicators:

            # DataIndicator needs
            code = 'scraper' + '.' + scraper + '.' + scraper_indicator.get('code')
            publisher_codes = ','.join(map(str.strip, scraper_indicator.get('indicators').split('\n')))
            publisher_data = {
                'type': 'api',
                'values': []
            }
            data_values = {}

            # Create geo and _total + _average under each geo
            for scraper_country in scraper_countries:
                default_total_avg = {
                    '_default': 0,
                    'values': {}
                }
                data_values[scraper_country] = {
                    '_total': default_total_avg,
                    '_average': default_total_avg
                }
            
            # Download WB indicators and set values
            wb_indicators = []
            for publisher_code in publisher_codes.split(','):
                wb_indicators.append(wbdata.get_indicator(indicator=publisher_code, display=False)[0])
            
            for wb_indicator in wb_indicators:
                
                data_values_wb = {
                    '_name': wb_indicator.get('name'),

                    # TODO: Set these under data_values = {'<geo_code>': ... , '_indicators': {'<indicator.code>': ... }}
                    '_topics': wb_indicator.get('topics'),
                    '_source': wb_indicator.get('source'),
                    '_sourceNote': wb_indicator.get('sourceNote'),
                    '_sourceOrganization': wb_indicator.get('sourceOrganization'),

                    '_default': 0,
                    'values': {}
                }

                # Create indicator under each geo
                for scraper_country in scraper_countries:
                    data_values[scraper_country][wb_indicator.get('id')] = data_values_wb

                try:
                    wb_indicator_values = wbdata.get_data(
                        wb_indicator.get('id'),
                        country=scraper_countries
                    )

                    # Whatever we receive, we add to publisher_data
                    publisher_data['values'].append(wb_indicator_values)

                    # Skip indicator if no values
                    if len(filter(self.filter_indicators,
                                wb_indicator_values)) == 0:
                        self.stdout.write(self.style.WARNING(
                            'Skipped WB Indicator "%s"' % wb_indicator.get(
                                'id')))
                        continue
                    
                    # Set default + total indicator value
                    # TODO: Determine how to use wb_indicator_values[0].get('decimal')
                    data_values[wb_indicator_values[0].get('country').get('id')][wb_indicator.get('id')]\
                        ['_default'] = int(wb_indicator_values[0].get('value'))
                    data_values[wb_indicator_values[0].get('country').get('id')]['_total']\
                        ['_default'] += int(wb_indicator_values[0].get('value'))
                    # Set the rest of the indicator values
                    for wb_indicator_value in wb_indicator_values:
                        # TODO: Determine how to use wb_indicator_value.get('decimal')
                        data_values[wb_indicator_value.get('country').get('id')][wb_indicator.get('id')]['values']\
                            [wb_indicator_value.get('date')] = int(wb_indicator_value.get('value'))
                
                except Exception as e:
                    self.stdout.write(self.style.NOTICE(
                        'NOTICE "%s"' % e.message))
                    continue
                
                
            
            # TODO: Set the _average of all values:

            # Change data_value keys from scraper_countries to wazimap_geo_ids
            data_values_old = data_values
            data_values = {}
            for data_value_old_key, data_value_old in data_values_old.items():
                data_values[scraper_countries_wazimap[data_value_old_key]] = data_value_old

            # Create DataIndicator if it doesn't exist, update otherwise
            indicator, created = DataIndicator.objects.update_or_create(
                code=code,
                publisher=publisher,
                scraper=scraper,
                defaults={
                    'title': scraper_indicator.get('title'),
                    'description': scraper_indicator.get('description'),

                    'publisher_codes': publisher_codes,
                    'publisher_data': publisher_data,

                    'data_values': data_values
                }
            )

            self.stdout.write(self.style.SUCCESS(
                    'Successfully created indicator "%s"' % indicator.code))
示例#21
0
 def testGetIndicatorByTopic(self):
     wbdata.get_indicator(topic="1")
示例#22
0
 def testGetAllIndicators(self):
     wbdata.get_indicator()
示例#23
0
 def testGetIndicatorBySourceAndTopic(self):
     wbdata.get_indicator(source="1", topic=1)
示例#24
0
 def testGetIndicatorBySource(self):
     wbdata.get_indicator(source=1)
import wbdata
import numpy as np
import pandas as pd
from numpy.linalg import inv
from matplotlib import pyplot
import matplotlib.pyplot as plt
import seaborn; seaborn.set()

wbdata.get_source()
wbdata.get_indicator(source = 25)

wbdata.get_data('NY.GDP.PCAP.PP.KD', country = 'USA')
wbdata.get_data('SL.TLF.BASC.ZS', country = 'USA')

country = [i['id'] for i in wbdata.get_country('USA', display=False)]
indicators = {"NY.GDP.PCAP.PP.KD": "gdppc_ppp", "SL.TLF.BASC.ZS": "laborforce_basic_educ"}
# indicators are "GDP per capita, PPP (constant 2011 international $)"
# and "Labor force with basic education (% of total working-age population with basic education)"

df = wbdata.get_dataframe(indicators, country, convert_date = False)

df.to_csv('hw2.csv')
df.describe()

dataset = pd.read_csv('hw2.csv')

print(dataset)
data=dataset.dropna()
print(data)

X = data.iloc[:, 2].copy()
示例#26
0
 def testGetIndicatorBySourceAndTopic(self):
     wbdata.get_indicator(source="1", topic=1)
示例#27
0
def get_indicators(i, name):
    indicators = wbdata.get_indicator(source = i, display=False)
    return indicators
示例#28
0
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import wbdata

# initial run to grab categories
# topic 19 = climate change
data_categories = wbdata.get_indicator(topic=19)


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# USE:
# output is a Boolean - if true, print as well as return
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def list_categories(output=True):
    category_list = []
    for x in range(len(data_categories)):
        item = data_categories[x]
        if output:
            print(str(x) + ".", item["name"], item["id"])
        category_list.append([x, item["name"], item["id"]])

    return category_list


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# USE:
# number is the number of a data set according to list_categories
# returns an object {id of data set: name of data set}
示例#29
0
 def get_indicator(self):
     if self.source == None:
         print('No source Selected')
         self.search_sources()
     else:
         return wb.get_indicator(self.source)
示例#30
0
 def testGetIndicatorBySource(self):
     indicators = wbd.get_indicator(source=1)
     assert all(i["source"]["id"] == "1" for i in indicators)
示例#31
0
 def testGetIndicatorByTopic(self):
     indicators = wbd.get_indicator(topic=1)
     assert all(
         any(t["id"] == "1" for t in i["topics"]) for i in indicators)
示例#32
0
# a simple matplotlib plot with legend, labels and a title
dfu.plot(); 
plt.legend(loc='best'); 
plt.title("GNI Per Capita ($USD, Atlas Method)"); 
plt.xlabel('Date'); plt.ylabel('GNI Per Capita ($USD, Atlas Method');


# In[ ]:

wbdata.get_source()


# In[ ]:

# 1 DOing Business
wbdata.get_indicator(source=1)


# In[ ]:

wbdata.search_countries("Brazil")


# In[ ]:

#wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, 
#column_name=u'value', keep_levels=False)
wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA')


# In[ ]:
示例#33
0
 def testGetIndicatorBySourceAndTopicFails(self):
     with pytest.raises(ValueError):
         wbd.get_indicator(source="1", topic=1)
示例#34
0
# http://wbdata.readthedocs.org/en/latest/
# http://wbdata.readthedocs.org/en/latest/fetcher.html

import wbdata
import datetime
import pandas as pd
'''
This wbdata is an interactive console to work with World Bank's API.
You can run wbdata.get_source() to see all sources of information and the
respective numbers to run in the below code to fetch the data.
Also, using Pandas package may facilitate the task to convert the lists and dictionaries
retrieved from the API.

Total Population -> source=16
'''

#wbdata.get_source()
wbdata.get_indicator(source=16)

# Define time range to search for data
data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1))

# Store the data as the variable df
df = pd.DataFrame(
    wbdata.get_data("SP.POP.TOTL", pandas=True, data_date=data_date))

# Write it to a CSV - Exemple in /data-pipeline
df.to_csv("YOURPATH/population_total_1960-2014.csv", sep=',')
示例#35
0
def print_wb_indicators(source):
    wbdata.get_indicator(source=source)