def stealingEnergy(countryname):
    data_dateUSC = (datetime.datetime(1971, 1, 1), datetime.datetime(2011, 1, 1))
    
   # Data on Electricity production from renewable sources (kWh) for US
    if countryname == "USA":    
        renewableProdUS.extend(wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_dateUSC, country="USA", pandas = True))
        electricityProdUS.extend(wbdata.get_data("EG.ELC.PROD.KH", data_date=data_dateUSC, country="USA", pandas = True))
    else:
        # Data on Electricity production from renewable sources (kWh) for CHN
        renewableProdCHN.extend(wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_dateUSC, country="CHN", pandas = True))
        electricityProdCHN.extend(wbdata.get_data("EG.ELC.PROD.KH", data_date=data_dateUSC, country="CHN", pandas = True))
Exemplo n.º 2
0
def load_indicator(con, indicator, db_column_name):
    cur = con.cursor()

    # add column if not exists
    try:
        cur.execute('''alter table country_annual_indicators
                       add column {} real'''.format(db_column_name))
        con.commit()
    except sqlite3.OperationalError:
        pass

    # try to update, otherwise insert
    indicator_value_items = wbdata.get_data(indicator)
    for item in indicator_value_items:
        cur.execute('''update country_annual_indicators
                       set {}=?
                       where iso_code=? and year=?'''.format(db_column_name),
                    (item['value'],
                     item['country']['id'].lower(),
                     item['date']))

        # If no update happened (i.e. the row didn't exist) then insert one
        cur.execute('''INSERT INTO country_annual_indicators ('iso_code', 'year', {})
                       SELECT ?, ?, ?
                       WHERE (Select Changes() = 0)'''.format(db_column_name),
                    (item['country']['id'].lower(),
                     item['date'],
                     item['value']))
    con.commit()
Exemplo n.º 3
0
 def test_quarterly_freq(self):
     got = wbd.get_data(
         "DP.DOD.DECD.CR.BC.CD",
         country="chl",
         data_date=dt.datetime(2013, 1, 1),
         freq="Q",
     )[0]["value"]
     assert got == 31049138725.7794
Exemplo n.º 4
0
def getWb(year):
    data_date = datetime.datetime(year, 1, 1)
    x = wbdata.get_data("NY.GDP.PCAP.CD", data_date=data_date, pandas=True)
    y = wbdata.get_data("1.1_TOTAL.FINAL.ENERGY.CONSUM",
                        data_date=data_date,
                        pandas=True)
    z = wbdata.get_data("SP.POP.TOTL", data_date=data_date, pandas=True)

    data = pd.concat([x, y, z], axis=1)
    data.columns = ["GDP", "Energy_Consumption", "Population"]
    data = data.dropna(axis=0, how="any")
    df = pd.DataFrame(data)
    df.to_csv("out.csv")
    x = np.transpose(np.array([data["GDP"].tolist()]))
    y = np.transpose(np.array([data["Energy_Consumption"].tolist()]))
    z = np.transpose(np.array([data["Population"].tolist()]))
    return (x, y, z)
def get_countryList(ind, ctry, year):
    raw_data = []
    data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1))
    raw_data = wbdata.get_data(ind, country=ctry, data_date=data_date)

    data = []
    for country in raw_data:
        data.append(country['country']['value'])
    return data
def get_countryList(ind, ctry, year):
    raw_data = []
    data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1))
    raw_data = wbdata.get_data(ind, country = ctry, data_date=data_date)    
    
    data = []    
    for country in raw_data:
        data.append(country['country']['value'])
    return data
Exemplo n.º 7
0
def form_to_data(myform):
    country = myform.cleaned_data['my_country']
    indicator = myform.cleaned_data['my_indicator']
    fromyr = int(myform.cleaned_data['from_'])
    toyr = myform.cleaned_data['to_']

    try:
        if toyr:
            toyr = int(toyr)
            data = wbdata.get_data(indicator, [country],
                                   data_date=(datetime(fromyr, 1, 1),
                                              datetime(toyr, 1, 1)))
        else:
            data = wbdata.get_data(indicator, [country],
                                   data_date=datetime(fromyr, 1, 1))
        return data

    except IndexError('No data'):
        return render(request, 'request_stats.html', {'form': myform})
Exemplo n.º 8
0
def carbon():
    if request.method == "POST":
        country_code = request.form.get("ccheck")
        emission_per_capita = w.get_data("EN.ATM.CO2E.EG.ZS",
                                         country=country_code.upper())
        for i in emission_per_capita:
            if i['value'] != None:
                value = i['value']
                value = str(value)
        return "Your Country's Carbon Dioxide Density(kg per kg of oil equivalent use): " + value
    return render_template("form.html")
 def get_gdp(self):
     gdp_list = []
     for country_code in self.loan_data.country_code.unique():
         if isinstance(country_code, str):
             gdp_list.append([
                 country_code,
                 pd.to_numeric(
                     wbdata.get_data("NY.GDP.PCAP.CD",
                                     country=(country_code))[1]['value'])
             ])
     return pd.DataFrame(gdp_list, columns=['country_code', 'gdp'])
Exemplo n.º 10
0
def wb_query(indicator,
             countries='all',
             start_year=config.START_YEAR,
             end_year=config.END_YEAR,
             iso3=True):
    """
    Retrieve data for a world bank indicator,
    For all world bank indicators, see:
    https://data.worldbank.org/indicator
    
    Credit to wbdata for a wrapper around API requests: https://wbdata.readthedocs.io/en/latest/
    
    Input
    -----
    indicator (str) - the world bank indicator as a string, i.e. '4.1_SHARE.RE.IN.ELECTRICITY' is share of RE in a countries generation mix.
    countries (list) - Default 'all' will pull all countries, if specifying a single country or a list of countries, double check spelling w/ world bank
    start_year (int) - First year to pull data from, most world bank indicators are reported on an annual basis.
    end_year (int) - Last year to pull data from, most world bank indicators are reported on an annual basis.
        
    Outputs
    -------
    Pandas DataFrame - Long, with columns for 'country','date', and 'value'
    """

    # --- Define daterange as tuple ---
    daterange = (datetime.datetime(start_year, 1,
                                   1), datetime.datetime(end_year, 12, 31))

    # --- Make api requests ---
    wb_query = wb.get_data(indicator, data_date=daterange, country=countries)

    # --- Package results as a df ---
    country_list = [d['country']['value'] for d in wb_query]
    year_list = [d['date'] for d in wb_query]
    value_list = [d['value'] for d in wb_query]

    df = pd.DataFrame({
        'country': country_list,
        'year': year_list,
        'value': value_list
    })
    df['indicator'] = indicator
    df['value'] = df['value'].astype(float)
    df['year'] = df['year'].astype(int)

    if iso3:
        df['iso'] = df['country'].apply(helper_functions.country_name_to_iso3)

    return df
Exemplo n.º 11
0
    def getWBDataFromWeb(self, pStockCode, pStart, pEnd):
        #https://wbdata.readthedocs.io/en/latest/
        wbdata.get_source()
        wbdata.get_indicator(source=1)
        wbdata.search_countries("united")
        date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
        self.data = wbdata.get_data("IC.BUS.EASE.XQ",
                                    country=("USA", "GBR"),
                                    data_date=date)
        for row in self.data:
            print(row['country']['id'], row)
            #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
            #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
            #df.describe()

        return self.data
def get_dataset_wb(indicator_id):
    """ downloads the given dataset and creates a dataframe
    Args:
        indicator_id: World Bank indicator_id for which to retrieve the dataset
    Returns:
        A pandas DataFrame of the dataset
    """
    if indicator_id in known_datasets_from_wb:
        return known_datasets_from_wb[indicator_id]
    else:
        dat = wbdata.get_data(indicator=indicator_id,
                              convert_date=True,
                              pandas=True)
        dat = dat.unstack(level=0)  # delete MultiIndex
        known_datasets_from_wb.update({indicator_id: dat})
        return dat
def test_year(ind, ctry, year):
    import wbdata
    data = []
    #### limit time frame to one year as the study only need one year data
    data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1))
    data = wbdata.get_data(ind, ctry, data_date)
    for c in data:
        if c['value'] == None:
            print 'Country:', c['country']['value']
            print 'Data:', c['indicator']['value']
            print 'Year:', c['date']
            print 'Staus:', 'NOT available'
        else:
            print 'Country:', c['country']['value']
            print 'Data:', c['indicator']['value']
            print 'Year:', c['date']
            print 'Staus:', 'Available'
def test_year(ind, ctry, year):
    import wbdata    
    data = []    
    #### limit time frame to one year as the study only need one year data    
    data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1))
    data = wbdata.get_data(ind, ctry, data_date)
    for c in data:
        if c['value'] == None:
            print 'Country:', c['country']['value']
            print 'Data:', c['indicator']['value']
            print 'Year:', c['date']
            print 'Staus:', 'NOT available'
        else:
            print 'Country:', c['country']['value']
            print 'Data:', c['indicator']['value']
            print 'Year:', c['date']            
            print 'Staus:', 'Available'
Exemplo n.º 15
0
def get_ind_preview(y1=2010, ind="FR.INR.LEND"):
    if ind:
        if y1:
            ydt = datetime(int(y1), 1, 1)
        ind_details = wbdata.get_indicator(ind, display=False)[0]
        ind_details['dev'] = "Feel free to use this variable!"
        try:
            data = rpy2functions.get_values(wbdata.get_data(ind, data_date=(ydt, ydt), country=preview_countries))
            if 'q' in data.keys()[0].split('.')[0].lower():
                ind_details['dev'] = "If chosen, please make sure all variables are quarterly."
            if sum(map(map_adder, data.values())) < 5: #wont add properly.
                ind_details['dev'] = "Very scarce data. Avoid using this variable."
        except TypeError as e:
            data = {'{0}.{1}'.format(str(y1), cont):'ERROR' for cont in preview_countries}
            ind_details['dev'] = "No data found for this variable. Do not use this variable."
        return jsonify({'data': data, 'details': ind_details})
    return {}
Exemplo n.º 16
0
def update_population_data(population_df):
    years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now()))
    new_pop = wbdata.get_data(indicator='SP.POP.TOTL',
                              data_date=years,
                              pandas=True).to_frame()

    formatted_population = reformat_dataframe(new_pop)

    added_years = list(formatted_population)
    population_df = population_df.sort_values(by=['Country Name'])
    formatted_population = formatted_population.sort_values(
        by=['Country Name'])
    population_df.update(formatted_population)

    for year in added_years:
        if year not in population_df.columns:
            population_df = pd.concat(
                [population_df, formatted_population[year]], axis=1, sort=True)

    return population_df
Exemplo n.º 17
0
def update_population_data(pop_df):
    years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now()))
    new_pop = wbdata.get_data(indicator='SP.POP.TOTL',
                              data_date=years,
                              pandas=True).to_frame()
    new_pop = new_pop['value'].to_frame().reset_index()
    new_pop.rename(columns={'country': 'Country Name'}, inplace=True)
    new_pop = new_pop.pivot(index='Country Name',
                            columns='date',
                            values='value')
    new_pop = new_pop.apply(pd.to_numeric).fillna(value=0).reset_index()
    added_years = list(new_pop)
    pop_df = pop_df.sort_values(by=['Country Name'])
    new_pop = new_pop.sort_values(by=['Country Name'])
    pop_df.update(new_pop)

    for year in added_years:
        if year not in pop_df.columns:
            pop_df = pd.concat([pop_df, new_pop[year]], axis=1, sort=True)

    return pop_df
Exemplo n.º 18
0
def get_data_spec(request):
    country, data_date, source, convert_date = request.param
    return GetDataSpec(
        result=wbd.get_data(
            "NY.GDP.MKTP.CD",
            country=country,
            data_date=data_date,
            source=source,
            convert_date=convert_date,
        ),
        country=country,
        data_date=data_date,
        source=source,
        convert_date=convert_date,
        expected_country="Eritrea",
        expected_date=dt.datetime(2010, 1, 1) if convert_date else "2010",
        expected_value={
            "2": 2117039512.19512,
            "11": 2117008130.0813
        }[source or "2"],
    )
Exemplo n.º 19
0
def update_co2_data(co2_df):
    years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now()))

    new_co2 = wbdata.get_data(indicator='EN.ATM.CO2E.KT',
                              data_date=years,
                              pandas=True).to_frame()

    formatted_co2 = reformat_dataframe(new_co2)

    added_years = list(formatted_co2)

    co2_df = co2_df.sort_values(by=['Country Name'])
    formatted_co2 = formatted_co2.sort_values(by=['Country Name'])
    co2_df.update(formatted_co2)

    for year in added_years:
        if year not in co2_df.columns:
            co2_df = pd.concat([co2_df, formatted_co2[year]],
                               axis=1,
                               sort=True)

    return co2_df
Exemplo n.º 20
0
def get_latest_wb_indicator_by_country(name: str, indicator: str):

    # Get raw data
    date = (datetime.datetime(2016, 1, 1), datetime.datetime(2016, 1, 1)
            )  # hwo to automatically select latest date?
    raw_data = wbdata.get_data(indicator=indicator, data_date=date)

    # Get country and arrivals
    countries = []
    value = []
    for x in raw_data:
        countries.append(x['country']['value'])
        arrivals = x['value']
        if arrivals is None:
            arrivals = np.nan
        value.append(arrivals)

    # Store as pandas data frame
    data = {"country": countries, name: value}
    df = pd.DataFrame(data)

    return df
Exemplo n.º 21
0
    def run(self):
        db.create_all()
        # get list of all country codes
        countries = wbdata.get_country()
        for country in countries:
            country_data = []

            try:
                country_data = wbdata.get_data("EG.FEC.RNEW.ZS",
                                               country=country["id"])
            # Throws a NoneType error if country doesn't exist in world bank db
            except TypeError:
                print(f"Could not get data for country {country['name']}")

            for data_entry in country_data:
                if data_entry["value"]:
                    db_entry = RenewableEnergyCountry(
                        country=data_entry["country"]["value"],
                        year=data_entry["date"],
                        renewable_energy_usage=data_entry["value"],
                    )
                    db.session.add(db_entry)
                db.session.commit()
Exemplo n.º 22
0
def update_co2_data(co2_df):
    years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now()))

    new_co2 = wbdata.get_data(indicator='EN.ATM.CO2E.KT',
                              data_date=years,
                              pandas=True).to_frame()
    new_co2 = new_co2['value'].to_frame().reset_index()
    new_co2.rename(columns={'country': 'Country Name'}, inplace=True)
    new_co2 = new_co2.pivot(index='Country Name',
                            columns='date',
                            values='value')
    new_co2 = new_co2.apply(pd.to_numeric).fillna(value=0).reset_index()

    added_years = list(new_co2)

    co2_df = co2_df.sort_values(by=['Country Name'])
    new_co2 = new_co2.sort_values(by=['Country Name'])
    co2_df.update(new_co2)

    for year in added_years:
        if year not in co2_df.columns:
            co2_df = pd.concat([co2_df, new_co2[year]], axis=1, sort=True)

    return co2_df
Exemplo n.º 23
0
    def collect_data(self):

        load_env()
        if isfunction(self.provider):
            if self.code:
                self.data = self.provider(self.code)
            else:
                self.data = self.provider()

        elif self.provider == 'fred':
            fred = Fred(api_key=os.environ['TOKEN_FRED'])
            self.data = fred.get_series(self.code,
                                        observation_start=self.start_dt,
                                        observation_end=self.end_dt)

        elif self.provider == 'eod_hist':
            url = 'https://eodhistoricaldata.com/api/eod/{0}'.format(self.code)
            params = {'api_token': os.environ['TOKEN_EODHIST']}
            expire_after = td(days=1).total_seconds()
            session = requests_cache.CachedSession(cache_name='cache',
                                                   backend='sqlite',
                                                   expire_after=expire_after)
            r = session.get(url, params=params)
            if r.status_code != requests.codes.ok:
                session = requests.Session()
                r = session.get(url, params=params)
            if r.status_code == requests.codes.ok:
                df = pd.read_csv(StringIO(r.text),
                                 skipfooter=1,
                                 parse_dates=[0],
                                 index_col=0,
                                 engine='python')
                self.data = df['Close']
            else:
                raise Exception(r.status_code, r.reason, url)

        elif self.provider == 'schiller':
            url = 'http://www.econ.yale.edu/~shiller/data/ie_data_with_TRCAPE.xls'
            webpage = requests.get(url, stream=True)
            self.data = pd.read_excel(io.BytesIO(webpage.content),
                                      'Data',
                                      header=7,
                                      skipfooter=1)
            self.data.index = self.data['Date'].apply(
                lambda x: dt.strptime(str(x).format(x, '4.2f'), '%Y.%m'))
            self.data = self.data[self.code]
            print(self.data.tail(5))

        elif self.provider == 'quandl':
            self.data = quandl.get(self.code,
                                   authtoken=os.environ['TOKEN_QUANDL'],
                                   collapse="quarterly",
                                   start_date=self.start_dt,
                                   end_date=self.end_dt)['Value']

        elif self.provider == 'bls':
            self.data = bls.get_series(
                [self.code],
                startyear=dt.strptime(self.start_dt, '%Y-%m-%d').year,
                endyear=dt.strptime(self.end_dt, '%Y-%m-%d').year,
                key=os.environ['TOKEN_BLS'])

        elif self.provider == 'worldbank':
            self.data = wbdata.get_data(
                self.code,
                country='US',
                data_date=(dt.strptime(self.start_dt, '%Y-%m-%d'),
                           dt.strptime(self.end_dt, '%Y-%m-%d')),
                convert_date=True,
                pandas=True,
                keep_levels=False)
            print(self.data.tail(5))

        print("Collected data for [{0}]".format(self.code))
Exemplo n.º 24
0
def get_data(from_date=datetime.datetime(2010, 1, 1), to_date=datetime.datetime.now(), variable="FR.INR.LEND"):
    duration = (from_date, to_date)
    variable = variable.upper()
    mykey = '-'.join(map(str, [from_date.year, to_date.year, variable]))
    return functions.get_values(wbdata.get_data(variable, data_date=duration))
# http://wbdata.readthedocs.org/en/latest/
# http://wbdata.readthedocs.org/en/latest/fetcher.html

import wbdata
import datetime
import pandas as pd

'''
This wbdata is an interactive console to work with World Bank's API.
You can run wbdata.get_source() to see all sources of information and the
respective numbers to run in the below code to fetch the data.
Also, using Pandas package may facilitate the task to convert the lists and dictionaries
retrieved from the API.

Total Population -> source=16
'''

#wbdata.get_source()
wbdata.get_indicator(source=16)

# Define time range to search for data
data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1))

# Store the data as the variable df
df = pd.DataFrame(wbdata.get_data("SP.POP.TOTL", pandas = True, data_date=data_date))

# Write it to a CSV - Exemple in /data-pipeline
df.to_csv ("YOURPATH/population_total_1960-2014.csv", sep = ',')
import numpy as np
#print(wb.get_data("BM.KLT.DINV.CD.WD", "ARB", pandas=True))

file = pd.read_csv("ind.csv")
indicators = file["Indicator Code"]
unique_indicators = list(set(indicators))
unique_indicators.sort()

l = []
from_to = (dt.datetime(1970, 1, 1), dt.datetime(2019, 1, 1))
list_indicators = {}
c = 1
for i in unique_indicators[:10]:
    print(c)
    c += 1
    result = wb.get_data(i, country_codes, pandas=True, data_date=from_to)
    maxi = list()
    counter = 0
    temp_maxi = list()
    for year in reversed(range(1970, 2020)):
        temp = [
            np.isnan(result[x]) for x in result.keys() if x[1] == str(year)
        ]
        print(temp)
        if True in temp:
            if len(temp_maxi) > len(maxi):
                maxi = temp_maxi[:]
            temp_maxi = list()
            continue
        temp_maxi.append(year)
        #print(temp_maxi)
Exemplo n.º 27
0
def main():
    '''
    This wbdata package is an interactive console to work with World Bank's API.
    You can run wbdata.get_source() to see all sources of information and the
    respective numbers to run in the below code to fetch the data.
    Also, using Pandas package may facilitate the task to convert the lists and dictionaries
    retrieved from the API.

    Total Population -> source=16
    GDP -> source = 2, NY.GDP.MKTP.CN
    GDP per capita (USD) -> source = 2, NY.GDP.PCAP.CD
    GDP per capita (PPP) (USD) -> source = 2, NY.GDP.PCAP.PP.CD
    GDP Growth Rate -> source = 2, NY.GDP.PCAP.KD.ZG
    Unemployment Rate -> source = 2, SL.UEM.TOTL.ZS
    Gini Coefficient -> source = 2, SI.POV.GINI
    Monetary base (M0, M1, M2, M3) (USD) ->
        M1:
        M2: source = 2, FM.LBL.MQMY.CN
        M3:
    Total Reserves (USD) -> source = 2, FI.RES.TOTL.CD
    Inflation Rate (USD) ->
    FDI Inflow (USD) ->
    Aid Inflow (USD) ->

    '''

    '''
    __________________________________________________________________
    The following parts are focused on retrieving data from the World Bank
    '''

    #--------------------   Code Exemplification    -------------------------#
    #wbdata.get_source()
    #wbdata.get_indicator(source=16)

    # Define time range to search for data
    data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1))

    # Store the data as the variable df
    population = pd.DataFrame(wbdata.get_data("SP.POP.TOTL", data_date=data_date, pandas = True))
    gdp = pd.DataFrame(wbdata.get_data("NY.GDP.MKTP.CN", data_date=data_date, pandas = True))
    gdp_per_capita = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.CD", data_date=data_date, pandas = True))
    gdp_per_capita_ppp = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.PP.CD", data_date=data_date, pandas = True))
    gdp_growth_rate = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.KD.ZG", data_date=data_date, pandas = True))
    unemployment_rate = pd.DataFrame(wbdata.get_data("SL.UEM.TOTL.ZS", data_date=data_date, pandas = True))
    #gini-coefficient ->
    total_reserves = pd.DataFrame(wbdata.get_data("FI.RES.TOTL.CD", data_date=data_date, pandas = True))


    # Write it to a CSV - Exemple in /data-pipeline
    population.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/population/population_total_1960-2014.csv", sep = ',')
    gdp.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp.csv", sep = ",")
    gdp_growth_rate.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-growth-rate.csv", sep = ",")
    gdp_per_capita.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-percapita.csv", sep = ",")
    gdp_per_capita_ppp.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-percapita-ppp.csv", sep = ",")
    unemployment_rate.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/unemployment_rate.csv", sep =",")
    # gini-coefficient.to_csv ("/gini_coefficient.csv", sep = ",")
    total_reserves.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/total_reserves.csv", sep = ",")

    ## Script to create folders for each csv file

    folder = "/home/gsilvapt/Documents/repos/Econ-Factbook/database"
    for file_path in glob.glob(os.path.join(folder, "*.*")):
        new_dir = file_path.rsplit(".", 1)[0]
        try:
            os.mkdir(os.path.join(folder, new_dir))
        except WindowsError:
            # Handle the case where the target dir already exist.
            pass
        shutil.move(file_path, os.path.join(new_dir, os.path.basename(file_path)))



    '''
    __________________________________________________________________
    The following parts are focused on retrieving data from the United
    Nations
    '''
    ## The United Natiosn API is still under construction. It was not possible
    ## yet to find a proper solution that can automate this process.


    '''
    ____________________________________________________________________________
    The following parts are focused on retrieving data from the International
    Labour Organization
    '''



    '''
    ____________________________________________________________________________
    The following parts are focused on retrieving data from the CIA World
    Factbook
    '''



    '''
    __________________________________________________________________
    The following parts are focused on retrieving data from the OECD
    '''


    '''
Exemplo n.º 28
0
 def testConvertDate(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", convert_date=True)
Exemplo n.º 29
0
 def testPandas(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", pandas=True)
Exemplo n.º 30
0
# Hypothesis: there is a reverse relationship between GDP per capita and unemployment rate.
# SL.UEM.TOTL.ZS = Unemployment rate in the world (Variable 1)
# NY.GDP.PCAP.PP.CD = GDP per capita in the world (Variable 2)
# For finding whether this hypothesis is true or not, we are going to make a regression analysis and find whether there is a linear relationship between two variables.

import wbdata
import datetime
import pandas as pd
import numpy as np
import regression
import math
import matplotlib.pyplot as pyplot
from sklearn.linear_model import LinearRegression

data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2015, 1, 1))
Variable_1 = wbdata.get_data("SL.UEM.TOTL.ZS", data_date = data_date, pandas = True)
Variable_2 = wbdata.get_data("NY.GDP.PCAP.PP.CD", data_date = data_date, pandas = True)

# We need to reshape Variable 1 and adjust Variable 1 and 2 for NaN values 

a = np.array(Variable_1).reshape(-1,1)
np.isnan(a)
np.where(np.isnan(a))
np.nan_to_num(a)
x = np.nan_to_num(a)

b = np.array(Variable_2)
np.isnan(b)
np.where(np.isnan(b))
np.nan_to_num(b)
y = np.nan_to_num(b)
Exemplo n.º 31
0
 def testColumnName(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", pandas=True,
                     column_name="IForget")
Exemplo n.º 32
0
def scrape():
    df = pd.read_csv(
        "../SecondProject2/Resources/Project2_idmc_disaster_all_dataset.csv")
    df.head()

    # In[4]:

    import pycountry_convert as pc

    # In[5]:

    country_code = pc.country_name_to_country_alpha2("China",
                                                     cn_name_format="default")
    print(country_code)
    continent_name = pc.country_alpha2_to_continent_code(country_code)
    print(continent_name)

    # In[6]:

    def country_code(name):
        try:
            code = pc.country_name_to_country_alpha2(name,
                                                     cn_name_format="default")
            return code
        except:
            return "N/A"

    def continent_name(name):
        try:
            continent = pc.country_alpha2_to_continent_code(name)
            return continent
        except:
            return "N/A"

    df["Country_2D"] = df["Country Name"].apply(country_code)
    df["Continent"] = df["Country_2D"].apply(continent_name)

    # In[7]:

    df.head()

    # In[8]:

    df_Africa = df[df["Continent"] == "AF"]
    df_Africa.head()

    # In[9]:

    df_africa = df_Africa.rename(columns={
        'Country Name': 'Country',
    })
    df_africa.head()

    # In[10]:

    df_africa.columns

    # In[11]:

    df.groupby("Continent").count()

    # In[12]:

    #Import Dependencies
    import os
    import requests
    import json
    import pprint
    import numpy as np
    import flask
    import wbdata
    import datetime

    # In[13]:

    countries = [
        'algeria', 'angola', 'benin', 'botswana', 'burkina faso', 'burundi',
        'cabo verde', 'cameroon', 'central african republic', 'chad',
        'comoros', 'congo', "cote d'ivoire", 'djibouti', 'egypt',
        'equatorial guinea', 'esqtini', 'ethiopia', 'gabon', 'gambia', 'ghana',
        'guinea', 'guinea-bissau', 'kenya', 'lesotho', 'liberia', 'libya',
        'madagascar', 'malawi', 'mali', 'mauritania', 'mauritius', 'morocco',
        'mozambique', 'namibia', 'niger', 'nigeria', 'rwanda',
        'sao tome and principe', 'senegal', 'seychelles', 'sirre leone',
        'somalia', 'south africa', 'south sudan', 'sudan', 'tanzania', 'togo',
        'tunisia', 'uganda', 'zambia', 'zimbabwe'
    ]
    country_codes = [
        'AGO', 'ALB', 'ARB', 'BDI', 'BEN', 'BFA', 'BMN', 'BSS', 'BWA', 'CAA',
        'CAF', 'CIV', 'CME', 'CMR', 'COG', 'COM', 'CPV', 'DJI', 'DMN', 'DSF',
        'DSS', 'DZA', 'EGY', 'ETH', 'GAB', 'GHA', 'GMB', 'GNB', 'GNQ', 'KEN',
        'LBR', 'LSO', 'MAR', 'MDG', 'MEA', 'MLI', 'MNA', 'MOZ', 'MRT', 'MUS',
        'MWI', 'NAF', 'NAM', 'NER', 'NGA', 'NLS', 'NRS', 'RRS', 'RSO', 'RWA',
        'SDN', 'SLE', 'SOM', 'SSA', 'SSD', 'SSF', 'SWZ', 'SXZ', 'SYC', 'TCD',
        'TGO', 'TMN', 'TSS', 'TUN', 'TZA', 'UGA', 'XZN', 'ZAF', 'ZMB', 'ZWE'
    ]
    indicators = "SP.POP.TOTL"
    data_date = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1)
    wbdata.get_indicator(source=50)

    # In[14]:

    wbdata.search_countries('')

    # In[15]:

    data = wbdata.get_data(indicators,
                           country=country_codes,
                           data_date=data_date)
    df_wbdata = pd.DataFrame(data)
    df_wbdata = df_wbdata.rename(
        columns={
            "indicator": "Indicator",
            "country": "Country",
            "countryiso3code": "Country code",
            "date": "Year",
            "value": "Population",
        })
    df_wbdata = df_wbdata.filter(
        items=['Country', 'Country code', 'Year', 'Population'])
    df_wbdata.dropna(inplace=True)
    df_wbdata['Country'] = df_wbdata['Country'].astype(str)
    df_wbdata['Country code'] = df_wbdata['Country code'].astype(str)
    df_wbdata['Year'] = df_wbdata['Year'].astype(str)
    df_wbdata['Population'] = df_wbdata['Population'].astype(str)
    df_wbdata['Country'] = df_wbdata['Country'].str.slice(23, -2)
    df_wbdata

    # In[16]:

    df_africa['Year'] = df_africa['Year'].astype('int64')
    df_wbdata['Year'] = df_wbdata['Year'].astype('int64')

    # In[17]:

    merged_df = pd.merge(left=df_africa,
                         right=df_wbdata,
                         how="left",
                         on=['Country code', 'Year', 'Country'])
    merged_df.head()

    # In[18]:

    merged_df = merged_df.rename(
        columns={
            'Country code': 'Country_Code',
            'Start Date': 'Start_Date',
            'Event Name': 'Event_Name',
            'Hazard Category': 'Hazard_Category',
            'Hazard Type': 'Hazard_Type',
            'New Displacements': 'New_Displacements',
        })
    merged_df.head()

    merged_df = merged_df.dropna()
    merged_df

    merged_df[merged_df['Population'].isna()].count()
    merged_df.to_csv('merged.csv', index=False)

    #Create the engine and pass in Postgresql
    engine = create_engine(
        'postgresql://*****:*****@localhost/project2_db')

    engine.table_names()

    query = pd.read_sql_query('select * from merged_data', con=engine)

    return (query)
Exemplo n.º 33
0
print(macro_data.tail())
#We can generate a plot for all of our data as follows:
macro_data.plot(grid=True)
#We can apply functions to all columns:
#Average values of macro data during recession
macro_data[macro_data['recession'] == 1].apply(np.mean)
#Average values of macro data during expansion
macro_data[macro_data['recession'] == 0].apply(np.mean)

#Access World Bank Data
#pip install wbdata
import wbdata
#We can search for keys as follows:
wbdata.search_indicators("unemployment")
data_date = (datetime.datetime(1950, 1, 1), datetime.datetime(2019, 2, 10))
unemployment_data = wbdata.get_data("UNEMPSA_", data_date=data_date)
country_data = pd.DataFrame(unemployment_data)

#Access data from quandl
#pip install quandl
import quandl
#Please replace my key with your key
quandl.api_config.ApiConfig.api_key = '1zm1xSnnoqFeAGksg3S1'
oil_prices = quandl.get("OPEC/ORB")
oil_prices.plot(grid=True, title='OPEC Reference Basket')

## IEX exchange (A simple API)
import requests
import io
url = 'https://api.iextrading.com/1.0/stock/aapl/chart/date/20190129'
s = requests.get(url).content
Exemplo n.º 34
0
#ease of business

import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey') #TUR
#get indicators with a search
wbdata.search_indicators('ease of doing business') #IC.BUS.DFRN.XQ
wbdata.get_data('IC.BUS.DFRN.XQ', country='TUR')[0]
wbdata.search_countries('united') #GBR
wbdata.get_data('IC.BUS.DFRN.XQ', country='GBR')[0]

import datetime
data_date = (datetime.datetime(2017, 1, 1), datetime.datetime(2019, 1, 1))
wbdata.get_data("IC.BUS.DFRN.XQ", country=("USA", "GBR"), data_date=data_date)

wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'OED')

#income level filter
wbdata.get_incomelevel()
countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)]
indicators = {"IC.BUS.DFRN.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)

df.to_csv('KocPython2020/in-classMaterial/day6/econ.csv')
df.describe()
Exemplo n.º 35
0
 def testDateRange(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country="usa",
                     data_date=(datetime.datetime(2006, 1, 1),
                                datetime.datetime(2010, 1, 1)))
Exemplo n.º 36
0
#ease of business

import wbdata
wbdata.get_source()
wbdata.get_indicator(source=1)
#get country codes with a search
wbdata.search_countries('Turkey')  #TUR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0]
wbdata.search_countries('united')  #GBR
wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR')

import datetime
data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.REG.COST.PC.MA.ZS",
                country=("USA", "GBR"),
                data_date=data_date)
wbdata.search_indicators("gdp per capita")
wbdata.get_data('NY.GDP.PCAP.KD.ZG')

wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA')
wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED')

#income level filter
wbdata.get_incomelevel()
countries = [
    i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)
]
indicators = {
    "IC.REG.COST.PC.MA.ZS": "doing_business",
    "NY.GDP.PCAP.PP.KD": "gdppc"
}
Exemplo n.º 37
0
"""
Messing around with Oliver Sherouse's wbdata, which accesses all of the 
World Bank's data API's. This follows the documentation, link below.  

Not sure this is ready for primetime, but it could be me... 

References 
* http://datacatalog.worldbank.org/
* http://blogs.worldbank.org/opendata/accessing-world-bank-data-apis-python-r-ruby-stata
* https://github.com/OliverSherouse/wbdata/blob/master/docs/source/index.rst 

Prepared for the NYU Course "Global Economy" 
* https://sites.google.com/site/nyusternglobal/home 
* https://github.com/DaveBackus/Global_Economy 

Written by Dave Backus @ NYU, September 2014  
Created with Python 3.4 
"""
import wbdata 

wbdata.get_source()
wbdata.get_indicator(source=15)
d   = wbdata.get_data('IC.BUS.EASE.XQ', country='USA')

indicators = {'IC.BUS.EASE.XQ': 'Ease', 'IRSPREAD': 'Spread'}
df1 = wbdata.get_dataframe(indicators, data_date=(2012, 2013), country='ARG')
#df2 = wbdata.get_dataframe('IRSPREAD', country='all', convert_date=True)

#%%
Exemplo n.º 38
0
def home(request):
    if request.method == "POST":
        def coal(c1):
            amount = 0.015 * c1
            return amount

        def lpg(lg):
            amount = 0.0803 * lg
            return amount

        def redmeat(rm):  # (per day)
            amount = 2.58 * rm  # metric tons of co2 per year
            return amount

        def clothes(cl):  # cl should be in dollars(per month)
            amount = 0.005 * cl
            return amount

        def furniture(fr):  # fr should be in dollars
            amount = 0.001 * fr
            return amount

        def laundry(ld):  # ld is number of times(per week)
            amount = 0.1 * ld
            return amount

        def treadmill(tm):  # tm must be in hrs(per week)
            amount = 0.0467 * tm
            return amount

        def vehicle(hrs):
            amount = 0.0444 * hrs
            return amount

        def papers(number):
            amount = 0.0152 * number
            return amount

        a = request.POST['heat']
        b = request.POST['hrsofheat']
        he = float(b)
        if a == 'coal':
            res1 = coal(he)
        elif a == 'LPG':
            res1 = lpg(he)

        c = request.POST['transportation']
        res2 = 0
        d = request.POST['hrs']
        hr = float(d)
        print(vehicle(hr))
        res3 = vehicle(hr)
        e = request.POST['meat']
        mt = float(e)
        print(redmeat(mt))
        res4 = redmeat(mt)
        f = request.POST['clothes']
        cl = float(f)
        print(clothes(cl))
        res5 = clothes(cl)
        g = request.POST['ac']
        print(g)
        res6 = 0
        h = request.POST['furniture']
        ft = float(h)
        print(furniture(ft))
        res7 = furniture(ft)
        i = request.POST['laundary']
        ld = float(i)
        print(laundry(ld))
        res7 = laundry(ld)
        j = request.POST['treadmill']
        td = float(j)
        print(treadmill(td))
        res8 = treadmill(td)
        k = request.POST['papers']
        p = int(k)
        print(papers(p))
        res9 = papers(p)

        code = request.POST['crcode']
        print(code)
        emission_per_capita = w.get_data("EN.ATM.CO2E.EG.ZS", country=code.upper())/3.667
        for i in emission_per_capita:
            if i['value'] != None:
                emission_per_capita_value = i['value']
                break
        # print(value)

        # country_code = input("Country code : ")
        GDP = w.get_data("NY.GDP.MKTP.CD", country=code.upper())
        GDP_val = GDP[1]['value']
        # print(GDP[1]['value'])

        # country_code = input("Country code : ")
        energy_intensity = w.get_data("EG.EGY.PRIM.PP.KD", country=code.upper())
        # print(energy_intensity)
        for i in energy_intensity:
            if i['value'] != None:
                energy_intensity_val = i['value']
                break
        # print(val)
        # print(emission_per_capita_value)
        # print(GDP_val)
        # print(energy_intensity_val)
        # print()
        kaya_value = (emission_per_capita_value * GDP_val * energy_intensity_val * 0.001) / 41.868
        result = res1 + res2 + res3 + res4 + res5 + res6 + res7 + res8
        print(result)
        print(kaya_value)

    return render(request, "result.html", {'getvalue': result, 'countrycode': kaya_value})
Exemplo n.º 39
0
## http://blogs.worldbank.org/opendata/new-country-classifications-income-level-2017-2018
    
## 國家資料,收入水平
countries = wb.get_country(display=False)  
df_country = pd.DataFrame(countries)

def get_income(x):
    return x['value']

df_country['incomeLevel'] = df_country.incomeLevel.apply(get_income)
df_country[df_country.id == 'TWN']

## GDP (current US$)
## https://data.worldbank.org/indicator/NY.GDP.MKTP.CD

gdp = wb.get_data("NY.GDP.MKTP.CD")
list_gdp = []
for i in gdp:
    if  i['date'] == '2018':
        list_gdp = list_gdp + [[i['country']['id'], i['country']['value'], i['value']]]
df_gdp = pd.DataFrame(list_gdp, columns = ['iso2Code','name','value'])

## GDP growth (annual %)
## GDP 成長
## https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG

gdp_growth = wb.get_data("NY.GDP.MKTP.KD.ZG")
list_gdp_growth = []
for i in gdp_growth:
    if  i['date'] == '2018':
        list_gdp_growth = list_gdp_growth + [[i['country']['id'], i['country']['value'], i['value']]]
@author: johnjsyoo
"""

import wbdata
import datetime
import pandas as pd
import MySQLdb as myDB

######################### GETTING ENERGY DATA FOR ALL COUNTRIES ##############################

# Setting the date range for our data
data_date = (datetime.datetime(2011, 1, 1))
             
# GDP (current US$)
gdp = wbdata.get_data("NY.GDP.MKTP.CD", data_date=data_date, pandas = True)[44:]
    
# Data on Electricity production from renewable sources (kWh)
renewableProd = wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_date, pandas = True)[44:]
electricityProd = wbdata.get_data("EG.ELC.PROD.KH", data_date=data_date, pandas = True)[44:]

# Convert the time-series data into a Data Frame
gdpDF = pd.DataFrame(gdp)
renewableProdDF = pd.DataFrame(renewableProd)
electricityProdDF = pd.DataFrame(electricityProd)

energyDF = gdpDF.join(renewableProdDF, lsuffix="GDP",rsuffix="Renewable_kWh")
energyDF = energyDF.join(electricityProdDF)
energyDF.rename(columns = {'value':'Electricity_kWh'}, inplace = True)

# Dropping all NaN values and zero values
Exemplo n.º 41
0
# In[ ]:

# 1 DOing Business
wbdata.get_indicator(source=1)


# In[ ]:

wbdata.search_countries("Brazil")


# In[ ]:

#wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, 
#column_name=u'value', keep_levels=False)
wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA')


# In[ ]:

data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1))
wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date)


# In[ ]:

wbdata.search_indicators("gdp per capita")


# In[ ]:
Exemplo n.º 42
0
 def testIndicator(self):
     wbdata.get_data("SH.XPD.PRIV.ZS")
Exemplo n.º 43
0
import wbdata
import json
import pprint

pp = pprint.PrettyPrinter(indent=4)

x = wbdata.get_data("FP.CPI.TOTL")
pp.pprint(x[0])
# sources = wbdata.get_source(display=False)
# indicators = wbdata.get_indicator(source=sources[0]['id'], display=False)
# pp.pprint(indicators[0])
# print 
# sources[0]['id']
Exemplo n.º 44
0
from pprint import pprint
import wbdata as w
country_code = input("Country code : ")
energy_intensity = w.get_data("EG.EGY.PRIM.PP.KD", country=country_code.upper())
#pprint(energy_intensity)
for i in energy_intensity:
    if i['value'] != None :
        value = i['value']
        break
print(value)

```
Country codes available
ABW   Aruba
AFG   Afghanistan
AFR   Africa
AGO   Angola
ALB   Albania
AND   Andorra
ANR   Andean Region
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
Exemplo n.º 45
0
 def testOneCountry(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country="USA")
Exemplo n.º 46
0
# http://wbdata.readthedocs.org/en/latest/
# http://wbdata.readthedocs.org/en/latest/fetcher.html

import wbdata
import datetime
import pandas as pd
'''
This wbdata is an interactive console to work with World Bank's API.
You can run wbdata.get_source() to see all sources of information and the
respective numbers to run in the below code to fetch the data.
Also, using Pandas package may facilitate the task to convert the lists and dictionaries
retrieved from the API.

Total Population -> source=16
'''

#wbdata.get_source()
wbdata.get_indicator(source=16)

# Define time range to search for data
data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1))

# Store the data as the variable df
df = pd.DataFrame(
    wbdata.get_data("SP.POP.TOTL", pandas=True, data_date=data_date))

# Write it to a CSV - Exemple in /data-pipeline
df.to_csv("YOURPATH/population_total_1960-2014.csv", sep=',')
Exemplo n.º 47
0
 def testTwoCountries(self):
     wbdata.get_data("SH.XPD.PRIV.ZS", country=("chn", "bra"))