def stealingEnergy(countryname): data_dateUSC = (datetime.datetime(1971, 1, 1), datetime.datetime(2011, 1, 1)) # Data on Electricity production from renewable sources (kWh) for US if countryname == "USA": renewableProdUS.extend(wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_dateUSC, country="USA", pandas = True)) electricityProdUS.extend(wbdata.get_data("EG.ELC.PROD.KH", data_date=data_dateUSC, country="USA", pandas = True)) else: # Data on Electricity production from renewable sources (kWh) for CHN renewableProdCHN.extend(wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_dateUSC, country="CHN", pandas = True)) electricityProdCHN.extend(wbdata.get_data("EG.ELC.PROD.KH", data_date=data_dateUSC, country="CHN", pandas = True))
def load_indicator(con, indicator, db_column_name): cur = con.cursor() # add column if not exists try: cur.execute('''alter table country_annual_indicators add column {} real'''.format(db_column_name)) con.commit() except sqlite3.OperationalError: pass # try to update, otherwise insert indicator_value_items = wbdata.get_data(indicator) for item in indicator_value_items: cur.execute('''update country_annual_indicators set {}=? where iso_code=? and year=?'''.format(db_column_name), (item['value'], item['country']['id'].lower(), item['date'])) # If no update happened (i.e. the row didn't exist) then insert one cur.execute('''INSERT INTO country_annual_indicators ('iso_code', 'year', {}) SELECT ?, ?, ? WHERE (Select Changes() = 0)'''.format(db_column_name), (item['country']['id'].lower(), item['date'], item['value'])) con.commit()
def test_quarterly_freq(self): got = wbd.get_data( "DP.DOD.DECD.CR.BC.CD", country="chl", data_date=dt.datetime(2013, 1, 1), freq="Q", )[0]["value"] assert got == 31049138725.7794
def getWb(year): data_date = datetime.datetime(year, 1, 1) x = wbdata.get_data("NY.GDP.PCAP.CD", data_date=data_date, pandas=True) y = wbdata.get_data("1.1_TOTAL.FINAL.ENERGY.CONSUM", data_date=data_date, pandas=True) z = wbdata.get_data("SP.POP.TOTL", data_date=data_date, pandas=True) data = pd.concat([x, y, z], axis=1) data.columns = ["GDP", "Energy_Consumption", "Population"] data = data.dropna(axis=0, how="any") df = pd.DataFrame(data) df.to_csv("out.csv") x = np.transpose(np.array([data["GDP"].tolist()])) y = np.transpose(np.array([data["Energy_Consumption"].tolist()])) z = np.transpose(np.array([data["Population"].tolist()])) return (x, y, z)
def get_countryList(ind, ctry, year): raw_data = [] data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1)) raw_data = wbdata.get_data(ind, country=ctry, data_date=data_date) data = [] for country in raw_data: data.append(country['country']['value']) return data
def get_countryList(ind, ctry, year): raw_data = [] data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1)) raw_data = wbdata.get_data(ind, country = ctry, data_date=data_date) data = [] for country in raw_data: data.append(country['country']['value']) return data
def form_to_data(myform): country = myform.cleaned_data['my_country'] indicator = myform.cleaned_data['my_indicator'] fromyr = int(myform.cleaned_data['from_']) toyr = myform.cleaned_data['to_'] try: if toyr: toyr = int(toyr) data = wbdata.get_data(indicator, [country], data_date=(datetime(fromyr, 1, 1), datetime(toyr, 1, 1))) else: data = wbdata.get_data(indicator, [country], data_date=datetime(fromyr, 1, 1)) return data except IndexError('No data'): return render(request, 'request_stats.html', {'form': myform})
def carbon(): if request.method == "POST": country_code = request.form.get("ccheck") emission_per_capita = w.get_data("EN.ATM.CO2E.EG.ZS", country=country_code.upper()) for i in emission_per_capita: if i['value'] != None: value = i['value'] value = str(value) return "Your Country's Carbon Dioxide Density(kg per kg of oil equivalent use): " + value return render_template("form.html")
def get_gdp(self): gdp_list = [] for country_code in self.loan_data.country_code.unique(): if isinstance(country_code, str): gdp_list.append([ country_code, pd.to_numeric( wbdata.get_data("NY.GDP.PCAP.CD", country=(country_code))[1]['value']) ]) return pd.DataFrame(gdp_list, columns=['country_code', 'gdp'])
def wb_query(indicator, countries='all', start_year=config.START_YEAR, end_year=config.END_YEAR, iso3=True): """ Retrieve data for a world bank indicator, For all world bank indicators, see: https://data.worldbank.org/indicator Credit to wbdata for a wrapper around API requests: https://wbdata.readthedocs.io/en/latest/ Input ----- indicator (str) - the world bank indicator as a string, i.e. '4.1_SHARE.RE.IN.ELECTRICITY' is share of RE in a countries generation mix. countries (list) - Default 'all' will pull all countries, if specifying a single country or a list of countries, double check spelling w/ world bank start_year (int) - First year to pull data from, most world bank indicators are reported on an annual basis. end_year (int) - Last year to pull data from, most world bank indicators are reported on an annual basis. Outputs ------- Pandas DataFrame - Long, with columns for 'country','date', and 'value' """ # --- Define daterange as tuple --- daterange = (datetime.datetime(start_year, 1, 1), datetime.datetime(end_year, 12, 31)) # --- Make api requests --- wb_query = wb.get_data(indicator, data_date=daterange, country=countries) # --- Package results as a df --- country_list = [d['country']['value'] for d in wb_query] year_list = [d['date'] for d in wb_query] value_list = [d['value'] for d in wb_query] df = pd.DataFrame({ 'country': country_list, 'year': year_list, 'value': value_list }) df['indicator'] = indicator df['value'] = df['value'].astype(float) df['year'] = df['year'].astype(int) if iso3: df['iso'] = df['country'].apply(helper_functions.country_name_to_iso3) return df
def getWBDataFromWeb(self, pStockCode, pStart, pEnd): #https://wbdata.readthedocs.io/en/latest/ wbdata.get_source() wbdata.get_indicator(source=1) wbdata.search_countries("united") date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) self.data = wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "GBR"), data_date=date) for row in self.data: print(row['country']['id'], row) #indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} #df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) #df.describe() return self.data
def get_dataset_wb(indicator_id): """ downloads the given dataset and creates a dataframe Args: indicator_id: World Bank indicator_id for which to retrieve the dataset Returns: A pandas DataFrame of the dataset """ if indicator_id in known_datasets_from_wb: return known_datasets_from_wb[indicator_id] else: dat = wbdata.get_data(indicator=indicator_id, convert_date=True, pandas=True) dat = dat.unstack(level=0) # delete MultiIndex known_datasets_from_wb.update({indicator_id: dat}) return dat
def test_year(ind, ctry, year): import wbdata data = [] #### limit time frame to one year as the study only need one year data data_date = (datetime.datetime(year, 1, 1), datetime.datetime(year, 1, 1)) data = wbdata.get_data(ind, ctry, data_date) for c in data: if c['value'] == None: print 'Country:', c['country']['value'] print 'Data:', c['indicator']['value'] print 'Year:', c['date'] print 'Staus:', 'NOT available' else: print 'Country:', c['country']['value'] print 'Data:', c['indicator']['value'] print 'Year:', c['date'] print 'Staus:', 'Available'
def get_ind_preview(y1=2010, ind="FR.INR.LEND"): if ind: if y1: ydt = datetime(int(y1), 1, 1) ind_details = wbdata.get_indicator(ind, display=False)[0] ind_details['dev'] = "Feel free to use this variable!" try: data = rpy2functions.get_values(wbdata.get_data(ind, data_date=(ydt, ydt), country=preview_countries)) if 'q' in data.keys()[0].split('.')[0].lower(): ind_details['dev'] = "If chosen, please make sure all variables are quarterly." if sum(map(map_adder, data.values())) < 5: #wont add properly. ind_details['dev'] = "Very scarce data. Avoid using this variable." except TypeError as e: data = {'{0}.{1}'.format(str(y1), cont):'ERROR' for cont in preview_countries} ind_details['dev'] = "No data found for this variable. Do not use this variable." return jsonify({'data': data, 'details': ind_details}) return {}
def update_population_data(population_df): years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now())) new_pop = wbdata.get_data(indicator='SP.POP.TOTL', data_date=years, pandas=True).to_frame() formatted_population = reformat_dataframe(new_pop) added_years = list(formatted_population) population_df = population_df.sort_values(by=['Country Name']) formatted_population = formatted_population.sort_values( by=['Country Name']) population_df.update(formatted_population) for year in added_years: if year not in population_df.columns: population_df = pd.concat( [population_df, formatted_population[year]], axis=1, sort=True) return population_df
def update_population_data(pop_df): years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now())) new_pop = wbdata.get_data(indicator='SP.POP.TOTL', data_date=years, pandas=True).to_frame() new_pop = new_pop['value'].to_frame().reset_index() new_pop.rename(columns={'country': 'Country Name'}, inplace=True) new_pop = new_pop.pivot(index='Country Name', columns='date', values='value') new_pop = new_pop.apply(pd.to_numeric).fillna(value=0).reset_index() added_years = list(new_pop) pop_df = pop_df.sort_values(by=['Country Name']) new_pop = new_pop.sort_values(by=['Country Name']) pop_df.update(new_pop) for year in added_years: if year not in pop_df.columns: pop_df = pd.concat([pop_df, new_pop[year]], axis=1, sort=True) return pop_df
def get_data_spec(request): country, data_date, source, convert_date = request.param return GetDataSpec( result=wbd.get_data( "NY.GDP.MKTP.CD", country=country, data_date=data_date, source=source, convert_date=convert_date, ), country=country, data_date=data_date, source=source, convert_date=convert_date, expected_country="Eritrea", expected_date=dt.datetime(2010, 1, 1) if convert_date else "2010", expected_value={ "2": 2117039512.19512, "11": 2117008130.0813 }[source or "2"], )
def update_co2_data(co2_df): years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now())) new_co2 = wbdata.get_data(indicator='EN.ATM.CO2E.KT', data_date=years, pandas=True).to_frame() formatted_co2 = reformat_dataframe(new_co2) added_years = list(formatted_co2) co2_df = co2_df.sort_values(by=['Country Name']) formatted_co2 = formatted_co2.sort_values(by=['Country Name']) co2_df.update(formatted_co2) for year in added_years: if year not in co2_df.columns: co2_df = pd.concat([co2_df, formatted_co2[year]], axis=1, sort=True) return co2_df
def get_latest_wb_indicator_by_country(name: str, indicator: str): # Get raw data date = (datetime.datetime(2016, 1, 1), datetime.datetime(2016, 1, 1) ) # hwo to automatically select latest date? raw_data = wbdata.get_data(indicator=indicator, data_date=date) # Get country and arrivals countries = [] value = [] for x in raw_data: countries.append(x['country']['value']) arrivals = x['value'] if arrivals is None: arrivals = np.nan value.append(arrivals) # Store as pandas data frame data = {"country": countries, name: value} df = pd.DataFrame(data) return df
def run(self): db.create_all() # get list of all country codes countries = wbdata.get_country() for country in countries: country_data = [] try: country_data = wbdata.get_data("EG.FEC.RNEW.ZS", country=country["id"]) # Throws a NoneType error if country doesn't exist in world bank db except TypeError: print(f"Could not get data for country {country['name']}") for data_entry in country_data: if data_entry["value"]: db_entry = RenewableEnergyCountry( country=data_entry["country"]["value"], year=data_entry["date"], renewable_energy_usage=data_entry["value"], ) db.session.add(db_entry) db.session.commit()
def update_co2_data(co2_df): years = (datetime.datetime(2015, 1, 1), (datetime.datetime.now())) new_co2 = wbdata.get_data(indicator='EN.ATM.CO2E.KT', data_date=years, pandas=True).to_frame() new_co2 = new_co2['value'].to_frame().reset_index() new_co2.rename(columns={'country': 'Country Name'}, inplace=True) new_co2 = new_co2.pivot(index='Country Name', columns='date', values='value') new_co2 = new_co2.apply(pd.to_numeric).fillna(value=0).reset_index() added_years = list(new_co2) co2_df = co2_df.sort_values(by=['Country Name']) new_co2 = new_co2.sort_values(by=['Country Name']) co2_df.update(new_co2) for year in added_years: if year not in co2_df.columns: co2_df = pd.concat([co2_df, new_co2[year]], axis=1, sort=True) return co2_df
def collect_data(self): load_env() if isfunction(self.provider): if self.code: self.data = self.provider(self.code) else: self.data = self.provider() elif self.provider == 'fred': fred = Fred(api_key=os.environ['TOKEN_FRED']) self.data = fred.get_series(self.code, observation_start=self.start_dt, observation_end=self.end_dt) elif self.provider == 'eod_hist': url = 'https://eodhistoricaldata.com/api/eod/{0}'.format(self.code) params = {'api_token': os.environ['TOKEN_EODHIST']} expire_after = td(days=1).total_seconds() session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after) r = session.get(url, params=params) if r.status_code != requests.codes.ok: session = requests.Session() r = session.get(url, params=params) if r.status_code == requests.codes.ok: df = pd.read_csv(StringIO(r.text), skipfooter=1, parse_dates=[0], index_col=0, engine='python') self.data = df['Close'] else: raise Exception(r.status_code, r.reason, url) elif self.provider == 'schiller': url = 'http://www.econ.yale.edu/~shiller/data/ie_data_with_TRCAPE.xls' webpage = requests.get(url, stream=True) self.data = pd.read_excel(io.BytesIO(webpage.content), 'Data', header=7, skipfooter=1) self.data.index = self.data['Date'].apply( lambda x: dt.strptime(str(x).format(x, '4.2f'), '%Y.%m')) self.data = self.data[self.code] print(self.data.tail(5)) elif self.provider == 'quandl': self.data = quandl.get(self.code, authtoken=os.environ['TOKEN_QUANDL'], collapse="quarterly", start_date=self.start_dt, end_date=self.end_dt)['Value'] elif self.provider == 'bls': self.data = bls.get_series( [self.code], startyear=dt.strptime(self.start_dt, '%Y-%m-%d').year, endyear=dt.strptime(self.end_dt, '%Y-%m-%d').year, key=os.environ['TOKEN_BLS']) elif self.provider == 'worldbank': self.data = wbdata.get_data( self.code, country='US', data_date=(dt.strptime(self.start_dt, '%Y-%m-%d'), dt.strptime(self.end_dt, '%Y-%m-%d')), convert_date=True, pandas=True, keep_levels=False) print(self.data.tail(5)) print("Collected data for [{0}]".format(self.code))
def get_data(from_date=datetime.datetime(2010, 1, 1), to_date=datetime.datetime.now(), variable="FR.INR.LEND"): duration = (from_date, to_date) variable = variable.upper() mykey = '-'.join(map(str, [from_date.year, to_date.year, variable])) return functions.get_values(wbdata.get_data(variable, data_date=duration))
# http://wbdata.readthedocs.org/en/latest/ # http://wbdata.readthedocs.org/en/latest/fetcher.html import wbdata import datetime import pandas as pd ''' This wbdata is an interactive console to work with World Bank's API. You can run wbdata.get_source() to see all sources of information and the respective numbers to run in the below code to fetch the data. Also, using Pandas package may facilitate the task to convert the lists and dictionaries retrieved from the API. Total Population -> source=16 ''' #wbdata.get_source() wbdata.get_indicator(source=16) # Define time range to search for data data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1)) # Store the data as the variable df df = pd.DataFrame(wbdata.get_data("SP.POP.TOTL", pandas = True, data_date=data_date)) # Write it to a CSV - Exemple in /data-pipeline df.to_csv ("YOURPATH/population_total_1960-2014.csv", sep = ',')
import numpy as np #print(wb.get_data("BM.KLT.DINV.CD.WD", "ARB", pandas=True)) file = pd.read_csv("ind.csv") indicators = file["Indicator Code"] unique_indicators = list(set(indicators)) unique_indicators.sort() l = [] from_to = (dt.datetime(1970, 1, 1), dt.datetime(2019, 1, 1)) list_indicators = {} c = 1 for i in unique_indicators[:10]: print(c) c += 1 result = wb.get_data(i, country_codes, pandas=True, data_date=from_to) maxi = list() counter = 0 temp_maxi = list() for year in reversed(range(1970, 2020)): temp = [ np.isnan(result[x]) for x in result.keys() if x[1] == str(year) ] print(temp) if True in temp: if len(temp_maxi) > len(maxi): maxi = temp_maxi[:] temp_maxi = list() continue temp_maxi.append(year) #print(temp_maxi)
def main(): ''' This wbdata package is an interactive console to work with World Bank's API. You can run wbdata.get_source() to see all sources of information and the respective numbers to run in the below code to fetch the data. Also, using Pandas package may facilitate the task to convert the lists and dictionaries retrieved from the API. Total Population -> source=16 GDP -> source = 2, NY.GDP.MKTP.CN GDP per capita (USD) -> source = 2, NY.GDP.PCAP.CD GDP per capita (PPP) (USD) -> source = 2, NY.GDP.PCAP.PP.CD GDP Growth Rate -> source = 2, NY.GDP.PCAP.KD.ZG Unemployment Rate -> source = 2, SL.UEM.TOTL.ZS Gini Coefficient -> source = 2, SI.POV.GINI Monetary base (M0, M1, M2, M3) (USD) -> M1: M2: source = 2, FM.LBL.MQMY.CN M3: Total Reserves (USD) -> source = 2, FI.RES.TOTL.CD Inflation Rate (USD) -> FDI Inflow (USD) -> Aid Inflow (USD) -> ''' ''' __________________________________________________________________ The following parts are focused on retrieving data from the World Bank ''' #-------------------- Code Exemplification -------------------------# #wbdata.get_source() #wbdata.get_indicator(source=16) # Define time range to search for data data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1)) # Store the data as the variable df population = pd.DataFrame(wbdata.get_data("SP.POP.TOTL", data_date=data_date, pandas = True)) gdp = pd.DataFrame(wbdata.get_data("NY.GDP.MKTP.CN", data_date=data_date, pandas = True)) gdp_per_capita = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.CD", data_date=data_date, pandas = True)) gdp_per_capita_ppp = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.PP.CD", data_date=data_date, pandas = True)) gdp_growth_rate = pd.DataFrame(wbdata.get_data("NY.GDP.PCAP.KD.ZG", data_date=data_date, pandas = True)) unemployment_rate = pd.DataFrame(wbdata.get_data("SL.UEM.TOTL.ZS", data_date=data_date, pandas = True)) #gini-coefficient -> total_reserves = pd.DataFrame(wbdata.get_data("FI.RES.TOTL.CD", data_date=data_date, pandas = True)) # Write it to a CSV - Exemple in /data-pipeline population.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/population/population_total_1960-2014.csv", sep = ',') gdp.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp.csv", sep = ",") gdp_growth_rate.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-growth-rate.csv", sep = ",") gdp_per_capita.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-percapita.csv", sep = ",") gdp_per_capita_ppp.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/gdp-percapita-ppp.csv", sep = ",") unemployment_rate.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/unemployment_rate.csv", sep =",") # gini-coefficient.to_csv ("/gini_coefficient.csv", sep = ",") total_reserves.to_csv ("/home/gsilvapt/Documents/repos/Econ-Factbook/database/total_reserves.csv", sep = ",") ## Script to create folders for each csv file folder = "/home/gsilvapt/Documents/repos/Econ-Factbook/database" for file_path in glob.glob(os.path.join(folder, "*.*")): new_dir = file_path.rsplit(".", 1)[0] try: os.mkdir(os.path.join(folder, new_dir)) except WindowsError: # Handle the case where the target dir already exist. pass shutil.move(file_path, os.path.join(new_dir, os.path.basename(file_path))) ''' __________________________________________________________________ The following parts are focused on retrieving data from the United Nations ''' ## The United Natiosn API is still under construction. It was not possible ## yet to find a proper solution that can automate this process. ''' ____________________________________________________________________________ The following parts are focused on retrieving data from the International Labour Organization ''' ''' ____________________________________________________________________________ The following parts are focused on retrieving data from the CIA World Factbook ''' ''' __________________________________________________________________ The following parts are focused on retrieving data from the OECD ''' '''
def testConvertDate(self): wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", convert_date=True)
def testPandas(self): wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", pandas=True)
# Hypothesis: there is a reverse relationship between GDP per capita and unemployment rate. # SL.UEM.TOTL.ZS = Unemployment rate in the world (Variable 1) # NY.GDP.PCAP.PP.CD = GDP per capita in the world (Variable 2) # For finding whether this hypothesis is true or not, we are going to make a regression analysis and find whether there is a linear relationship between two variables. import wbdata import datetime import pandas as pd import numpy as np import regression import math import matplotlib.pyplot as pyplot from sklearn.linear_model import LinearRegression data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2015, 1, 1)) Variable_1 = wbdata.get_data("SL.UEM.TOTL.ZS", data_date = data_date, pandas = True) Variable_2 = wbdata.get_data("NY.GDP.PCAP.PP.CD", data_date = data_date, pandas = True) # We need to reshape Variable 1 and adjust Variable 1 and 2 for NaN values a = np.array(Variable_1).reshape(-1,1) np.isnan(a) np.where(np.isnan(a)) np.nan_to_num(a) x = np.nan_to_num(a) b = np.array(Variable_2) np.isnan(b) np.where(np.isnan(b)) np.nan_to_num(b) y = np.nan_to_num(b)
def testColumnName(self): wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", pandas=True, column_name="IForget")
def scrape(): df = pd.read_csv( "../SecondProject2/Resources/Project2_idmc_disaster_all_dataset.csv") df.head() # In[4]: import pycountry_convert as pc # In[5]: country_code = pc.country_name_to_country_alpha2("China", cn_name_format="default") print(country_code) continent_name = pc.country_alpha2_to_continent_code(country_code) print(continent_name) # In[6]: def country_code(name): try: code = pc.country_name_to_country_alpha2(name, cn_name_format="default") return code except: return "N/A" def continent_name(name): try: continent = pc.country_alpha2_to_continent_code(name) return continent except: return "N/A" df["Country_2D"] = df["Country Name"].apply(country_code) df["Continent"] = df["Country_2D"].apply(continent_name) # In[7]: df.head() # In[8]: df_Africa = df[df["Continent"] == "AF"] df_Africa.head() # In[9]: df_africa = df_Africa.rename(columns={ 'Country Name': 'Country', }) df_africa.head() # In[10]: df_africa.columns # In[11]: df.groupby("Continent").count() # In[12]: #Import Dependencies import os import requests import json import pprint import numpy as np import flask import wbdata import datetime # In[13]: countries = [ 'algeria', 'angola', 'benin', 'botswana', 'burkina faso', 'burundi', 'cabo verde', 'cameroon', 'central african republic', 'chad', 'comoros', 'congo', "cote d'ivoire", 'djibouti', 'egypt', 'equatorial guinea', 'esqtini', 'ethiopia', 'gabon', 'gambia', 'ghana', 'guinea', 'guinea-bissau', 'kenya', 'lesotho', 'liberia', 'libya', 'madagascar', 'malawi', 'mali', 'mauritania', 'mauritius', 'morocco', 'mozambique', 'namibia', 'niger', 'nigeria', 'rwanda', 'sao tome and principe', 'senegal', 'seychelles', 'sirre leone', 'somalia', 'south africa', 'south sudan', 'sudan', 'tanzania', 'togo', 'tunisia', 'uganda', 'zambia', 'zimbabwe' ] country_codes = [ 'AGO', 'ALB', 'ARB', 'BDI', 'BEN', 'BFA', 'BMN', 'BSS', 'BWA', 'CAA', 'CAF', 'CIV', 'CME', 'CMR', 'COG', 'COM', 'CPV', 'DJI', 'DMN', 'DSF', 'DSS', 'DZA', 'EGY', 'ETH', 'GAB', 'GHA', 'GMB', 'GNB', 'GNQ', 'KEN', 'LBR', 'LSO', 'MAR', 'MDG', 'MEA', 'MLI', 'MNA', 'MOZ', 'MRT', 'MUS', 'MWI', 'NAF', 'NAM', 'NER', 'NGA', 'NLS', 'NRS', 'RRS', 'RSO', 'RWA', 'SDN', 'SLE', 'SOM', 'SSA', 'SSD', 'SSF', 'SWZ', 'SXZ', 'SYC', 'TCD', 'TGO', 'TMN', 'TSS', 'TUN', 'TZA', 'UGA', 'XZN', 'ZAF', 'ZMB', 'ZWE' ] indicators = "SP.POP.TOTL" data_date = datetime.datetime(2008, 1, 1), datetime.datetime(2019, 1, 1) wbdata.get_indicator(source=50) # In[14]: wbdata.search_countries('') # In[15]: data = wbdata.get_data(indicators, country=country_codes, data_date=data_date) df_wbdata = pd.DataFrame(data) df_wbdata = df_wbdata.rename( columns={ "indicator": "Indicator", "country": "Country", "countryiso3code": "Country code", "date": "Year", "value": "Population", }) df_wbdata = df_wbdata.filter( items=['Country', 'Country code', 'Year', 'Population']) df_wbdata.dropna(inplace=True) df_wbdata['Country'] = df_wbdata['Country'].astype(str) df_wbdata['Country code'] = df_wbdata['Country code'].astype(str) df_wbdata['Year'] = df_wbdata['Year'].astype(str) df_wbdata['Population'] = df_wbdata['Population'].astype(str) df_wbdata['Country'] = df_wbdata['Country'].str.slice(23, -2) df_wbdata # In[16]: df_africa['Year'] = df_africa['Year'].astype('int64') df_wbdata['Year'] = df_wbdata['Year'].astype('int64') # In[17]: merged_df = pd.merge(left=df_africa, right=df_wbdata, how="left", on=['Country code', 'Year', 'Country']) merged_df.head() # In[18]: merged_df = merged_df.rename( columns={ 'Country code': 'Country_Code', 'Start Date': 'Start_Date', 'Event Name': 'Event_Name', 'Hazard Category': 'Hazard_Category', 'Hazard Type': 'Hazard_Type', 'New Displacements': 'New_Displacements', }) merged_df.head() merged_df = merged_df.dropna() merged_df merged_df[merged_df['Population'].isna()].count() merged_df.to_csv('merged.csv', index=False) #Create the engine and pass in Postgresql engine = create_engine( 'postgresql://*****:*****@localhost/project2_db') engine.table_names() query = pd.read_sql_query('select * from merged_data', con=engine) return (query)
print(macro_data.tail()) #We can generate a plot for all of our data as follows: macro_data.plot(grid=True) #We can apply functions to all columns: #Average values of macro data during recession macro_data[macro_data['recession'] == 1].apply(np.mean) #Average values of macro data during expansion macro_data[macro_data['recession'] == 0].apply(np.mean) #Access World Bank Data #pip install wbdata import wbdata #We can search for keys as follows: wbdata.search_indicators("unemployment") data_date = (datetime.datetime(1950, 1, 1), datetime.datetime(2019, 2, 10)) unemployment_data = wbdata.get_data("UNEMPSA_", data_date=data_date) country_data = pd.DataFrame(unemployment_data) #Access data from quandl #pip install quandl import quandl #Please replace my key with your key quandl.api_config.ApiConfig.api_key = '1zm1xSnnoqFeAGksg3S1' oil_prices = quandl.get("OPEC/ORB") oil_prices.plot(grid=True, title='OPEC Reference Basket') ## IEX exchange (A simple API) import requests import io url = 'https://api.iextrading.com/1.0/stock/aapl/chart/date/20190129' s = requests.get(url).content
#ease of business import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR #get indicators with a search wbdata.search_indicators('ease of doing business') #IC.BUS.DFRN.XQ wbdata.get_data('IC.BUS.DFRN.XQ', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.BUS.DFRN.XQ', country='GBR')[0] import datetime data_date = (datetime.datetime(2017, 1, 1), datetime.datetime(2019, 1, 1)) wbdata.get_data("IC.BUS.DFRN.XQ", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'OED') #income level filter wbdata.get_incomelevel() countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)] indicators = {"IC.BUS.DFRN.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"} df = wbdata.get_dataframe(indicators, country=countries, convert_date=True) df.to_csv('KocPython2020/in-classMaterial/day6/econ.csv') df.describe()
def testDateRange(self): wbdata.get_data("SH.XPD.PRIV.ZS", country="usa", data_date=(datetime.datetime(2006, 1, 1), datetime.datetime(2010, 1, 1)))
#ease of business import wbdata wbdata.get_source() wbdata.get_indicator(source=1) #get country codes with a search wbdata.search_countries('Turkey') #TUR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='TUR')[0] wbdata.search_countries('united') #GBR wbdata.get_data('IC.REG.COST.PC.MA.ZS', country='GBR') import datetime data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.REG.COST.PC.MA.ZS", country=("USA", "GBR"), data_date=data_date) wbdata.search_indicators("gdp per capita") wbdata.get_data('NY.GDP.PCAP.KD.ZG') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='USA') wbdata.get_data('NY.GDP.PCAP.KD.ZG', country='OED') #income level filter wbdata.get_incomelevel() countries = [ i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False) ] indicators = { "IC.REG.COST.PC.MA.ZS": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc" }
""" Messing around with Oliver Sherouse's wbdata, which accesses all of the World Bank's data API's. This follows the documentation, link below. Not sure this is ready for primetime, but it could be me... References * http://datacatalog.worldbank.org/ * http://blogs.worldbank.org/opendata/accessing-world-bank-data-apis-python-r-ruby-stata * https://github.com/OliverSherouse/wbdata/blob/master/docs/source/index.rst Prepared for the NYU Course "Global Economy" * https://sites.google.com/site/nyusternglobal/home * https://github.com/DaveBackus/Global_Economy Written by Dave Backus @ NYU, September 2014 Created with Python 3.4 """ import wbdata wbdata.get_source() wbdata.get_indicator(source=15) d = wbdata.get_data('IC.BUS.EASE.XQ', country='USA') indicators = {'IC.BUS.EASE.XQ': 'Ease', 'IRSPREAD': 'Spread'} df1 = wbdata.get_dataframe(indicators, data_date=(2012, 2013), country='ARG') #df2 = wbdata.get_dataframe('IRSPREAD', country='all', convert_date=True) #%%
def home(request): if request.method == "POST": def coal(c1): amount = 0.015 * c1 return amount def lpg(lg): amount = 0.0803 * lg return amount def redmeat(rm): # (per day) amount = 2.58 * rm # metric tons of co2 per year return amount def clothes(cl): # cl should be in dollars(per month) amount = 0.005 * cl return amount def furniture(fr): # fr should be in dollars amount = 0.001 * fr return amount def laundry(ld): # ld is number of times(per week) amount = 0.1 * ld return amount def treadmill(tm): # tm must be in hrs(per week) amount = 0.0467 * tm return amount def vehicle(hrs): amount = 0.0444 * hrs return amount def papers(number): amount = 0.0152 * number return amount a = request.POST['heat'] b = request.POST['hrsofheat'] he = float(b) if a == 'coal': res1 = coal(he) elif a == 'LPG': res1 = lpg(he) c = request.POST['transportation'] res2 = 0 d = request.POST['hrs'] hr = float(d) print(vehicle(hr)) res3 = vehicle(hr) e = request.POST['meat'] mt = float(e) print(redmeat(mt)) res4 = redmeat(mt) f = request.POST['clothes'] cl = float(f) print(clothes(cl)) res5 = clothes(cl) g = request.POST['ac'] print(g) res6 = 0 h = request.POST['furniture'] ft = float(h) print(furniture(ft)) res7 = furniture(ft) i = request.POST['laundary'] ld = float(i) print(laundry(ld)) res7 = laundry(ld) j = request.POST['treadmill'] td = float(j) print(treadmill(td)) res8 = treadmill(td) k = request.POST['papers'] p = int(k) print(papers(p)) res9 = papers(p) code = request.POST['crcode'] print(code) emission_per_capita = w.get_data("EN.ATM.CO2E.EG.ZS", country=code.upper())/3.667 for i in emission_per_capita: if i['value'] != None: emission_per_capita_value = i['value'] break # print(value) # country_code = input("Country code : ") GDP = w.get_data("NY.GDP.MKTP.CD", country=code.upper()) GDP_val = GDP[1]['value'] # print(GDP[1]['value']) # country_code = input("Country code : ") energy_intensity = w.get_data("EG.EGY.PRIM.PP.KD", country=code.upper()) # print(energy_intensity) for i in energy_intensity: if i['value'] != None: energy_intensity_val = i['value'] break # print(val) # print(emission_per_capita_value) # print(GDP_val) # print(energy_intensity_val) # print() kaya_value = (emission_per_capita_value * GDP_val * energy_intensity_val * 0.001) / 41.868 result = res1 + res2 + res3 + res4 + res5 + res6 + res7 + res8 print(result) print(kaya_value) return render(request, "result.html", {'getvalue': result, 'countrycode': kaya_value})
## http://blogs.worldbank.org/opendata/new-country-classifications-income-level-2017-2018 ## 國家資料,收入水平 countries = wb.get_country(display=False) df_country = pd.DataFrame(countries) def get_income(x): return x['value'] df_country['incomeLevel'] = df_country.incomeLevel.apply(get_income) df_country[df_country.id == 'TWN'] ## GDP (current US$) ## https://data.worldbank.org/indicator/NY.GDP.MKTP.CD gdp = wb.get_data("NY.GDP.MKTP.CD") list_gdp = [] for i in gdp: if i['date'] == '2018': list_gdp = list_gdp + [[i['country']['id'], i['country']['value'], i['value']]] df_gdp = pd.DataFrame(list_gdp, columns = ['iso2Code','name','value']) ## GDP growth (annual %) ## GDP 成長 ## https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG gdp_growth = wb.get_data("NY.GDP.MKTP.KD.ZG") list_gdp_growth = [] for i in gdp_growth: if i['date'] == '2018': list_gdp_growth = list_gdp_growth + [[i['country']['id'], i['country']['value'], i['value']]]
@author: johnjsyoo """ import wbdata import datetime import pandas as pd import MySQLdb as myDB ######################### GETTING ENERGY DATA FOR ALL COUNTRIES ############################## # Setting the date range for our data data_date = (datetime.datetime(2011, 1, 1)) # GDP (current US$) gdp = wbdata.get_data("NY.GDP.MKTP.CD", data_date=data_date, pandas = True)[44:] # Data on Electricity production from renewable sources (kWh) renewableProd = wbdata.get_data("EG.ELC.RNEW.KH", data_date=data_date, pandas = True)[44:] electricityProd = wbdata.get_data("EG.ELC.PROD.KH", data_date=data_date, pandas = True)[44:] # Convert the time-series data into a Data Frame gdpDF = pd.DataFrame(gdp) renewableProdDF = pd.DataFrame(renewableProd) electricityProdDF = pd.DataFrame(electricityProd) energyDF = gdpDF.join(renewableProdDF, lsuffix="GDP",rsuffix="Renewable_kWh") energyDF = energyDF.join(electricityProdDF) energyDF.rename(columns = {'value':'Electricity_kWh'}, inplace = True) # Dropping all NaN values and zero values
# In[ ]: # 1 DOing Business wbdata.get_indicator(source=1) # In[ ]: wbdata.search_countries("Brazil") # In[ ]: #wbdata.get_data(indicator, country=u'all', data_date=None, convert_date=False, pandas=False, #column_name=u'value', keep_levels=False) wbdata.get_data("IC.BUS.EASE.XQ", country=u'BRA') # In[ ]: data_date = (datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)) wbdata.get_data("IC.BUS.EASE.XQ", country=("USA", "BRA"), data_date=data_date) # In[ ]: wbdata.search_indicators("gdp per capita") # In[ ]:
def testIndicator(self): wbdata.get_data("SH.XPD.PRIV.ZS")
import wbdata import json import pprint pp = pprint.PrettyPrinter(indent=4) x = wbdata.get_data("FP.CPI.TOTL") pp.pprint(x[0]) # sources = wbdata.get_source(display=False) # indicators = wbdata.get_indicator(source=sources[0]['id'], display=False) # pp.pprint(indicators[0]) # print # sources[0]['id']
from pprint import pprint import wbdata as w country_code = input("Country code : ") energy_intensity = w.get_data("EG.EGY.PRIM.PP.KD", country=country_code.upper()) #pprint(energy_intensity) for i in energy_intensity: if i['value'] != None : value = i['value'] break print(value) ``` Country codes available ABW Aruba AFG Afghanistan AFR Africa AGO Angola ALB Albania AND Andorra ANR Andean Region ARB Arab World ARE United Arab Emirates ARG Argentina ARM Armenia ASM American Samoa ATG Antigua and Barbuda AUS Australia AUT Austria AZE Azerbaijan BDI Burundi BEA East Asia & Pacific (IBRD-only countries)
def testOneCountry(self): wbdata.get_data("SH.XPD.PRIV.ZS", country="USA")
# http://wbdata.readthedocs.org/en/latest/ # http://wbdata.readthedocs.org/en/latest/fetcher.html import wbdata import datetime import pandas as pd ''' This wbdata is an interactive console to work with World Bank's API. You can run wbdata.get_source() to see all sources of information and the respective numbers to run in the below code to fetch the data. Also, using Pandas package may facilitate the task to convert the lists and dictionaries retrieved from the API. Total Population -> source=16 ''' #wbdata.get_source() wbdata.get_indicator(source=16) # Define time range to search for data data_date = (datetime.datetime(1960, 1, 1), datetime.datetime(2014, 1, 1)) # Store the data as the variable df df = pd.DataFrame( wbdata.get_data("SP.POP.TOTL", pandas=True, data_date=data_date)) # Write it to a CSV - Exemple in /data-pipeline df.to_csv("YOURPATH/population_total_1960-2014.csv", sep=',')
def testTwoCountries(self): wbdata.get_data("SH.XPD.PRIV.ZS", country=("chn", "bra"))