def test_http_error(table_id): try: cbsodata.get_data(table_id) except requests.HTTPError: assert True else: assert False
def test_select_list(): default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: if fs.startswith('$select='): select = fs[8:] cbsodata.get_data('82070ENG', select=select.split(', '))
def test_filters(): default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: if fs.startswith('$filter='): filt = fs[8:] cbsodata.get_data('82070ENG', filters=filt)
async def get_data(request: Request): write_logging(request, 'CBS-API ') data = pd.DataFrame(cbsodata.get_data('84286NED')) data = data[cbscolumns] data = data[data['Codering_3'].str.contains('|'.join(gemeenteselectie))] data = data.fillna(0) return data.to_dict(orient='index')
def get_population_per_ggd_region(): # Table 84721NED gives information about regions within the Netherlands # See: https://www.cbs.nl/nl-nl/cijfers/detail/84721NED cbs_data = cbsodata.get_data('84721NED', select=['RegioS', 'Code_14', 'Inwonertal_52']) cbs_df = pd.DataFrame(cbs_data).groupby("Code_14").sum() cbs_df.index = cbs_df.index.str.strip() return cbs_df.to_dict()["Inwonertal_52"]
def process_data_gezondheidsmonitor(CBS_codering, name): data = cbsodata.get_data(CBS_codering) buurten2016 = cbsodata.get_data('83487NED') translate2016 = {} for row in buurten2016: if row['SoortRegio_2'].rstrip() == 'Gemeente': translate2016[row['WijkenEnBuurten']] = row['Codering_3'] collecting = {} for row in data: try: identifier = int('1' + translate2016[row['RegioS']][2:]) collecting[identifier] = row except: pass df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None) df.rename(columns=translator, inplace=True) return df
def load_cbs_table(table_id, typed_data_set=False): ''' Create a Postgres table with the required structure, downloads the data from CBS, and loads the data into the Postgres table. ''' try: table_name = get_sanitized_cbs_table_title(table_id, typed_data_set) except ConnectionError as e: print( f'\tERROR! Could not load CBS table due to a ConnectionError. Are you connected to the internet? Error:\n\t{e}' ) # do not continue processing return print(f'Loading CBS table {table_name}') # Do not load data if table_exists(table_name): print('Table already exists, skipping downloading and loading.') else: print('Creating table...') create_table_for_cbs_table(table_id, table_name) print('Downloading data...') if typed_data_set: data = cbsodata.get_meta(table_id, 'TypedDataSet') else: data = cbsodata.get_data(table_id) print('Inserting data...') sanitize_key_dict = get_sanitize_key_dict(table_id) connection = get_connection() cursor = connection.cursor() for row in data: # Before we can insert the data, we have to manipulate # the key so it matches the columns in the created # Postgres table, and we might need to trim the white space # from the values. row_dict = { sanitize_key_dict[key]: sanitize_data(value) for key, value in row.items() } # TODO: it's probably more efficient to just pass a list, # I think the required keys are always there. And then we can # more easily batch these INSERTs. insert_dict(table_name, row_dict, cursor) cursor.close() connection.commit() connection.close() add_indices(table_name) print()
def process_data_sociale_voorzieningen(CBS_codering, name): data = cbsodata.get_data(CBS_codering) collecting = {} for row in reversed(data): if len(row['Perioden']) != 4 or len(row['Codering_3'].rstrip()) == 0: continue identifier = int('1' + row['Codering_3'][2:]) collecting[identifier] = row df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None) df.rename(columns=translator, inplace=True) return df
def process_data_buurten(CBS_codering, name): data = cbsodata.get_data(CBS_codering) collecting = {} year = int(''.join([x for x in name if x.isdigit()])) for row in reversed(data): collecting[int('1' + row['Codering_3'][2:])] = row row['Perioden'] = year row['Gemeentenaam_1'] = row['Gemeentenaam_1'].strip() row['SoortRegio_2'] = row['SoortRegio_2'].strip() df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None) df.rename(columns=translator, inplace=True) df = absolute_values_BeW(df) return df
def test_get_data_derden(table_id): # option 1 print("global") cbsodata.options.catalog_url = 'dataderden.cbs.nl' data_option1 = cbsodata.get_data(table_id) cbsodata.options.catalog_url = 'opendata.cbs.nl' # option 2 print("context") with cbsodata.catalog('dataderden.cbs.nl'): data_option2 = cbsodata.get_data(table_id) # option 3 print("argument") data_option3 = cbsodata.get_data(table_id, catalog_url='dataderden.cbs.nl') assert len(data_option1[0].keys()) > 0 for key in data_option1[0].keys(): assert data_option1[0][key] == \ data_option2[0][key] == data_option3[0][key]
def get_config(self, dir_config): """Get config""" if dir_config: if isinstance(dir_config, str): dir_config = Path(dir_config) dir_config.mkdir(exist_ok=True) path_area_codes = dir_config / 'area_codes.csv' path_cbs = dir_config / 'cbs.csv' try: area_codes = pd.read_csv(path_area_codes, dtype='object') except FileNotFoundError: area_codes = self.get_area_codes() if dir_config: area_codes.to_csv(path_area_codes, index=False) for code, place in zip(area_codes.code, area_codes.place): self.replace[code] = place try: cbs = pd.read_csv(path_cbs) except FileNotFoundError: cbs = pd.DataFrame(cbsodata.get_data(self.cbs_table)) for c in cbs.columns: if cbs[c].dtype == 'int64': continue cbs[c] = cbs[c].str.strip() if dir_config: cbs.to_csv(path_cbs, index=False) wp = {} wp['any'] = { wp.lower():gm for wp, gm in zip(cbs.Woonplaatsen, cbs.Naam_2) if list(cbs.Woonplaatsen).count(wp) == 1 } gm = {} gm['any'] = {gm.lower():gm for gm in set(cbs.Naam_2)} for k, v in self.recode_gem.items(): gm['any'][k] = v for province in set(cbs.Naam_4): subset = cbs[cbs.Naam_4 == province] key = province.lower() gm[key] = {gm.lower():gm for gm in set(subset.Naam_2)} for k, v in self.recode_gem.items(): gm[key][k] = v wp[key] = { wp.lower():gm for wp, gm in zip(subset.Woonplaatsen, subset.Naam_2) if list(subset.Woonplaatsen).count(wp) == 1 } remove = list(set(cbs.Naam_4.str.lower())) + self.remove remove = [s for s in remove if s not in ['utrecht', 'groningen']] return wp, gm, self.replace, remove
def test_select_n_cols(): default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: if fs.startswith('$select='): select = fs[8:] select_list = select.split(', ') data = cbsodata.get_data('82070ENG', select=select_list[0:2]) assert len(data[0].keys()) == 2 assert len(data[5].keys()) == 2 assert len(data[10].keys()) == 2
def get_and_combine_cbs_tables(dict_tables, double_trouble_colnames=None, url='opendata.cbs.nl'): """ Method to get multiple similar tables in the CBS database. Parameters ---------- dict_tables : dict(str: str) Dictionary with as key the period and as value the table name double_trouble_colnames : dict(str: str) double_trouble_colnames: Dictionary with columnnames that will cause trouble if the suffix is deleted url : str URL of the catalog of the CBS databases, i.e.: 'opendata.cbs.nl' Returns ------- pd.DataFrame with cbs data """ print(f"Number of tables to collect: {len(dict_tables)}") df = pd.DataFrame() for interval, table in dict_tables.items(): print(f"Pythonic iteration {interval} for table {table}") try: df_sub = pd.DataFrame(cbsodata.get_data(table, catalog_url=url)) if double_trouble_colnames: df_sub = df_sub.rename(columns=double_trouble_colnames) cols_wijk_stripped = [ i.rstrip('0123456789').replace("_", "").lower() for i in list(df_sub.columns) ] dict_wijk_cols_renamed = { key: value for key, value in zip(iter(df_sub.columns), iter(cols_wijk_stripped)) } df_sub = df_sub.rename(columns=dict_wijk_cols_renamed) df_sub['interval'] = interval # print(list(df_sub.columns)) except Exception: df_sub = pd.DataFrame() pass df = pd.concat([df, df_sub], sort=True) # print(list(df.columns)) return df
def cbsmortalitydisplacement(): cbstable = '70895ned' info = cbsodata.get_info(cbstable) modified = info['Modified'] cachefile = cachedir / f"{modified.replace(':', '_')}-cbsmortdispl.json" if cachefile.exists(): with open(cachefile) as fh: return json.load(fh) data = cbsodata.get_data(cbstable) with open(cachefile, 'w') as fh: json.dump(data, fh) return data
def process_data_wmo(CBS_codering, name): data = cbsodata.get_data(CBS_codering) collecting = {} for row in reversed(data): if len(row['Perioden']) != 4 or len(row['Codering_3'].rstrip()) == 0: continue identifier = int('1' + row['Codering_3'][2:]) Financieringsvorm = row['Financieringsvorm'] if identifier in collecting: result = collecting[identifier] result[Financieringsvorm] = row['WmoClienten_5'] result[Financieringsvorm + ' per100Inwoners_6'] = row['WmoClientenPer1000Inwoners_6'] else: row[Financieringsvorm] = row['WmoClienten_5'] row[Financieringsvorm + ' per100Inwoners_6'] = row['WmoClientenPer1000Inwoners_6'] collecting[identifier] = row df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None) df.rename(columns=translator, inplace=True) return df
def process_data_jongeren(CBS_codering, name): data = cbsodata.get_data(CBS_codering) collecting = {} for row in data: if len(row['Perioden']) != 4: continue identifier = int('1' + row['Codering_3'][2:]) if 'VormenVanJeugdzorg' in row.keys(): TypeJeugdzorg = row['VormenVanJeugdzorg'] else: TypeJeugdzorg = row['TypeJeugdzorg'] if identifier in collecting: result = collecting[identifier] result[TypeJeugdzorg] = row['TotaalJongerenMetJeugdzorg_5'] else: row[TypeJeugdzorg] = row['TotaalJongerenMetJeugdzorg_5'] row['Gemeentenaam_1'] = row['Gemeentenaam_1'].strip() row['Wijken'] = row['Wijken'].strip() collecting[identifier] = row df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None) df.rename(columns=translator, inplace=True) df.loc[df['Codering'].str.contains('NL01', regex=False), ['Codering']] = 'NL00' return df
# -*- coding: utf-8 -*- """ Created on Fri Mar 29 16:45:06 2019 @author: Buf """ import cbsodata as cbs import pandas as pd try: pd.read_csv('data.csv') except IOError: pd.DataFrame(cbs.get_data('83021ENG')).to_csv('data.csv') df_orig = pd.read_csv('data.csv').drop('Unnamed: 0', 1) try: pd.read_csv('metadata.csv') except IOError: pd.DataFrame(cbs.get_meta('83021ENG', 'DataProperties', catalog_url=None)).to_csv('metadata.csv') metadata = pd.read_csv('metadata.csv')[['Key', 'Title', 'Description']].set_index('Key') selection1 = [ 'Drinkers_17', 'Smokers_1', 'NormalWeight_50', 'LastMonth_44', # Drug use 'LastYear_45', # Drug use 'Ever_46', # Drug use 'WeeklySporter_67'
https://www.cbs.nl/nl-nl/onze-diensten/open-data Auteur: Jolien Oomens Centraal Bureau voor de Statistiek In dit voorbeeld worden gemeentegrenzen gekoppeld aan geboortecijfers om een thematische kaart te maken. """ import pandas as pd import geopandas as gpd import cbsodata # Haal alle geboortecijfers op en filter op gemeenten data = pd.DataFrame( cbsodata.get_data( '83765NED', select=['WijkenEnBuurten', 'Codering_3', 'GeboorteRelatief_25'])) data['Codering_3'] = data['Codering_3'].str.strip() # De geodata wordt via de API van het Nationaal Georegister van PDOK opgehaald. # Een overzicht van beschikbare data staat op https://www.pdok.nl/datasets. geodata_url = "https://geodata.nationaalgeoregister.nl/cbsgebiedsindelingen/wfs?request=GetFeature&service=WFS&version=2.0.0&typeName=cbs_gemeente_2017_gegeneraliseerd&outputFormat=json" gemeentegrenzen = gpd.read_file(geodata_url) gemeentegrenzen = pd.merge(gemeentegrenzen, data, left_on="statcode", right_on="Codering_3") p = gemeentegrenzen.plot(column='GeboorteRelatief_25', figsize=(10, 8)) p.axis('off')
def retreive_data_cbs(data): df = pd.DataFrame(cbsodata.get_data(data)) return df
import pandas as pd import cbsodata from datetime import date from pathlib import Path cachestamp_week = date.today().strftime('%G-%V') cbsfile = Path(f'cache/{cachestamp_week}-83474NED') if cbsfile.exists(): df_83474NED = pd.read_json(cbsfile) else: df_83474NED = pd.DataFrame(cbsodata.get_data('83474NED')) df_83474NED.to_json(cbsfile) popsize = df_83474NED.iloc[-1]['BevolkingAanHetEindVanDePeriode_8'] df_dashboard = pd.read_csv( 'https://raw.githubusercontent.com/Sikerdebaard/netherlands-vaccinations-scraper/main/people-vaccinated.csv', index_col=0) df_dashboard.index = pd.to_datetime(df_dashboard.index) manual_points = [] for idx, row in df_dashboard.iterrows(): vaccinations = row['total_vaccinations'] date = idx.date() data = { 'date': date, 'total_vaccinations': vaccinations, 'people_vaccinated': vaccinations, 'total_vaccinations_per_hundred': vaccinations / popsize * 100, 'people_vaccinated_per_hundred': vaccinations / popsize * 100,
bw_shp = gpd.read_file('zip:///' + bw_file_name + '!uitvoer_shape/buurt_2013.shp') # for future reference: read straight from url # http://andrewgaidus.com/Reading_Zipped_Shapefiles/ # data on regional division 2013 cbs_tables = cbsodata.get_table_list() def find_identifier(cbs_tables, search_list): for tbl in cbs_tables: if all(x in tbl['Title'] for x in search_list): identifier = tbl['Identifier'] return identifier gebieden_2013_id = find_identifier(cbs_tables, ['Gebieden', '2013']) gebieden_2013 = pd.DataFrame(cbsodata.get_data(gebieden_2013_id)) ''' Data wrangling ''' # stadsgewesten columns sg_names = ['Code_1', 'Naam_5', 'Code_6', 'Naam_45', 'Code_46'] # clean up and create legible names stadsgewesten = gebieden_2013[sg_names]. \ apply( lambda x: x.str.rstrip() ). \ rename(columns = { 'Naam_5' : 'Corop_naam', 'Code_6' : 'Corop_code',
def generate(): data = cbsodata.get_data('7461bev') df = pd.DataFrame(data) if df[df['Perioden'] == '2021'].shape[0] == 0: df = df[df['Perioden'] == '2020'] else: df = df[df['Perioden'] == '2021'] df = df[df['BurgerlijkeStaat'] == 'Totaal burgerlijke staat'] df = df[df['Geslacht'] == 'Totaal mannen en vrouwen'] ages = [f'{x} jaar' for x in range(0, 100)] + ['100 jaar of ouder'] df = df[df['Leeftijd'].isin(ages)] df['Leeftijd'] = [int(x.split(' ')[0]) for x in df['Leeftijd'].values] popsize = df['Bevolking_1'].sum().astype(int) grownups = df[df['Leeftijd'] >= 18]['Bevolking_1'].sum().astype(int) children = df[df['Leeftijd'] < 18]['Bevolking_1'].sum().astype(int) vaccgroup = df[df['Leeftijd'] >= 12]['Bevolking_1'].sum().astype(int) assert (grownups + children) == popsize df_model = pd.read_csv('data/ensemble.csv', index_col=0) df_model.index = pd.to_datetime(df_model.index) df_model.sort_index(inplace=True) latest_idx = df_model['people_vaccinated'].index[-1] latest_people_vaccinated = df_model.loc[latest_idx]['people_vaccinated'] latest_people_fully_vaccinated = df_model.loc[latest_idx][ 'people_fully_vaccinated'] latest_people_vaccinated, latest_idx pct_vaccinated = latest_people_vaccinated / popsize * 100 pct_grownups = latest_people_vaccinated / grownups * 100 pct_vaccgroup = latest_people_vaccinated / vaccgroup * 100 if pct_vaccgroup > 100: pct_vaccgroup = 100 pct_fully_vaccinated = latest_people_fully_vaccinated / popsize * 100 pct_fully_grownups = latest_people_fully_vaccinated / grownups * 100 pct_fully_vaccgroup = latest_people_fully_vaccinated / vaccgroup * 100 if pct_fully_vaccgroup > 100: pct_fully_vaccgroup = 100 twitter_handles = ['@covid_nl', '@kalahiri', '@YorickB'] two_random_handles = random.sample(twitter_handles, 2) headers = [ f'🧙✨🔮 Het orakel kijkt diep in de kristallen bol en ziet daar een grafiek verschijnen.', f'🧙✨🔮 Rook, vuur en lasereffecten! Het orakel verschijnt.', f'🧙✨🔮 Het orakel kijkt diep in de kristallen bol. {two_random_handles[0]} denkt er zo over en {two_random_handles[1]} weer net wat anders.', ] tweet = f"""{random.choice(headers)} Gevaccineerd met ten minste een dosis: {progressbar(pct_vaccinated)} van de Nederlanders {progressbar(pct_vaccgroup)} van de 12+-ers""".strip() tweet2 = f""" Volledig gevaccineerd, alle doses van een vaccin ontvangen: {progressbar(pct_fully_vaccinated)} van de Nederlanders {progressbar(pct_fully_vaccgroup)} van de 12+-ers""".strip() shutdownmsg = """Deze bot wordt op 19 december uitgeschakeld. Volg @yorickb @kalahiri @covid_nl voor meer updates.""" return [ tweet, tweet2, _latest_year_tweet(), 'https://twitter.com/vaccinorakel/status/1367747721671675910', shutdownmsg ], [[]]
""" Voorbeelden gebruik van CBS Open Data v3 in Python https://www.cbs.nl/nl-nl/onze-diensten/open-data Auteur: Jolien Oomens Centraal Bureau voor de Statistiek Minimale voorbeelden van het ophalen van een tabel, het koppelen van metadata en het filteren van data voor het downloaden. """ import pandas as pd import cbsodata # Downloaden van tabeloverzicht toc = pd.DataFrame(cbsodata.get_table_list()) # Downloaden van gehele tabel (kan een halve minuut duren) data = pd.DataFrame(cbsodata.get_data('83765NED')) print(data.head()) # Downloaden van metadata metadata = pd.DataFrame(cbsodata.get_meta('83765NED', 'DataProperties')) print(metadata[['Key', 'Title']]) # Downloaden van selectie van data data = pd.DataFrame( cbsodata.get_data('83765NED', filters="WijkenEnBuurten eq 'GM0363 '", select=['WijkenEnBuurten', 'AantalInwoners_5'])) print(data.head())
def download(data_id: str, output_file: Path) -> None: if Path(output_file).exists(): return df = pd.DataFrame(cbsodata.get_data(data_id)) df.to_csv(output_file, index=False)
""" Voorbeelden gebruik van CBS Open Data v3 in Python https://www.cbs.nl/nl-nl/onze-diensten/open-data Auteur: Jolien Oomens Centraal Bureau voor de Statistiek In dit voorbeeld worden cijfers over de opbrengst van toeristenbelasting per jaar opgehaald en weergegeven in een grafiek. """ import pandas as pd import cbsodata # Download data en selecteer jaarcijfers data = pd.DataFrame( cbsodata.get_data('84120NED', filters="BelastingenEnWettelijkePremies eq 'A045081'")) data = data[data['Perioden'].str.match("^\d{4}$")] # Plot een grafiek p = data.plot(x='Perioden', y='OntvangenBelastingenEnWettPremies_1', legend=False) p.set_title("Opbrengst toeristenbelasting per jaar") p.set_ylim([0, 250]) p.set_xlabel("") p.set_ylabel("mln euro")
import pandas as pd import cbsodata import csv from sqlalchemy import create_engine import getpass import os import datetime import sys #Downloading entire dataset deathdata = pd.DataFrame(cbsodata.get_data('7052eng')) facilitiesdata = pd.DataFrame(cbsodata.get_data('7042eng')) #Downloading metadata deathmetadata = pd.DataFrame(cbsodata.get_meta('7052eng', 'DataProperties')) facilitiesmetadata = pd.DataFrame( cbsodata.get_meta('7042eng', 'DataProperties')) #link python to mysql and load data class datamanager: #build connnection to mysql database #def __init__ (self, database, username=input('insert username'), password=getpass.getpass('insert password')): def __init__(self, database): f = os.path.join(os.path.expanduser('~'), 'data_analytics_project', 'Scripts', 'authentication.txt') F = open(f) lines = F.readlines() self.username = lines[0].strip() self.password = lines[1].strip()
""" Name : c3_14_cbsodata.py Book : Hands-on Data Science with Anaconda) Publisher: Packt Publishing Ltd. Author : Yuxing Yan and James Yan Date : 1/15/2018 email : [email protected] [email protected] """ import pandas as pd import cbsodata as cb name = '82070ENG' data = pd.DataFrame(cb.get_data(name)) print(data.head()) info = cb.get_info(name) print(info['Title'])
def read_population_data_xlsx(source, name): wijken = defaultdict(str) codering = defaultdict(str) year = int(''.join([x for x in name if x.isdigit()])) cbs_bron = source_translator[year] data_buurtenwijken = cbsodata.get_data(cbs_bron) for x in data_buurtenwijken: wijk = x['WijkenEnBuurten'].strip() gemeente = x['Gemeentenaam_1'].strip() if x['Codering_3'][0:2] == 'WK': wijken[(gemeente, wijk)] = x['Codering_3'] codering[x['Codering_3']] = (gemeente, wijk) ##HIER LADEN WE DE BEWONERSDATA 2018 IN DE DATABASE, NOG IN SQL SERVER ZETTEN ## This file needs the document "kale bron 2015.xlsx" to be in the same folder to run. population_data = pd.read_excel(source) population_data['Codering'] = population_data.apply(lambda x: wijken[ (x['GemeenteGBA'], x['GWBnaam' + str(year)].strip())], axis=1) population_data['0tm17'] = 0 population_data['18tm65'] = 0 population_data['66tm74'] = 0 population_data['75tm84'] = 0 population_data['85'] = 0 population_data['totaal'] = 0 population_data['20tm65'] = 0 population_data['15tm75'] = 0 def optellen(wijk): if year == 2015: leeftijd = wijk['Leeftijd'] else: leeftijd = wijk['jaren'] aantal_bewoners = wijk['Aantal'] wijk['totaal'] += aantal_bewoners if leeftijd < 18: wijk['0tm17'] += aantal_bewoners elif leeftijd < 66: wijk['18tm65'] += aantal_bewoners elif leeftijd < 75: wijk['66tm74'] += aantal_bewoners elif leeftijd < 85: wijk['75tm84'] += aantal_bewoners else: wijk['85'] += aantal_bewoners if leeftijd < 66 and leeftijd > 19: wijk['20tm65'] += aantal_bewoners if leeftijd < 76 and leeftijd > 14: wijk['15tm75'] += aantal_bewoners return wijk population_data = population_data.apply(optellen, axis=1) population_data = population_data[[ 'totaal', '0tm17', '18tm65', '66tm74', '75tm84', '85', '20tm65', '15tm75', 'Codering' ]].groupby(['Codering']).sum() population_data['Codering'] = population_data.index population_data = population_data.reset_index(drop=True) population_data = population_data[1:] population_data['Gemeentecode'] = population_data['Codering'].apply( lambda x: 'GM' + x[2:6]) NL = population_data.sum() NL['Codering'] = 'NL00' NL['Gemeentecode'] = 'NL00' GM = population_data.groupby('Gemeentecode').sum() GM['Codering'] = GM.index GM = GM.reset_index(drop=True) result = pd.concat([population_data, GM], ignore_index=True) result = result.append(NL, ignore_index=True) #result.index = result['Codering'].map(lambda x: int('1' + x[2:])) #result.rename({'Codering_3':'Codering'},inplace = True) #result.drop('codering',inplace = True) return result
from pathlib import Path from datetime import date from pathlib import Path cachekey = date.today().strftime('%G-%V') output_path = Path('./html/') try: cbscachefile = Path('cache') / f'{cachekey}-83474NED' if cbscachefile.exists(): cbs_df = pd.read_json(cbscachefile) print('Using cached 83474NED') else: cbs_df = pd.DataFrame(cbsodata.get_data('83474NED')) cbs_df.to_json(cbscachefile) by_year = {} by_month = {} for idx, row in cbs_df.iterrows(): try: year, month = row['Perioden'].split(' ') except ValueError as e: continue if month not in by_month: by_month[month] = {} by_month[month][year] = row['LevendGeborenKinderen_2']
def retreive_data_cbs(): df = pd.DataFrame(cbsodata.get_data('83625ENG')) return df