def gen_mapping_BL2LK_json(): """ generates a mapping table of BL_Code <-> LK_ID dict: key1 = BC_Code -> list of LK_IDs: {"SH": {"BL_Name": "Schleswig-Holstein", "LK_IDs": [["01001", "Flensburg"], ["01002", "Kiel"], ..] ..}..} """ global d_ref_landkreise d_bundeslaender = {} d_landkreis_id_name_mapping = {} # lk_id -> name for lk_id in d_ref_landkreise.keys(): lk = d_ref_landkreise[lk_id] d_landkreis_id_name_mapping[lk_id] = get_lk_name_from_lk_id(lk_id) if lk['BL_Code'] not in d_bundeslaender.keys(): d = {} l_lk_ids = [] l_lk_ids.append((lk_id, lk['LK_Name'])) d['BL_Name'] = lk['BL_Name'] d['LK_IDs'] = l_lk_ids d_bundeslaender[lk['BL_Code']] = d else: d_bundeslaender[lk['BL_Code']]['LK_IDs'].append( (lk_id, lk['LK_Name'])) helper.write_json('data/de-districts/mapping_bundesland_landkreis.json', d_bundeslaender) helper.write_json('data/de-districts/mapping_landkreis_ID_name.json', d_landkreis_id_name_mapping)
def export_latest_data(d_districts_data: dict): d_districts_latest = helper.extract_latest_data(d_ref_landkreise, d_districts_data) d_for_export_V1 = d_districts_latest l_for_export_V2 = [] for lk_id, d in d_districts_latest.items(): # V1: dict (lk_id) -> dict # V2: list of ficts # d_for_export_V1[lk_id] = d d["Landkreis"] = get_lk_name_from_lk_id(lk_id) d["Bundesland"] = d["BL_Name"] del d["BL_Name"] # divi data is not returned by helper.extract_latest_data and mostly not available at latest day, so using the date of the previous day instead if 'DIVI_Intensivstationen_Covid_Prozent' in d_districts_data[lk_id][ -1]: d['DIVI_Intensivstationen_Covid_Prozent'] = d_districts_data[ lk_id][-1]['DIVI_Intensivstationen_Covid_Prozent'] d['DIVI_Intensivstationen_Betten_belegt_Prozent'] = d_districts_data[ lk_id][-1]['DIVI_Intensivstationen_Betten_belegt_Prozent'] elif 'DIVI_Intensivstationen_Covid_Prozent' in d_districts_data[lk_id][ -2]: d['DIVI_Intensivstationen_Covid_Prozent'] = d_districts_data[ lk_id][-2]['DIVI_Intensivstationen_Covid_Prozent'] d['DIVI_Intensivstationen_Betten_belegt_Prozent'] = d_districts_data[ lk_id][-2]['DIVI_Intensivstationen_Betten_belegt_Prozent'] d_for_export_V2 = d d_for_export_V2['LK_ID'] = lk_id l_for_export_V2.append(d_for_export_V2) # Export as JSON helper.write_json('data/de-districts/de-districts-results.json', d_for_export_V1, sort_keys=True) helper.write_json( filename='data/de-districts/de-districts-results-V2.json', d=l_for_export_V2, sort_keys=True) # Export as CSV with open('data/de-districts/de-districts-results.tsv', mode='w', encoding='utf-8', newline='\n') as fh_csv: csvwriter = csv.DictWriter( fh_csv, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Landkreis', 'Bundesland', 'Population', 'Cases', 'Deaths', 'Cases_Per_Million', 'Deaths_Per_Million', 'DIVI_Intensivstationen_Covid_Prozent', 'DIVI_Intensivstationen_Betten_belegt_Prozent' ]) csvwriter.writeheader() for lk_id, d in d_for_export_V1.items(): csvwriter.writerow(d)
def write_all_ingreds(recipe_file_name, ingred_file_name): """Save json of all ingreds in recipe_file_name to ingred_file_name.""" data = helper.get_json(recipe_file_name) ingreds = [] for recipe in data: ingreds.append(recipe['ingreds']) ingreds = [ingred for sublist in ingreds for ingred in sublist] ingreds = list(set(ingreds)) ingreds.sort() helper.write_json(ingreds, ingred_file_name, 'w') return ingreds
def export_latest_data(d_ref_states, d_states_data: dict): d_states_latest = helper.extract_latest_data(d_ref_states, d_states_data) # # d_states_latest = dict(d_ref_states) # for code in d_states_latest.keys(): # assert code in d_states_data.keys() # l_state = d_states_data[code] # d_latest = l_state[-1] # d_states_latest[code]['Date_Latest'] = d_latest['Date'] # for key in ('Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Per_Million', 'Deaths_Per_Million'): # d_states_latest[code][key] = d_latest[key] with open('data/de-states/de-states-latest.tsv', mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter( fh, delimiter='\t', extrasaction='ignore', fieldnames=('State', 'Code', 'Population', 'Pop Density', 'Date_Latest', 'Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Per_Million', 'Deaths_Per_Million', 'DoublingTime_Cases_Last_Week_Per_100000', 'Slope_Cases_Last_Week_Percent', 'Slope_Deaths_Last_Week_Percent', 'Cases_Last_Week_7Day_Percent')) csvwriter.writeheader() for code in sorted(d_states_latest.keys()): d = d_states_latest[code] d['Code'] = code if code == 'DE-total': # DE as last row d_de = dict(d) continue csvwriter.writerow(d) del d, code # add # to uncomment the DE total sum last line d_de['State'] = '# Deutschland' csvwriter.writerow(d_de) del d_de helper.write_json(f'data/de-states/de-states-latest.json', d_states_latest) l_for_export = [] for code in sorted(d_states_latest.keys(), key=str.casefold): d2 = d_states_latest[code] d2['Code'] = code l_for_export.append(d2) helper.write_json(filename='data/de-states/de-states-latest-list.json', d=l_for_export)
def write_recipe_data_filtered(infile, outfile): """Filter recipes from infile and save to outfile as json.""" data = helper.get_json(infile) with open('approved_ingreds', 'r', encoding="utf8") as f: approved_ingreds = set(f.read().splitlines()) ingred_filters = generate_ingred_filters(approved_ingreds) # Remove duplicate recipes df = pd.DataFrame(data) df_unique = df[~df['title'].duplicated()] data = df_unique.to_dict('records') for recipe in data: filtered_ingreds = filter_naive(recipe['ingreds'], ingred_filters) recipe['ingreds'] = filtered_ingreds helper.write_json(data, outfile, 'w')
def export_time_series_all_countries(): for country in d_countries_timeseries.keys(): # for country in d_selected_countries.keys(): country_code = read_country_code(country) if not country_code: continue # country_code = d_selected_countries[country]['Code'] l_country_data = d_countries_timeseries[country] # pop_in_Mill = d_selected_countries[country]['Population'] / 1000000 helper.write_json(f'data/int/country-{country_code}.json', l_country_data) with open(f'data/int/country-{country_code}.tsv', mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter( fh, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Days_Past', 'Date', 'Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Per_Million', 'Deaths_Per_Million', 'Cases_New_Per_Million', 'Deaths_New_Per_Million', 'Cases_Doubling_Time', 'Deaths_Doubling_Time', 'Cases_Change_Factor', 'Deaths_Change_Factor', 'Days_Since_2nd_Death', 'Cases_Last_Week_Per_Million', 'Deaths_Last_Week_Per_Million' ]) csvwriter.writeheader() for d in l_country_data: d2 = d # d2[] # this_Cases_Doubling_Time = None # this_Deaths_Doubling_Time = None # if 'Cases_Doubling_Time' in d: # this_Cases_Doubling_Time = d['Cases_Doubling_Time'] # if 'Deaths_Doubling_Time' in d: # this_Deaths_Doubling_Time = d['Deaths_Doubling_Time'] csvwriter.writerow(d2)
def export_data(d_states_data: dict): # export JSON and CSV for code in d_states_data.keys(): outfile = f'data/de-states/de-state-{code}.tsv' l_time_series = d_states_data[code] helper.write_json(f'data/de-states/de-state-{code}.json', d=l_time_series, sort_keys=True) with open(outfile, mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter( fh, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Days_Past', 'Date', 'Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Last_Week', 'Deaths_Last_Week', 'Cases_Per_Million', 'Deaths_Per_Million', 'Cases_New_Per_Million', 'Deaths_New_Per_Million', 'Cases_Last_Week_Per_Million', 'Deaths_Last_Week_Per_Million', 'Cases_Last_Week_Per_100000', # 'Cases_Doubling_Time', 'Deaths_Doubling_Time', 'DIVI_Intensivstationen_Covid_Prozent', 'DIVI_Intensivstationen_Betten_belegt_Prozent', 'Cases_Last_Week_Doubling_Time', 'Cases_Last_Week_7Day_Percent' ]) csvwriter.writeheader() for d in l_time_series: csvwriter.writerow(d)
def preprocess(self): """ Naive preprocessor that creates the dataset for CLAMS by cloning all repos and filtering locally. :return: """ repos_dir = os.path.join(os.getcwd(), 'repos') helper.create_dir(repos_dir) bitbucket_client = BitBucketServerClient( host=self.bitbucket_host, is_ssh=False, credentials=self.bitbucket_credentials) repos = bitbucket_client.get_bitbucket_server_repos(self.client_repos) self.clone_repos(bitbucket_client, repos, repos_dir) for project in projects_map: package_name = projects_map[project]['package'] print "Removing previous session's results..." directory = os.path.join(os.getcwd(), 'files', project) helper.delete_dir(directory) helper.create_dir(directory) print "Ready to run new session!\n" # use the following command to filter files os.system("find ./repos -iname '*.java' | xargs -n16 -P8 grep -l" + " \"" + package_name + "\" > " + project + ".txt") fname = project + ".txt" files_urls = {} if os.path.exists(fname): self.process_filtered_results(directory, files_urls, fname, project) print 'Writing files\' BitBucket Server urls to file...' helper.write_json(files_urls, 'files_urls', directory) print 'Files\' BitBucket urls are now stored in a json file!\n' else: print 'No usage examples found for ' + project
def write_recipe_matrix(outfile='recipe_matrix.json'): '''2D matrix whose rows are ingredients and cols are recipes. A 1 denotes the occurence of an ingredient in a given recipe.''' ingreds = helper.get_json('all_ingreds_filtered.json') recipes = helper.get_json('recipe_data_filtered.json') titles = [] for recipe in recipes: titles.append(recipe['title']) df = pd.DataFrame(0, ingreds, titles) ingreds = set(ingreds) for recipe in recipes: recipe_ingreds = set(recipe['ingreds']) matches = recipe_ingreds & ingreds if len(matches) > 0: df.loc[list(matches), recipe['title']] = 1 data = df.to_numpy() data = data.tolist() helper.write_json(data, outfile, 'w')
def save_data(json_data, filename_part): filename_with_path = os.path.join(config.data_directory, filename_part + ".json") temp_filename_with_path = filename_with_path + ".temp" bak_filename_with_path = os.path.join(config.data_backup_directory, filename_part + ".json" + datetime.now().strftime('.%Y-%m-%d-%H-%M-%S.bak')) exists = os.path.isfile(filename_with_path) if exists: hlp.write_json(json_data, temp_filename_with_path) if os.path.getsize(filename_with_path) == os.path.getsize(temp_filename_with_path): os.remove(temp_filename_with_path) hlp.write_log("File already existed and is the same. Keeping old file: " + filename_with_path, dtm_prefix=False) else: # files are different # backup old file os.rename(filename_with_path, bak_filename_with_path) # rename temporary file os.rename(temp_filename_with_path, filename_with_path) hlp.write_log("File already exists but it's different. Old file was backed up as: " + bak_filename_with_path, dtm_prefix=False) else: hlp.write_json(json_data, filename_with_path) hlp.write_log("File downloaded and stored as: " + filename_with_path, dtm_prefix=False)
def preprocess(self): """ This method runs the preprocessor that creates the dataset for CLAMS by using Hound to filter down to specific files. :return: """ for project in projects_map: package_name = projects_map[project]['package'] print "Removing previous session's results..." directory = os.path.join(os.getcwd(), 'files', project) helper.delete_dir(directory) helper.create_dir(directory) print "Ready to run new session!\n" # search on Hound print "\nSearching on Hound..." hound_client = HoundClient(self.hound_host, self.hound_credentials) hound_query = {'q': 'import ' + package_name, 'i': 'nope', 'files': '.java', 'repos': '*'} json_response = hound_client.search(hound_query) files_urls = self.parse_hound_response(project, json_response) print "Search completed!\n" # download files from BitBucket Server print "Downloading files..." bitbucket_client = BitBucketServerClient(host=self.bitbucket_host, is_ssh=self.is_bitbucket_ssh, credentials=self.bitbucket_credentials) for file_name, info in files_urls.iteritems(): response = bitbucket_client.download_file(info) helper.write_file_content(response, file_name, directory, self.is_bitbucket_ssh) print "Files are now stored locally!\n" print 'Writing files\' BitBucket Server urls to file...' helper.write_json(files_urls, 'files_urls', directory) print 'Files\' BitBucket Server urls are now stored in a json file!\n' # sleep for 1s to avoid overloading Hound/BitBucket # remove in case you don't have any latency issues time.sleep(1)
def export_data(d_states_data: dict): # export JSON and CSV for code in d_states_data.keys(): outfile = f'data/de-states/de-state-{code}.tsv' l_time_series = d_states_data[code] helper.write_json( f'data/de-states/de-state-{code}.json', l_time_series) with open(outfile, mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter(fh, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Days_Past', 'Date', 'Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Last_Week', 'Deaths_Last_Week', 'Cases_Per_Million', 'Deaths_Per_Million', 'Cases_New_Per_Million', 'Deaths_New_Per_Million', 'Cases_Last_Week_Per_Million', 'Deaths_Last_Week_Per_Million', 'Cases_Doubling_Time', 'Deaths_Doubling_Time' ] ) csvwriter.writeheader() for d in l_time_series: csvwriter.writerow(d)
def export_data(d_districts_data: dict): for lk_id, l_time_series in d_districts_data.items(): file_out = f'data/de-districts/de-district_timeseries-{lk_id}' # Export data as JSON helper.write_json( file_out+'.json', d=l_time_series, sort_keys=True) with open(file_out+'.tsv', mode='w', encoding='utf-8', newline='\n') as fh_csv: csvwriter = csv.DictWriter(fh_csv, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Days_Past', 'Date', 'Cases', 'Deaths', 'Cases_New', 'Deaths_New', 'Cases_Last_Week', 'Deaths_Last_Week', 'Cases_Per_Million', 'Deaths_Per_Million', 'Cases_New_Per_Million', 'Deaths_New_Per_Million', 'Cases_Last_Week_Per_Million', 'Deaths_Last_Week_Per_Million', # 'Cases_Doubling_Time', 'Deaths_Doubling_Time', 'DIVI_Intensivstationen_Covid_Prozent', 'DIVI_Intensivstationen_Betten_belegt_Prozent', 'Cases_Last_Week_7Day_Percent' ] ) csvwriter.writeheader() for d in l_time_series: csvwriter.writerow(d)
def write_all_ingreds_lemma(infile='all_ingreds_filtered.json', outfile='static/all_ingreds_lemma.json'): """Save json of lemmatization of ingreds in infile to outfile.""" ingreds = helper.get_json(infile) ingreds = [lemmatize(ingred) for ingred in ingreds] helper.write_json(ingreds, outfile, 'w')
def export_data(): global d_data_all helper.write_json(filename + '.json', d_data_all, sort_keys=False, indent=1)
def generate_database() -> dict: d_database = {} for csv_file in glob.glob('data/de-divi/downloaded/*.csv'): (filepath, fileName) = os.path.split(csv_file) (fileBaseName, fileExtension) = os.path.splitext(fileName) date = fileBaseName del filepath, fileName, fileBaseName, fileExtension # file 2020-04-24.csv: # bundesland,kreis,anzahl_standorte,betten_frei,betten_belegt,faelle_covid_aktuell_im_bundesland # file 2020-04-26.csv: # gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,bundesland # 2020-04-28.csv # gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,bundesland,daten_stand # file 2020-06-28.csv # bundesland,gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,daten_stand # -> skipping file 2020-04-24.csv and 2020-04-25.csv if date in ('2020-04-24', '2020-04-25'): continue with open(csv_file, mode='r', encoding='utf-8') as f: csv_reader = csv.DictReader(f, delimiter=",") for row in csv_reader: assert len(row) >= 8, "Error: too few rows found" bl_id = row["bundesland"] lk_id = row["gemeindeschluessel"] d = { # "bl_id": row["bundesland"], # "lk_id": row["gemeindeschluessel"], "Date": date, "anzahl_meldebereiche": int(row["anzahl_meldebereiche"]), "faelle_covid_aktuell": int(row["faelle_covid_aktuell"]), "faelle_covid_aktuell_beatmet": int(row["faelle_covid_aktuell_beatmet"]), "anzahl_standorte": int(row["anzahl_standorte"]), "betten_frei": int(float(row["betten_frei"])), "betten_belegt": int(float(row["betten_belegt"])) } d["betten_ges"] = d["betten_frei"] + d["betten_belegt"] if d["betten_ges"] > 0: d["betten_belegt_proz"] = round(100 * d["betten_belegt"] / d["betten_ges"], 1) d["faelle_covid_aktuell_proz"] = round(100*d["faelle_covid_aktuell"] / d["betten_ges"], 1) else: d["betten_belegt_proz"] = None d["faelle_covid_aktuell_proz"] = None if d["faelle_covid_aktuell"] > 0: d["faelle_covid_aktuell_beatmet_proz"] = round( 100*d["faelle_covid_aktuell_beatmet"] / d["faelle_covid_aktuell"], 1) else: d["faelle_covid_aktuell_beatmet_proz"] = 0 # if "daten_stand" in row: # d["daten_stand"] = row["daten_stand"] # else: # d["daten_stand"] = date if lk_id not in d_database: d_database[lk_id] = [] d_database[lk_id].append(d) helper.write_json('cache/de-divi/de-divi-V3.json', d_database, sort_keys=True, indent=1) return d_database
def extract_latest_date_data(): """ for all countries in json: extract latest entry write to data/int/countries-latest-all.tsv and data/int/countries-latest-all.json """ d_countries_latest = helper.extract_latest_data(d_countries_ref, d_countries_timeseries) l_for_export = [] with open('data/int/countries-latest-all.tsv', mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter( fh, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Country', 'Population', 'Date', 'Cases', 'Deaths', 'Cases_Per_Million', 'Deaths_Per_Million', 'Cases_Last_Week_Per_Million', 'Deaths_Last_Week_Per_Million', 'Continent', 'Code', 'DoublingTime_Cases_Last_Week_Per_100000' ]) # 'Cases_Last_Week', csvwriter.writeheader() for country in sorted(d_countries_latest.keys(), key=str.casefold): # l_time_series = d_countries_timeseries[country] # d = l_time_series[-1] # last entry (=>latest date) # pop = read_population(country) d2 = d_countries_latest[country] d2['Country'] = country # d2['Code'] = read_country_code(d2['Country']) # d2['Continent'] = read_continent(d2['Country']) # d2['Population'] = pop # if d2['Cases_Per_Million']: # d2['Cases_Per_Million'] = round( # d['Cases_Per_Million'], 0) # if d2['Deaths_Per_Million']: # d2['Deaths_Per_Million'] = round( # d['Deaths_Per_Million'], 0) # if d2['Cases_Last_Week_Per_Million']: # d2['Cases_Last_Week_Per_Million'] = round( # d['Cases_Last_Week_Per_Million'], 0) # if d2['Deaths_Last_Week_Per_Million']: # d2['Deaths_Last_Week_Per_Million'] = round( # d['Deaths_Last_Week_Per_Million'], 0) csvwriter.writerow(d2) l_for_export.append(d2) # JSON export helper.write_json(filename='data/int/countries-latest-all.json', d=l_for_export) # for selected countries write to separate file, for Gnuplot plotting with open('data/int/countries-latest-selected.tsv', mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.DictWriter(fh, delimiter='\t', extrasaction='ignore', fieldnames=[ 'Country', 'Date', 'Population', 'Cases', 'Deaths', 'Cases_Per_Million', 'Deaths_Per_Million' ]) csvwriter.writeheader() for country in sorted(d_selected_countries.keys(), key=str.casefold): l_time_series = d_countries_timeseries[country] d = l_time_series[-1] # last entry for this country d2 = d d2["Country"] = country d2['Population'] = d_selected_countries[country]['Population'] csvwriter.writerow(d2)
def generate_database() -> dict: d_database = {} # d_database_states = {} # Bundesländer d_database_states = {'01': {}, '02': {}, '03': {}, '04': {}, '05': {}, '06': {}, '07': { }, '08': {}, '09': {}, '10': {}, '11': {}, '12': {}, '13': {}, '14': {}, '15': {}, '16': {}, 'DE-total': {}} for csv_file in glob.glob('data/de-divi/downloaded/*.csv'): (filepath, fileName) = os.path.split(csv_file) (fileBaseName, fileExtension) = os.path.splitext(fileName) date = fileBaseName del filepath, fileName, fileBaseName, fileExtension # file 2020-04-24.csv: # bundesland,kreis,anzahl_standorte,betten_frei,betten_belegt,faelle_covid_aktuell_im_bundesland # file 2020-04-26.csv: # gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,bundesland # 2020-04-28.csv # gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,bundesland,daten_stand # file 2020-06-28.csv # bundesland,gemeindeschluessel,anzahl_meldebereiche,faelle_covid_aktuell,faelle_covid_aktuell_beatmet,anzahl_standorte,betten_frei,betten_belegt,daten_stand # -> skipping file 2020-04-24.csv and 2020-04-25.csv if date in ('2020-04-24', '2020-04-25'): continue with open(csv_file, mode='r', encoding='utf-8') as f: csv_reader = csv.DictReader(f, delimiter=",") for row in csv_reader: assert len(row) >= 8, "Error: too few rows found" bl_id = row["bundesland"] lk_id = row["gemeindeschluessel"] d = { # "bl_id": row["bundesland"], # "lk_id": row["gemeindeschluessel"], "Date": date, "anzahl_meldebereiche": int(row["anzahl_meldebereiche"]), "faelle_covid_aktuell": int(row["faelle_covid_aktuell"]), "faelle_covid_aktuell_beatmet": int(row["faelle_covid_aktuell_beatmet"]), "anzahl_standorte": int(row["anzahl_standorte"]), "betten_frei": int(float(row["betten_frei"])), "betten_belegt": int(float(row["betten_belegt"])) } d["betten_ges"] = d["betten_frei"] + d["betten_belegt"] if d["betten_ges"] > 0: d["betten_belegt_proz"] = round(100 * d["betten_belegt"] / d["betten_ges"], 1) d["faelle_covid_aktuell_proz"] = round(100*d["faelle_covid_aktuell"] / d["betten_ges"], 1) else: d["betten_belegt_proz"] = None d["faelle_covid_aktuell_proz"] = None if d["faelle_covid_aktuell"] > 0: d["faelle_covid_aktuell_beatmet_proz"] = round( 100*d["faelle_covid_aktuell_beatmet"] / d["faelle_covid_aktuell"], 1) else: d["faelle_covid_aktuell_beatmet_proz"] = 0 # if "daten_stand" in row: # d["daten_stand"] = row["daten_stand"] # else: # d["daten_stand"] = date if lk_id not in d_database: d_database[lk_id] = [] d_database[lk_id].append(d) # calc de_states_sum d2 = dict(d) del d2['Date'], d2['betten_ges'], d2['betten_belegt_proz'], d2['faelle_covid_aktuell_proz'], d2['faelle_covid_aktuell_beatmet_proz'] if date not in d_database_states[bl_id]: d_database_states[bl_id][date] = d2 else: for k in d2.keys(): d_database_states[bl_id][date][k] += d2[k] # 'DE-total' if date not in d_database_states['DE-total']: d_database_states['DE-total'][date] = d2 else: for k in d2.keys(): d_database_states['DE-total'][date][k] += d2[k] # print(d_database_states[bl_id][date]) helper.write_json('cache/de-divi/de-divi-V3.json', d_database, sort_keys=True, indent=1) d_database_states2 = {} for bl_id in d_database_states.keys(): bl_code = d_bl_id2code[bl_id] d_database_states2[bl_code] = [] for date, d in d_database_states[bl_id].items(): d['Date'] = date # copy from above: d["betten_ges"] = d["betten_frei"] + d["betten_belegt"] if d["betten_ges"] > 0: d["betten_belegt_proz"] = round(100 * d["betten_belegt"] / d["betten_ges"], 1) d["faelle_covid_aktuell_proz"] = round(100*d["faelle_covid_aktuell"] / d["betten_ges"], 1) else: d["betten_belegt_proz"] = None d["faelle_covid_aktuell_proz"] = None if d["faelle_covid_aktuell"] > 0: d["faelle_covid_aktuell_beatmet_proz"] = round( 100*d["faelle_covid_aktuell_beatmet"] / d["faelle_covid_aktuell"], 1) else: d["faelle_covid_aktuell_beatmet_proz"] = 0 d_database_states2[bl_code].append(d) del d_database_states helper.write_json('cache/de-divi/de-divi-V3-states.json', d_database_states2, sort_keys=True, indent=1) return d_database
for country_name in d_country_ref_data: d_country_ref_data[country_name]['Population'] = int( d_country_ref_data[country_name]['Population']) d_country_ref_data[country_name]['geonameid'] = int( d_country_ref_data[country_name]['geonameid']) d_country_ref_data[country_name]['ISO-Numeric'] = int( d_country_ref_data[country_name]['ISO-Numeric']) d_country_ref_data[country_name]['Area(in sq km)'] = float( d_country_ref_data[country_name]['Area(in sq km)']) if d_country_ref_data['Eritrea']['Population'] == 0: d_country_ref_data['Eritrea']['Population'] = 5750433 # export as json helper.write_json(file_JSON, d_country_ref_data) # export as csv l = [] for country_name in sorted(d_country_ref_data.keys()): d = d_country_ref_data[country_name] d['Country'] = country_name l.append(d) del d keys = header_row with open(file_CSV, mode='w', encoding='utf-8', newline='\n') as file: dict_writer = csv.DictWriter(file, keys, delimiter="\t") dict_writer.writeheader() dict_writer.writerows(l)