def test_query_helper(): # Ira is happy with the functionality so far but is worried a bit # about the difficuilty of finding the right technichal ids for regions # and statistics. # He realizes that there is helper functionality to identify the # federal states quickly in a human readable way and wants to try # it for Berlin assert federal_states.Berlin == "11" # That already worked nicely but in general there are many regions. # Ira would like to easily search through all of them and realizes # that he can obtain a DataFrame for this. reg_locally_stored = get_regions() assert isinstance(reg_locally_stored, pd.DataFrame) # Being satisfied with the regions, Ira now wants to have a # look at an equivalent overview of statistics. statistics = get_statistics() assert isinstance(statistics, pd.DataFrame) # Although this might already be sufficient for finding # interesting statistics, Ira read that there is already # some basic build in search functionality which # he wants to try. filtered_statistics = get_statistics("scheidung") assert isinstance(filtered_statistics, pd.DataFrame) assert filtered_statistics.shape[0] < 50
def test_translated_statistic_overview_table(): untranslated_statistics = get_statistics( stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider() ) translated_statistics = get_statistics( stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider(), target_language="en", translation_provider=SchemaTranslationProvider(), ) assert untranslated_statistics.shape == translated_statistics.shape assert not untranslated_statistics.equals(translated_statistics)
def get_topic(input): try: sim_stat = [] separator = ' ' words = nlp(separator.join(get_hotwords(input))) for doc in desc: list1 = [] for teil in words: list2 = [] for token in doc: list2.append(token.similarity(teil)) list1.append(max(np.array(list2))) mean = np.array(list1).mean() sim_stat.append(mean) table = get_statistics().iloc[np.array(sim_stat).argmax()] term = table['short_description'] info = table['long_description'] info = info.split("===Aussage===") if (len(info) > 1): info = info[1] else: info = info[0] info = info.split("Indikatorberechnung") info = info[0] info = info.split("=") info = info[0] info = info[:500] session['info'] = info return term except: return "False"
def get_chart(): topic = session.get('topic') myid = session.get('myid') description = "short_description.str.contains('" + topic + "')" table = get_statistics().query(description, engine='python') q = Query.region(myid) field = table.iloc[0] field = field.name f1 = q.add_field(field) results = q.results() df = results.set_index('year') # Save df as csv df.to_csv('downloads/data.csv', sep='\t') fig = Figure() axis = fig.add_subplot(1, 1, 1) xs = x = df.index ys = y = df[field] axis.plot(xs, ys, linestyle='--', marker='o', color='b') axis.set_xlabel('Time') axis.set_ylabel("\n".join(wrap(topic + " in " + session.get('city'), 60))) axis.xaxis.set_major_locator(MaxNLocator(integer=True)) fig.tight_layout() return fig
def test_statistic_overview_table(): stats = get_statistics( stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider() ) assert isinstance(stats, pd.DataFrame) assert stats.index.name == "statistic" assert list(stats.columns) == ["short_description", "long_description"] assert stats.shape[0] > 400
def test_statistic_overview_table(): stats = get_statistics() assert isinstance(stats, pd.DataFrame) assert list(stats.columns) == [ "statistics", "short_description", "long_description", ] assert stats.shape[0] > 400
def test_queryHelper(): # Ira is happy with the functionality so far but is worried a bit # about the difficuilty of finding the right technichal ids for regions # and statistics. # He realizes that there is helper functionality to identify the # federal states quickly in a human readable way and wants to try # it for Berlin assert federal_states.Berlin == "11" # That already worked nicely but in general there are many regions. # Ira would like to easily search through all of them and realizes # that he can obtain a DataFrame for this. reg_locally_stored = get_all_regions() assert isinstance(reg_locally_stored, pd.DataFrame) # Ira reads in the help that this is a stored list of regions and # not obtained live from datenguide. # He knows that region definitions and ids don't change very # often, but he would like the ability to obtain the most up to date # regions anyways. He therefore tries the function download_all_regions # that is designed for this purpouse reg = download_all_regions() assert isinstance(reg, pd.DataFrame) assert list(reg.columns) == ["name", "level", "parent"] assert reg.index.name == "id" assert reg.shape[0] > 10000 # Being satisfied with the regions, Ira now wants to have a # look at an equivalent overview of statistics. statistics = get_statistics() assert isinstance(statistics, pd.DataFrame) # Although this might already be sufficient for finding # interesting statistics, Ira read that there is already # some basic build in search functionality which # he wants to try. filtered_statistics = get_statistics("scheidung") assert isinstance(filtered_statistics, pd.DataFrame) assert filtered_statistics.shape[0] < 50
def get_chart_map(): # this is calling the chart try: topic = session.get('topic') regions = get_regions().query("level == 'nuts3'") cities = regions.query( '(parent == "091") | (parent == "092") | (parent == "093") | (parent == "094") | (parent == "095") | (parent == "096") | (parent == "097")' ) # get multiple regions q = Query.region(list(cities.index)) description = "short_description.str.contains('" + topic + "')" table = get_statistics().query(description, engine='python') field = table.iloc[0] field = field.name q.add_field(field) results_nuts3 = q.results() # read in shps shp_nuts2 = gpd.read_file("shp/bavaria_nuts2") max_year = max(results_nuts3["year"]) results_nuts3_lastyear = results_nuts3[results_nuts3["year"] == max_year] # prep for merging results_nuts3_lastyear = results_nuts3_lastyear.drop_duplicates() # test if df is empty row = results_nuts3_lastyear.iloc[4] emptytest = row.iloc[4] if (len(emptytest) != 0): results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[ "name"].str.replace(", Landkreis", "") results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[ "name2"].str.replace(", Landeshauptstadt", "") # merge datenguide data plot_data = shp_nuts2.merge(results_nuts3_lastyear, left_on="CC_2", right_on="id") # plot fig = Figure() axis = fig.add_subplot(1, 1, 1) axis = plot_data.plot(column=field, legend=True, ax=axis) fig.suptitle(topic + " in " + str(max_year)) axis.set_axis_off() # return fig.get_figure() return fig except Exception as e: app.logger.error('an error occurred during the creation of the map:', e)
import io from flask import Flask, render_template, request, session from flask import send_file # to download files from string import punctuation import logging import geopandas as gpd from textwrap import wrap from matplotlib.ticker import MaxNLocator # for integer values when plotting import spacy import numpy as np nlp = spacy.load("de_core_news_lg") # German # preparing statistics statistics = get_statistics().short_description.values.tolist() desc = [] for names in statistics: desc.append(nlp(names)) # all regions on nut3 level in Bavaria bezirke = get_regions().query("parent == '09'") z = [] ids = [] for i in bezirke.index.values.tolist(): ids = ids + get_regions().query("parent == '" + str(i) + "'").name.index.tolist() z = z + get_regions().query("parent == '" + str(i) + "'").name.values.tolist() # transform names to nlp format for comparing