"width": "35%", "height": "80px", "left": "17rem", "bottom": "10px", #"border": "1px solid #e7eff6", #"border-radius": "10px", 'overflowY': 'scroll' } # ------------------------------ # 2. SQL Queries # ------------------------------ # 2.1 Initial query # ------------------------------ df_dropout_efficiency = def_data.runQuery(""" select code_municip, name_municip as muni, benchmarking_rank as rank, benchmarking_efficiency as efficiency from cluster_master_table_by_municipio cmtbm ; """) df_dropout_efficiency['efficiency'] = df_dropout_efficiency[ 'efficiency'].astype(np.float64) df_dropout_efficiency['efficiency_percent'] = df_dropout_efficiency[ 'efficiency'].astype(float).map("{:.1%}".format) df_dropout_efficiency.sort_values(by=['efficiency', 'muni'], ascending=[False, True], inplace=True) # 2.1 Query function # ------------------------------ # ------------------------------ # 3. Map # ------------------------------ # 3.1 Loads JSON file
def on_button_click(n): if n is not None: # 1. Define set of input variables for DEA # 1.1 Get variable ids from checklist var_list = '' single_qote = "'" for var in sidebar_benchmarking.input_array: var_list = var_list + single_qote + var + single_qote + ',' var_list = var_list[:-1] # 1.2 Get variable name from SQL table var_definition. df_var_name = def_data.runQuery( 'select name, label from public.var_definition where var_id in (' + var_list + ');') # 2. Define the SQL query. benchmarking_sql_query = '' # SQL query. var_col = '' # Columns for query. var_res = '' # Restrictions for query. are_res = '' # Restriction for the area. # 2.1 Define columns and restrictions for the query for var in df_var_name['name']: var_col = var_col + var + ',' var_res = var_res + ' and ' + var + ' is not null ' var_col = var_col + '(dane_alu_01 - dane_alu_11) as nodropouts' var_res = var_res + ' and dane_alu_01 > 0; ' # 2.2 Define the region restriction if sidebar_benchmarking.area_array is not None: are_res = 'where region = ' + single_qote + sidebar_benchmarking.area_array + single_qote + ' ' # 2.3 Define the SQL query benchmarking_sql_query = 'select code_municip, name_municip as muni, ' + var_col + \ ' from cluster_master_table_by_municipio ' + \ are_res + var_res # 3. Get data from SQL table master_table_by_municipio. df_benchmarking_data = def_data.runQuery(benchmarking_sql_query) # 4. Performs DEA calculations df_benchmarking_data = df_benchmarking_data.rename( columns={'code_municip': 'DMU'}) # 4.1 Define the input and output variables inp = df_var_name['name'].tolist() out = ['nodropouts'] # 4.2 Load the models for Phase I and Phase II def_data.BCCO_Base_PH1(df_benchmarking_data, inp, out) def_data.BCCO_Base_PH2(df_benchmarking_data, inp, out) # 4.3 Creates data frame for resulting efficiencies ef = pd.DataFrame( columns=['dmu', 'efficiency', 'reference_set', 'slack']) # 4.4 Recover global variables # 4.5 Solves for all DMUs for dmu in df_benchmarking_data['DMU'].tolist(): def_data.BCCO_DMU_PH1(df_benchmarking_data, dmu, inp, out) ph1_dual = linprog(c=def_data.obj1, A_ub=def_data.lhs_ineq1, b_ub=def_data.rhs_ineq1, A_eq=def_data.lhs_eq1, b_eq=def_data.rhs_eq1, bounds=def_data.bnd1, method="simplex") def_data.BCCO_DMU_PH2(df_benchmarking_data, dmu, -1 * ph1_dual.fun, inp, out) ph2 = linprog(c=def_data.obj2, A_eq=def_data.lhs_eq2, b_eq=def_data.rhs_eq2, bounds=def_data.bnd2, method="simplex") ef = ef.append( { 'dmu': dmu, 'efficiency': -1 / ph1_dual.fun, 'reference_set': def_data.BCCO_DMU_REFSET(df_benchmarking_data, inp, out, ph2.x), 'slack': def_data.BCCO_DMU_VAR(inp, out, ph1_dual.slack) }, ignore_index=True) # 5. Process the results # 5.1 Merge to get municipalities names ef = ef.merge(df_benchmarking_data[['DMU', 'muni']], left_on='dmu', right_on='DMU') ef = ef.sort_values(by=["efficiency", 'muni'], ascending=[False, True]) ef["rank"] = ef["efficiency"].rank(ascending=False, method='min') ef['efficiency_percent'] = ef['efficiency'].astype(float).map( "{:.1%}".format) ef['Municipality'] = ef['muni'] # 5.2 Efficient Units and Reference set ef_dmu = ef[ef['efficiency'] >= 1][['dmu', 'muni']] ref_set = pd.DataFrame(ef[ef['efficiency'] < 1][[ 'reference_set' ]].reset_index()['reference_set'].value_counts()).reset_index() def convert_dmu_to_string(array): new_array = [] for dmu in array: if dmu in list(ef_dmu['dmu']): new_array.append(ef_dmu[ef_dmu['dmu'] == dmu][[ 'muni' ]].reset_index()['muni'][0]) return new_array # From ref_set gets names and takeout non productive units refset_data = [] for rs in ref_set['index']: line_set = [] for dmu in rs: if dmu in list(ef_dmu['dmu']): line_set.append(ef_dmu[ef_dmu['dmu'] == dmu][[ 'muni' ]].reset_index()['muni'][0]) refset_data.append(line_set) # This is the new Reference Set to print on screen. refset_df = pd.DataFrame( { 'Reference Set': refset_data, '# Municipalities': list(ref_set['reference_set']) }, columns=['Reference Set', '# Municipalities']) new_group_data = refset_df.to_dict('records') # Refset for display in the map ef['Ref Municipality'] = ef['reference_set'].apply( convert_dmu_to_string) # 5.3 Slack Variables count slack = [] for sl in ef['slack']: slack.extend(sl) slack_data = (pd.DataFrame(slack, columns=[ 'Slack Count' ]))['Slack Count'].value_counts().reset_index() slack_data = slack_data.merge(df_var_name, left_on='index', right_on='name', how='left') slack_data = slack_data.rename(columns={'label': 'Feature'}) new_slack_graph = px.bar(slack_data, x="Feature", y="Slack Count", height=200) new_slack_graph.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) # 6. Creates new map # 6.1 Loads JSON file # # ------------------------------ map_url = '' if sidebar_benchmarking.area_array is None: map_url = 'data/municipios_1mn.json' elif sidebar_benchmarking.area_array == 'Amazonica': map_url = 'data/amazonica90.json' elif sidebar_benchmarking.area_array == 'Andina': map_url = 'data/andina90.json' elif sidebar_benchmarking.area_array == 'Caribe': map_url = 'data/caribe90.json' elif sidebar_benchmarking.area_array == 'Orinoquia': map_url = 'data/orinoquia90.json' elif sidebar_benchmarking.area_array == 'Pacifica': map_url = 'data/pacifico90.json' with open(map_url) as geo: munijson = json.loads(geo.read()) # 6.2 Define new map properties # ------------------------------ new_Map = px.choropleth_mapbox( ef, # Data locations= 'dmu', # Column containing the identifiers used in the GeoJSON file featureidkey= "properties.MPIO_CCNCT", # Column in de JSON containing the identifier of the municipality. color= 'efficiency', # Column giving the color intensity of the region geojson=munijson, # The GeoJSON file zoom=4, # Zoom mapbox_style= "white-bg", # Mapbox style, for different maps you need a Mapbox account and a token center={ "lat": 4.5709, "lon": -74.2973 }, # Center color_continuous_scale="Viridis", # Color Scheme opacity=0.5, # Opacity of the map hover_name='Municipality', hover_data=['Ref Municipality']) new_Map.update_geos(fitbounds="locations", visible=False) new_Map.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) new_Rank_data = ef[['rank', 'muni', 'efficiency_percent']].to_dict('records') else: new_Map = EF_Map new_Rank_data = ranking_table.data new_group_data = group_table.data new_slack_graph = slack_graph return [new_Map, new_Rank_data, new_group_data, new_slack_graph]
import dash import dash_html_components as html import dash_bootstrap_components as dbc import dash_core_components as dcc from dash.dependencies import Input, Output, State # Recall app from app import app from library import def_data df_vars = def_data.runQuery("""select * from public.var_definition;""") df_mun = def_data.runQuery("""select * from desertion_by_municip;""") df_master = def_data.runQuery("""select * from master_table_by_municipio;""") # dropdown = dbc.DropdownMenu( # id='drop_down', # label="Menu", # children=[ # dbc.DropdownMenuItem(value=list(df['group_id'].unique())), # dbc.DropdownMenuItem("Item 2"), # dbc.DropdownMenuItem("Item 3"), # ], # ) # dropdown = dcc.Dropdown( # id='drop_down', # options=[{"label": name, "value": name} for name in names], # value=[name for name in names], # clearable=True, # multi=True, # ),
# ------------------------------ SIDEBAR_STYLE = { "position": "fixed", "top": '140px', "left": 0, "bottom": 0, "width": "16rem", "padding": "2rem 1rem", "background-color": "#f8f9fa", } # ------------------------------ # 2. SQL queries # ------------------------------ # 2.1 Query variables df_vars = def_data.runQuery( """select * from public.var_definition order by group_id;""") # ------------------------------ # 3. Accordion # ------------------------------ var_groups = list(df_vars['group_id'].unique()) cardBody = [] for gr in var_groups: tempCardBody = [] options = [] for var_id in df_vars[df_vars['group_id'] == gr]['var_id']: options.append({ 'label': ' ' + list(df_vars[df_vars['var_id'] == str(var_id)]['label'])[0], 'value': str(var_id)
# ------------------------------ SIDEBAR_STYLE = { "position": "fixed", "top": '140px', "left": 0, "bottom": 0, "width": "16rem", "padding": "2rem 1rem", "background-color": "#f8f9fa", } # ------------------------------ # 2. SQL queries # ------------------------------ # 2.1 Query variables df_vars = def_data.runQuery( """select * from public.var_definition order by group_id;""") # ------------------------------ # 3. Accordion # ------------------------------ var_groups = list(df_vars['group_id'].unique()) cardBody = [] for gr in var_groups: tempCardBody = [] options = [] for var_id in df_vars[df_vars['group_id'] == gr]['var_id']: options.append( html.Label(list( df_vars[df_vars['var_id'] == str(var_id)]['label'])[0], id='mapLabel-' + str(var_id))) options.append(html.Br())
def build_chart(depto_val, *args): # 3.2.1 Determine is a variable is selected and which # ------------------------------ global selected_var_code df_vars = sidebar_maps.df_vars.copy() changed_label_id = [p['prop_id'] for p in dash.callback_context.triggered][0] for i in df_vars['var_id']: if 'mapLabel-' + i + ".n_clicks" in changed_label_id: selected_var_code = i selected_var = df_vars.loc[df_vars['var_id'] == str( selected_var_code)]['name'].reset_index()['name'][0] single_qote = "'" if False: # If no change in either the dropdown or the labels, then do nothing. return map_fig else: # 3.2.2 Map by department (drop down mun is None) if depto_val is None: # 3.2.2.1 Query desired variable # ------------------------------ selected_var = "desertion_perc" sql_query = 'select code_dept, name_dept, avg(' + selected_var + ') as ' + selected_var + ' ' + \ 'from cluster_master_table_by_municipio ' + \ 'group by code_dept, name_dept;' df_var_all_dpto = def_data.runQuery(sql_query) df_var_all_dpto[selected_var] = df_var_all_dpto[ selected_var].astype(np.float64) label_fig = df_vars[df_vars['name'] == selected_var].reset_index()['label'][0] label_tittle = df_vars[ df_vars['name'] == selected_var].reset_index()['description'][0] # 3.2.2.2 Define new map # ------------------------------ new_map = px.choropleth_mapbox( df_var_all_dpto, geojson=DEP_json, color=selected_var, locations="code_dept", featureidkey="properties.DPTO_CCDGO", color_continuous_scale="Blues", center={ "lat": 4.94, "lon": -73.77 }, hover_name="name_dept", mapbox_style="carto-positron", zoom=4) new_map.update_layout(margin={ "r": 0, "l": 0, "b": 0 }, title_text=label_tittle, coloraxis_colorbar=dict(title=label_fig)) return new_map # 3.2.3 Map by municipality (drop down mun is not None) else: # 3.2.3.1 Query desired variable # ------------------------------ mylist = { 'dane_alu_18_p', 'dane_tic_01', 'dane_alu_12_p', 'dane_tic_03_1_p', 'po_pob_rural_10mil' } if selected_var in mylist: selected_var_query = 'desertion_perc' else: selected_var_query = selected_var sql_query = 'select code_municip, code_dept, name_municip, dane_alu_18_p, dane_tic_03_1_p, ' + \ 'po_pob_rural_10mil, dane_alu_12_p, dane_tic_01, ' + selected_var_query + ' ' + \ 'from cluster_master_table_by_municipio ' + \ 'where code_dept = ' + single_qote + depto_val + single_qote + ';' df_var_by_dpto = def_data.runQuery(sql_query) df_var_by_dpto[selected_var] = df_var_by_dpto[selected_var].astype( np.float64) # 3.2.3.2 Filtering the Departament # ------------------------------ MUN2_json['features'] = [ city for city in MUN_json['features'] if city['properties']['DPTO_CCDGO'] == depto_val ] center_x = MUN2_json['features'][0]['geometry']['coordinates'][0][ 0][0] center_y = MUN2_json['features'][0]['geometry']['coordinates'][0][ 0][1] new_center = dict(lat=center_y, lon=center_x) # 3.2.3.3 MAp Layout # ------------------------------ label_fig = df_vars[df_vars['name'] == selected_var].reset_index()['label'][0] label_tittle = df_vars[ df_vars['name'] == selected_var].reset_index()['description'][0] df_var_by_dpto['% Transferred students'] = pd.Series( [ "{0:.2f}%".format(val * 100) for val in df_var_by_dpto['dane_alu_12_p'] ], index=df_var_by_dpto.index) df_var_by_dpto['% Students afternoon'] = pd.Series( [ "{0:.2f}%".format(val * 100) for val in df_var_by_dpto['dane_alu_18_p'] ], index=df_var_by_dpto.index) df_var_by_dpto['% Schools with electricity'] = pd.Series( [ "{0:.2f}%".format(val * 100) for val in df_var_by_dpto['dane_tic_03_1_p'] ], index=df_var_by_dpto.index) df_var_by_dpto['% Transferred students'] = pd.Series( [ "{0:.2f}%".format(val * 100) for val in df_var_by_dpto['dane_alu_12_p'] ], index=df_var_by_dpto.index) df_var_by_dpto['% Habitants (towns-rural)'] = pd.Series( [ round(val, 2) for val in df_var_by_dpto['po_pob_rural_10mil'] ], index=df_var_by_dpto.index) df_var_by_dpto['Avg computers x100 stu.'] = pd.Series( [round(val, 2) for val in df_var_by_dpto['dane_tic_01']], index=df_var_by_dpto.index) #df_var_by_dpto = df_var_by_dpto.rename({'dane_alu_18_p': '% Students afternoon', # 'dane_tic_01': 'Avg computers x100 stu.', # 'dane_alu_12_p': '% Transferred students', # 'dane_tic_03_1_p': '% Schools with electricity', # 'po_pob_rural_10mil': '% Habitants (towns-rural)'}, axis=1) # 3.2.3.4 Define new map # ------------------------------ new_map = px.choropleth_mapbox( df_var_by_dpto, geojson=MUN2_json, locations='code_municip', color=selected_var, featureidkey="properties.MPIO_CCNCT", hover_name="name_municip", mapbox_style="carto-positron", center=new_center, hover_data=[ '% Students afternoon', '% Habitants (towns-rural)', 'Avg computers x100 stu.', '% Schools with electricity', '% Transferred students' ], zoom=6, color_continuous_scale="blues", ) new_map.update_layout(margin={ "r": 0, "l": 0, "b": 0 }, title_text=label_tittle, coloraxis_colorbar=dict(title=label_fig)) return new_map
MAP_MAPS_STYLE = { "position": "fixed", "width": "70%", "left": "17rem", "top": "140px", "border": "1px solid #e7eff6" } # ------------------------------ # 2. SQL Queries # ------------------------------ # 2.1 Initial query # ------------------------------ df_desertion = def_data.runQuery(""" select code_dept, name_dept, avg(desertion_perc) as desertion_perc from master_table_by_municipio where year_cohort = 2019 group by code_dept, name_dept; """) df_desertion['desertion_perc'] = df_desertion['desertion_perc'].astype( np.float64) df_vars = def_data.runQuery( """select * from public.var_definition order by group_id;""") # 2.2 Format adjusment # ------------------------------ # ------------------------------ # 3. MAP # ------------------------------ # 3.1 Global variables # ------------------------------
def update_cluster_figures(feature_id, log_value, cluster_value, y_value): global df_scatter, cl_scatter_feature, cl_feature_label # 1. Determine if there is a change in the feature selection. If so, changes the data frame. if feature_id is not None: if feature_id == cl_scatter_feature: print('Not necessary to update data frame') else: # Update the data frame with the selected feature. cl_scatter_feature = feature_id new_feature_name = df_vars[df_vars['var_id'] == str(feature_id)][[ 'var_name' ]].reset_index()['var_name'][0] cl_feature_label = df_vars[df_vars['var_id'] == str(feature_id)][[ 'Feature' ]].reset_index()['Feature'][0] sql_query = 'select name_municip, desertion_no, desertion_perc, me_cobertura_neta, ' + \ 'cobertura_rank, desercion_rank, deser_perc_rank, '+ \ new_feature_name + ' from cluster_master_table_by_municipio; ' df_scatter = def_data.runQuery(sql_query) for col in [ 'desertion_no', 'me_cobertura_neta', 'desertion_perc', new_feature_name ]: df_scatter[col] = df_scatter[col].astype(np.float64) df_scatter['Cluster'] = df_scatter['deser_perc_rank'].astype( str).str[0] df_scatter.rename(columns={ 'name_municip': 'Municipio', new_feature_name: cl_feature_label, 'desertion_no': '# Dropouts', 'desertion_perc': '% Dropouts', 'me_cobertura_neta': 'Coverage', 'deser_perc_rank': 'Cluster Description', "cobertura_rank": "Coverage Type", "desercion_rank": "Desertion Type" }, inplace=True) # 2. Filter data according to selected cluster. if cluster_value is None: # If no filter, then use a copy of data. df_scatter_final = df_scatter else: df_scatter_final = df_scatter[df_scatter['Cluster Description'] == cluster_value] # 3. Determine the scale o x-axis: linear or logarithmic cl_scale = True if log_value == 'log' else False # 4. Make new scatter plot new_scatter = px.scatter(df_scatter_final, x=cl_feature_label, y=y_value, log_x=cl_scale, color="Cluster") # 5. Make new box plot new_box = px.box(df_scatter_final, x="Coverage Type", y=cl_feature_label, color="Desertion Type", log_y=cl_scale, points="all", title="Box plot of " + cl_feature_label, hover_data=["Municipio"]) # 6. Second text according to feature selection new_second_text = '' if feature_id in text_list: new_second_text = text_content[feature_id] return [new_scatter, new_box, new_second_text]
# 1.10 Second text STYLE_CLUSTER_END_SPACE = { "position": "absolute", "width": "42%", "height": "20px", "right": "7%", "top": "2250px" } # ------------------------------ # 2. SQL Queries # ------------------------------ # 2.1 Query for cluster by municipality # ------------------------------ df_clusters = def_data.runQuery(""" select code_municip, name_municip, desertion_no, me_cobertura_neta, desertion_perc, deser_perc_rank, cobertura_rank, desercion_rank, dane_doc_31 from cluster_master_table_by_municipio; """) for col in [ 'desertion_no', 'me_cobertura_neta', 'desertion_perc', 'dane_doc_31' ]: df_clusters[col] = df_clusters[col].astype(np.float64) df_clusters.rename(columns={ "name_municip": "Municipio", "desertion_no": "# Dropouts", "me_cobertura_neta": "Coverage", "desertion_perc": "% Dropouts", "deser_perc_rank": "Cluster Description", "cobertura_rank": "Coverage Type", "desercion_rank": "Desertion Type" }, inplace=True)