def test_lorenz_curve(): """ Tests `lorenz` function, which calculates the lorenz curve An income distribution where everyone has almost the same wealth should be similar to a straight line An income distribution where one person has almost the wealth should be flat and then shoot straight up when it approaches one """ n = 3000 # Almost Equal distribution y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n) cum_people, cum_income = lorenz_curve(y) assert_allclose(cum_people, cum_income, rtol=1e-03) # Very uneven distribution y = np.repeat(0.001, n) y[4] = 100000 pop_cum, income_cum = lorenz_curve(y) expected_income_cum = np.repeat(0., n + 1) expected_income_cum[-1] = 1. assert_allclose(expected_income_cum, income_cum, atol=1e-4)
def update_tabs(n_clicks, reference_lines, selected_sectors, shock, empresa, formalidad, contrato): # descriptive statistics mediana_choque = 'No choque' texto_mediana = 'Mediana choque: {}'.format(mediana_choque) pobreza_choque = 'No choque' texto_pobreza = u'Índice de pobreza choque: {}'.format(pobreza_choque) # get context ctx = dash.callback_context if not ctx.triggered: return fig_hist, texto_mediana, fig_lorenz, texto_pobreza else: action_id = ctx.triggered[0]['prop_id'].split('.')[0] if action_id == 'apply-button': # ---------------------------- # 1. Datos print(datetime.datetime.now()) print('Updating data...') # ---------------------------- if formalidad is None: formalidad = [] if contrato is None: contrato = [] if selected_sectors is None: selected_sectors = [] # reestablezco la base df_shock = df.copy() # definicion de hogares en riesgo df_shock.loc[(((df_shock['sector'].isin(selected_sectors)) & (df_shock['tipo_empresa'] == empresa)) & ((df_shock['informales'].isin(formalidad)) | (df_shock['cuenta_propia'].isin(contrato)))), 'riesgo'] = 1 df_shock.riesgo = df_shock.riesgo.fillna(value=0) df_shock = update_income(df_shock, shock) # ---------------------------- # 2. Histograma y mediana print('Updating Histogram...') # ---------------------------- # clean histogram fig_hist.data = [] # compute kernel kernel_shock = gaussian_kde(df_shock.ING_pc_choque_arriendo, weights=df_shock.fac_exp_ind_12m) # update median mediana_choque = weighted_median(df_shock, 'ING_pc_choque_arriendo', 'fac_exp_ind_12m') texto_mediana = 'Mediana choque: {:,.0f} COP'.format(mediana_choque) # modified distribution fig_hist.add_trace( go.Scatter(x=xs, y=kernel_shock(xs), name=u'Distribución Choque', line_color=naranja, fill='tozeroy', fillcolor='rgba(237,177,131,0.5)', mode='lines')) # Original distribution fig_hist.add_trace( go.Scatter(x=xs, y=dist_original, mode='lines', name=u'Distribución Original', line=dict(color=black))) # ---------------------------- # 3. Lorenz print('Updating Lorenz...') # ---------------------------- # clean figure fig_lorenz.data = [] fig_lorenz.add_trace( go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name=u'Igualdad total', line=dict(color='grey', dash="dashdot"))) # calculate new values f_vals_shock, l_vals_shock = qe.lorenz_curve( df_shock.ING_pc_choque_arriendo.to_numpy()) # sample to plot ids = list(range(0, len(f_vals))) sample_ids = random.sample(ids, 10000) fig_lorenz.add_trace( go.Scatter(x=f_vals_shock[sample_ids], y=l_vals_shock[sample_ids], mode='lines', name=u'Distribución Choque', line_color=naranja)) fig_lorenz.add_trace( go.Scatter(x=f_vals[sample_ids], y=l_vals[sample_ids], mode='lines', name=u'Distribución Original', line_color=black)) # ---------------------------- # 4. Poverty print('Updating Poverty Measures... \n') print('-------------------------------------') # ---------------------------- pobreza_choque = calculo_pobreza(df_shock, 'ING_pc_choque_arriendo') texto_pobreza = u'Índice de pobreza choque: {:.2f}%'.format( pobreza_choque) elif action_id == 'reference-lines': lines, names = generate_reference_lines(reference_lines, dist_original) fig_hist.update_layout(shapes=lines, annotations=names) else: raise PreventUpdate return fig_hist, texto_mediana, fig_lorenz, texto_pobreza
mode='lines', name=u'Distribución Original', line=dict(color=black))) # summary statistics median_original = weighted_median(df, 'ING_pc_bl_def_arriendo', 'fac_exp_ind_12m') # Lorenz curve and GINI (https://python.quantecon.org/wealth_dynamics.html) # TODO: weighted version of these calculations # start = time.time() # gini_original = qe.gini_coefficient(df.ING_pc_bl_def_arriendo.to_numpy()) # print('Gini calculated in {} minutes'.format((time.time()-start)/60)) f_vals, l_vals = qe.lorenz_curve(df.ING_pc_bl_def_arriendo.to_numpy()) fig_lorenz = go.Figure( layout=go.Layout(title=go.layout.Title(text=u"Curva de Lorenz"), xaxis=go.layout.XAxis( title="Porcentaje acumulado de personas"), yaxis=go.layout.YAxis( title="Porcentaje acumulado del ingreso"), plot_bgcolor='white', paper_bgcolor='white', font_color='grey', showlegend=True)) # sample to plot ids = list(range(0, len(f_vals))) sample_ids = random.sample(ids, 10000)