Exemplos de perc_with_spline em Python, exemplos de libraries.lib_gather_data.perc_with_spline em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: lib_poverty_tables_and_maps.py Projeto: roveryi/well-being_model

def run_poverty_duration_plot(myC):

    # Load file with geographical (region/province/district) as index
    df = pd.read_csv('../output_country/' + myC + '/poverty_duration_no.csv')
    df = df.reset_index().set_index(df.columns[1])

    geo = df.index.name
    all_geo = np.array(df[~df.index.duplicated(keep='first')].index)

    # used in groupby
    df['country'] = myC

    # assign deciles
    listofdeciles = np.arange(0.10, 1.01, 0.10)
    df = df.reset_index().groupby(
        'country', sort=True).apply(lambda x: match_percentiles(
            x,
            perc_with_spline(reshape_data(x.c), reshape_data(x.pcwgt),
                             listofdeciles), 'decile'))

    # Load additional SP runs
    _sp = []
    for f in glob.glob(
            '/Users/brian/Desktop/BANK/hh_resilience_model/output_country/' +
            myC + '/poverty_duration_*.csv'):
        _ = f.replace(
            '/Users/brian/Desktop/BANK/hh_resilience_model/output_country/' +
            myC + '/poverty_duration_', '').replace('.csv', '')
        _sp.append(_)

    for iSP in _sp:
        _ = pd.read_csv('../output_country/' + myC + '/poverty_duration_' +
                        iSP + '.csv')
        df[['t_pov_inc' + iSP, 't_pov_cons' + iSP,
            't_pov_bool' + iSP]] = _[['t_pov_inc', 't_pov_cons', 't_pov_bool']]

    ############################
    # Do some plotting
    #plot_crit = '(t_pov_bool)&(hazard=="PF")&(rp==500)'

    #df.loc[df.eval(plot_crit)].plot.hexbin('dk0','t_pov_cons')
    #plt.gca().get_figure().savefig('../output_plots/'+myC+'/poverty_duration_hexbin_no.pdf',format='pdf')
    #plt.cla()

    #df.loc[df.eval(plot_crit)].plot.scatter('dk0','t_pov_cons')
    #plt.gca().get_figure().savefig('../output_plots/'+myC+'/poverty_duration_scatter_no.pdf',format='pdf')
    #plt.cla()

    ############################
    df = df.reset_index().set_index(['hazard', 'rp', 'decile'])

    df_dec = pd.DataFrame(index=df.sum(level=['hazard', 'rp', 'decile']).index)
    # Populate the df_dec dataframe now, while its index is set to ['hazard','rp','decile']

    # Number of individuals who face income or consumption poverty
    df_dec['n_new_pov_inc'] = df.loc[df.t_pov_bool == True, 'pcwgt'].sum(
        level=['hazard', 'rp', 'decile'])
    df_dec['n_new_pov_cons'] = df.loc[df.t_pov_bool == True, 'pcwgt'].sum(
        level=['hazard', 'rp', 'decile'])

    # Individuals who face income or consumption poverty as fraction of all individuals
    df_dec['frac_new_pov_inc'] = df_dec['n_new_pov_inc'] / df['pcwgt'].sum(
        level=['hazard', 'rp', 'decile'])
    df_dec['frac_new_pov_cons'] = df_dec['n_new_pov_cons'] / df['pcwgt'].sum(
        level=['hazard', 'rp', 'decile'])

    # Among people pushed into pov: average time in poverty (months)
    for iSP in _sp:
        df_dec['t_pov_inc_avg' + iSP] = 12. * (
            df.loc[df.eval('t_pov_bool' + iSP + '==True'),
                   ['pcwgt', 't_pov_inc' +
                    iSP]].prod(axis=1).sum(level=['hazard', 'rp', 'decile']) /
            df.loc[df.eval('t_pov_bool' + iSP + '==True'),
                   'pcwgt'].sum(level=['hazard', 'rp', 'decile']))
        df_dec['t_pov_cons_avg' + iSP] = 12. * (
            df.loc[df.eval('t_pov_bool' + iSP + '==True'),
                   ['pcwgt', 't_pov_cons' +
                    iSP]].prod(axis=1).sum(level=['hazard', 'rp', 'decile']) /
            df.loc[df.eval('t_pov_bool' + iSP + '==True'),
                   'pcwgt'].sum(level=['hazard', 'rp', 'decile']))

    for iloc in all_geo:
        df_dec['t_pov_inc_avg_' + iloc] = 12. * (
            df.loc[df.eval('(t_pov_bool==True)&(' + geo + '==@iloc)'),
                   ['pcwgt', 't_pov_inc']].prod(axis=1).sum(
                       level=['hazard', 'rp', 'decile']) /
            df.loc[df.eval('(t_pov_bool==True)&(' + geo + '==@iloc)'),
                   'pcwgt'].sum(level=['hazard', 'rp', 'decile']))

        df_dec['t_pov_cons_avg_' + iloc] = 12. * (
            df.loc[df.eval('(t_pov_bool==True)&(' + geo + '==@iloc)'),
                   ['pcwgt', 't_pov_cons']].prod(axis=1).sum(
                       level=['hazard', 'rp', 'decile']) /
            df.loc[df.eval('(t_pov_bool==True)&(' + geo + '==@iloc)'),
                   'pcwgt'].sum(level=['hazard', 'rp', 'decile']))

    df_dec.to_csv('../output_country/' + myC + '/poverty_by_decile.csv')

    ######################
    # Scatter plot of hh that have to delay reconstruction
    upper_lim = 1E15
    df['t_reco'] = (np.log(1.0 / 0.05) /
                    df['hh_reco_rate']).clip(upper=upper_lim)

    means = []
    xmax = 2.5E5
    step = xmax / 10.
    for i in np.linspace(0, 10, 10):
        means.append(df.loc[df.eval(
            '(rp==1000)&(c>@i*@step)&(c<=(@i+1)*@step)&(t_reco!=@upper_lim)'
        ), ['pcwgt', 't_reco']].prod(axis=1).sum() / df.loc[df.eval(
            '(rp==1000)&(c>@i*@step)&(c<=(@i+1)*@step)&(t_reco!=@upper_lim)'),
                                                            ['pcwgt']].sum())

    ax = df.loc[df.eval('(c<@xmax)&(t_reco<12)')].plot.hexbin('c',
                                                              't_reco',
                                                              gridsize=25,
                                                              mincnt=1)
    plt.plot(step * np.linspace(0, 10, 10), means, zorder=100)

    # Do the formatting
    ax = title_legend_labels(ax,
                             'Precipitation flood in ' + myC,
                             lab_x='Pre-disaster consumption [PhP per cap]',
                             lab_y='Time to reconstruct [years]',
                             leg_fs=9)

    # Do the saving
    plt.draw()
    plt.gca().get_figure().savefig('../output_plots/' + myC +
                                   '/t_start_reco_scatter.pdf',
                                   format='pdf')
    plt.cla()

    ######################
    # Latex table of poorest quintile poverty time

    _cons_to_tex = df_dec.drop([
        i for i in df_dec.columns
        if i not in ['t_pov_cons_avg_' + j for j in all_geo]
    ],
                               axis=1)
    _inc_to_tex = df_dec.drop([
        i for i in df_dec.columns
        if i not in ['t_pov_inc_avg_' + j for j in all_geo]
    ],
                              axis=1)

    _cons_to_tex = _cons_to_tex.rename(
        columns={'t_pov_cons_avg_' + j: j
                 for j in all_geo}).stack()
    _inc_to_tex = _inc_to_tex.rename(
        columns={'t_pov_inc_avg_' + j: j
                 for j in all_geo}).stack()
    _cons_to_tex.index.names = ['hazard', 'rp', 'decile', geo]
    _inc_to_tex.index.names = ['hazard', 'rp', 'decile', geo]

    _to_tex = pd.DataFrame(index=_cons_to_tex.index)
    _to_tex['Income'] = _inc_to_tex.round(1)
    _to_tex['Consumption'] = _cons_to_tex.round(1)

    _to_tex = _to_tex.reset_index().set_index(geo)
    _to_tex = _to_tex.loc[_to_tex.eval(
        '(hazard=="PF")&(rp==10)&(decile==1)')].sort_values('Consumption',
                                                            ascending=False)

    print(_to_tex.head())

    _to_tex[['Income',
             'Consumption']].to_latex('latex/' + myC + '_poverty_duration.tex')

    ######################
    # Plot consumption and income poverty (separately)
    df_dec = df_dec.reset_index()

    _lab = {
        't_pov_cons_avg':
        'Average time to exit poverty\n(income net of reconstruction & savings) [months]',
        't_pov_inc_avg': 'Average time to exit poverty (income only) [months]'
    }

    print(df_dec.columns)

    for ipov in ['t_pov_cons_avg', 't_pov_inc_avg']:
        # Do the plotting
        #ax = df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==10)')].plot.scatter('decile',ipov+'no',color=sns_pal[1],lw=0,label='Natl. average (RP = 5 years)',zorder=99)
        #df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==1000)')].plot.scatter('decile',ipov+'no',color=sns_pal[3],lw=0,label='Natl. average (RP = 1000 years)',zorder=98,ax=ax)

        ax = df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==10)')].plot(
            'decile', ipov + 'no', color=sns_pal[1], zorder=97, label='')
        df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==1000)')].plot(
            'decile',
            ipov + 'no',
            color=sns_pal[3],
            zorder=96,
            label='',
            ax=ax)

        icol = 4

        # Which areas to plot?
        if myC == 'SL': focus = ['Rathnapura', 'Colombo', 'Kandy', 'Gampaha']
        elif myC == 'PH': focus = ['NCR']

        for iloc in focus:
            df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==10)')].plot.scatter(
                'decile',
                ipov + '_' + iloc,
                color=sns_pal[icol],
                lw=0,
                label=iloc + ' (RP = 5 years)',
                zorder=95,
                ax=ax)
            df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==10)')].plot(
                'decile',
                ipov + '_' + iloc,
                color=sns_pal[icol],
                zorder=94,
                label='',
                ax=ax)
            icol += 1

        # Do the formatting
        ax = title_legend_labels(ax,
                                 'Precipitation flood in ' + myC,
                                 lab_x='Decile',
                                 lab_y=_lab[ipov],
                                 lim_x=[0.5, 10.5],
                                 lim_y=[-0.1, 42],
                                 leg_fs=9)
        ax.xaxis.set_ticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        ax.yaxis.set_ticks([0, 6, 12, 18, 24, 30, 36, 42])

        # Do the saving
        ax.get_figure().savefig('../output_plots/' + myC + '/' + ipov +
                                '_by_decile.pdf',
                                format='pdf')
        plt.cla()

    ######################
    # Plot consumption and income poverty (separately), with alternative SPs
    _lab = {
        't_pov_cons_avg':
        'Average time to exit poverty\n(income net of reconstruction & savings) [months]',
        't_pov_inc_avg': 'Average time to exit poverty (income only) [months]'
    }

    icol = 0
    ax = plt.gca()
    for ipov in ['t_pov_cons_avg', 't_pov_inc_avg']:
        # Do the plotting
        _df_5 = df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==10)')].copy()
        _df_1000 = df_dec.loc[df_dec.eval('(hazard=="PF")&(rp==1000)')].copy()

        for iSP in _sp:

            plt.fill_between(_df_5['decile'].values,
                             _df_5[ipov + iSP],
                             _df_1000[ipov + iSP],
                             alpha=0.3)

            _df_5.plot.scatter('decile',
                               ipov + iSP,
                               lw=0,
                               label='',
                               zorder=99,
                               ax=ax)
            _df_1000.plot.scatter('decile',
                                  ipov + iSP,
                                  lw=0,
                                  label='',
                                  zorder=98,
                                  ax=ax)

            _df_5.plot('decile',
                       ipov + iSP,
                       zorder=97,
                       linestyle=':',
                       label=iSP + ' natl. average (RP = 5 years)',
                       ax=ax)
            _df_1000.plot('decile',
                          ipov + iSP,
                          zorder=96,
                          label=iSP + ' natl. average (RP = 1000 years)',
                          ax=ax)

            icol += 1

        # Do the formatting
        ax = title_legend_labels(ax,
                                 'Precipitation flood in ' + myC,
                                 lab_x='Decile',
                                 lab_y=_lab[ipov],
                                 lim_x=[0.5, 10.5],
                                 lim_y=[-0.1],
                                 leg_fs=9)
        ax.xaxis.set_ticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        ax.yaxis.set_ticks([0, 3, 6, 9, 12, 15, 18])

        # Do the saving
        ax.get_figure().savefig('../output_plots/' + myC + '/' + ipov +
                                '_with_sps_by_decile.pdf',
                                format='pdf')
        plt.cla()

    plt.close('all')

    return True

Exemplo n.º 2

0

Exibir arquivo

Arquivo: postprocess_sp_comparison.py Projeto: walshb1/hh_resilience_model

# Decide whether to do this at decile or quintile level
agglev = 'decile'

try: _q = pd.read_csv('../output_country/'+myCountry+'/sp_comparison_by_'+agglev+'.csv').set_index(agglev)
except:

    # Load file
    _f = pd.read_csv('../output_country/'+myCountry+'/poverty_duration_no.csv')
    _f_up = pd.read_csv('../output_country/'+myCountry+'/poverty_duration_unif_poor.csv')

    if 'quintile' not in _f.columns or 'decile' not in _f.columns:

        # Assign deciles
        _deciles=np.arange(0.10, 1.01, 0.10)
        _f = _f.groupby(['hazard','rp'],sort=True).apply(lambda x:match_percentiles(x,perc_with_spline(reshape_data(x.c),reshape_data(x.pcwgt),_deciles),'decile','c'))
        _f_up = _f_up.groupby(['hazard','rp'],sort=True).apply(lambda x:match_percentiles(x,perc_with_spline(reshape_data(x.c),reshape_data(x.pcwgt),_deciles),'decile','c'))

        # Assign quintiles
        _quintiles=np.arange(0.20, 1.01, 0.20)
        _f = _f.groupby(['hazard','rp'],sort=True).apply(lambda x:match_percentiles(x,perc_with_spline(reshape_data(x.c),reshape_data(x.pcwgt),_quintiles),'quintile','c'))
        _f_up = _f_up.groupby(['hazard','rp'],sort=True).apply(lambda x:match_percentiles(x,perc_with_spline(reshape_data(x.c),reshape_data(x.pcwgt),_quintiles),'quintile','c'))

        _f.to_csv('../output_country/'+myCountry+'/poverty_duration_no.csv')
        _f_up.to_csv('../output_country/'+myCountry+'/poverty_duration_unif_poor.csv')

    # Put quintile, hhid into index
    _f = _f.reset_index().set_index(['decile','quintile']).sort_index()
    _f_up = _f_up.reset_index().set_index(['decile','quintile']).sort_index()

    # Quintile-level (or decile-level) info

Exemplo n.º 3

0

Exibir arquivo

def SL_PMT_plots(myCountry, economy, event_level, myiah, myHaz, my_PDS,
                 _wprime, base_str, to_usd):
    out_files = os.getcwd() + '/../output_country/' + myCountry + '/'

    listofquintiles = np.arange(0.20, 1.01, 0.20)
    quint_labels = [
        'Poorest\nquintile', 'Second', 'Third', 'Fourth',
        'Wealthiest\nquintile'
    ]

    myiah['hhid'] = myiah['hhid'].astype('str')
    myiah = myiah.set_index('hhid')
    pmt, _ = get_pmt(myiah)
    myiah['PMT'] = pmt

    for _loc in myHaz[0]:
        for _haz in myHaz[1]:
            for _rp in myHaz[2]:

                plt.cla()
                _ = myiah.loc[(myiah[economy] == _loc)
                              & (myiah['hazard'] == _haz) &
                              (myiah['rp'] == _rp)].copy()

                _ = _.reset_index().groupby(
                    economy, sort=True).apply(lambda x: match_percentiles(
                        x,
                        perc_with_spline(reshape_data(x.PMT),
                                         reshape_data(x.pcwgt_no),
                                         listofquintiles), 'quintile', 'PMT'))

                for _sort in ['PMT']:

                    _ = _.sort_values(_sort, ascending=True)

                    _['pcwgt_cum_' + base_str] = _['pcwgt_' +
                                                   base_str].cumsum()
                    _['pcwgt_cum_' + my_PDS] = _['pcwgt_' + my_PDS].cumsum()

                    _['dk0_cum'] = _[['pcwgt_' + base_str,
                                      'dk0']].prod(axis=1).cumsum()

                    _['cost_cum_' + my_PDS] = _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).cumsum()
                    # ^ cumulative cost
                    _['cost_frac_' + my_PDS] = _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).cumsum() / _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).sum()
                    # ^ cumulative cost as fraction of total

                    # GET WELFARE COSTS
                    _['dw_cum_' +
                      base_str] = _[['pcwgt_' + base_str,
                                     'dw_' + base_str]].prod(axis=1).cumsum()
                    # Include public costs in baseline (dw_cum)
                    ext_costs_base = pd.read_csv(out_files +
                                                 'public_costs_tax_' +
                                                 base_str + '_.csv').set_index(
                                                     [economy, 'hazard', 'rp'])

                    ext_costs_base[
                        'dw_pub_curr'] = ext_costs_base['dw_pub'] / _wprime
                    ext_costs_base[
                        'dw_soc_curr'] = ext_costs_base['dw_soc'] / _wprime
                    ext_costs_base['dw_tot_curr'] = ext_costs_base[[
                        'dw_pub', 'dw_soc'
                    ]].sum(axis=1) / _wprime

                    ext_costs_base_sum = ext_costs_base.loc[
                        ext_costs_base['contributer'] != ext_costs_base.index.
                        get_level_values(event_level[0]),
                        ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum(
                            level=[economy, 'hazard', 'rp']).reset_index()

                    ext_costs_base_pub = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_pub_curr'])
                    ext_costs_base_soc = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_soc_curr'])
                    ext_costs_base_sum = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_tot_curr'])

                    _['dw_cum_' +
                      my_PDS] = _[['pcwgt_' + my_PDS,
                                   'dw_' + my_PDS]].prod(axis=1).cumsum()
                    # ^ cumulative DW, with my_PDS implemented

                    # Include public costs in pds_dw_cum
                    ext_costs_pds = pd.read_csv(out_files +
                                                'public_costs_tax_' + my_PDS +
                                                '_.csv').set_index(
                                                    [economy, 'hazard', 'rp'])

                    ext_costs_pds[
                        'dw_pub_curr'] = ext_costs_pds['dw_pub'] / _wprime
                    ext_costs_pds[
                        'dw_soc_curr'] = ext_costs_pds['dw_soc'] / _wprime
                    ext_costs_pds['dw_tot_curr'] = ext_costs_pds[[
                        'dw_pub', 'dw_soc'
                    ]].sum(axis=1) / _wprime

                    ext_costs_pds_sum = ext_costs_pds.loc[
                        (ext_costs_pds['contributer'] != ext_costs_pds.index.
                         get_level_values(event_level[0])),
                        ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum(
                            level=[economy, 'hazard', 'rp']).reset_index()

                    ext_costs_pds_pub = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_pub_curr'])
                    ext_costs_pds_soc = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_soc_curr'])
                    ext_costs_pds_sum = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_tot_curr'])

                    _['dw_cum_' +
                      my_PDS] += (ext_costs_pds_pub +
                                  ext_costs_pds_soc) * _['cost_frac_' + my_PDS]
                    _['delta_dw_cum_' +
                      my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS]

                    ### PMT-ranked population coverage [%]
                    plt.plot(
                        100. * _['pcwgt_cum_' + base_str] /
                        _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] /
                        _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum())
                    plt.annotate(
                        'Total asset losses\n$' +
                        str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) +
                        ' mil.',
                        xy=(0.1, 0.85),
                        xycoords='axes fraction',
                        color=greys_pal[7],
                        fontsize=10)
                    if False:
                        plt.plot(
                            100. * _['pcwgt_cum_' + base_str] /
                            _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] /
                            _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum())

                plt.xlabel('Population percentile [%]',
                           labelpad=8,
                           fontsize=10)
                plt.ylabel('Cumulative asset losses [%]',
                           labelpad=8,
                           fontsize=10)
                plt.xlim(0)
                plt.ylim(-0.1)
                plt.gca().xaxis.set_ticks([20, 40, 60, 80, 100])
                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pcwgt_vs_dk0_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.cla()

                #####################################
                ### PMT threshold vs dk (normalized)
                _ = _.sort_values('PMT', ascending=True)
                plt.plot(_['PMT'],
                         100. * _['dk0_cum'] /
                         _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(),
                         linewidth=1.8,
                         zorder=99,
                         color=q_colors[1])

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = _.loc[_['quintile'] == _q, 'PMT'].max()
                    _q_y = 100. * _.loc[_['quintile'] <= _q,
                                        ['pcwgt_' + base_str, 'dk0']].prod(
                                            axis=1).sum() / _[[
                                                'pcwgt_' + base_str, 'dk0'
                                            ]].prod(axis=1).sum()
                    if _q == 1: _q_yprime = _q_y / 20

                    plt.plot([_q_x, _q_x], [0, _q_y],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)

                    _usd = ' mil.'
                    plt.annotate((quint_labels[_q - 1] + '\n$' + str(
                        round(
                            1E-6 * to_usd *
                            _.loc[_['quintile'] == _q,
                                  ['pcwgt_' + base_str, 'dk0']].prod(
                                      axis=1).sum(), 1)) + _usd),
                                 xy=(_q_x, _q_y + _q_yprime),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91)

                if False:
                    plt.scatter(
                        _['PMT'],
                        100. * _['dk0_cum'] /
                        _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(),
                        alpha=0.08,
                        s=6,
                        zorder=10,
                        color=q_colors[1])

                plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10)
                plt.ylabel('Cumulative asset losses [%]',
                           labelpad=8,
                           fontsize=10)
                plt.annotate(
                    'Total asset losses\n$' +
                    str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) +
                    ' mil.',
                    xy=(0.1, 0.85),
                    xycoords='axes fraction',
                    color=greys_pal[7],
                    fontsize=10)
                plt.xlim(825)
                plt.ylim(-0.1)

                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk_norm_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')

                #####################################
                ### PMT threshold vs dk & dw
                plt.cla()
                plt.plot(_['PMT'],
                         _['dk0_cum'] * to_usd * 1E-6,
                         color=q_colors[1],
                         linewidth=1.8,
                         zorder=99)
                plt.plot(_['PMT'],
                         _['dw_cum_' + base_str] * to_usd * 1E-6,
                         color=q_colors[3],
                         linewidth=1.8,
                         zorder=99)

                _y1 = 1.08
                _y2 = 1.03
                if _['dk0_cum'].max() < _['dw_cum_' + base_str].max():
                    _y1 = 1.03
                    _y2 = 1.08

                plt.annotate(
                    'Total asset losses = $' +
                    str(round(_['dk0_cum'].max() * to_usd * 1E-6, 1)) +
                    ' million',
                    xy=(0.02, _y1),
                    xycoords='axes fraction',
                    color=q_colors[1],
                    ha='left',
                    va='top',
                    fontsize=10,
                    annotation_clip=False)

                wb_str = 'Total wellbeing losses = \$' + str(
                    round(_['dw_cum_' + base_str].max() * to_usd * 1E-6,
                          1)) + ' million'
                #wb_natl_str = '(+\$'+str(round(ext_costs_base_sum*to_usd*1E-6,1))+')'
                wb_natl_str = 'National welfare losses\n  $' + str(
                    round(ext_costs_base_sum * to_usd * 1E-6, 1)) + ' million'

                plt.annotate(wb_str,
                             xy=(0.02, _y2),
                             xycoords='axes fraction',
                             color=q_colors[3],
                             ha='left',
                             va='top',
                             fontsize=10,
                             annotation_clip=False)
                #plt.annotate(wb_natl_str,xy=(0.02,0.77),xycoords='axes fraction',color=q_colors[3],ha='left',va='top',fontsize=10)

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = _.loc[_['quintile'] == _q, 'PMT'].max()
                    _q_y = max(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6,
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6)
                    if _q == 1: _q_yprime = _q_y / 25

                    plt.plot([_q_x, _q_x], [0, _q_y],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)
                    plt.annotate(quint_labels[_q - 1],
                                 xy=(_q_x, _q_y + 7 * _q_yprime),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)

                    # This figures out label ordering (are cumulative asset or cum welfare lossers higher?)
                    _cumk = round(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6, 1)
                    _cumw = round(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6,
                        1)
                    if _cumk >= _cumw:
                        _yprime_k = 4 * _q_yprime
                        _yprime_w = 1 * _q_yprime
                    else:
                        _yprime_k = 1 * _q_yprime
                        _yprime_w = 4 * _q_yprime

                    _qk = round(
                        _.loc[_['quintile'] == _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6, 1)
                    _qw = round(
                        _.loc[_['quintile'] == _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6,
                        1)

                    plt.annotate('$' + str(_qk) + ' mil.',
                                 xy=(_q_x, _q_y + _yprime_k),
                                 color=q_colors[1],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)
                    plt.annotate('$' + str(_qw) + ' mil.',
                                 xy=(_q_x, _q_y + _yprime_w),
                                 color=q_colors[3],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)

                plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10)
                plt.ylabel('Cumulative losses [mil. US$]',
                           labelpad=8,
                           fontsize=10)
                plt.xlim(825)
                plt.ylim(-0.1)

                plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          ' in ' + _loc,
                          loc='left',
                          color=greys_pal[7],
                          pad=30,
                          fontsize=15)

                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk0_' + _loc +
                                  '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.close('all')

                #####################################
                ### Cost vs benefit of PMT

                show_net_benefit = False
                if show_net_benefit:
                    _['dw_cum_' +
                      base_str] += (ext_costs_base_pub + ext_costs_base_soc
                                    ) * _['cost_frac_' + my_PDS]
                #*_[['pcwgt_'+base_str,'dk0']].prod(axis=1).cumsum()/_[['pcwgt_'+base_str,'dk0']].prod(axis=1).sum()
                # ^ include national costs in baseline dw
                _['delta_dw_cum_' +
                  my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS]
                # redefine this because above changed

                plt.cla()
                plt.plot(_['PMT'],
                         _['cost_cum_' + my_PDS] * to_usd * 1E-6,
                         color=q_colors[1],
                         linewidth=1.8,
                         zorder=99)
                plt.plot(_['PMT'],
                         _['delta_dw_cum_' + my_PDS] * to_usd * 1E-6,
                         color=q_colors[3],
                         linewidth=1.8,
                         zorder=99)

                plt.annotate('PDS cost =\n$' + str(
                    round(_['cost_cum_' + my_PDS].max() * to_usd * 1E-6, 2)) +
                             ' mil.',
                             xy=(_['PMT'].max(),
                                 _['cost_cum_' + my_PDS].max() * to_usd *
                                 1E-6),
                             color=q_colors[1],
                             weight='bold',
                             ha='left',
                             va='top',
                             fontsize=10,
                             annotation_clip=False)
                plt.annotate('Avoided wellbeing\nlosses = $' + str(
                    round(_.iloc[-1]['delta_dw_cum_' + my_PDS] * to_usd * 1E-6,
                          2)) + ' mil.',
                             xy=(_['PMT'].max(),
                                 _.iloc[-1]['delta_dw_cum_' + my_PDS] *
                                 to_usd * 1E-6),
                             color=q_colors[3],
                             weight='bold',
                             ha='left',
                             va='top',
                             fontsize=10)

                #for _q in [1,2,3,4,5]:
                #    _q_x = _.loc[_['quintile']==_q,'PMT'].max()
                #    _q_y = max(_.loc[_['quintile']<=_q,['pcwgt','dk0']].prod(axis=1).sum()*to_usd*1E-6,
                #               _.loc[_['quintile']<=_q,['pcwgt','dw_no']].prod(axis=1).sum()*to_usd*1E-6)
                #    if _q == 1: _q_yprime = _q_y/20

                #    plt.plot([_q_x,_q_x],[0,_q_y],color=greys_pal[4],ls=':',linewidth=1.5,zorder=91)
                #    plt.annotate(quint_labels[_q-1],xy=(_q_x,_q_y+_q_yprime),color=greys_pal[6],ha='right',va='bottom',style='italic',fontsize=8,zorder=91)

                plt.xlabel('Upper PMT threshold for post-disaster support',
                           labelpad=8,
                           fontsize=12)
                plt.ylabel('Cost & benefit [mil. US$]',
                           labelpad=8,
                           fontsize=12)
                plt.xlim(825)  #;plt.ylim(0)

                plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          '\n  in ' + _loc,
                          loc='left',
                          color=greys_pal[7],
                          pad=25,
                          fontsize=15)
                plt.annotate(pds_dict[my_PDS],
                             xy=(0.02, 1.03),
                             xycoords='axes fraction',
                             color=greys_pal[6],
                             ha='left',
                             va='bottom',
                             weight='bold',
                             style='italic',
                             fontsize=8,
                             zorder=91,
                             clip_on=False)

                plt.plot(plt.gca().get_xlim(), [0, 0],
                         color=greys_pal[2],
                         linewidth=0.90)
                sns.despine(bottom=True)
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_dk_vs_dw_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '_' +
                                  my_PDS + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.close('all')
                continue

                #####################################
                ### Cost vs benefit of PMT
                _ = _.fillna(0)
                #_ = _.loc[_['pcwgt_'+my_PDS]!=0].copy()
                _ = _.loc[(_['help_received_' + my_PDS] != 0)
                          & (_['pcwgt_' + my_PDS] != 0)].copy()

                #_['dw_cum_'+my_PDS] = _[['pcwgt_'+my_PDS,'dw_'+my_PDS]].prod(axis=1).cumsum()
                #_['dw_cum_'+my_PDS] += ext_costs_pds_pub + ext_costs_pds_soc*_['cost_frac_'+my_PDS]
                # ^ unchanged from above

                _c1, _c1b = paired_pal[2], paired_pal[3]
                _c2, _c2b = paired_pal[0], paired_pal[1]

                _window = 100
                if _.shape[0] < 100: _window = int(_.shape[0] / 5)

                plt.cla()

                _y_values_A = (_['cost_cum_' + my_PDS] *
                               to_usd).diff() / _['pcwgt_' + my_PDS]
                _y_values_B = pd.rolling_mean(
                    (_['cost_cum_' + my_PDS] * to_usd).diff() /
                    _['pcwgt_' + my_PDS], _window)

                if _y_values_A.max() >= 1.25 * _y_values_B.max(
                ) or _y_values_A.min() <= 0.75 * _y_values_B.min():
                    plt.scatter(_['PMT'],
                                (_['cost_cum_' + my_PDS] * to_usd).diff() /
                                _['pcwgt_' + my_PDS],
                                color=_c1,
                                s=4,
                                zorder=98,
                                alpha=0.25)
                    plt.plot(_['PMT'],
                             pd.rolling_mean(
                                 (_['cost_cum_' + my_PDS] * to_usd).diff() /
                                 _['pcwgt_' + my_PDS], _window),
                             color=_c1b,
                             lw=1.0,
                             zorder=98)
                else:
                    plt.plot(_['PMT'],
                             (_['cost_cum_' + my_PDS] * to_usd).diff() /
                             _['pcwgt_' + my_PDS],
                             color=_c1b,
                             lw=1.0,
                             zorder=98)

                plt.scatter(_['PMT'],
                            (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                            _['pcwgt_' + my_PDS],
                            color=_c2,
                            s=4,
                            zorder=98,
                            alpha=0.25)
                plt.plot(_['PMT'],
                         pd.rolling_mean(
                             (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                             _['pcwgt_' + my_PDS], _window),
                         color=_c2b,
                         lw=1.0,
                         zorder=98)
                _y_min = 1.05 * pd.rolling_mean(
                    (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                    _['pcwgt_' + my_PDS], _window).min()
                _y_max = 1.1 * max(
                    pd.rolling_mean(
                        (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                        _['pcwgt_' + my_PDS], _window).max(), 1.05 *
                    ((_['cost_cum_' + my_PDS] * to_usd).diff() /
                     _['pcwgt_' + my_PDS]).mean() + _q_yprime)

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = min(1150, _.loc[_['quintile'] == _q, 'PMT'].max())
                    #_q_y = max(_.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dk0']].prod(axis=1).sum()*to_usd,
                    #           _.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dw_no']].prod(axis=1).sum()*to_usd))
                    if _q == 1:
                        _q_xprime = (_q_x - 840) / 40
                        _q_yprime = _y_max / 200

                    plt.plot([_q_x, _q_x], [_y_min, _y_max],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)
                    plt.annotate(quint_labels[_q - 1],
                                 xy=(_q_x - _q_xprime, _y_max),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='top',
                                 style='italic',
                                 fontsize=7,
                                 zorder=99)

                #toggle this
                plt.annotate('PDS cost',
                             xy=(_['PMT'].max() - _q_xprime,
                                 ((_['cost_cum_' + my_PDS] * to_usd).diff() /
                                  _['pcwgt_' + my_PDS]).mean() + _q_yprime),
                             color=_c1b,
                             weight='bold',
                             ha='right',
                             va='bottom',
                             fontsize=8,
                             annotation_clip=False)

                #plt.annotate('Avoided\nwellbeing losses',xy=(_['PMT'].max()-_q_xprime,pd.rolling_mean((_['delta_dw_cum']*to_usd/_['pcwgt_'+my_PDS]).diff(),_window).min()+_q_yprime),
                #             color=_c2b,weight='bold',ha='right',va='bottom',fontsize=8)

                plt.xlabel('Upper PMT threshold for post-disaster support',
                           labelpad=10,
                           fontsize=10)
                plt.ylabel(
                    'Marginal impact at threshold [US$ per next enrollee]',
                    labelpad=10,
                    fontsize=10)

                plt.title(str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          ' in ' + _loc,
                          loc='right',
                          color=greys_pal[7],
                          pad=20,
                          fontsize=15)
                plt.annotate(pds_dict[my_PDS],
                             xy=(0.99, 1.02),
                             xycoords='axes fraction',
                             color=greys_pal[6],
                             ha='right',
                             va='bottom',
                             weight='bold',
                             style='italic',
                             fontsize=8,
                             zorder=91,
                             clip_on=False)

                plt.plot([840, 1150], [0, 0],
                         color=greys_pal[2],
                         linewidth=0.90)
                plt.xlim(840, 1150)
                plt.ylim(_y_min, _y_max)
                sns.despine(bottom=True)
                plt.grid(False)

                plt.gcf().savefig(
                    '../output_plots/SL/PMT/pmt_slope_cost_vs_benefit_' +
                    _loc + '_' + _haz + '_' + str(_rp) + '_' + my_PDS + '.pdf',
                    format='pdf',
                    bbox_inches='tight')
                plt.close('all')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: lib_country_dir.py Projeto: aj-wb/shock_model_LC

def load_survey_data(myC):
    df = None
    #Each survey/country should have the following:
    # -> hhid household id
    # -> hhinc household income? but seems to be expenditure (SL)
    # -> pcinc household income per person
    # -> hhwgt number of households this line is 'representative' of
    # -> pcwgt total population this line is representative of
    # -> hhsize household size
    # -> hhsize_ae household size2
    # -> hhsoc social payments (government and remittances)
    # -> pcsoc per person social payments
    # -> ispoor
    # -> has_ew

    if myC == 'PH':
        #path = '2015FIES/fies2015_complete.csv'
        path = '~/Desktop/BANK/hh_resilience_model/inputs/PH/FIES2015.csv'
        # df = pd.read_csv(path)[['w_regn','w_prov','w_mun','w_bgy','w_ea','w_shsn','w_hcn', ## original code modified by <ps>
        df = pd.read_csv('./csv/FIES2015.csv')[[
            'w_regn',
            'w_prov',
            'w_mun',
            'w_bgy',
            'w_ea',
            'w_shsn',
            'w_hcn',  # modified to reflect local path for covid_phl project <ps>20200415 
            'totex',
            'regft',
            'hhwgt',
            'poorhh',
            'totdis',
            'tothrec',
            'pcinc_s',
            'pcinc_ppp11',
            'pcwgt',
            'fsize',
            'agri_sal',
            'nonagri_sal',
            'cash_abroad',
            'cash_domestic',
            'othin',
            'net_cfg',  # Crop Farming and Gardening net receipts (gross = 'eacfggrs')
            'net_lpr',  # Livestock and Poultry Raising net receipts (gross = 'ealprgrs')
            'net_fish',  # Fishing gross receipts (gross = 'eafisgrs')
            'net_for',  # Forestry and Hunting net receipts (gross = 'eaforgrs')
            'net_ret',  # Wholesale and Retail Trade net receipts (gross = 'eatrdgrs')
            'net_mfg',  # Manufacturing net receipts (gross = 'eamfggrs')
            'net_com',  # Community,Social,Rec'l,Personal Services net receipts (gross = 'eacpsgrs')
            'net_trans',  # Transportation,Storage and Comcn Services net receipts (gross = 'eatcsgrs')
            'net_min',  # Mining and Quarrying gross receipts (gross = eamnggrs)
            'net_cons',  # Construction net receipts (gross = 'eacongrs')
            'net_nec',  # Entrepreneurial Activities NEC net receipts (gross = 'eanecgrs')
            't930220',  # total public receipts
            't930221',  # cct incl 4Ps transfers
            #'eainc',   # Total Income from Entrepreneurial Activites
            'job',  # Household Head Job or Business Indicator  (2nd visit only)
            'occup_fin',  # Household Head Occupation  (2nd visit only)
            'employed_pay',  # Total number of family members employed for pay   (2nd visit only)
            'employed_prof',  # Total number of family members employed for profit   (2nd visit only)
            'job',  # Household Head Job or Business Indicator  (2nd visit only)
            'cw',  # Household Head Class of Worker  (2nd visit only)
            'spouse_emp',  #Spouse has job/business   (2nd visit only)
            'majsr',  # Major Grouping of Main Source of Income        
            'minsr',  # Detailed Grouping of Main Source of income 
            'radio_qty',
            'tv_qty',
            'cellphone_qty',
            'pc_qty',
            'savings',
            'invest'
            #rentals_rec interest pension dividends
            #netshare  other_source net_receipt regft
        ]]

        df = df.rename(
            columns={
                'tothrec': 'hhsoc',
                'poorhh': 'ispoor',
                'totex': 'hhexp',
                't930220': 'total_public',
                't930221': 'cct4P'
            })

        df['total_entrepreneurial'] = df[[
            'net_cfg', 'net_lpr', 'net_fish', 'net_for', 'net_ret', 'net_mfg',
            'net_com', 'net_trans', 'net_min', 'net_cons', 'net_nec'
        ]].sum(axis=1)

        df['hhsize'] = df['pcwgt'] / df['hhwgt']
        #df['hhsize_ae']  = df['pcwgt']/df['hhwgt']
        #df['aewgt']   = df['pcwgt'].copy()

        # Per capita expenditures
        df['pcexp'] = df['hhexp'] / df['hhsize']

        # These lines use income as income
        df = df.rename(columns={'pcinc_s': 'pcinc'})
        df['hhinc'] = df[['pcinc', 'hhsize']].prod(axis=1)

        df['ppp_factor'] = df.eval(
            '(365*pcinc_ppp11*hhsize)/hhinc')  # <-- annual PPP/LCU

        print(
            '\nTotal value:',
            round(
                1E-9 * df[['pcinc_ppp11', 'pcwgt']].prod(axis=1).sum() * 365 /
                12, 2), ' bil. $PPP(2011)/month')
        print('-- non-ag wages: {} bil. $PPP/month'.format(
            round(
                1E-9 / 12 *
                df[['nonagri_sal', 'hhwgt', 'ppp_factor']].prod(axis=1).sum(),
                2)))
        print('-- ag wages: {} bil. $PPP/month\n'.format(
            round(
                1E-9 / 12 *
                df[['agri_sal', 'hhwgt', 'ppp_factor']].prod(axis=1).sum(),
                2)))

        df['pcsoc'] = df['hhsoc'] / df['hhsize']

        #df['tot_savings'] = df[['savings','invest']].sum(axis=1,skipna=False)
        df['savings'] = df['savings'].fillna(-1)
        df['invest'] = df['invest'].fillna(-1)

        df['axfin'] = 0
        df.loc[(df.savings > 0) | (df.invest > 0), 'axfin'] = 1

        df['est_sav'] = df[['axfin', 'pcinc']].prod(axis=1) / 2.

        #df['has_ew'] = df[['radio_qty','tv_qty','cellphone_qty','pc_qty']].sum(axis=1).clip(upper=1)
        #df = df.drop(['radio_qty','tv_qty','cellphone_qty','pc_qty'],axis=1)

        _mc_lo, _mc_hi = get_middleclass_range('PH')
        df['ismiddleclass'] = (df.pcinc >= _mc_lo)  #&(df.pcinc<=_mc_hi)

        _lo, _hi = get_secure_range('PH')
        df['issecure'] = (df.pcinc >= _lo) & (df.pcinc <= _hi)

        _lo, _hi = get_vulnerable_range('PH')
        df['isvulnerable'] = (df.pcinc >= _lo) & (df.pcinc <= _hi)

        # Run savings script
        df['country'] = 'PH'
        listofquintiles = np.arange(0.10, 1.01, 0.10)
        df = df.reset_index().groupby(
            'country', sort=True).apply(lambda x: match_percentiles(
                x,
                perc_with_spline(reshape_data(x.pcinc), reshape_data(x.pcwgt),
                                 listofquintiles),
                'decile_nat',
                sort_val='pcinc')).drop(['index'], axis=1)
        df = df.reset_index().groupby(
            'w_regn', sort=True).apply(lambda x: match_percentiles(
                x,
                perc_with_spline(reshape_data(x.pcinc), reshape_data(x.pcwgt),
                                 listofquintiles),
                'decile_reg',
                sort_val='pcinc')).drop(['index'], axis=1)
        df = df.reset_index().set_index(['w_regn', 'decile_nat',
                                         'decile_reg']).drop('index', axis=1)

        df['precautionary_savings'] = df['pcinc'] - df['pcexp']

        # Savings rate by national decile
        _ = pd.DataFrame(index=df.sum(level='decile_nat').index)
        _['income'] = df[['pcinc', 'pcwgt']].prod(axis=1).sum(
            level='decile_nat') / df['pcwgt'].sum(level='decile_nat')
        _['expenditures'] = df[['pcexp', 'pcwgt']].prod(axis=1).sum(
            level='decile_nat') / df['pcwgt'].sum(level='decile_nat')
        _['precautionary_savings'] = _['income'] - _['expenditures']
        _.sort_index().to_csv('csv/hh_savings_by_decile.csv')

        # Savings rate by decile (regionally-defined) & region
        _ = pd.DataFrame(index=df.sum(level=['w_regn', 'decile_reg']).index)
        _['income'] = df[['pcinc', 'pcwgt']].prod(axis=1).sum(
            level=['w_regn', 'decile_reg']) / df['pcwgt'].sum(
                level=['w_regn', 'decile_reg'])
        _['expenditures'] = df[['pcexp', 'pcwgt']].prod(axis=1).sum(
            level=['w_regn', 'decile_reg']) / df['pcwgt'].sum(
                level=['w_regn', 'decile_reg'])
        _['precautionary_savings'] = _['income'] - _['expenditures']
        _.sort_index().to_csv('csv/hh_savings_by_decile_and_region.csv')

        # Savings rate for hh in subsistence (natl average)
        listofquartiles = np.arange(0.25, 1.01, 0.25)
        df = df.reset_index().groupby(
            'country', sort=True).apply(lambda x: match_percentiles(
                x,
                perc_with_spline(reshape_data(x.precautionary_savings),
                                 reshape_data(x.pcwgt), listofquartiles),
                'nat_sav_quartile',
                sort_val='precautionary_savings'))
        df = df.reset_index().groupby(
            'w_regn', sort=True).apply(lambda x: match_percentiles(
                x,
                perc_with_spline(reshape_data(x.precautionary_savings),
                                 reshape_data(x.pcwgt), listofquartiles),
                'reg_sav_quartile',
                sort_val='precautionary_savings')).drop(['index'], axis=1)
        df = df.reset_index().set_index(['w_regn', 'decile_nat', 'decile_reg'
                                         ]).drop('index', axis=1).sort_index()

        _ = pd.DataFrame()
        _.loc['subsistence_savings_rate', 'hh_avg'] = (
            df.loc[df.pcinc < get_subsistence_line(myC)].eval(
                'pcwgt*(pcinc-pcexp)').sum() /
            df.loc[df.pcinc < get_subsistence_line(myC), 'pcwgt'].sum())
        _.loc['subsistence_savings_rate',
              'hh_q1'] = df.loc[df.nat_sav_quartile == 1,
                                'precautionary_savings'].max()
        _.loc['subsistence_savings_rate',
              'hh_q2'] = df.loc[df.nat_sav_quartile == 2,
                                'precautionary_savings'].max()
        _.loc['subsistence_savings_rate',
              'hh_q3'] = df.loc[df.nat_sav_quartile == 3,
                                'precautionary_savings'].max()

        _.sort_index().to_csv('csv/hh_savings_in_subsistence_natl.csv')

        # Savings rate for hh in subsistence (by region)
        _ = pd.DataFrame()
        _['hh_avg'] = (df.loc[df.pcinc < get_subsistence_line(myC)].eval(
            'pcwgt*(pcinc-pcexp)').sum(level='w_regn') /
                       df.loc[df.pcinc < get_subsistence_line(myC),
                              'pcwgt'].sum(level='w_regn'))
        _['hh_q1'] = df.loc[df.reg_sav_quartile == 1,
                            'precautionary_savings'].max(level='w_regn')
        _['hh_q2'] = df.loc[df.reg_sav_quartile == 2,
                            'precautionary_savings'].max(level='w_regn')
        _['hh_q3'] = df.loc[df.reg_sav_quartile == 3,
                            'precautionary_savings'].max(level='w_regn')
        _.sort_index().to_csv('csv/hh_savings_in_subsistence_reg.csv')

        if False:
            _.plot.scatter('income', 'expenditures')
            plt.gcf().savefig('figs/income_vs_exp_by_decile_PH.pdf',
                              format='pdf')
            plt.cla()

            _.plot.scatter('income', 'precautionary_savings')
            plt.gcf().savefig('figs/net_income_vs_exp_by_decile_PH.pdf',
                              format='pdf')
            plt.cla()

            df.boxplot(column='aprecautionary_savings', by='decile')
            plt.ylim(-1E5, 1E5)
            plt.gcf().savefig('figs/net_income_by_exp_decile_boxplot_PH.pdf',
                              format='pdf')
            plt.cla()

        # Drop unused columns
        df = df.reset_index().set_index(
            ['w_regn', 'w_prov', 'w_mun', 'w_bgy', 'w_ea', 'w_shsn', 'w_hcn'])
        df = df.drop([
            _c for _c in [
                'country', 'decile_nat', 'decile_reg', 'est_sav',
                'tot_savings', 'savings', 'invest', 'precautionary_savings',
                'index', 'level_0'
            ] if _c in df.columns
        ],
                     axis=1)

        # Standardize province info
        prov_code, region_code = get_places_dict(myC)

        df = df.reset_index()
        get_hhid_FIES(df)
        df = df.rename(columns={
            'w_prov': 'province',
            'w_regn': 'region'
        }).reset_index()
        df['province'].replace(prov_code, inplace=True)
        df['region'].replace(region_code, inplace=True)
        df = df.reset_index().set_index(get_economic_unit(myC)).drop(
            ['index', 'level_0'], axis=1)
        #

    # Assing weighted household consumption to quintiles within each province
    print('Finding quintiles')
    economy = df.index.names[0]
    listofquintiles = np.arange(0.20, 1.01, 0.20)
    # groupby apply takes each economy and then applies the function separately to each economy.
    # https://pandas.pydata.org/pandas-docs/stable/generated/pandas.core.groupby.GroupBy.apply.html
    # Finds quintiles by district
    df = df.reset_index().groupby(
        economy, sort=True).apply(lambda x: match_percentiles(
            x,
            perc_with_spline(reshape_data(x.pcinc), reshape_data(x.pcwgt),
                             listofquintiles), 'quintile'))

    print('Finding deciles')
    # finds deciles by district
    listofdeciles = np.arange(0.10, 1.01, 0.10)
    df = df.reset_index().groupby(
        economy, sort=True).apply(lambda x: match_percentiles(
            x,
            perc_with_spline(reshape_data(x.pcinc), reshape_data(x.pcwgt),
                             listofdeciles), 'decile'))
    # drop extraneous columns
    df.drop([
        icol for icol in ['level_0', 'index', 'pctle_05', 'pctle_05_nat']
        if icol in df.columns
    ],
            axis=1,
            inplace=True)

    # Last thing: however 'c' was set (income or consumption), pcsoc can't be higher than 0.99*that!
    df['pcsoc'] = df['pcsoc'].clip(upper=0.99 * df['pcinc'])

    return df