Python SurveySimulation.SurveySimulation示例，openfisca_core.simulations.SurveySimulation.SurveySimulation Python示例

示例#1

0

显示文件

def build_aggregates():
    #    writer = None
    years = range(2009, 2010)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        simu.set_survey()
        simu.compute()
        variables = ["garext", "ci_garext", "inthab", "ppe_brute", "rni"]
        x = simu.aggregated_by_entity("men", variables, all_output_vars=False)
        #        df = x[0]
        #        print df["ci_garext"].describe()
        agg = Aggregates()
        agg.set_simulation(simu)
        agg.show_default = False
        agg.show_real = False
        agg.show_diff = False
        agg.varlist = var_list  # ERROR: var_list is undefined.
        agg.compute()
        cols = agg.aggr_frame.columns[:4]
        print agg.aggr_frame[cols].to_string()
        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        #        agg.aggr_frame.to_excel(writer, yr, index= False, header= True)
        del simu
        del agg
        import gc
        gc.collect()

示例#2

0

显示文件

def test():
    from openfisca_core.simulations import SurveySimulation
    from .aggregates import Aggregates

    yr = 2006
    simulation = SurveySimulation()
    simulation.set_config(year = yr)
    simulation.set_param()
    simulation.set_survey()

    calibration = Calibration()
    calibration.set_simulation(simulation)

    filename = "../../countries/france/calibrations/calib_2006.csv"
    calibration.set_inputs_margins_from_file(filename, 2006)

    calibration.set_param('invlo', 3)
    calibration.set_param('up', 3)
    calibration.set_param('method', 'logit')

    aggregates = Aggregates()
    aggregates.set_simulation(simulation)
    simulation.compute()
    aggregates.compute()

    print aggregates.aggr_frame.to_string()

    calibration.set_calibrated_weights()
    simulation.compute()
    aggregates.compute()

    print aggregates.aggr_frame.to_string()

示例#3

0

显示文件

文件： aggregates_all_years.py 项目： stanislasrybak/openfisca-qt

def build_aggregates():

    writer = None
    years = range(2006, 2010)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        simu.set_survey()
        inflator = get_loyer_inflator(year)
        simu.inflate_survey({'loyer': inflator})
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        if writer is None:
            writer = ExcelWriter(str(fname_all))
        agg.aggr_frame.to_excel(writer,
                                yr,
                                index=False,
                                header=True,
                                float_format="%.2f")
        print agg.aggr_frame.to_string()
        del simu
        del agg
        import gc
        gc.collect()

    writer.save()

示例#4

0

显示文件

def get_common_dataframe(variables, year=2006):
    """
    Compare variables in erf an openfisca
    """
    simulation = SurveySimulation()
    simulation.set_config(year=year)
    simulation.set_param()
    simulation.set_survey()
    simulation.compute()

    erf = ErfsDataTable(year=2006)
    if "ident" not in variables:
        erf_variables = variables + ["ident"]
    else:
        erf_variables = variables

    if "wprm" not in erf_variables:
        erf_variables = erf_variables + ["wprm"]
    else:
        erf_variables = erf_variables

    erf_dataframe = erf.get_values(erf_variables, table="menage")
    erf_dataframe.rename(columns={'ident': 'idmen'}, inplace=True)
    for col in erf_dataframe.columns:
        if col is not "idmen":
            erf_dataframe.rename(columns={col: col + "_erf"}, inplace=True)

    of_dataframe, of_dataframe_default = simulation.aggregated_by_entity(
        "men", variables, all_output_vars=False, force_sum=True)
    del of_dataframe_default

    merged_df = of_dataframe.merge(erf_dataframe, on="idmen")
    del of_dataframe, erf_dataframe
    return merged_df

示例#5

0

显示文件

文件： trannoy.py 项目： stanislasrybak/openfisca-qt

def survey_case(year):

    #        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    simulation.set_config(year=year, num_table=1, reforme=True)
    simulation.set_param()
    simulation.P.ir.autre.charge_loyer.plaf = 500
    simulation.P.ir.autre.charge_loyer.active = 1
    simulation.P.ir.autre.charge_loyer.plaf_nbp = 0

    # plaf=1000 plaf_nbp=0: -42160, =1: -41292
    # plaf=500  plaf_nbp=0: -43033, =1: -42292

    # Bareme threshold reduction in pct
    reduc = .1
    print simulation.P.ir.bareme
    print simulation.P.ir.bareme.nb
    for i in range(2, simulation.P.ir.bareme.nb):
        simulation.P.ir.bareme.setSeuil(
            i, simulation.P.ir.bareme.seuils[i] * (1 - reduc))

    print simulation.P.ir.bareme
    print simulation.P.ir.bareme.nb
    simulation.compute()

    # Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()

    df1 = agg.aggr_frame
    print df1.to_string()
    return

示例#6

0

显示文件

def test_chunk():
    print "debut"
    writer = None
    years = range(2011, 2012)
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1, 5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5',
                            num_table=3,
                            chunks_count=nb_chunk,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()

示例#7

0

显示文件

文件： separate_tables_generator.py 项目： stanislasrybak/openfisca-qt

def convert_to_3_tables(year=2006, survey_file=None, output_file=None):

    if survey_file is None:
        raise Exception(
            'You need a .h5 file with the survey to extract the variables from'
        )
    if output_file is None:
        output_file = survey_file
        raise Warning(
            'the survey file will be used to store the created tables')

    store = HDFStore(survey_file)
    output = HDFStore(output_file)
    print output

    simulation = SurveySimulation()
    simulation.set_config(year=year)
    table1 = store['survey_' + str(year)]

    for entity in ['ind', 'foy', 'men', 'fam']:
        key = 'survey_' + str(year) + '/' + str(entity)

        vars_matching_entity = vars_matching_entity_from_table(
            table1, simulation, entity)
        print entity, vars_matching_entity_from_table
        print 'table1 enum'

        if entity == 'ind':
            print 'INDIVIDUALS'
            print table1['noindiv']
            table_entity = table1.loc[:, vars_matching_entity]

        # we take care have all ident and selecting qui==0
        else:
            #             print '    entity :', entity
            #             print table1['noindiv'].head()
            position = 'qui' + entity
            #             print table1[position]
            table_entity = table1.ix[table1[position] == 0, [
                'noi', 'idmen', 'idfoy', 'idfam', 'quifoy', 'quimen', 'quifam'
            ] + vars_matching_entity]
            #             print table_entity.noi.head()
            table_entity = table_entity.rename_axis(table_entity['id' +
                                                                 entity],
                                                    axis=1)


#             print '    APRES'
#             print table_entity.noi.head()
        print key
        output.put(key, table_entity)

    del table1
    import gc
    gc.collect()

    store.close()
    output.close()

示例#8

0

显示文件

def test3():
    year = 2006
    erf = DataCollection(year=year)
    df = erf.get_of_values(table="eec_menage")
    from openfisca_core.simulations import SurveySimulation
    simulation = SurveySimulation()
    simulation.set_config(year=year)
    simulation.set_param()
    simulation.compute()  # TODO: this should not be mandatory
    check_consistency(simulation.input_table, df)

示例#9

0

显示文件

def test():
    yr = 2006
    simu = SurveySimulation()
    simu.set_config(year=yr)
    simu.set_param()
    filename = os.path.join(model.DATA_DIR, 'survey_psl.h5')
    simu.set_survey(filename=filename)
    simu.compute()

    df = get_structure(simu, 'br_al')
    print df.to_string()

示例#10

0

显示文件

def toto():
    year = 2006

    simulation = SurveySimulation()
    simulation.set_config(year=year)
    simulation.set_param()
    simulation.set_survey()
    simulation.compute()

    for name, col in simulation.output_table.column_by_name.iteritems():
        print col.name
        print col._dtype
        print col.entity

示例#11

0

显示文件

def test():

    for year in range(2006, 2010):
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        simu.set_survey()
        for var in ["f4ga", "f4gb", "f4gc", "f4ge", "f4gf", "f4gg"]:
            print var
            df = simu.survey.get_value(var)
            print df.max()
            print df.min()

示例#12

0

显示文件

文件： build_aggregates_from_sources.py 项目： Massiliane/openfisca-france

def test():
    country = "france"
    for year in range(2006, 2010):

        yr = str(year)
        simu = SurveySimulation()
        simu.set_config(year=yr, country=country)
        simu.set_param()

        agg = Aggregates()
        agg.set_simulation(simu)

        for col in agg.varlist:
            print col

示例#13

0

显示文件

文件： example_test.py 项目： stanislasrybak/openfisca-qt

def survey_case(year=2006):
    yr = str(year)
    #        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
    simulation.set_config(year=yr, survey_filename=survey_filename)
    simulation.set_param()

    #    Ignore this
    #    inflator = get_loyer_inflator(year)
    #    simulation.inflate_survey({'loyer' : inflator})

    simulation.compute()
    simul_out_df = simulation.output_table.table
    simul_in_df = simulation.input_table.table
    print simul_out_df.loc[:,
                           ['af', 'af_base', 'af_forf', 'af_majo', 'af_nbenf'
                            ]].describe()
    print 'input vars'
    print simul_in_df.columns
    print 'output vars'
    print simul_out_df.columns

    #     check_inputs_enumcols(simulation)

    # Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()
    df1 = agg.aggr_frame
    print df1.columns

    print df1.to_string()

    #    Saving aggregates
    #    if writer is None:
    #        writer = ExcelWriter(str(fname)
    #    agg.aggr_frame.to_excel(writer, yr, index= False, header= True)

    # Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by='so', vars=['nivvie'])
    print df2.to_string()
    return df1

示例#14

0

显示文件

def survey_case(year):

    yr = str(year)
    #        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    simulation.set_config(year=yr, num_table=1)
    simulation.set_param()

    simulation.compute()

    df = simulation.get_variables_dataframe(variables=["rsa_act"],
                                            entity='ind')
    print df["rsa_act"].describe()

    del simulation
    import gc
    gc.collect()

示例#15

0

显示文件

文件： recap.py 项目： stanislasrybak/openfisca-qt

    def _generate_aggregates(self):
        dfs = list()
        dfs_erf = list()
        years = self.years
        for year in years:
            # Running a standard SurveySimulation to get OF aggregates
            simulation = SurveySimulation()
            survey_filename = self.survey_filename
            simulation.set_config(year=year, survey_filename=survey_filename)
            simulation.set_param()
            simulation.compute()
            agg = Aggregates()
            agg.set_simulation(simulation)
            agg.compute()
            df = agg.aggr_frame
            df['year'] = year
            label_by_name = dict(
                (name, column.label) for name, column in
                simulation.output_table.column_by_name.iteritems())
            #colonnes = simulation.output_table.table.columns
            dfs.append(df)
            variables = agg.varlist
            labels_variables = [
                label_by_name[variable] for variable in variables
            ]
            del simulation, agg, df
            # simulation.save_content(name, filename)

            gc.collect()

            # ERFS
            temp = (build_erf_aggregates(variables=variables, year=year))
            temp.rename(columns=label_by_name, inplace=True)
            temp = temp.T
            temp.reset_index(inplace=True)
            temp['year'] = year
            dfs_erf.append(temp)
            del temp
            gc.collect()

        self.labels_variables = labels_variables
        self.aggregates_of_dataframe = dfs
        self.aggregates_erfs_dataframe = dfs_erf

示例#16

0

显示文件

def build_erf_aggregates():
    """
    Fetch the relevant aggregates from erf data
    """
    #    Uses rpy2.
    #    On MS Windows, The environment variable R_HOME and R_USER should be set
    import pandas.rpy.common as com
    import rpy2.rpy_classic as rpy
    rpy.set_default_mode(rpy.NO_CONVERSION)

    country = 'france'
    for year in range(2006, 2008):
        menageXX = "menage" + str(year)[2:]
        menageRdata = menageXX + ".Rdata"
        filename = os.path.join(os.path.dirname(DATA_DIR), 'R', 'erf',
                                str(year), menageRdata)
        yr = str(year)
        simu = SurveySimulation()
        simu.set_config(year=yr, country=country)
        simu.set_param()

        agg = Aggregates()
        agg.set_simulation(simu)
        # print agg.varlist
        rpy.r.load(filename)

        menage = com.load_data(menageXX)
        cols = []
        print year
        for col in agg.varlist:
            #print col
            erf_var = "m_" + col + "m"
            if erf_var in menage.columns:
                cols += [erf_var]

        df = menage[cols]
        wprm = menage["wprm"]
        for col in df.columns:

            tot = (df[col] * wprm).sum() / 1e9
            print col, tot

示例#17

0

显示文件

文件： aggregates_all_years.py 项目： stanislasrybak/openfisca-qt

def test_gini():
    """
    Compute Gini coefficients
    """
    years = range(2006, 2010)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        inflator = get_loyer_inflator(year)
        simu.inflate_survey({'loyer': inflator})
        simu.compute()

        inequality = Inequality()
        inequality.set_simulation(simu)
        inequality.compute()
        print inequality.inequality_dataframe
        print inequality.poverty
        del simu

示例#18

0

显示文件

def survey_case_3_tables():
    year = 2006
    yr = str(year)
    simulation = SurveySimulation()

    survey_input = HDFStore(survey3_test)
    #     convert_to_3_tables(year=year, survey_file=survey_file, output_file=survey3_file)
    df_men = survey_input['survey_2006/men']
    df_foy = survey_input['survey_2006/foy']
    df_fam = survey_input['survey_2006/fam']
    df_fam['alr'] = 0
    survey_input['survey_2006/fam'] = df_fam

    simulation.num_table = 3

    simulation.set_config(year=yr, survey_filename=survey3_test)
    simulation.set_param()

    simulation.compute()

    # Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()

    df1 = agg.aggr_frame
    print df1.to_string()

    # #    Saving aggregates
    #     if writer is None:
    #         writer = ExcelWriter(str(fname)
    #     agg.aggr_frame.to_excel(writer, yr, index= False, header= True)

    # Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by='so', vars=['nivvie'])
    print df2.to_string()

示例#19

0

显示文件

文件： test_datatable3.py 项目： stanislasrybak/openfisca-qt

def build_aggregates3():

    writer = None
    years = range(2006, 2007)
    tot1 = 0
    tot3 = 0
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        deb3 = time.clock()
        simu.set_survey(num_table=3)
        simu.compute()
        fin3 = time.clock()

        print "coucou"
        col = simu.survey.column_by_name.get("so")
        print col.entity
        agg3 = Aggregates3()
        agg3.set_simulation(simu)
        agg3.compute()
        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True)

        for ent in ['ind', 'men', 'foy', 'fam']:
            dir_name = destination_dir + ent + '.csv'
            ##            simu.survey.table3[ent].to_csv(dir_name)
            #            import pdb
            #            pdb.set_trace()
            ##            com.convert_to_r_dataframe
            simu.output_table.table3[ent][:num_output].to_csv(dir_name)

        deb1 = time.clock()
        simu.set_survey(num_table=1)
        print "prob compute"
        simu.compute()
        fin1 = time.clock()

        dir_name = destination_dir + 'en1' + '.csv'
        print "prob output"
        simu.output_table.table[:num_output].to_csv(dir_name)

        agg = Aggregates()
        print "prob set"
        agg.set_simulation(simu)
        print "prob compute"
        agg.compute()

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        print "prob ind"
        agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" + str(fin1 - deb1)
        print "Time to process 3 table :" + str(fin3 - deb3)
    print tot1, tot3, tot3 - tot1

示例#20

0

显示文件

文件： check_num_table.py 项目： stanislasrybak/openfisca-qt

def compar_num_table():

    writer = None
    years = range(2006, 2007)
    tot1 = 0
    tot3 = 0
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        deb3 = time.clock()
        sous_ech = [
            6000080, 6000080, 6000195, 6000195, 6000288, 6000288, 6000499,
            6000499, 6000531, 6000531, 6000542, 6000542
        ]
        sous_ech = [
            6000191, 6000191, 6000531, 6000614, 6000195, 6000195, 6000499,
            6000499, 6000531, 6000614, 6000531, 6000614, 6000531, 6000531,
            6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531,
            6000542, 6000542, 6000614, 6000191
        ]

        #al
        sous_ech = [6000122, 6000865, 6001256]
        # typ_men
        sous_ech = [6006630, 6006753, 6008508]
        # foy
        sous_ech = [6036028, 6028397, 6019248]

        sous_ech = None
        simu.set_survey(num_table=3, subset=sous_ech)
        simu.compute()

        agg3 = Aggregates()
        for ent in ['ind', 'men', 'foy', 'fam']:
            tab = simu.output_table.table3[ent]
            renam = {}
            renam['wprm_' + ent] = 'wprm'
            tab = tab.rename(columns=renam)
        agg3.set_simulation(simu)
        agg3.compute()

        fin3 = time.clock()

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True)

        # export to csv to run compar in R
        for ent in ['ind', 'men', 'foy', 'fam']:
            dir_name = destination_dir + ent + '.csv'
            tab = simu.output_table.table3[ent]
            renam = {}
            renam['wprm_' + ent] = 'wprm'
            if ent == 'ind':
                ident = [
                    "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy"
                ]
            else:
                ident = ["idmen", "idfam", "idfoy"]
            for nom in ident:
                renam[nom + '_' + ent] = nom
            tab = tab.rename(columns=renam)
            order_var = ident + list(tab.columns - ident)
            tab.sort(['idmen', 'idfam',
                      'idfoy']).ix[:num_output, order_var].to_csv(dir_name)

        deb1 = time.clock()
        simu.set_survey(num_table=1, subset=sous_ech)
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        fin1 = time.clock()

        # export to csv to run compar in R
        dir_name = destination_dir + 'en1' + '.csv'
        tab = simu.output_table.table
        tab.drop([
            'idfam_fam', 'idfam_foy', 'idfam_men', 'idfoy_fam', 'idfoy_foy',
            'idfoy_men', 'idmen_men', 'idmen_fam', 'idmen_foy', 'wprm_foy',
            'wprm_fam'
        ],
                 axis=1,
                 inplace=True)
        renam = {}
        ent = 'ind'
        renam['wprm_' + ent] = 'wprm'
        ident = [
            "noi", "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy"
        ]
        for nom in ident:
            renam[nom + '_' + ent] = nom
        tab = tab.rename(columns=renam)
        order_var = ident + list(tab.columns - ident)
        tab.sort(['idmen', 'idfam', 'idfoy']).ix[:num_output,
                                                 order_var].to_csv(dir_name)

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" + str(fin1 - deb1)
        print "Time to process 3 table :" + str(fin3 - deb3)
    print tot1, tot3, tot3 - tot1

示例#21

0

显示文件

def test(year=2006, variables=['af']):
    simulation = SurveySimulation()
    survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
    simulation.set_config(year=year, survey_filename=survey_filename)
    simulation.set_param()
    simulation.compute()

    #     of_aggregates = Aggregates()
    #     of_aggregates.set_simulation(simulation)
    #     of_aggregates.compute()
    #     print of_aggregates.aggr_frame
    #
    #     from openfisca_france.data.erf.aggregates import build_erf_aggregates
    #     temp = (build_erf_aggregates(variables=variables, year= year))
    #     print temp
    #     return
    variable = "af"
    debugger = Debugger()
    debugger.set_simulation(simulation)
    debugger.set_variable(variable)
    debugger.show_aggregates()

    def get_all_ancestors(varlist):
        if len(varlist) == 0:
            return []
        else:
            if varlist[0]._parents == set():
                return ([varlist[0]] + get_all_ancestors(varlist[1:]))
            else:
                return ([varlist[0]] +
                        get_all_ancestors(list(varlist[0]._parents)) +
                        get_all_ancestors(varlist[1:]))

    # We want to get all ancestors + children + the options that we're going to encounter
    parents = map(lambda x: simulation.output_table.column_by_name.get(x),
                  variables)
    parents = get_all_ancestors(parents)
    options = []
    for varcol in parents:
        options.extend(varcol._option.keys())
    options = list(set(options))
    #print options
    parents = map(lambda x: x.name, parents)
    for var in variables:
        children = set()
        varcol = simulation.output_table.column_by_name.get(var)
        children = children.union(set(map(lambda x: x.name, varcol._children)))
    variables = list(set(parents + list(children)))
    #print variables
    del parents, children
    gc.collect()

    def get_var(variable):
        variables = [variable]
        return simulation.aggregated_by_entity(entity="men",
                                               variables=variables,
                                               all_output_vars=False,
                                               force_sum=True)[0]

    simu_aggr_tables = get_var(variables[0])
    for var in variables[1:]:
        simu_aggr_tables = simu_aggr_tables.merge(get_var(var)[['idmen', var]],
                                                  on='idmen',
                                                  how='outer')

    # We load the data from erf table in case we have to pick data there
    erf_data = DataCollection(year=year)
    os.system('cls')
    todo = set(variables + ["ident", "wprm"]).union(set(options))
    print 'Variables or equivalents to fetch :'
    print todo
    '''
    Méthode générale pour aller chercher les variables de l'erf/eec
    ( qui n'ont pas forcément le même nom
    et parfois sont les variables utilisées pour créér l'of ):
    1 - essayer le get_of2erf, ça doit marcher pour les variables principales ( au moins les aggrégats
    que l'on compare )
    Si les variables ne sont pas directement dans la table,
    elles ont été calculées à partir d'autres variables de données erf/eec
    donc chercher dans :
    2 - build_survey
    3 - model/model.py qui dira éventuellement dans quel module de model/ chercher
    Le 'print todo' vous indique quelles variables chercher
    ( attention à ne pas inclure les enfants directs )
    L'utilisation du Ctrl-H est profitable !
    '''

    fetch_eec = [
        'statut', 'titc', 'chpub', 'encadr', 'prosa', 'age', 'naim', 'naia',
        'noindiv'
    ]
    fetch_erf = ['zsali', 'af', 'ident', 'wprm', 'noi', 'noindiv', 'quelfic']
    erf_df = erf_data.get_of_values(variables=fetch_erf, table="erf_indivi")
    eec_df = erf_data.get_of_values(variables=fetch_eec, table="eec_indivi")
    erf_eec_indivi = erf_df.merge(eec_df, on='noindiv', how='inner')
    assert 'quelfic' in erf_eec_indivi.columns, "quelfic not in erf_indivi columns"
    del eec_df, erf_df

    # We then get the aggregate variables for the menage ( mainly to compare with of )
    print 'Loading data from erf_menage table'
    erf_menage = erf_data.get_of_values(variables=list(todo) + ['quelfic'],
                                        table="erf_menage")

    del todo
    gc.collect()
    assert 'ident' in erf_menage.columns, "ident not in erf_menage.columns"

    from openfisca_france.data.erf import get_erf2of
    erf2of = get_erf2of()
    erf_menage.rename(columns=erf2of, inplace=True)

    # We get the options from the simulation non aggregated tables:

    # First from the output_table
    # We recreate the noindiv in output_table
    simulation.output_table.table[
        'noindiv'] = 100 * simulation.output_table.table.idmen_ind + simulation.output_table.table.noi_ind
    simulation.output_table.table['noindiv'] = simulation.output_table.table[
        'noindiv'].astype(np.int64)
    s1 = [
        var for var in set(options).intersection(
            set(simulation.output_table.table.columns))
    ] + ['idmen_ind', 'quimen_ind', 'noindiv']
    simu_nonaggr_tables = (simulation.output_table.table)[s1]
    simu_nonaggr_tables.rename(columns={
        'idmen_ind': 'idmen',
        'quimen_ind': 'quimen'
    },
                               inplace=True)
    assert 'noindiv' in simu_nonaggr_tables.columns

    # If not found, we dwelve into the input_table
    if (set(s1) - set(['idmen_ind', 'quimen_ind', 'noindiv'])) < set(options):
        assert 'noindiv' in simulation.input_table.table.columns, "'noindiv' not in simulation.input_table.table.columns"
        s2 = [
            var for var in (set(options).intersection(
                set(simulation.input_table.table.columns)) - set(s1))
        ] + ['noindiv']
        #print s2
        temp = simulation.input_table.table[s2]
        simu_nonaggr_tables = simu_nonaggr_tables.merge(temp,
                                                        on='noindiv',
                                                        how='inner',
                                                        sort=False)

        del s2, temp
    del s1
    gc.collect()

    simu_nonaggr_tables = simu_nonaggr_tables[list(set(options)) +
                                              ['idmen', 'quimen', 'noindiv']]
    #print options, variables
    assert 'idmen' in simu_nonaggr_tables.columns, 'Idmen not in simu_nonaggr_tables columns'

    # Check the idmens that are not common
    erf_menage.rename(columns={'ident': 'idmen'}, inplace=True)

    print "\n"
    print 'Checking if idmen is here...'
    print '\n ERF : '
    print 'idmen' in erf_menage.columns
    print "\n Simulation output"
    print 'idmen' in simu_aggr_tables.columns
    print "\n"

    #print 'Dropping duplicates of idmen for both tables...'
    assert not erf_menage["idmen"].duplicated().any(
    ), "Duplicated idmen in erf_menage"
    #erf_menage.drop_duplicates('idmen', inplace = True)
    simu_aggr_tables.drop_duplicates('idmen', inplace=True)
    assert not simu_aggr_tables["idmen"].duplicated().any(
    ), "Duplicated idmen in of"

    print 'Checking mismatching idmen... '
    s1 = set(erf_menage['idmen']) - (set(simu_aggr_tables['idmen']))
    if s1:
        print "idmen that aren't in simu_aggr_tables : %s" % str(len(s1))
        pass
    s2 = (set(simu_aggr_tables['idmen'])) - set(erf_menage['idmen'])
    if s2:
        print "idmen that aren't in erf_menage : %s" % str(len(s2))
        pass
    del s1, s2

    # Restrict to common idmens and merge
    s3 = set(erf_menage['idmen']).intersection(set(simu_aggr_tables['idmen']))
    print "Restricting to %s common idmen... \n" % str(len(s3))
    erf_menage = erf_menage[erf_menage['idmen'].isin(s3)]
    simu_aggr_tables = simu_aggr_tables[simu_aggr_tables['idmen'].isin(s3)]
    del s3
    gc.collect()

    #print erf_menage.columns
    #print simu_aggr_tables.columns

    # Compare differences across of and erf dataframes
    print "Comparing differences between dataframes... \n"
    colcom = (set(erf_menage.columns).intersection(
        set(simu_aggr_tables.columns))) - set(['idmen', 'wprm'])
    print 'Common variables: '
    print colcom
    erf_menage.reset_index(inplace=True)
    simu_aggr_tables.reset_index(inplace=True)
    for col in colcom:
        temp = set(
            erf_menage['idmen'][erf_menage[col] != simu_aggr_tables[col]])
        print "Numbers of idmen that aren't equal on variable %s : %s \n" % (
            col, str(len(temp)))
        del temp

    # Detect the biggest differences
    bigtable = merge(erf_menage,
                     simu_aggr_tables,
                     on='idmen',
                     how='inner',
                     suffixes=('_erf', '_of'))
    print 'Length of new dataframe is %s' % str(len(bigtable))
    #print bigtable.columns
    bigtable.set_index('idmen', drop=False, inplace=True)

    already_met = []
    options_met = []

    for col in colcom:
        bigtemp = None
        table = bigtable[and_(bigtable[col + '_erf'] != 0,
                              bigtable[col + '_of'] != 0)]
        table[col] = (table[col + '_erf'] - table[col + '_of']
                      ) / table[col + '_erf']  #Difference relative
        table[col] = table[col].apply(lambda x: abs(x))
        print 'Minimum difference between the two tables for %s is %s' % (
            col, str(table[col].min()))
        print 'Maximum difference between the two tables for %s is %s' % (
            col, str(table[col].max()))
        print table[col].describe()
        try:
            assert len(table[col]) == len(table['wprm_of']), "PINAGS"
            dec, values = mwp(table[col],
                              np.arange(1, 11),
                              table['wprm_of'],
                              2,
                              return_quantiles=True)
            #print sorted(values)
            dec, values = mwp(table[col],
                              np.arange(1, 101),
                              table['wprm_erf'],
                              2,
                              return_quantiles=True)
            #print sorted(values)[90:]
            del dec, values
            gc.collect()
        except:
            #print 'Weighted percentile method didnt work for %s' %col
            pass
        print "\n"

        # Show the relevant information for the most deviant households
        table.sort(columns=col, ascending=False, inplace=True)
        #print table[col][0:10].to_string()
        if bigtemp is None:
            bigtemp = {
                'table': table[[col, col + '_of', col + '_erf',
                                'idmen']][0:10],
                'options': None
            }
        bigtemp['table'][col + 'div'] = bigtemp['table'][
            col + '_of'] / bigtemp['table'][col + '_erf']
        print bigtemp['table'].to_string()
        '''
        bigtemp is the table which will get filled little by little by the relevant variables.
        Up to the last rows of code 'table' refers to a table of aggregated values,
        while 'options is a table of individual variables.
        The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents',
        and it causes an error in Python unless for certain types like dictionnary values.
        '''
        #print "\n"

        # If variable is a Prestation, we show the dependancies
        varcol = simulation.output_table.column_by_name.get(col)
        if isinstance(varcol, Prestation):
            '''
            For the direct children
            '''
            if not varcol._children is None:
                ch_to_fetch = list(varcol._children)
                ch_to_fetch = map(lambda x: x.name, ch_to_fetch)
                ch_fetched = []

                if set(ch_to_fetch) <= set(simu_aggr_tables.columns):
                    print "Variables which need %s to be computed :\n %s \n" % (
                        col, str(ch_to_fetch))
                    for var in ch_to_fetch:
                        if var + '_of' in table.columns:
                            ch_fetched.append(var + '_of')
                        else:
                            ch_fetched.append(var)
                elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union(
                        erf_menage.columns):
                    print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" % (
                        col, str(ch_to_fetch))
                    for var in ch_to_fetch:
                        if var in simu_aggr_tables.columns:
                            if var + '_of' in table.columns:
                                ch_fetched.append(var + '_of')
                        elif var + '_erf' in table.columns:
                            ch_fetched.append(var + '_erf')
                        else:
                            ch_fetched.append(var)
                else:
                    print "Variables which need %s to be computed (some missing):\n %s \n" % (
                        col, str(ch_to_fetch))
                    for var in ch_to_fetch:

                        if var in simu_aggr_tables.columns:
                            if var + '_of' in table.columns:
                                ch_fetched.append(var + '_of')
                        elif var in erf_menage.columns:
                            if var + '_erf' in table.columns:
                                ch_fetched.append(var + '_erf')

                print table[[col] + ch_fetched][0:10]
                print "\n"
                del ch_to_fetch, ch_fetched
            '''
            For the parents
            '''
            def iter_on_parents(varcol):
                if (varcol._parents == set() and varcol._option
                        == {}) or varcol.name in already_met:
                    return
                else:
                    par_to_fetch = list(varcol._parents)
                    par_to_fetch = map(lambda x: x.name, par_to_fetch)
                    par_fetched = []

                    if set(par_fetched) <= set(simu_aggr_tables.columns):
                        #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched))
                        for var in par_fetched:
                            if var + '_of' in table.columns:
                                par_fetched.append(var + '_of')
                            else:
                                par_fetched.append(var)
                    elif set(par_fetched) <= set(
                            simu_aggr_tables.columns).union(
                                erf_menage.columns):
                        #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched))
                        for var in par_fetched:
                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    par_fetched.append(var + '_of')
                            elif var + '_erf' in table.columns:
                                par_fetched.append(var + '_erf')
                            else:
                                par_fetched.append(var)
                    else:
                        for var in par_fetched:
                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    par_fetched.append(var + '_of')
                            elif var in erf_menage.columns:
                                if var + '_erf' in table.columns:
                                    par_fetched.append(var + '_erf')
                        if len(par_fetched) > 0:
                            #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched))
                            pass
                        else:
                            #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched))
                            pass

                    if len(par_fetched) > 0:
                        temp = table[[col, 'idmen'] + par_fetched][0:10]
                        bigtemp['table'] = pd.merge(temp,
                                                    bigtemp['table'],
                                                    how='inner')
                        #print temp.to_string(), "\n"
                    if varcol._option != {} and not set(
                            varcol._option.keys()) < set(options_met):
                        vars_to_fetch = list(
                            set(varcol._option.keys()) - set(options_met))
                        #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys())
                        liste = [i for i in range(0, 10)]
                        liste = map(lambda x: table['idmen'].iloc[x], liste)
                        temp = simu_nonaggr_tables[
                            ['idmen', 'quimen', 'noindiv'] +
                            vars_to_fetch][simu_nonaggr_tables['idmen'].isin(
                                table['idmen'][0:10])]

                        temp_sorted = temp[temp['idmen'] == liste[0]]
                        for i in xrange(1, 10):
                            temp_sorted = temp_sorted.append(
                                temp[temp['idmen'] == liste[i]])
                        if bigtemp['options'] is None:
                            bigtemp['options'] = temp_sorted
                            bigtemp['options'] = bigtemp['options'].merge(
                                erf_eec_indivi, on='noindiv', how='outer')
                        else:
                            bigtemp['options'] = bigtemp['options'].merge(
                                temp_sorted,
                                on=['noindiv', 'idmen', 'quimen'],
                                how='outer')


#                         temp_sorted.set_index(['idmen',  'quimen'], drop = True, inplace = True) # If we do that
                        del temp, temp_sorted
                        gc.collect()

                    already_met.append(varcol.name)
                    options_met.extend(varcol._option.keys())
                    for var in varcol._parents:
                        iter_on_parents(var)

            iter_on_parents(varcol)
            # We merge the aggregate table with the option table ( for each individual in entity )
            bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'],
                                                      how='left',
                                                      on='idmen',
                                                      suffixes=('(agg)',
                                                                '(ind)'))

            # Reshaping the table to group by descending error on col, common entities
            bigtemp['table'].sort(columns=['af', 'quimen'],
                                  ascending=[False, True],
                                  inplace=True)
            bigtemp['table'] = bigtemp['table'].groupby(['idmen', 'quimen'],
                                                        sort=False).sum()
            print "Table of values for %s dependencies : \n" % col
            print bigtemp['table'].to_string()
            del bigtemp['table'], bigtemp['options']
            gc.collect()

示例#22

0

显示文件

            print "Table of values for %s dependencies : \n" % col
            print bigtemp['table'].to_string()
            del bigtemp['table'], bigtemp['options']
            gc.collect()

if __name__ == '__main__':

    restart = True
    survey = 'survey.h5'
    save_path = os.path.join(model.DATA_DIR, 'erf')
    saved_simulation_filename = os.path.join(save_path,
                                             'debugger_' + survey[:-3])

    if restart:
        year = 2006
        simulation = SurveySimulation()
        if survey == 'survey.h5':
            survey_filename = os.path.join(model.DATA_DIR, survey)
        else:
            survey_filename = os.path.join(model.DATA_DIR, 'sources', survey)

        simulation.set_config(year=year, survey_filename=survey_filename)
        simulation.set_param()
        simulation.compute()
        simulation.save_content('debug', saved_simulation_filename)

    deb = Debugger()
    deb.set_simulation(name='debug', filename=saved_simulation_filename)
    deb.set_variable('af')
    deb.show_aggregates()
    deb.preproc()

示例#23

0

显示文件

文件： core_tests.py 项目： Massiliane/openfisca-france

def check_survey(year=2013):
    simulation = SurveySimulation()
    simulation.set_config(year=year)
    simulation.set_param()
    simulation.compute()

示例#24

0

显示文件

文件： example_test.py 项目： stanislasrybak/openfisca-qt

def test_laurence():
    '''
    Computes the openfisca/real numbers comparaison table in excel worksheet.

    Warning: To add more years you'll have to twitch the code manually.
    Default is years 2006 to 2009 included.
    '''
    def save_as_xls(df, alter_method=True):
        # Saves a datatable under Excel table using XLtable
        if alter_method:
            filename = "C:\desindexation.xls"
            print filename
            writer = ExcelWriter(str(filename))
            df.to_excel(writer)
            writer.save()
        else:
            # XLtable utile pour la mise en couleurs, reliefs, etc. de la table, inutile sinon
            stxl = XLtable(df)
            # <========== HERE TO CHANGE OVERLAY ======>
            wb = xlwt.Workbook()
            ws = wb.add_sheet('resultatstest')
            erfxcel = stxl.place_table(ws)
            try:  # I dunno more clever commands
                wb.save("C:\outputtest.xls")
            except:
                n = random.randint(0, 100)
                wb.save("C:\outputtest_" + str(n) + ".xls")

#===============================================================================
#     from numpy.random import randn
#     mesures = ['cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add']
#     sources = ['of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel']
#     year = ['2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006',
#             '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007',
#             '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008']
#     ind = zip(*[mesures,sources, year])
# #     print ind
#     from pandas.core.index import MultiIndex
#     ind = MultiIndex.from_tuples(ind, names = ['mesure', 'source', 'year'])
# #     print ind
#     d = pd.DataFrame(randn(27,2), columns = ['Depenses', 'Recettes'], index = ind)
#     d.reset_index(inplace = True, drop = False)
#     d = d.groupby(by = ['mesure', 'source', 'year'], sort = False).sum()
#     print d
#     d_unstacked = d.unstack()
#     print d
#     indtemp1 = d.index.get_level_values(0)
#     indtemp2 = d.index.get_level_values(1)
#     indexi = zip(*[indtemp1, indtemp2])
#     print indexi
#     indexi_bis = []
#     for i in xrange(len(indexi)):
#         if indexi[i] not in indexi_bis:
#             indexi_bis.append(indexi[i])
#     indexi = indexi_bis
#     indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source'])
#     print indexi
#     d_unstacked = d_unstacked.reindex_axis(indexi, axis = 0)
#     print d_unstacked.to_string()
#     save_as_xls(d_unstacked)
#     return
#===============================================================================

    def reshape_tables(dfs, dfs_erf):
        agg = Aggregates()

        # We need this for the columns labels to work

        print 'Resetting index to avoid later trouble on manipulation'
        for d in dfs:
            d.reset_index(inplace=True)
            d.set_index('Mesure', inplace=True, drop=False)
            d.reindex_axis(labels_variables, axis=0)
            d.reset_index(inplace=True, drop=True)
#             print d.to_string()
        for d in dfs_erf:
            d.reset_index(inplace=True)
            d['Mesure'] = agg.labels['dep']
            d.set_index('index', inplace=True, drop=False)
            d.reindex_axis(agg.labels.values(), axis=0)
            d.reset_index(inplace=True, drop=True)


#             print d.to_string()

# Concatening the openfisca tables for =/= years
        temp = pd.concat([dfs[0], dfs[1]], ignore_index=True)
        temp = pd.concat([temp, dfs[2]], ignore_index=True)
        temp = pd.concat([temp, dfs[3]], ignore_index=True)
        del temp[agg.labels['entity']], temp['index']
        gc.collect()

        print 'We split the real aggregates from the of table'
        temp2 = temp[[
            agg.labels['var'], agg.labels['benef_real'],
            agg.labels['dep_real'], 'year'
        ]]
        del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']]
        temp['source'] = 'of'
        temp2['source'] = 'reel'
        temp2.rename(columns={
            agg.labels['benef_real']: agg.labels['benef'],
            agg.labels['dep_real']: agg.labels['dep']
        },
                     inplace=True)
        temp = pd.concat([temp, temp2], ignore_index=True)

        print 'We add the erf data to the table'
        for df in dfs_erf:
            del df['level_0'], df['Mesure']
            df.rename(columns={
                'index': agg.labels['var'],
                1: agg.labels['dep']
            },
                      inplace=True)
        temp3 = pd.concat([dfs_erf[0], dfs_erf[1]], ignore_index=True)
        temp3 = pd.concat([temp3, dfs_erf[2]], ignore_index=True)
        temp3 = pd.concat([temp3, dfs_erf[3]], ignore_index=True)
        temp3['source'] = 'erfs'
        gc.collect()
        temp = pd.concat([temp, temp3], ignore_index=True)
        #         print temp.to_string()

        print 'Index manipulation to reshape the output'
        temp.reset_index(drop=True, inplace=True)
        # We set the new index
        #         temp.set_index('Mesure', drop = True, inplace = True)
        #         temp.set_index('source', drop = True, append = True, inplace = True)
        #         temp.set_index('year', drop = False, append = True, inplace = True)
        temp = temp.groupby(by=["Mesure", "source", "year"], sort=False).sum()
        # Tricky, the [mesure, source, year] index is unique so sum() will return the only value
        # Groupby automatically deleted the source, mesure... columns and added them to index
        assert (isinstance(temp, pd.DataFrame))
        #         print temp.to_string()

        # We want the years to be in columns, so we use unstack
        temp_unstacked = temp.unstack()
        # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table :

        ## Reindexing rows
        from pandas.core.index import MultiIndex
        indtemp1 = temp.index.get_level_values(0)
        indtemp2 = temp.index.get_level_values(1)
        indexi = zip(*[indtemp1, indtemp2])
        indexi_bis = []
        for i in xrange(0, len(indexi)):
            if indexi[i] not in indexi_bis:
                indexi_bis.append(indexi[i])
        indexi = indexi_bis
        del indexi_bis
        indexi = MultiIndex.from_tuples(indexi, names=['Mesure', 'source'])
        #         import pdb
        #         pdb.set_trace()
        temp_unstacked = temp_unstacked.reindex_axis(
            indexi, axis=0)  # axis = 0 for rows, 1 for columns

        ## Reindexing columns
        # TODO : still not working
        col_indexi = []
        for col in temp.columns.get_level_values(0).unique():
            for yr in range(2006, 2010):
                col_indexi.append((col, str(yr)))
        col_indexi = MultiIndex.from_tuples(col_indexi)
        #         print col_indexi
        #         print temp_unstacked.columns
        print col_indexi
        #         temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis = 1)

        # Our table is ready to be turned to Excel worksheet !
        print temp_unstacked.to_string()
        temp_unstacked.fillna(0, inplace=True)
        return temp_unstacked

    dfs = []
    dfs_erf = []
    for i in range(2006, 2010):
        year = i
        yr = str(i)
        # Running a standard SurveySim to get aggregates
        simulation = SurveySimulation()
        survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
        simulation.set_config(year=yr, survey_filename=survey_filename)
        simulation.set_param()
        simulation.compute()
        agg = Aggregates()
        agg.set_simulation(simulation)
        agg.compute()
        df = agg.aggr_frame
        df['year'] = year
        label_by_name = dict(
            (name, column.label) for name, column in
            simulation.output_table.column_by_name.iteritems())
        #colonnes = simulation.output_table.table.columns
        dfs.append(df)
        variables = agg.varlist
        labels_variables = [label_by_name[variable] for variable in variables]
        del simulation, agg, df
        gc.collect()

        #Getting ERF aggregates from ERF table
        temp = build_erf_aggregates(variables=variables, year=year)
        temp.rename(columns=label_by_name, inplace=True)
        temp = temp.T
        temp.reset_index(inplace=True)
        temp['year'] = year
        dfs_erf.append(temp)
        del temp
        gc.collect()
        print 'Out of data fetching for year ' + str(year)
    print 'Out of data fetching'

    datatest = reshape_tables(dfs, dfs_erf)
    save_as_xls(datatest, alter_method=False)