def build_aggregates(): writer = None years = range(2006, 2010) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() simu.set_survey() inflator = get_loyer_inflator(year) simu.inflate_survey({'loyer': inflator}) simu.compute() agg = Aggregates() agg.set_simulation(simu) agg.compute() if writer is None: writer = ExcelWriter(str(fname_all)) agg.aggr_frame.to_excel(writer, yr, index=False, header=True, float_format="%.2f") print agg.aggr_frame.to_string() del simu del agg import gc gc.collect() writer.save()
def test(): from openfisca_core.simulations import SurveySimulation from .aggregates import Aggregates yr = 2006 simulation = SurveySimulation() simulation.set_config(year = yr) simulation.set_param() simulation.set_survey() calibration = Calibration() calibration.set_simulation(simulation) filename = "../../countries/france/calibrations/calib_2006.csv" calibration.set_inputs_margins_from_file(filename, 2006) calibration.set_param('invlo', 3) calibration.set_param('up', 3) calibration.set_param('method', 'logit') aggregates = Aggregates() aggregates.set_simulation(simulation) simulation.compute() aggregates.compute() print aggregates.aggr_frame.to_string() calibration.set_calibrated_weights() simulation.compute() aggregates.compute() print aggregates.aggr_frame.to_string()
def build_aggregates(): writer = None years = range(2006,2010) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() simu.set_survey() inflator = get_loyer_inflator(year) simu.inflate_survey({'loyer' : inflator}) simu.compute() agg = Aggregates() agg.set_simulation(simu) agg.compute() if writer is None: writer = ExcelWriter(str(fname_all)) agg.aggr_frame.to_excel(writer, yr, index= False, header= True, float_format="%.2f") print agg.aggr_frame.to_string() del simu del agg import gc gc.collect() writer.save()
def build_from_openfisca( directory = None): df_age_final = None for yr in range(2006,2010): simulation = SurveySimulation() simulation.set_config(year = yr) simulation.set_param() simulation.set_survey() df_age = get_age_structure(simulation) df_age[yr] = df_age['wprm'] del df_age['wprm'] if df_age_final is None: df_age_final = df_age else: df_age_final = df_age_final.merge(df_age) if directory is None: directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) print df_age_final.dtypes store.put("openfisca", df_age_final) store.close()
def get_common_dataframe(variables, year = 2006): """ Compare variables in erf an openfisca """ simulation = SurveySimulation() simulation.set_config(year = year) simulation.set_param() simulation.set_survey() simulation.compute() erf = ErfsDataTable(year=2006) if "ident" not in variables: erf_variables = variables + ["ident"] else: erf_variables = variables if "wprm" not in erf_variables: erf_variables = erf_variables + ["wprm"] else: erf_variables = erf_variables erf_dataframe = erf.get_values(erf_variables, table="menage") erf_dataframe.rename(columns={'ident': 'idmen'}, inplace=True) for col in erf_dataframe.columns: if col is not "idmen": erf_dataframe.rename(columns={col: col + "_erf"}, inplace=True) of_dataframe, of_dataframe_default = simulation.aggregated_by_entity("men", variables, all_output_vars=False, force_sum=True) del of_dataframe_default merged_df = of_dataframe.merge(erf_dataframe, on="idmen") del of_dataframe, erf_dataframe return merged_df
def get_common_dataframe(variables, year=2006): """ Compare variables in erf an openfisca """ simulation = SurveySimulation() simulation.set_config(year=year) simulation.set_param() simulation.set_survey() simulation.compute() erf = ErfsDataTable(year=2006) if "ident" not in variables: erf_variables = variables + ["ident"] else: erf_variables = variables if "wprm" not in erf_variables: erf_variables = erf_variables + ["wprm"] else: erf_variables = erf_variables erf_dataframe = erf.get_values(erf_variables, table="menage") erf_dataframe.rename(columns={'ident': 'idmen'}, inplace=True) for col in erf_dataframe.columns: if col is not "idmen": erf_dataframe.rename(columns={col: col + "_erf"}, inplace=True) of_dataframe, of_dataframe_default = simulation.aggregated_by_entity( "men", variables, all_output_vars=False, force_sum=True) del of_dataframe_default merged_df = of_dataframe.merge(erf_dataframe, on="idmen") del of_dataframe, erf_dataframe return merged_df
def build_aggregates(): # writer = None years = range(2009,2010) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() simu.set_survey() simu.compute() variables = ["garext", "ci_garext", "inthab", "ppe_brute", "rni"] x = simu.aggregated_by_entity("men", variables, all_output_vars = False) # df = x[0] # print df["ci_garext"].describe() agg = Aggregates() agg.set_simulation(simu) agg.show_default = False agg.show_real = False agg.show_diff = False agg.varlist = var_list # ERROR: var_list is undefined. agg.compute() cols = agg.aggr_frame.columns[:4] print agg.aggr_frame[cols].to_string() # if writer is None: # writer = ExcelWriter(str(fname_all)) # agg.aggr_frame.to_excel(writer, yr, index= False, header= True) del simu del agg import gc gc.collect()
def test(): from openfisca_core.simulations import SurveySimulation from .aggregates import Aggregates yr = 2006 simulation = SurveySimulation() simulation.set_config(year = yr) simulation.set_param() simulation.set_survey() calibration = Calibration() calibration.set_simulation(simulation) filename = "../../countries/france/calibrations/calib_2006.csv" calibration.set_inputs_margins_from_file(filename, 2006) calibration.set_param('invlo', 3) calibration.set_param('up', 3) calibration.set_param('method', 'logit') aggregates = Aggregates() aggregates.set_simulation(simulation) simulation.compute() aggregates.compute() print aggregates.aggr_frame.to_string() calibration.set_calibrated_weights() simulation.compute() aggregates.compute() print aggregates.aggr_frame.to_string()
def build_aggregates(): # writer = None years = range(2009, 2010) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() simu.set_survey() simu.compute() variables = ["garext", "ci_garext", "inthab", "ppe_brute", "rni"] x = simu.aggregated_by_entity("men", variables, all_output_vars=False) # df = x[0] # print df["ci_garext"].describe() agg = Aggregates() agg.set_simulation(simu) agg.show_default = False agg.show_real = False agg.show_diff = False agg.varlist = var_list # ERROR: var_list is undefined. agg.compute() cols = agg.aggr_frame.columns[:4] print agg.aggr_frame[cols].to_string() # if writer is None: # writer = ExcelWriter(str(fname_all)) # agg.aggr_frame.to_excel(writer, yr, index= False, header= True) del simu del agg import gc gc.collect()
def test(): yr = 2006 simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() filename = os.path.join(model.DATA_DIR, 'survey_psl.h5') simu.set_survey(filename=filename) simu.compute() df = get_structure(simu, 'br_al') print df.to_string()
def test(): yr = 2006 simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() filename = os.path.join(model.DATA_DIR, 'survey_psl.h5') simu.set_survey(filename = filename) simu.compute() df = get_structure(simu, 'br_al') print df.to_string()
def toto(): year = 2006 simulation = SurveySimulation() simulation.set_config(year=year) simulation.set_param() simulation.set_survey() simulation.compute() for name, col in simulation.output_table.column_by_name.iteritems(): print col.name print col._dtype print col.entity
def toto(): year = 2006 simulation = SurveySimulation() simulation.set_config(year=year) simulation.set_param() simulation.set_survey() simulation.compute() for name, col in simulation.output_table.column_by_name.iteritems(): print col.name print col._dtype print col.entity
def test(): for year in range(2006,2010): yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() simu.set_survey() for var in ["f4ga", "f4gb", "f4gc", "f4ge", "f4gf", "f4gg"]: print var df = simu.survey.get_value(var) print df.max() print df.min()
def test(): for year in range(2006, 2010): yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() simu.set_survey() for var in ["f4ga", "f4gb", "f4gc", "f4ge", "f4gf", "f4gg"]: print var df = simu.survey.get_value(var) print df.max() print df.min()
from openfisca_core.simulations import SurveySimulation filename = os.path.join(model.DATA_DIR, 'survey3.h5') num_table = 3 input = HDFStore(filename) survey = tables.openFile(destination_dir + "survey3.h5", mode="w") years = ['2006'] for yr in years: simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() simu.set_survey(num_table=num_table) survey_year = survey.createGroup("/", "survey_" + yr, "year") if num_table == 3: for ent in ['ind', 'men', 'foy', 'fam']: tab = simu.survey.table3[ent] tab_type = tab.to_records(index=False).dtype survey_table = survey.createTable('/survey_' + yr, ent, tab_type) survey_table.append(tab.to_records(index=False)) survey_table.flush() if num_table == 1: tab = simu.survey.table tab_type = tab.to_records(index=False).dtype to_remote = ['opt_colca', 'quelfic'] for x in tab_type.descr: if x[1] == '|b1': to_remote = to_remote + [x[0]]
from openfisca_core.simulations import SurveySimulation from openfisca_qt.scripts.validation.check_consistency_tests import check_inputs_enumcols, check_entities, check_weights # Validation # Should ideally produce a log file # Try to be the most country/model # agnostic (so part of the general stuff could be elsewhere # Proceed using import from separate file in validation year = 2006 simulation = SurveySimulation() simulation.set_config(year = year) simulation.set_param() simulation.set_survey() # Pre-computation validation # def test_inputs_consistency(): """ Test consistency of inputs data """ # check that the Enumcols are right (and fix the labels/the original data) ok, message = check_inputs_enumcols(simulation) if not ok: print "Error: Check enumcols"
def build_aggregates3(): writer = None years = range(2006, 2007) tot1 = 0 tot3 = 0 for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() import time deb3 = time.clock() simu.set_survey(num_table=3) simu.compute() fin3 = time.clock() print "coucou" col = simu.survey.column_by_name.get("so") print col.entity agg3 = Aggregates3() agg3.set_simulation(simu) agg3.compute() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg3.xlsx') agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True) for ent in ['ind', 'men', 'foy', 'fam']: dir_name = destination_dir + ent + '.csv' ## simu.survey.table3[ent].to_csv(dir_name) # import pdb # pdb.set_trace() ## com.convert_to_r_dataframe simu.output_table.table3[ent][:num_output].to_csv(dir_name) deb1 = time.clock() simu.set_survey(num_table=1) print "prob compute" simu.compute() fin1 = time.clock() dir_name = destination_dir + 'en1' + '.csv' print "prob output" simu.output_table.table[:num_output].to_csv(dir_name) agg = Aggregates() print "prob set" agg.set_simulation(simu) print "prob compute" agg.compute() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg1.xlsx') print "prob ind" agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True) del simu del agg import gc gc.collect() tot1 += fin1 - deb1 tot3 += fin3 - deb3 print "Time to process 1 table :" + str(fin1 - deb1) print "Time to process 3 table :" + str(fin3 - deb3) print tot1, tot3, tot3 - tot1
def compar_num_table(): writer = None years = range(2006, 2007) tot1 = 0 tot3 = 0 filename = destination_dir + 'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year=yr) simu.set_param() import time deb3 = time.clock() sous_ech = [ 6000080, 6000080, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000531, 6000542, 6000542 ] sous_ech = [ 6000191, 6000191, 6000531, 6000614, 6000195, 6000195, 6000499, 6000499, 6000531, 6000614, 6000531, 6000614, 6000531, 6000531, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000542, 6000542, 6000614, 6000191 ] #al sous_ech = [6000122, 6000865, 6001256] # typ_men sous_ech = [6006630, 6006753, 6008508] # foy sous_ech = [6036028, 6028397, 6019248] sous_ech = None simu.set_survey(num_table=3, subset=sous_ech) simu.compute() agg3 = Aggregates() for ent in ['ind', 'men', 'foy', 'fam']: tab = simu.output_table.table3[ent] renam = {} renam['wprm_' + ent] = 'wprm' tab = tab.rename(columns=renam) agg3.set_simulation(simu) agg3.compute() fin3 = time.clock() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg3.xlsx') agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True) # export to csv to run compar in R for ent in ['ind', 'men', 'foy', 'fam']: dir_name = destination_dir + ent + '.csv' tab = simu.output_table.table3[ent] renam = {} renam['wprm_' + ent] = 'wprm' if ent == 'ind': ident = [ "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy" ] else: ident = ["idmen", "idfam", "idfoy"] for nom in ident: renam[nom + '_' + ent] = nom tab = tab.rename(columns=renam) order_var = ident + list(tab.columns - ident) tab.sort(['idmen', 'idfam', 'idfoy']).ix[:num_output, order_var].to_csv(dir_name) deb1 = time.clock() simu.set_survey(num_table=1, subset=sous_ech) simu.compute() agg = Aggregates() agg.set_simulation(simu) agg.compute() fin1 = time.clock() # export to csv to run compar in R dir_name = destination_dir + 'en1' + '.csv' tab = simu.output_table.table tab.drop([ 'idfam_fam', 'idfam_foy', 'idfam_men', 'idfoy_fam', 'idfoy_foy', 'idfoy_men', 'idmen_men', 'idmen_fam', 'idmen_foy', 'wprm_foy', 'wprm_fam' ], axis=1, inplace=True) renam = {} ent = 'ind' renam['wprm_' + ent] = 'wprm' ident = [ "noi", "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy" ] for nom in ident: renam[nom + '_' + ent] = nom tab = tab.rename(columns=renam) order_var = ident + list(tab.columns - ident) tab.sort(['idmen', 'idfam', 'idfoy']).ix[:num_output, order_var].to_csv(dir_name) # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg1.xlsx') agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True) del simu del agg import gc gc.collect() tot1 += fin1 - deb1 tot3 += fin3 - deb3 print "Time to process 1 table :" + str(fin1 - deb1) print "Time to process 3 table :" + str(fin3 - deb3) print tot1, tot3, tot3 - tot1
filename = os.path.join(model.DATA_DIR, 'survey3.h5') num_table = 3 input = HDFStore(filename) survey = tables.openFile(destination_dir+"survey3.h5", mode = "w") years = ['2006'] for yr in years: simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() simu.set_survey(num_table=num_table) survey_year = survey.createGroup("/", "survey_"+yr,"year") if num_table == 3: for ent in ['ind','men','foy','fam']: tab = simu.survey.table3[ent] tab_type = tab.to_records(index=False).dtype survey_table = survey.createTable('/survey_'+yr,ent,tab_type) survey_table.append(tab.to_records(index=False)) survey_table.flush() if num_table == 1: tab = simu.survey.table tab_type = tab.to_records(index=False).dtype to_remote = ['opt_colca','quelfic'] for x in tab_type.descr: if x[1] == '|b1' : to_remote = to_remote + [x[0]]
def compar_num_table(): writer = None years = range(2006,2007) tot1 = 0 tot3 = 0 filename = destination_dir+'output3.h5' store = HDFStore(filename) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() import time deb3 = time.clock() sous_ech = [6000080, 6000080, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000531, 6000542, 6000542] sous_ech = [6000191, 6000191, 6000531, 6000614, 6000195, 6000195, 6000499, 6000499, 6000531, 6000614, 6000531, 6000614, 6000531, 6000531, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000542, 6000542, 6000614, 6000191] #al sous_ech = [6000122, 6000865, 6001256] # typ_men sous_ech = [6006630, 6006753, 6008508] # foy sous_ech = [6036028, 6028397, 6019248] sous_ech = None simu.set_survey(num_table=3, subset=sous_ech) simu.compute() agg3 = Aggregates() for ent in ['ind','men','foy','fam']: tab = simu.output_table.table3[ent] renam={} renam['wprm_'+ent] = 'wprm' tab = tab.rename(columns=renam) agg3.set_simulation(simu) agg3.compute() fin3 = time.clock() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg3.xlsx') agg3.aggr_frame.to_excel(fname_all, yr, index= False, header= True) # export to csv to run compar in R for ent in ['ind','men','foy','fam']: dir_name = destination_dir + ent +'.csv' tab = simu.output_table.table3[ent] renam ={} renam['wprm_'+ent] = 'wprm' if ent=='ind': ident = ["idmen","quimen","idfam","quifam","idfoy","quifoy"] else: ident = ["idmen","idfam","idfoy"] for nom in ident: renam[nom+'_'+ent] = nom tab = tab.rename(columns=renam) order_var = ident+list(tab.columns - ident) tab.sort(['idmen','idfam','idfoy']).ix[:num_output,order_var].to_csv(dir_name) deb1 = time.clock() simu.set_survey(num_table=1, subset=sous_ech) simu.compute() agg = Aggregates() agg.set_simulation(simu) agg.compute() fin1 = time.clock() # export to csv to run compar in R dir_name = destination_dir + 'en1' +'.csv' tab = simu.output_table.table tab.drop(['idfam_fam','idfam_foy','idfam_men','idfoy_fam','idfoy_foy','idfoy_men','idmen_men','idmen_fam','idmen_foy','wprm_foy','wprm_fam'], axis=1, inplace=True) renam ={} ent = 'ind' renam['wprm_'+ent] = 'wprm' ident = ["noi","idmen","quimen","idfam","quifam","idfoy","quifoy"] for nom in ident: renam[nom+'_'+ent] = nom tab = tab.rename(columns=renam) order_var = ident+list(tab.columns - ident) tab.sort(['idmen','idfam','idfoy']).ix[:num_output,order_var].to_csv(dir_name) # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg1.xlsx') agg.aggr_frame.to_excel(fname_all, yr, index= False, header= True) del simu del agg import gc gc.collect() tot1 += fin1 - deb1 tot3 += fin3 - deb3 print "Time to process 1 table :" +str(fin1 - deb1) print "Time to process 3 table :" +str(fin3 - deb3) print tot1, tot3, tot3- tot1
def build_aggregates3(): writer = None years = range(2006,2007) tot1 = 0 tot3 = 0 for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() import time deb3 = time.clock() simu.set_survey(num_table=3) simu.compute() fin3 = time.clock() print "coucou" col = simu.survey.column_by_name.get("so") print col.entity agg3 = Aggregates3() agg3.set_simulation(simu) agg3.compute() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg3.xlsx') agg3.aggr_frame.to_excel(fname_all, yr, index= False, header= True) for ent in ['ind','men','foy','fam']: dir_name = destination_dir + ent +'.csv' ## simu.survey.table3[ent].to_csv(dir_name) # import pdb # pdb.set_trace() ## com.convert_to_r_dataframe simu.output_table.table3[ent][:num_output].to_csv(dir_name) deb1 = time.clock() simu.set_survey(num_table=1) print "prob compute" simu.compute() fin1 = time.clock() dir_name = destination_dir + 'en1' +'.csv' print "prob output" simu.output_table.table[:num_output].to_csv(dir_name) agg = Aggregates() print "prob set" agg.set_simulation(simu) print "prob compute" agg.compute() # if writer is None: # writer = ExcelWriter(str(fname_all)) fname_all = os.path.join(destination_dir, 'agg1.xlsx') print "prob ind" agg.aggr_frame.to_excel(fname_all, yr, index= False, header= True) del simu del agg import gc gc.collect() tot1 += fin1 - deb1 tot3 += fin3 - deb3 print "Time to process 1 table :" +str(fin1 - deb1) print "Time to process 3 table :" +str(fin3 - deb3) print tot1, tot3, tot3- tot1
age = survey.get_value('age') if sum((quifam >= 2) & (age >= 21)) != 0: print "they are kids that are of age >= 21" # Problemes # enfants de plus de 21 ans et parents à charge dans les familles avec quifam=0 # idmen = survey.get_value('idmen') # from numpy import max as max_ # print max_(idmen) if __name__ == '__main__': year = 2006 simulation = SurveySimulation() simulation.set_config(year = year) simulation.set_param() simulation.set_survey() ok, message = check_inputs_enumcols(simulation) if not ok: print message ok, message = check_entities(simulation) if not ok: print message ok, message = check_weights(simulation) if not ok: print message