def _generate_aggregates(self): dfs = list() dfs_erf = list() years = self.years for year in years: # Running a standard SurveySimulation to get OF aggregates simulation = SurveySimulation() survey_filename = self.survey_filename simulation.set_config(year=year, survey_filename=survey_filename) simulation.set_param() simulation.compute() agg = Aggregates() agg.set_simulation(simulation) agg.compute() df = agg.aggr_frame df['year'] = year label_by_name = dict( (name, column.label) for name, column in simulation.output_table.column_by_name.iteritems() ) #colonnes = simulation.output_table.table.columns dfs.append(df) variables = agg.varlist labels_variables = [ label_by_name[variable] for variable in variables ] del simulation, agg, df # simulation.save_content(name, filename) gc.collect() # ERFS temp = (build_erf_aggregates(variables=variables, year= year)) temp.rename(columns = label_by_name, inplace = True) temp = temp.T temp.reset_index(inplace = True) temp['year'] = year dfs_erf.append(temp) del temp gc.collect() self.labels_variables = labels_variables self.aggregates_of_dataframe = dfs self.aggregates_erfs_dataframe = dfs_erf
def show_aggregates(self): from openfisca_france.data.erf.aggregates import build_erf_aggregates assert self.simulation is not None, 'simulation attribute is None' assert self.variable is not None, 'variable attribute is None' variable = self.variable of_aggregates = Aggregates() of_aggregates.set_simulation(self.simulation) of_aggregates.compute() temp = (build_erf_aggregates(variables=[variable], year= self.simulation.datesim.year)) selection = of_aggregates.aggr_frame["Mesure"] == self.simulation.io_column_by_name[variable].label print of_aggregates.aggr_frame[selection] print temp # TODO: clean this return
def _generate_aggregates(self): dfs = list() dfs_erf = list() years = self.years for year in years: # Running a standard SurveySimulation to get OF aggregates simulation = SurveySimulation() survey_filename = self.survey_filename simulation.set_config(year=year, survey_filename=survey_filename) simulation.set_param() simulation.compute() agg = Aggregates() agg.set_simulation(simulation) agg.compute() df = agg.aggr_frame df['year'] = year label_by_name = dict( (name, column.label) for name, column in simulation.output_table.column_by_name.iteritems()) #colonnes = simulation.output_table.table.columns dfs.append(df) variables = agg.varlist labels_variables = [ label_by_name[variable] for variable in variables ] del simulation, agg, df # simulation.save_content(name, filename) gc.collect() # ERFS temp = (build_erf_aggregates(variables=variables, year=year)) temp.rename(columns=label_by_name, inplace=True) temp = temp.T temp.reset_index(inplace=True) temp['year'] = year dfs_erf.append(temp) del temp gc.collect() self.labels_variables = labels_variables self.aggregates_of_dataframe = dfs self.aggregates_erfs_dataframe = dfs_erf
def show_aggregates(self): from openfisca_france.data.erf.aggregates import build_erf_aggregates assert self.simulation is not None, 'simulation attribute is None' assert self.variable is not None, 'variable attribute is None' variable = self.variable of_aggregates = Aggregates() of_aggregates.set_simulation(self.simulation) of_aggregates.compute() temp = (build_erf_aggregates(variables=[variable], year=self.simulation.datesim.year)) selection = of_aggregates.aggr_frame[ "Mesure"] == self.simulation.io_column_by_name[variable].label print of_aggregates.aggr_frame[selection] print temp # TODO: clean this return
def test_laurence(): ''' Computes the openfisca/real numbers comparaison table in excel worksheet. Warning: To add more years you'll have to twitch the code manually. Default is years 2006 to 2009 included. ''' def save_as_xls(df, alter_method = True): # Saves a datatable under Excel table using XLtable if alter_method: filename = "C:\desindexation.xls" print filename writer = ExcelWriter(str(filename)) df.to_excel(writer) writer.save() else: # XLtable utile pour la mise en couleurs, reliefs, etc. de la table, inutile sinon stxl = XLtable(df) # <========== HERE TO CHANGE OVERLAY ======> wb = xlwt.Workbook() ws = wb.add_sheet('resultatstest') erfxcel = stxl.place_table(ws) try: # I dunno more clever commands wb.save("C:\outputtest.xls") except: n = random.randint(0,100) wb.save("C:\outputtest_"+str(n)+".xls") #=============================================================================== # from numpy.random import randn # mesures = ['cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add', # 'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add', # 'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add'] # sources = ['of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel', # 'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel', # 'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel'] # year = ['2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', # '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', # '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008'] # ind = zip(*[mesures,sources, year]) # # print ind # from pandas.core.index import MultiIndex # ind = MultiIndex.from_tuples(ind, names = ['mesure', 'source', 'year']) # # print ind # d = pd.DataFrame(randn(27,2), columns = ['Depenses', 'Recettes'], index = ind) # d.reset_index(inplace = True, drop = False) # d = d.groupby(by = ['mesure', 'source', 'year'], sort = False).sum() # print d # d_unstacked = d.unstack() # print d # indtemp1 = d.index.get_level_values(0) # indtemp2 = d.index.get_level_values(1) # indexi = zip(*[indtemp1, indtemp2]) # print indexi # indexi_bis = [] # for i in xrange(len(indexi)): # if indexi[i] not in indexi_bis: # indexi_bis.append(indexi[i]) # indexi = indexi_bis # indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source']) # print indexi # d_unstacked = d_unstacked.reindex_axis(indexi, axis = 0) # print d_unstacked.to_string() # save_as_xls(d_unstacked) # return #=============================================================================== def reshape_tables(dfs, dfs_erf): agg = Aggregates() # We need this for the columns labels to work print 'Resetting index to avoid later trouble on manipulation' for d in dfs: d.reset_index(inplace = True) d.set_index('Mesure', inplace = True, drop = False) d.reindex_axis(labels_variables, axis = 0) d.reset_index(inplace = True, drop = True) # print d.to_string() for d in dfs_erf: d.reset_index(inplace = True) d['Mesure'] = agg.labels['dep'] d.set_index('index', inplace = True, drop = False) d.reindex_axis(agg.labels.values(), axis = 0) d.reset_index(inplace = True, drop = True) # print d.to_string() # Concatening the openfisca tables for =/= years temp = pd.concat([dfs[0],dfs[1]], ignore_index = True) temp = pd.concat([temp,dfs[2]], ignore_index = True) temp = pd.concat([temp,dfs[3]], ignore_index = True) del temp[agg.labels['entity']], temp['index'] gc.collect() print 'We split the real aggregates from the of table' temp2 = temp[[agg.labels['var'], agg.labels['benef_real'], agg.labels['dep_real'], 'year']] del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']] temp['source'] = 'of' temp2['source'] = 'reel' temp2.rename(columns = {agg.labels['benef_real'] : agg.labels['benef'], agg.labels['dep_real'] : agg.labels['dep']}, inplace = True) temp = pd.concat([temp,temp2], ignore_index = True) print 'We add the erf data to the table' for df in dfs_erf: del df['level_0'], df['Mesure'] df.rename(columns = {'index' : agg.labels['var'], 1 : agg.labels['dep']}, inplace = True) temp3 = pd.concat([dfs_erf[0], dfs_erf[1]], ignore_index = True) temp3 = pd.concat([temp3, dfs_erf[2]], ignore_index = True) temp3 = pd.concat([temp3, dfs_erf[3]], ignore_index = True) temp3['source'] = 'erfs' gc.collect() temp = pd.concat([temp, temp3], ignore_index = True) # print temp.to_string() print 'Index manipulation to reshape the output' temp.reset_index(drop = True, inplace = True) # We set the new index # temp.set_index('Mesure', drop = True, inplace = True) # temp.set_index('source', drop = True, append = True, inplace = True) # temp.set_index('year', drop = False, append = True, inplace = True) temp = temp.groupby(by=["Mesure", "source", "year"], sort = False).sum() # Tricky, the [mesure, source, year] index is unique so sum() will return the only value # Groupby automatically deleted the source, mesure... columns and added them to index assert(isinstance(temp, pd.DataFrame)) # print temp.to_string() # We want the years to be in columns, so we use unstack temp_unstacked = temp.unstack() # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table : ## Reindexing rows from pandas.core.index import MultiIndex indtemp1 = temp.index.get_level_values(0) indtemp2 = temp.index.get_level_values(1) indexi = zip(*[indtemp1, indtemp2]) indexi_bis = [] for i in xrange(0,len(indexi)): if indexi[i] not in indexi_bis: indexi_bis.append(indexi[i]) indexi = indexi_bis del indexi_bis indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source']) # import pdb # pdb.set_trace() temp_unstacked = temp_unstacked.reindex_axis(indexi, axis = 0) # axis = 0 for rows, 1 for columns ## Reindexing columns # TODO : still not working col_indexi = [] for col in temp.columns.get_level_values(0).unique(): for yr in range(2006,2010): col_indexi.append((col, str(yr))) col_indexi = MultiIndex.from_tuples(col_indexi) # print col_indexi # print temp_unstacked.columns print col_indexi # temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis = 1) # Our table is ready to be turned to Excel worksheet ! print temp_unstacked.to_string() temp_unstacked.fillna(0, inplace = True) return temp_unstacked dfs = [] dfs_erf = [] for i in range(2006,2010): year = i yr = str(i) # Running a standard SurveySim to get aggregates simulation = SurveySimulation() survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5') simulation.set_config(year=yr, survey_filename=survey_filename) simulation.set_param() simulation.compute() agg = Aggregates() agg.set_simulation(simulation) agg.compute() df = agg.aggr_frame df['year'] = year label_by_name = dict( (name, column.label) for name, column in simulation.output_table.column_by_name.iteritems() ) #colonnes = simulation.output_table.table.columns dfs.append(df) variables = agg.varlist labels_variables = [ label_by_name[variable] for variable in variables ] del simulation, agg, df gc.collect() #Getting ERF aggregates from ERF table temp = build_erf_aggregates(variables=variables, year= year) temp.rename(columns = label_by_name, inplace = True) temp = temp.T temp.reset_index(inplace = True) temp['year'] = year dfs_erf.append(temp) del temp gc.collect() print 'Out of data fetching for year ' + str(year) print 'Out of data fetching' datatest = reshape_tables(dfs, dfs_erf) save_as_xls(datatest, alter_method = False)
def test_laurence(): ''' Computes the openfisca/real numbers comparaison table in excel worksheet. Warning: To add more years you'll have to twitch the code manually. Default is years 2006 to 2009 included. ''' def save_as_xls(df, alter_method=True): # Saves a datatable under Excel table using XLtable if alter_method: filename = "C:\desindexation.xls" print filename writer = ExcelWriter(str(filename)) df.to_excel(writer) writer.save() else: # XLtable utile pour la mise en couleurs, reliefs, etc. de la table, inutile sinon stxl = XLtable(df) # <========== HERE TO CHANGE OVERLAY ======> wb = xlwt.Workbook() ws = wb.add_sheet('resultatstest') erfxcel = stxl.place_table(ws) try: # I dunno more clever commands wb.save("C:\outputtest.xls") except: n = random.randint(0, 100) wb.save("C:\outputtest_" + str(n) + ".xls") #=============================================================================== # from numpy.random import randn # mesures = ['cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add', # 'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add', # 'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add'] # sources = ['of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel', # 'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel', # 'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel'] # year = ['2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', # '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', # '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008'] # ind = zip(*[mesures,sources, year]) # # print ind # from pandas.core.index import MultiIndex # ind = MultiIndex.from_tuples(ind, names = ['mesure', 'source', 'year']) # # print ind # d = pd.DataFrame(randn(27,2), columns = ['Depenses', 'Recettes'], index = ind) # d.reset_index(inplace = True, drop = False) # d = d.groupby(by = ['mesure', 'source', 'year'], sort = False).sum() # print d # d_unstacked = d.unstack() # print d # indtemp1 = d.index.get_level_values(0) # indtemp2 = d.index.get_level_values(1) # indexi = zip(*[indtemp1, indtemp2]) # print indexi # indexi_bis = [] # for i in xrange(len(indexi)): # if indexi[i] not in indexi_bis: # indexi_bis.append(indexi[i]) # indexi = indexi_bis # indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source']) # print indexi # d_unstacked = d_unstacked.reindex_axis(indexi, axis = 0) # print d_unstacked.to_string() # save_as_xls(d_unstacked) # return #=============================================================================== def reshape_tables(dfs, dfs_erf): agg = Aggregates() # We need this for the columns labels to work print 'Resetting index to avoid later trouble on manipulation' for d in dfs: d.reset_index(inplace=True) d.set_index('Mesure', inplace=True, drop=False) d.reindex_axis(labels_variables, axis=0) d.reset_index(inplace=True, drop=True) # print d.to_string() for d in dfs_erf: d.reset_index(inplace=True) d['Mesure'] = agg.labels['dep'] d.set_index('index', inplace=True, drop=False) d.reindex_axis(agg.labels.values(), axis=0) d.reset_index(inplace=True, drop=True) # print d.to_string() # Concatening the openfisca tables for =/= years temp = pd.concat([dfs[0], dfs[1]], ignore_index=True) temp = pd.concat([temp, dfs[2]], ignore_index=True) temp = pd.concat([temp, dfs[3]], ignore_index=True) del temp[agg.labels['entity']], temp['index'] gc.collect() print 'We split the real aggregates from the of table' temp2 = temp[[ agg.labels['var'], agg.labels['benef_real'], agg.labels['dep_real'], 'year' ]] del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']] temp['source'] = 'of' temp2['source'] = 'reel' temp2.rename(columns={ agg.labels['benef_real']: agg.labels['benef'], agg.labels['dep_real']: agg.labels['dep'] }, inplace=True) temp = pd.concat([temp, temp2], ignore_index=True) print 'We add the erf data to the table' for df in dfs_erf: del df['level_0'], df['Mesure'] df.rename(columns={ 'index': agg.labels['var'], 1: agg.labels['dep'] }, inplace=True) temp3 = pd.concat([dfs_erf[0], dfs_erf[1]], ignore_index=True) temp3 = pd.concat([temp3, dfs_erf[2]], ignore_index=True) temp3 = pd.concat([temp3, dfs_erf[3]], ignore_index=True) temp3['source'] = 'erfs' gc.collect() temp = pd.concat([temp, temp3], ignore_index=True) # print temp.to_string() print 'Index manipulation to reshape the output' temp.reset_index(drop=True, inplace=True) # We set the new index # temp.set_index('Mesure', drop = True, inplace = True) # temp.set_index('source', drop = True, append = True, inplace = True) # temp.set_index('year', drop = False, append = True, inplace = True) temp = temp.groupby(by=["Mesure", "source", "year"], sort=False).sum() # Tricky, the [mesure, source, year] index is unique so sum() will return the only value # Groupby automatically deleted the source, mesure... columns and added them to index assert (isinstance(temp, pd.DataFrame)) # print temp.to_string() # We want the years to be in columns, so we use unstack temp_unstacked = temp.unstack() # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table : ## Reindexing rows from pandas.core.index import MultiIndex indtemp1 = temp.index.get_level_values(0) indtemp2 = temp.index.get_level_values(1) indexi = zip(*[indtemp1, indtemp2]) indexi_bis = [] for i in xrange(0, len(indexi)): if indexi[i] not in indexi_bis: indexi_bis.append(indexi[i]) indexi = indexi_bis del indexi_bis indexi = MultiIndex.from_tuples(indexi, names=['Mesure', 'source']) # import pdb # pdb.set_trace() temp_unstacked = temp_unstacked.reindex_axis( indexi, axis=0) # axis = 0 for rows, 1 for columns ## Reindexing columns # TODO : still not working col_indexi = [] for col in temp.columns.get_level_values(0).unique(): for yr in range(2006, 2010): col_indexi.append((col, str(yr))) col_indexi = MultiIndex.from_tuples(col_indexi) # print col_indexi # print temp_unstacked.columns print col_indexi # temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis = 1) # Our table is ready to be turned to Excel worksheet ! print temp_unstacked.to_string() temp_unstacked.fillna(0, inplace=True) return temp_unstacked dfs = [] dfs_erf = [] for i in range(2006, 2010): year = i yr = str(i) # Running a standard SurveySim to get aggregates simulation = SurveySimulation() survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5') simulation.set_config(year=yr, survey_filename=survey_filename) simulation.set_param() simulation.compute() agg = Aggregates() agg.set_simulation(simulation) agg.compute() df = agg.aggr_frame df['year'] = year label_by_name = dict( (name, column.label) for name, column in simulation.output_table.column_by_name.iteritems()) #colonnes = simulation.output_table.table.columns dfs.append(df) variables = agg.varlist labels_variables = [label_by_name[variable] for variable in variables] del simulation, agg, df gc.collect() #Getting ERF aggregates from ERF table temp = build_erf_aggregates(variables=variables, year=year) temp.rename(columns=label_by_name, inplace=True) temp = temp.T temp.reset_index(inplace=True) temp['year'] = year dfs_erf.append(temp) del temp gc.collect() print 'Out of data fetching for year ' + str(year) print 'Out of data fetching' datatest = reshape_tables(dfs, dfs_erf) save_as_xls(datatest, alter_method=False)