def simulate_reform_cd(year): assert year is not None TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() reform = reform_cd.build_reform(tax_benefit_system) input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year = year, tax_benefit_system = reform ) reference_simulation = survey_scenario.new_simulation(debug = True, reference = True) reform_simulation = survey_scenario.new_simulation(debug = True) reform_data_frame_by_entity_key_plural = dict( foyers = pandas.DataFrame( dict([(name, reference_simulation.calculate_add(name)) for name in [ 'impo', 'rfr', ]]) ), ) return reform_data_frame_by_entity_key_plural, reference_simulation
def reform_survey_simulation(reform = None, year = None, ind_variables = None, fam_variables = None, foy_variables = None, men_variables = None, used_as_input_variables = None, reform_specific_foy_variables = None): assert reform is not None assert year is not None TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() reform = reform.build_reform(tax_benefit_system) input_data_frame = get_input_data_frame(year) survey_scenario_reform = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = used_as_input_variables, year = year, tax_benefit_system = reform ) reference_simulation = survey_scenario_reform.new_simulation(debug = False, reference = True) reference_data_frame_by_entity_key_plural = from_simulation_to_data_frame_by_entity_key_plural( reference_simulation, ind_variables, fam_variables, foy_variables, men_variables) reform_simulation = survey_scenario_reform.new_simulation(debug = False) reform_data_frame_by_entity_key_plural = from_simulation_to_data_frame_by_entity_key_plural( reform_simulation, ind_variables, fam_variables, foy_variables + reform_specific_foy_variables, men_variables) return reform_data_frame_by_entity_key_plural, reference_data_frame_by_entity_key_plural
def test_survey_simulation(): year = 2009 TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) survey_scenario_reform = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year = year, tax_benefit_system = tax_benefit_system ) simulation = survey_scenario_reform.new_simulation(debug = False, reference = True) simulation_data_frame_by_entity_key_plural = dict( foyers = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'rfr', 'irpp', 'rbg', 'iaidrdi', 'rng', 'rni', 'ip_net', 'reductions', 'decile_rfr', 'weight_foyers', ]]) ), ) return simulation_data_frame_by_entity_key_plural
def test_calibration(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) survey_scenario.initialize_weights() calibration = Calibration(survey_scenario) calibration.parameters['method'] = 'linear' print calibration.initial_total_population calibration.total_population = calibration.initial_total_population * 1.123 print calibration.total_population filename = os.path.join( openfisca_france_data_location, "openfisca_france_data", "calibrations", "calib_2006.csv" ) calibration.set_inputs_margins_from_file(filename, 2006) calibration.set_parameters('invlo', 3) calibration.set_parameters('up', 3) calibration.set_parameters('method', 'logit') calibration.calibrate()
def test_calibration(): year = 2006 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) survey_scenario.initialize_weights() calibration = Calibration() calibration.set_survey_scenario(survey_scenario) calibration.parameters['method'] = 'linear' print calibration.initial_total_population calibration.total_population = calibration.initial_total_population * 1.123 print calibration.total_population filename = os.path.join( openfisca_france_data_location, "openfisca_france_data", "calibrations", "calib_2006.csv" ) calibration.set_inputs_margins_from_file(filename, 2006) calibration.set_parameters('invlo', 3) calibration.set_parameters('up', 3) calibration.set_parameters('method', 'logit') calibration.calibrate()
def test_survey_simulation(): year = 2009 TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) survey_scenario_reform = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year=year, tax_benefit_system=tax_benefit_system) simulation = survey_scenario_reform.new_simulation(debug=False, reference=True) simulation_data_frame_by_entity_key_plural = dict(foyers=pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'rfr', 'irpp', 'rbg', 'iaidrdi', 'rng', 'rni', 'ip_net', 'reductions', 'decile_rfr', 'weight_foyers', ]])), ) return simulation_data_frame_by_entity_key_plural
def df_survey_simulation(reductions): year = 2009 TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) survey_scenario_reference = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year=year, tax_benefit_system=tax_benefit_system) simulation = survey_scenario_reference.new_simulation() # from openfisca_core import periods # period = periods.period('year', 2007) # period = period.start.offset('first-of', 'month').period('year') # bareme = simulation.legislation_at(period.start).ir.bareme data_frame_by_entity_key_plural = dict(foyers=pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'rfr', 'irpp', 'rbg', 'iaidrdi', 'rng', 'ip_net', 'reductions', 'decile_rfr', 'weight_foyers', ] + reductions])), ) return data_frame_by_entity_key_plural
def test_weights_building(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) survey_scenario.new_simulation() return survey_scenario.simulation
def test_weights_building(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, year=year, ) survey_scenario.new_simulation() return survey_scenario.simulation
def test_pivot_table_1d_mean(year=2009): input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, year=year, ) pivot_table = survey_scenario.compute_pivot_table(columns=['decile_rfr'], values=['irpp']) return pivot_table
def varying_survey_simulation(year = 2009, increment = 10, target = 'irpp', varying = 'rni', used_as_input_variables = None): TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) # Simulation 1 : get varying and target survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = used_as_input_variables, year = year, tax_benefit_system = tax_benefit_system ) simulation = survey_scenario.new_simulation(debug = False) output_data_frame = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ target, varying, 'idfoy_original' ]])) # Make input_data_frame_by_entity_key_plural from the previous input_data_frame and simulation input_data_frames_by_entity_key_plural = \ from_input_df_to_entity_key_plural_df(input_data_frame, tax_benefit_system, simulation) foyers = input_data_frames_by_entity_key_plural['idfoy'] foyers = pandas.merge(foyers, output_data_frame, on = 'idfoy_original') # Incrementation of varying: foyers[varying] = foyers[varying] + increment # On remplace la nouvelle base dans le dictionnaire input_data_frames_by_entity_key_plural['idfoy'] = foyers # 2e simulation à partir de input_data_frame_by_entity_key_plural: # TODO: fix used_as_input_variabels in the from_input_df_to_entity_key_plural_df() function used_as_input_variables = used_as_input_variables + [varying] TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = None, input_data_frames_by_entity_key_plural = input_data_frames_by_entity_key_plural, used_as_input_variables = used_as_input_variables, year = year, tax_benefit_system = tax_benefit_system, ) simulation = survey_scenario.new_simulation(debug = False) output_data_frame2 = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ target, varying, 'idfoy_original' ]])) output_data_frame2.rename(columns = {varying: '{}_2'.format(varying), target: '{}_2'.format(target)}, inplace = True) merged = pandas.merge(output_data_frame, output_data_frame2, on = 'idfoy_original') merged['marginal_rate'] = marginal_rate_survey(merged, '{}'.format(target), '{}_2'.format(target), 'rni', 'rni_2') merged['average_rate'] = average_rate(target = merged[target], varying = merged[varying]) return merged
def create_survey_scenario(year = None): assert year is not None input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['salaire_imposable', 'cho', 'rst', 'age_en_mois'], year = year, ) return survey_scenario
def test_weights_building(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['sal', 'cho', 'rst', 'age_en_mois'], year = year, ) survey_scenario.new_simulation() return survey_scenario.simulation
def create_survey_scenario(year=None): assert year is not None input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, year=year, ) survey_scenario.initialize_weights() return survey_scenario
def test_weights_building(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=['sal', 'cho', 'rst', 'age_en_mois'], year=year, ) survey_scenario.new_simulation() return survey_scenario.simulation
def create_survey_scenario(year = None): assert year is not None input_data_frame = get_input_data_frame(year) assert "wprm" in input_data_frame.columns survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, tax_benefit_system = base.france_data_tax_benefit_system, year = year, ) return survey_scenario
def create_survey_scenario(year = None): assert year is not None input_data_frame = get_input_data_frame(year) assert "wprm" in input_data_frame.columns survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, tax_benefit_system = base.france_data_tax_benefit_system, year = year, ) return survey_scenario
def test_inflation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, year=year, ) survey_scenario.new_simulation() target_by_variable = dict(salaire_imposable=1.2e08) survey_scenario.inflate(target_by_variable=target_by_variable)
def test_pivot_table_1d_mean(year = 2009): input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) pivot_table = survey_scenario.compute_pivot_table( columns = ['decile_rfr'], values = ['irpp'] ) return pivot_table
def build_by_extraction(year = None): assert year is not None df = get_input_data_frame(year) output = df.ix[0:1] for symbol in ['fam', 'foy', 'men']: output['id{}_original'.format(symbol)] = output['id{}'.format(symbol)] output.loc[0, 'sali'] = 20000 output.loc[1, 'sali'] = 10000 output['wprm'] = 100 output.to_csv(csv_file_realpath) output.to_hdf(hdf5_file_realpath)
def build_by_extraction(year = None): assert year is not None df = get_input_data_frame(year) output = df.ix[0:1] for symbol in ['fam', 'foy', 'men']: output['id{}_original'.format(symbol)] = output['id{}'.format(symbol)] output.loc[0, 'sali'] = 20000 output.loc[1, 'sali'] = 10000 output['wprm'] = 100 output.to_csv(csv_file_realpath) output.to_hdf(hdf5_file_realpath)
def test_inflation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) survey_scenario.new_simulation() target_by_variable = dict( salaire_imposable = 1.2e08 ) survey_scenario.inflate(target_by_variable = target_by_variable)
def create_survey_scenario(data_year=2009, year=2013, reform=None): assert year is not None assert data_year is not None input_data_frame = get_input_data_frame(data_year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, tax_benefit_system=reform, reference_tax_benefit_system=base.france_data_tax_benefit_system, year=year, ) return survey_scenario
def build_by_extraction(year=None): assert year is not None df = get_input_data_frame(year) output = df.ix[0:1] for symbol in ['fam', 'foy', 'men']: output['id{}_original'.format(symbol)] = output['id{}'.format(symbol)] output.loc[0, 'sali'] = 20000 output.loc[1, 'sali'] = 10000 output['wprm'] = 100 store = pandas.HDFStore(hdf5_file_realpath) store.put(str(year), output) store.close()
def create_survey_scenario(data_year = 2009, year = 2013, reform = None): assert year is not None assert data_year is not None input_data_frame = get_input_data_frame(data_year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, tax_benefit_system = reform, reference_tax_benefit_system = base.france_data_tax_benefit_system, year = year, ) return survey_scenario
def survey_simulate(used_as_input_variables, year, ind_variables = None, fam_variables = None, foy_variables = None, men_variables = None): year = year input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = used_as_input_variables, year = year, ) simulation = survey_scenario.new_simulation() data_frame_by_entity_key_plural = from_simulation_to_data_frame_by_entity_key_plural( simulation, ind_variables, fam_variables, foy_variables, men_variables) return data_frame_by_entity_key_plural, simulation
def test_survey_simulation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year = year, ) simulation = survey_scenario.new_simulation() data_frame_by_entity_key_plural = dict( individus = pandas.DataFrame( dict([(name, simulation.calculate(name)) for name in [ 'idmen', 'quimen', 'idfoy', 'quifoy', 'idfam', 'quifam', 'age', 'champm_individus', 'sal', 'salaire_net', # 'smic55', 'txtppb', # salsuperbrut # TODO bug in 2006 ]]) ), familles = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'af_nbenf', 'af', 'weight_familles', ]]) ), menages = pandas.DataFrame( dict([(name, simulation.calculate(name)) for name in [ 'revdisp', ]]) ), ) (data_frame_familles.weight_familles * data_frame_familles.af).sum() / 1e9 > 10 return data_frame_by_entity_key_plural, simulation
def test_survey_simulation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year=year, ) simulation = survey_scenario.new_simulation() data_frame_by_entity_key_plural = dict( individus=pandas.DataFrame( dict([ (name, simulation.calculate(name)) for name in [ 'idmen', 'quimen', 'idfoy', 'quifoy', 'idfam', 'quifam', 'age', 'champm_individus', 'sal', 'salaire_net', # 'smic55', 'txtppb', # salsuperbrut # TODO bug in 2006 ] ])), familles=pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'af_nbenf', 'af', 'weight_familles', ]])), menages=pandas.DataFrame( dict([(name, simulation.calculate(name)) for name in [ 'revdisp', ]])), ) (data_frame_familles.weight_familles * data_frame_familles.af).sum() / 1e9 > 10 return data_frame_by_entity_key_plural, simulation
def test_survey_simulation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, year = year, ) simulation = survey_scenario.new_simulation(trace = True) data_frame_by_entity_key_plural = survey_scenario.create_data_frame_by_entity_key_plural( variables = [ 'aspa', 'aide_logement_montant_brut', 'idmen', 'quimen', 'idfoy', 'quifoy', 'idfam', 'quifam', 'age', 'activite', 'br_rmi_i', 'champm_individus', 'pensions_alimentaires_percues', 'salaire_imposable', 'salaire_net', 'smic55', 'txtppb', 'af_nbenf', 'af', 'br_rmi', 'rsa', 'rstnet', 'weight_familles', 'revdisp', ] ) assert ( data_frame_by_entity_key_plural['familles'].weight_familles * data_frame_by_entity_key_plural['familles'].af ).sum() / 1e9 > 10 return data_frame_by_entity_key_plural, simulation
def test_survey_simulation(): year = 2009 input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, year=year, ) simulation = survey_scenario.new_simulation(trace=True) data_frame_by_entity_key_plural = survey_scenario.create_data_frame_by_entity_key_plural( variables=[ 'aspa', 'aide_logement_montant_brut', 'idmen', 'quimen', 'idfoy', 'quifoy', 'idfam', 'quifam', 'age', 'activite', 'br_rmi_i', 'champm_individus', 'pensions_alimentaires_percues', 'salaire_imposable', 'salaire_net', 'smic55', 'txtppb', 'af_nbenf', 'af', 'br_rmi', 'rsa', 'rstnet', 'weight_familles', 'revdisp', ]) assert (data_frame_by_entity_key_plural['familles'].weight_familles * data_frame_by_entity_key_plural['familles'].af).sum() / 1e9 > 10 return data_frame_by_entity_key_plural, simulation
def simulate_reform_cd(year): assert year is not None TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() reform = reform_cd.build_reform(tax_benefit_system) input_data_frame = get_input_data_frame(year) survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year=year, tax_benefit_system=reform) reference_simulation = survey_scenario.new_simulation(debug=True, reference=True) reform_simulation = survey_scenario.new_simulation(debug=True) reform_data_frame_by_entity_key_plural = dict(foyers=pandas.DataFrame( dict([(name, reference_simulation.calculate_add(name)) for name in [ 'impo', 'rfr', ]])), ) return reform_data_frame_by_entity_key_plural, reference_simulation
def df_survey_simulation(reductions): year = 2009 TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) survey_scenario_reference = SurveyScenario().init_from_data_frame( input_data_frame = input_data_frame, used_as_input_variables = ['sal', 'cho', 'rst', 'age_en_mois', 'smic55'], year = year, tax_benefit_system = tax_benefit_system ) simulation = survey_scenario_reference.new_simulation() # from openfisca_core import periods # period = periods.period('year', 2007) # period = period.start.offset('first-of', 'month').period('year') # bareme = simulation.legislation_at(period.start).ir.bareme data_frame_by_entity_key_plural = dict( foyers = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [ 'rfr', 'irpp', 'rbg', 'iaidrdi', 'rng', 'ip_net', 'reductions', 'decile_rfr', 'weight_foyers', ] + reductions ]) ), ) return data_frame_by_entity_key_plural
def varying_survey_simulation(year=2009, increment=10, target='irpp', varying='rni', used_as_input_variables=None): TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() input_data_frame = get_input_data_frame(year) # Simulation 1 : get varying and target survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=input_data_frame, used_as_input_variables=used_as_input_variables, year=year, tax_benefit_system=tax_benefit_system) simulation = survey_scenario.new_simulation(debug=False) output_data_frame = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [target, varying, 'idfoy_original']])) # Make input_data_frame_by_entity_key_plural from the previous input_data_frame and simulation input_data_frames_by_entity_key_plural = \ from_input_df_to_entity_key_plural_df(input_data_frame, tax_benefit_system, simulation) foyers = input_data_frames_by_entity_key_plural['idfoy'] foyers = pandas.merge(foyers, output_data_frame, on='idfoy_original') # Incrementation of varying: foyers[varying] = foyers[varying] + increment # On remplace la nouvelle base dans le dictionnaire input_data_frames_by_entity_key_plural['idfoy'] = foyers # 2e simulation à partir de input_data_frame_by_entity_key_plural: # TODO: fix used_as_input_variabels in the from_input_df_to_entity_key_plural_df() function used_as_input_variables = used_as_input_variables + [varying] TaxBenefitSystem = openfisca_france_data.init_country() tax_benefit_system = TaxBenefitSystem() survey_scenario = SurveyScenario().init_from_data_frame( input_data_frame=None, input_data_frames_by_entity_key_plural= input_data_frames_by_entity_key_plural, used_as_input_variables=used_as_input_variables, year=year, tax_benefit_system=tax_benefit_system, ) simulation = survey_scenario.new_simulation(debug=False) output_data_frame2 = pandas.DataFrame( dict([(name, simulation.calculate_add(name)) for name in [target, varying, 'idfoy_original']])) output_data_frame2.rename(columns={ varying: '{}_2'.format(varying), target: '{}_2'.format(target) }, inplace=True) merged = pandas.merge(output_data_frame, output_data_frame2, on='idfoy_original') merged['marginal_rate'] = marginal_rate_survey(merged, '{}'.format(target), '{}_2'.format(target), 'rni', 'rni_2') merged['average_rate'] = average_rate(target=merged[target], varying=merged[varying]) return merged
del df['is{}'.format(p)], df['p{}'.format(p)] df['decile_of_{}'.format( variable)] = number_of_quantile - df['decile_of_{}'.format(variable)] return if __name__ == '__main__': import logging import time log = logging.getLogger(__name__) import sys logging.basicConfig(level=logging.INFO, stream=sys.stdout) start = time.time() year = 2009 input_data_frame = get_input_data_frame(year) revimp = input_data_frame[[ 'salaire_imposable', 'quifoy', 'idfoy_original' ]][input_data_frame.quifoy == 0] # revimp.revimp[np.isnan(revimp.revimp)] = 0 ind_variables = [ 'idmen', 'quimen', 'idfoy', 'salaire_imposable', 'salaire_net' ] # foy_variables = ['irpp', 'decile_rfr', 'decile_rni', 'weight_foyers', 'idfoy_original', 'rfr'] foy_variables = [ 'irpp', 'decile_rfr', 'weight_foyers', 'idfoy_original', 'rfr' ] used_as_input_variables = [ 'salaire_imposable', 'cho', 'rst', 'age_en_mois', 'smic55' ]
df['decile_of_{}'.format(variable)] = df['decile_of_{}'.format(variable)] + df['is{}'.format(p)].astype('int') del df['is{}'.format(p)], df['p{}'.format(p)] df['decile_of_{}'.format(variable)] = number_of_quantile - df['decile_of_{}'.format(variable)] return if __name__ == '__main__': import logging import time log = logging.getLogger(__name__) import sys logging.basicConfig(level = logging.INFO, stream = sys.stdout) start = time.time() year = 2009 input_data_frame = get_input_data_frame(year) revimp = input_data_frame[['salaire_imposable', 'quifoy', 'idfoy_original']][input_data_frame.quifoy == 0] # revimp.revimp[np.isnan(revimp.revimp)] = 0 ind_variables = ['idmen', 'quimen', 'idfoy', 'salaire_imposable', 'salaire_net'] # foy_variables = ['irpp', 'decile_rfr', 'decile_rni', 'weight_foyers', 'idfoy_original', 'rfr'] foy_variables = ['irpp', 'decile_rfr', 'weight_foyers', 'idfoy_original', 'rfr'] used_as_input_variables = ['salaire_imposable', 'cho', 'rst', 'age_en_mois', 'smic55'] df_by_entity_key_plural, simulation = survey_simulate(used_as_input_variables, year, ind_variables, foy_variables = foy_variables) df_foyers = df_by_entity_key_plural['foyers'][['weight_foyers', 'idfoy_original', 'rfr']] df = pandas.merge(df_foyers, revimp, on = 'idfoy_original') weight = 'weight_foyers' number_of_quantile = 10 # make_weighted_deciles_of_variable(df, 'revimp', weight, number_of_quantile)