def test_get_or_construct_value1():
    folder_year = 2013
    overall_dict = {
        'pib': {
            'code': 'B1g/PIB',
            'institution': 'S1',
            'description': 'PIB'
            },
        'complicated_var': {
            'code': None,
            'description': 'PIB0',
            'formula': '2*pib - pib - pib + pib*pib - pib^2'
            },
        'very_complicated_var': {
            'code': None,
            'description': 'PIB0',
            'formula': 'complicated_var^2'
            }
        }
    df = get_comptes_nationaux_data(folder_year)

    variable_name = 'pib'
    pib_serie = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014))
    variable_name = 'very_complicated_var'
    serie, formula = get_or_construct_value(df, variable_name, overall_dict, years = range(1949, 2014))
    assert isinstance(serie, pandas.DataFrame)
    assert serie.columns == [variable_name]
    assert all(serie[variable_name] == 0), serie[variable_name]
def test_run_through():
    years = [2006, 2007, 2008, 2009]
    df = get_denombrements_fiscaux_data_frame(years=years)
    index_by_variable_name = create_index_by_variable_name(
        formula_by_variable_name, level_2_formula_by_variable_name)
    variable_name = 'interets_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'dividendes_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'revenus_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years,
                           fill_value=0)
    variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'f2da'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
def test_run_through():
    years = [2006, 2007, 2008, 2009]
    df = get_denombrements_fiscaux_data_frame(years=years)
    index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name)
    variable_name = "interets_imposes_au_prelevement_liberatoire"
    get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
    variable_name = "dividendes_imposes_au_prelevement_liberatoire"
    get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
    variable_name = "revenus_imposes_au_prelevement_liberatoire"
    get_or_construct_value(df, variable_name, index_by_variable_name, years=years, fill_value=0)
    variable_name = "assurances_vie_imposees_au_prelevement_liberatoire"
    get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
    variable_name = "f2da"
    get_or_construct_value(df, variable_name, index_by_variable_name, years=years)
def build_aggregates(raw_data,
                     formula_by_variable_name,
                     level_2_formula_by_variable_name=None,
                     years=None,
                     fill_value=numpy.NaN):
    assert years is not None
    aggregates = None
    index_by_variable_name = create_index_by_variable_name(
        formula_by_variable_name, level_2_formula_by_variable_name)
    for variable_name in formula_by_variable_name.keys(
    ) + level_2_formula_by_variable_name.keys():
        serie, formula = get_or_construct_value(raw_data,
                                                variable_name,
                                                index_by_variable_name,
                                                years=years,
                                                fill_value=fill_value)
        serie = serie.reset_index().drop_duplicates().set_index('year')
        assert not numpy.any(
            serie.index.duplicated()), 'Duplicated index for {} : {}'.format(
                variable_name, serie)
        if aggregates is None:
            aggregates = serie
        else:
            try:
                aggregates = pandas.concat([aggregates, serie],
                                           axis=1,
                                           verify_integrity=True)
            except Exception, e:
                print "aggregates", aggregates
                print "serie", serie
                raise (e)
Exemplo n.º 5
0
def test_get_or_construct_value_empty():
    df = get_tidy_data(2013)
    incomplete_overall_dict = create_dict_for_test_get_value_empty()
    value_empty, formula_of_empty = get_or_construct_value(df, 'notfound_arg', incomplete_overall_dict)
    # should return KeyError because components of formula are not in dict
    # TODO: assert KeyError...
    return value_empty
Exemplo n.º 6
0
def test_get_or_construct_value_empty():
    df = get_tidy_data(2013)
    incomplete_overall_dict = create_dict_for_test_get_value_empty()
    value_empty, formula_of_empty = get_or_construct_value(
        df, 'notfound_arg', incomplete_overall_dict)
    # should return KeyError because components of formula are not in dict
    # TODO: assert KeyError...
    return value_empty
 def assert_value_construction(variable_name, test):
     year = test['year']
     target = test['target']
     value = get_or_construct_value(
         df, variable_name, index_by_variable_name, years = years, fill_value = 0)[0].loc[year]
     if year >= 2009:
         assert all(value == target), "{} for {}: got {} instead of {}".format(
             variable_name, year, value.values, target)
 def assert_value_construction(variable_name, test):
     year = test['year']
     target = test['target']
     value = get_or_construct_value(df,
                                    variable_name,
                                    index_by_variable_name,
                                    years=years)[0].loc[year]
     assert all(value == target), "{} for {}: got {} instead of {}".format(
         variable_name, year, value.values, target)
Exemplo n.º 9
0
def test_get_or_construct_value_working():
    df = get_tidy_data(2013)

    simple_dict = {
        'Interets verses par rdm': {
            'code': 'D41',
            'institution': 'S2',
            'ressources': False
        }
    }
    dict_with_div = {
        'Interets_verses_par_rdm': {
            'code': 'D41',
            'institution': 'S2',
            'ressources': False
        },
        'Interets_verses_par_rdm / 100': {
            'code': 'D41',
            'institution': 'S2',
            'ressources': False,
            'formula': 'Interets_verses_par_rdm / 100'
        }
    }
    dict_sal_cot_soc = create_dict_sal_cot_soc()
    dict_revenus_rdm = create_dict_revenus_rdm()
    dict_with_squares = create_dict_w_squares()
    dict_profits = create_dict_profits()

    value_simple_dict, formula_simple_dict = get_or_construct_value(
        df, 'Interets verses par rdm', simple_dict, years=range(1949, 2014))
    value_div, formula_div = get_or_construct_value(
        df,
        'Interets_verses_par_rdm / 100',
        dict_with_div,
        years=range(1949, 2014))
    value_sq, formula_sq = get_or_construct_value(df,
                                                  'Square_of_sum',
                                                  dict_with_squares,
                                                  years=range(1949, 2014))
    value_rdm_net, formula_rdm_net = get_or_construct_value(
        df,
        'Interets_et_dividendes_verses_par_rdm_nets',
        dict_revenus_rdm,
        years=range(1949, 2014))
    values_profits, formulas_profits = get_or_construct_value(
        df, 'Profits_des_societes', dict_profits, years=range(1949, 2014))
    value_sal_cs, formula_sal_cs = get_or_construct_value(
        df,
        'Sal_cs_verses_societes',
        dict_sal_cot_soc,
        years=range(1978, 2014))

    return value_simple_dict, value_div, value_sq, value_rdm_net, values_profits, value_sal_cs
def build_aggregates(raw_data, formula_by_variable_name, level_2_formula_by_variable_name = None, years = None,
        fill_value = numpy.NaN):
    assert years is not None
    aggregates = None
    index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name)
    for variable_name in formula_by_variable_name.keys() + level_2_formula_by_variable_name.keys():
        serie, formula = get_or_construct_value(
            raw_data, variable_name, index_by_variable_name, years = years, fill_value = fill_value)
        serie = serie.reset_index().drop_duplicates().set_index('year')
        assert not numpy.any(serie.index.duplicated()), 'Duplicated index for {} : {}'.format(
            variable_name, serie)
        if aggregates is None:
            aggregates = serie
        else:
            try:
                aggregates = pandas.concat([aggregates, serie], axis = 1, verify_integrity = True)
            except Exception, e:
                print "aggregates", aggregates
                print "serie", serie
                raise(e)
Exemplo n.º 11
0
def test_get_or_construct_value_working():
    df = get_tidy_data(2013)

    simple_dict = {'Interets verses par rdm': {'code': 'D41', 'institution': 'S2', 'ressources': False}}
    dict_with_div = {
        'Interets_verses_par_rdm': {
            'code': 'D41', 'institution': 'S2', 'ressources': False
            },
        'Interets_verses_par_rdm / 100': {
            'code': 'D41', 'institution': 'S2', 'ressources': False, 'formula': 'Interets_verses_par_rdm / 100'
            }
        }
    dict_sal_cot_soc = create_dict_sal_cot_soc()
    dict_revenus_rdm = create_dict_revenus_rdm()
    dict_with_squares = create_dict_w_squares()
    dict_profits = create_dict_profits()

    value_simple_dict, formula_simple_dict = get_or_construct_value(
        df, 'Interets verses par rdm', simple_dict, years = range(1949, 2014)
        )
    value_div, formula_div = get_or_construct_value(
        df, 'Interets_verses_par_rdm / 100', dict_with_div, years = range(1949, 2014)
        )
    value_sq, formula_sq = get_or_construct_value(
        df, 'Square_of_sum', dict_with_squares, years = range(1949, 2014)
        )
    value_rdm_net, formula_rdm_net = get_or_construct_value(
        df, 'Interets_et_dividendes_verses_par_rdm_nets', dict_revenus_rdm, years = range(1949, 2014)
        )
    values_profits, formulas_profits = get_or_construct_value(
        df, 'Profits_des_societes', dict_profits, years = range(1949, 2014)
        )
    value_sal_cs, formula_sal_cs = get_or_construct_value(
        df, 'Sal_cs_verses_societes', dict_sal_cot_soc, years = range(1978, 2014)
        )

    return value_simple_dict, value_div, value_sq, value_rdm_net, values_profits, value_sal_cs
 def assert_value_construction(variable_name, test):
     year = test["year"]
     target = test["target"]
     value = get_or_construct_value(df, variable_name, index_by_variable_name, years=years)[0].loc[year]
     assert all(value == target), "{} for {}: got {} instead of {}".format(variable_name, year, value.values, target)
def test_run_through():
    years = [2006, 2007, 2008, 2009, 2010, 2011]
    df = get_denombrements_fiscaux_data_frame(years=years)
    index_by_variable_name = create_index_by_variable_name(
        formula_by_variable_name, level_2_formula_by_variable_name)
    variable_name = 'interets_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'dividendes_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'revenus_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years,
                           fill_value=0)
    variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = 'f2da'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=years)
    variable_name = u'f5he'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=range(2010, 2012))
    variable_name = u'f5jr'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=range(2007, 2012),
                           fill_value=0)
    variable_name = 'plus_values_professionnelles_regime_normal'
    get_or_construct_value(df,
                           variable_name,
                           index_by_variable_name,
                           years=range(2007, 2012),
                           fill_value=0)
def test_run_through():
    years = [2006, 2007, 2008, 2009, 2010, 2011]
    df = get_denombrements_fiscaux_data_frame(years = years)
    index_by_variable_name = create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name)
    variable_name = 'interets_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = years)
    variable_name = 'dividendes_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = years)
    variable_name = 'revenus_imposes_au_prelevement_liberatoire'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = years, fill_value = 0)
    variable_name = 'assurances_vie_imposees_au_prelevement_liberatoire'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = years)
    variable_name = 'f2da'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = years)
    variable_name = u'f5he'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2010, 2012))
    variable_name = u'f5jr'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2007, 2012), fill_value = 0)
    variable_name = 'plus_values_professionnelles_regime_normal'
    get_or_construct_value(df, variable_name, index_by_variable_name, years = range(2007, 2012), fill_value = 0)