Пример #1
0
def estimate_simulate(which, points, model_obj, df_obs):
    """Allow the user to easily simulate samples at the beginning and the end of the estimation."""
    paras_obj, questions, version = \
        dist_class_attributes(model_obj, 'paras_obj', 'questions', 'version')

    if version in ['scaled_archimedean']:
        upper, marginals = dist_class_attributes(model_obj, 'upper',
                                                 'marginals')
        version_specific = {'upper': upper, 'marginals': marginals}
    elif version in ['nonstationary']:
        version_specific = dict()

    m_optimal = get_optimal_compensations(version, paras_obj, questions,
                                          **version_specific)

    os.mkdir(which)
    os.chdir(which)

    sim_model = copy.deepcopy(model_obj)
    sim_model.attr['sim_file'] = which

    sim_model.update('optim', 'free', points)
    sim_model.write_out(which + '.trempy.ini')
    simulate(which + '.trempy.ini')

    compare_datasets(which, df_obs, questions, m_optimal)

    os.chdir('../')
Пример #2
0
def run_regression_test(test):
    """Run a single regression test.

    It is repeatedly used by the testing infrastructure.
    Thus, manual modifications are only required here.
    """
    # Create and process initialization file
    init_dict, crit_val = test

    # Temporary: code to handle old dictionaries in the vault without:
    version_keys = init_dict['VERSION'].keys()
    if 'discounting' not in version_keys:
        init_dict['VERSION']['discounting'] = None
    if 'stationary_model' not in version_keys:
        init_dict['VERSION']['stationary_model'] = True
    if 'heterogeneity' not in version_keys:
        init_dict['VERSION']['heterogeneity'] = False
    if 'df_other' not in version_keys:
        init_dict['VERSION']['df_other'] = 'equal_univariate'

    print_init_dict(init_dict)
    model_obj = ModelCls('test.trempy.ini')
    df, _ = simulate('test.trempy.ini')

    # Distribute class attributes for further processing.
    args = [model_obj, 'paras_obj', 'questions', 'cutoffs', 'version']
    paras_obj, questions, cutoffs, version = dist_class_attributes(*args)

    if version in ['scaled_archimedean']:
        args = [model_obj, 'marginals', 'upper']
        marginals, upper = dist_class_attributes(*args)
        version_specific = {'marginals': marginals, 'upper': upper}
    else:
        version_specific = dict()

    # The number of actual economic parameters in paras_obj not counting questions.
    n_econ_params = paras_obj.attr['nparas_econ']

    # Standard deviations
    x_econ_all = paras_obj.get_values('econ', 'all')
    if version in ['scaled_archimedean']:
        sds = x_econ_all[5:]
    elif version in ['nonstationary']:
        sds = x_econ_all[n_econ_params:]

    stat, _ = criterion_function(df, questions, cutoffs, paras_obj,
                                 version, sds, **version_specific)
    np.testing.assert_almost_equal(stat, crit_val)
Пример #3
0
def create_regression_vault(num_tests):
    """Create a set of regression tests."""
    np.random.seed(123)

    tests = []
    for _ in range(num_tests):

        print('\n ... creating test ' + str(_))

        constr = dict()
        constr['maxfun'] = np.random.random_integers(1, 5)

        # Create and process initialization file
        init_dict = get_random_init(constr)
        model_obj = ModelCls('test.trempy.ini')
        df, _ = simulate('test.trempy.ini')

        # Distribute class attributes for further processing.
        args = [model_obj, 'paras_obj', 'questions', 'cutoffs', 'version']
        paras_obj, questions, cutoffs, version = dist_class_attributes(*args)

        # Handle version-specific objects not included in the para_obj
        if version in ['scaled_archimedean']:
            upper, marginals = dist_class_attributes(*[model_obj, 'upper', 'marginals'])
            version_specific = {'upper': upper, 'marginals': marginals}
        elif version in ['nonstationary']:
            version_specific = dict()

        # Get number of economic parameters. Paras with higher index belong to questions.
        nparas_econ = paras_obj.attr['nparas_econ']

        # Now get correct standard deviations. Versions are handled implicitly.
        x_econ_all = paras_obj.get_values('econ', 'all')
        sds = x_econ_all[nparas_econ:]

        # Evaluate criterion function and process results
        stat, _ = criterion_function(df, questions, cutoffs, paras_obj,
                                     version, sds, **version_specific)
        tests += [(init_dict, stat)]

        cleanup()

    pkl.dump(tests, open('regression_vault.trempy.pkl', 'wb'))
Пример #4
0
    def _check_integrity(self):
        """Check the integrity of the class instance."""
        # Distribute class attributes for further processing.
        args = ['questions', 'start', 'num_skip']
        questions, start, num_skip = dist_class_attributes(self, *args)

        # We restrict the identifiers for the questions between 1 and 16
        np.testing.assert_equal(0 < min(questions) <= max(questions) < 46,
                                True)

        # The number of skipped individuals has to be non-negative.
        np.testing.assert_equal(0 <= num_skip, True)

        # We have to alternative how to start the estimation.
        np.testing.assert_equal(start in ['init', 'auto'], True)
Пример #5
0
def test_2():
    """Ensure the back an forth transformations for the parameter values."""
    get_random_init()

    model_obj = ModelCls('test.trempy.ini')
    paras_obj, num_questions = dist_class_attributes(model_obj, 'paras_obj', 'num_questions')
    nparas_econ = paras_obj.attr['nparas_econ']

    for _ in range(500):
        x_optim_all_current = np.random.uniform(-1, 1, size=num_questions + nparas_econ)
        paras_obj.set_values('optim', 'all', x_optim_all_current)

        x_econ_all_current = paras_obj.get_values('econ', 'all')
        paras_obj.set_values('econ', 'all', x_econ_all_current)

        stat = paras_obj.get_values('optim', 'all')
        np.testing.assert_almost_equal(x_optim_all_current, stat)
Пример #6
0
def estimate(fname):
    """Estimate the model by the method of maximum likelihood."""
    estimate_cleanup()

    model_obj = ModelCls(fname)

    # Distribute class parameters except for economic parameters and version-specific thing
    args = [model_obj, 'version', 'est_file', 'questions', 'paras_obj', 'start', 'cutoffs',
            'maxfun', 'est_detailed', 'opt_options', 'optimizer', 'est_agents', 'num_skip']

    version, est_file, questions, paras_obj, start, cutoffs, maxfun, est_detailed, \
        opt_options, optimizer, est_agents, num_skip = dist_class_attributes(*args)

    # Handle version-specific objects not included in the para_obj
    if version in ['scaled_archimedean']:
        upper, marginals = dist_class_attributes(*[model_obj, 'upper', 'marginals'])
        version_specific = {'upper': upper, 'marginals': marginals}
    elif version in ['nonstationary']:
        version_specific = dict()

    # We only need to continue if there is at least one parameter to actually estimate.
    if len(paras_obj.get_values('optim', 'free')) == 0:
        raise TrempyError('no free parameter to estimate')

    # Some initial setup
    df_obs = process(est_file, questions, num_skip, est_agents, cutoffs)

    estimate_obj = EstimateClass(
        df=df_obs, cutoffs=cutoffs, questions=questions, paras_obj=copy.deepcopy(paras_obj),
        max_eval=maxfun, optimizer=optimizer, version=version, **version_specific)

    # We lock in an evaluation at the starting values as not all optimizers actually start there.
    if start in ['auto']:
        paras_obj = get_automatic_starting_values(paras_obj, df_obs, questions,
                                                  version, **version_specific)

    # Objects for scipy.minimize
    x_optim_free_start = paras_obj.get_values('optim', 'free')
    x_free_bounds = paras_obj.get_bounds('free')
    estimate_obj.evaluate(x_optim_free_start)

    # We simulate a sample at the starting point.
    if est_detailed:
        estimate_simulate('start', x_optim_free_start, model_obj, df_obs)

    # Optimization of likelihood function
    if maxfun > 1:

        options = dict()

        if optimizer == 'SCIPY-BFGS':
            options['gtol'] = opt_options['SCIPY-BFGS']['gtol']
            options['eps'] = opt_options['SCIPY-BFGS']['eps']
            method = 'BFGS'
            bounds = None
        elif optimizer == 'SCIPY-POWELL':
            options['ftol'] = opt_options['SCIPY-POWELL']['ftol']
            options['xtol'] = opt_options['SCIPY-POWELL']['xtol']
            method = 'POWELL'
            bounds = None
        elif optimizer == 'SCIPY-L-BFGS-B':
            options['gtol'] = opt_options['SCIPY-L-BFGS-B']['gtol']
            options['ftol'] = opt_options['SCIPY-L-BFGS-B']['ftol']
            options['eps'] = opt_options['SCIPY-L-BFGS-B']['eps']
            method = 'L-BFGS-B'
            bounds = x_free_bounds
            # Add bounds
        else:
            raise TrempyError('flawed choice of optimization method')

        try:
            opt = minimize(estimate_obj.evaluate, x_optim_free_start, method=method,
                           options=options, bounds=bounds)
        except MaxfunError:
            opt = dict()
            opt['message'] = 'Optimization reached maximum number of function evaluations.'
            opt['success'] = False
    else:
        # We are not faced with a serious estimation request.
        opt = dict()
        opt['message'] = 'Single evaluation of criterion function at starting values.'
        opt['success'] = False

    # Now we can wrap up all estimation related tasks.
    estimate_obj.finish(opt)

    # We simulate a sample at the stopping point.
    if est_detailed:
        x_econ_all_step = estimate_obj.get_attr('x_econ_all_step')
        paras_obj.set_values('econ', 'all', x_econ_all_step)
        x_optim_free_step = paras_obj.get_values('optim', 'free')
        estimate_simulate('stop', x_optim_free_step, model_obj, df_obs)
        shutil.copy('stop/compare.trempy.info', 'compare.trempy.info')

    # We only return the best value of the criterion function and the corresponding parameter
    # vector.
    rslt = list()
    rslt.append(estimate_obj.get_attr('f_step'))
    rslt.append(estimate_obj.get_attr('x_econ_all_step'))

    return rslt
Пример #7
0
def test_6():
    """Ensure that the weight c_t in the CES function is computed correctly."""
    for _ in range(500):
        get_random_init()
        model_obj = ModelCls('test.trempy.ini')
        args = ['paras_obj', 'num_questions', 'version']
        paras_obj, _, version = dist_class_attributes(model_obj, *args)

        if version in ['nonstationary']:
            nparas_econ = paras_obj.attr['nparas_econ']

            alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, \
                discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, \
                unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, \
                unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24 = \
                paras_obj.get_values('econ', 'all')[:nparas_econ]

            discounting = paras_obj.attr['discounting']
            stationary_model = paras_obj.attr['stationary_model']
            df_other = paras_obj.attr['df_other']

            copula_obj = get_copula_nonstationary(
                alpha, beta, gamma, y_scale,
                discount_factors_0, discount_factors_1,
                discount_factors_3, discount_factors_6,
                discount_factors_12, discount_factors_24,
                unrestricted_weights_0, unrestricted_weights_1,
                unrestricted_weights_3, unrestricted_weights_6,
                unrestricted_weights_12, unrestricted_weights_24,
                discounting=discounting,
                stationary_model=stationary_model,
                df_other=df_other
            )

            unrestricted_weights = {
                0: unrestricted_weights_0,
                1: unrestricted_weights_1,
                3: unrestricted_weights_3,
                6: unrestricted_weights_6,
                12: unrestricted_weights_12,
                24: unrestricted_weights_24,
            }

            copula = copula_obj.attr['copula']
            y_weights = copula.attr['y_weights']
            d_f = copula.attr['discount_factors']

            if stationary_model:
                for t, c_t in y_weights.items():
                    np.testing.assert_equal(c_t == y_scale, True)
            else:
                for t, c_t in y_weights.items():
                    if df_other in ['linear']:
                        lhs = max(0, y_scale + t * unrestricted_weights_0)
                    elif df_other in ['exponential']:
                        lhs = y_scale * unrestricted_weights_0 ** t
                    elif df_other in ['equal_univariate']:
                        lhs = y_scale * d_f[t] ** (gamma - 1)
                    elif df_other in ['free']:
                        lhs = unrestricted_weights[t]

                    np.testing.assert_equal(c_t == lhs, True)
Пример #8
0
def simulate(fname):
    """Simulate the model based on the initialization file."""
    model_obj = ModelCls(fname)
    version = model_obj.attr['version']

    # Get fixed args that do not change during simulation.
    args = [
        model_obj, 'sim_agents', 'questions', 'sim_seed', 'sim_file',
        'paras_obj', 'cutoffs'
    ]
    if version in ['scaled_archimedean']:
        args += ['upper', 'marginals']
        sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs, upper, marginals = \
            dist_class_attributes(*args)

        version_specific = {'upper': upper, 'marginals': marginals}
    elif version in ['nonstationary']:
        sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs = \
            dist_class_attributes(*args)
        version_specific = dict()
    else:
        raise TrempyError('version not implemented')

    np.random.seed(sim_seed)
    m_optimal = get_optimal_compensations(version, paras_obj, questions,
                                          **version_specific)

    # First, get number of preference parameters. Paras with higher index belong to questions!
    nparas_econ = paras_obj.attr['nparas_econ']

    # Now, get standard deviation for the error in each question.
    sds = paras_obj.get_values('econ', 'all')[nparas_econ:]
    heterogeneity = paras_obj.attr['heterogeneity']
    if heterogeneity:
        sds_time = sds[1]
        sds_risk = sds[2]

    # TODO: This is what I am proposing instead of the loop below
    # Simulate data
    # data = []
    # agent_identifier = np.arange(sim_agents)
    # for k, q in enumerate(questions):
    #     lower_cutoff, upper_cutoff = cutoffs[q]
    #     # If we estimate agent by agent, we use only two sds for time and risk quetions.
    #     if heterogeneity:
    #         if q <= 30:
    #             sds_current_q = sds_time * (upper_cutoff - lower_cutoff) / 200
    #         else:
    #             sds_current_q = sds_risk * (upper_cutoff - lower_cutoff) / 20
    #     else:
    #         sds_current_q = sds[k]

    #     m_latent = np.random.normal(loc=m_optimal[q], scale=sds_current_q, size=sim_agents)
    #     m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf)
    #     m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS

    #     question_identifier = np.repeat(q, repeats=sim_agents)

    #     data += list(zip(agent_identifier, question_identifier, m_observed))

    data = []
    for i in range(sim_agents):
        for k, q in enumerate(questions):
            lower_cutoff, upper_cutoff = cutoffs[q]
            # If we estimate agent by agent, we use only two sds for time and risk quetions.
            if heterogeneity:
                if q <= 30:
                    sds_current_q = sds_time * (upper_cutoff -
                                                lower_cutoff) / 200
                else:
                    sds_current_q = sds_risk * (upper_cutoff -
                                                lower_cutoff) / 20
            else:
                sds_current_q = sds[k]

            m_latent = np.random.normal(loc=m_optimal[q],
                                        scale=sds_current_q,
                                        size=1)
            m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf)
            m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS

            data += [[i, q, m_observed]]

    # Post-processing step
    df = pd.DataFrame(data)
    df.rename({
        0: 'Individual',
        1: 'Question',
        2: 'Compensation'
    },
              inplace=True,
              axis='columns')
    dtype = {
        'Individual': np.int,
        'Question': np.int,
        'Compensation': np.float
    }
    df = df.astype(dtype)
    df.set_index(['Individual', 'Question'], inplace=True, drop=False)
    df.sort_index(inplace=True)

    df.to_pickle(sim_file + '.trempy.pkl', protocol=2)

    x_econ_all_current = paras_obj.get_values('econ', 'all')

    fval, _ = criterion_function(df, questions, cutoffs, paras_obj, version,
                                 sds, **version_specific)

    write_info(version, x_econ_all_current, df, questions, fval, m_optimal,
               sim_file + '.trempy.info')

    return df, fval