def estimate_simulate(which, points, model_obj, df_obs): """Allow the user to easily simulate samples at the beginning and the end of the estimation.""" paras_obj, questions, version = \ dist_class_attributes(model_obj, 'paras_obj', 'questions', 'version') if version in ['scaled_archimedean']: upper, marginals = dist_class_attributes(model_obj, 'upper', 'marginals') version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: version_specific = dict() m_optimal = get_optimal_compensations(version, paras_obj, questions, **version_specific) os.mkdir(which) os.chdir(which) sim_model = copy.deepcopy(model_obj) sim_model.attr['sim_file'] = which sim_model.update('optim', 'free', points) sim_model.write_out(which + '.trempy.ini') simulate(which + '.trempy.ini') compare_datasets(which, df_obs, questions, m_optimal) os.chdir('../')
def run_regression_test(test): """Run a single regression test. It is repeatedly used by the testing infrastructure. Thus, manual modifications are only required here. """ # Create and process initialization file init_dict, crit_val = test # Temporary: code to handle old dictionaries in the vault without: version_keys = init_dict['VERSION'].keys() if 'discounting' not in version_keys: init_dict['VERSION']['discounting'] = None if 'stationary_model' not in version_keys: init_dict['VERSION']['stationary_model'] = True if 'heterogeneity' not in version_keys: init_dict['VERSION']['heterogeneity'] = False if 'df_other' not in version_keys: init_dict['VERSION']['df_other'] = 'equal_univariate' print_init_dict(init_dict) model_obj = ModelCls('test.trempy.ini') df, _ = simulate('test.trempy.ini') # Distribute class attributes for further processing. args = [model_obj, 'paras_obj', 'questions', 'cutoffs', 'version'] paras_obj, questions, cutoffs, version = dist_class_attributes(*args) if version in ['scaled_archimedean']: args = [model_obj, 'marginals', 'upper'] marginals, upper = dist_class_attributes(*args) version_specific = {'marginals': marginals, 'upper': upper} else: version_specific = dict() # The number of actual economic parameters in paras_obj not counting questions. n_econ_params = paras_obj.attr['nparas_econ'] # Standard deviations x_econ_all = paras_obj.get_values('econ', 'all') if version in ['scaled_archimedean']: sds = x_econ_all[5:] elif version in ['nonstationary']: sds = x_econ_all[n_econ_params:] stat, _ = criterion_function(df, questions, cutoffs, paras_obj, version, sds, **version_specific) np.testing.assert_almost_equal(stat, crit_val)
def create_regression_vault(num_tests): """Create a set of regression tests.""" np.random.seed(123) tests = [] for _ in range(num_tests): print('\n ... creating test ' + str(_)) constr = dict() constr['maxfun'] = np.random.random_integers(1, 5) # Create and process initialization file init_dict = get_random_init(constr) model_obj = ModelCls('test.trempy.ini') df, _ = simulate('test.trempy.ini') # Distribute class attributes for further processing. args = [model_obj, 'paras_obj', 'questions', 'cutoffs', 'version'] paras_obj, questions, cutoffs, version = dist_class_attributes(*args) # Handle version-specific objects not included in the para_obj if version in ['scaled_archimedean']: upper, marginals = dist_class_attributes(*[model_obj, 'upper', 'marginals']) version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: version_specific = dict() # Get number of economic parameters. Paras with higher index belong to questions. nparas_econ = paras_obj.attr['nparas_econ'] # Now get correct standard deviations. Versions are handled implicitly. x_econ_all = paras_obj.get_values('econ', 'all') sds = x_econ_all[nparas_econ:] # Evaluate criterion function and process results stat, _ = criterion_function(df, questions, cutoffs, paras_obj, version, sds, **version_specific) tests += [(init_dict, stat)] cleanup() pkl.dump(tests, open('regression_vault.trempy.pkl', 'wb'))
def _check_integrity(self): """Check the integrity of the class instance.""" # Distribute class attributes for further processing. args = ['questions', 'start', 'num_skip'] questions, start, num_skip = dist_class_attributes(self, *args) # We restrict the identifiers for the questions between 1 and 16 np.testing.assert_equal(0 < min(questions) <= max(questions) < 46, True) # The number of skipped individuals has to be non-negative. np.testing.assert_equal(0 <= num_skip, True) # We have to alternative how to start the estimation. np.testing.assert_equal(start in ['init', 'auto'], True)
def test_2(): """Ensure the back an forth transformations for the parameter values.""" get_random_init() model_obj = ModelCls('test.trempy.ini') paras_obj, num_questions = dist_class_attributes(model_obj, 'paras_obj', 'num_questions') nparas_econ = paras_obj.attr['nparas_econ'] for _ in range(500): x_optim_all_current = np.random.uniform(-1, 1, size=num_questions + nparas_econ) paras_obj.set_values('optim', 'all', x_optim_all_current) x_econ_all_current = paras_obj.get_values('econ', 'all') paras_obj.set_values('econ', 'all', x_econ_all_current) stat = paras_obj.get_values('optim', 'all') np.testing.assert_almost_equal(x_optim_all_current, stat)
def estimate(fname): """Estimate the model by the method of maximum likelihood.""" estimate_cleanup() model_obj = ModelCls(fname) # Distribute class parameters except for economic parameters and version-specific thing args = [model_obj, 'version', 'est_file', 'questions', 'paras_obj', 'start', 'cutoffs', 'maxfun', 'est_detailed', 'opt_options', 'optimizer', 'est_agents', 'num_skip'] version, est_file, questions, paras_obj, start, cutoffs, maxfun, est_detailed, \ opt_options, optimizer, est_agents, num_skip = dist_class_attributes(*args) # Handle version-specific objects not included in the para_obj if version in ['scaled_archimedean']: upper, marginals = dist_class_attributes(*[model_obj, 'upper', 'marginals']) version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: version_specific = dict() # We only need to continue if there is at least one parameter to actually estimate. if len(paras_obj.get_values('optim', 'free')) == 0: raise TrempyError('no free parameter to estimate') # Some initial setup df_obs = process(est_file, questions, num_skip, est_agents, cutoffs) estimate_obj = EstimateClass( df=df_obs, cutoffs=cutoffs, questions=questions, paras_obj=copy.deepcopy(paras_obj), max_eval=maxfun, optimizer=optimizer, version=version, **version_specific) # We lock in an evaluation at the starting values as not all optimizers actually start there. if start in ['auto']: paras_obj = get_automatic_starting_values(paras_obj, df_obs, questions, version, **version_specific) # Objects for scipy.minimize x_optim_free_start = paras_obj.get_values('optim', 'free') x_free_bounds = paras_obj.get_bounds('free') estimate_obj.evaluate(x_optim_free_start) # We simulate a sample at the starting point. if est_detailed: estimate_simulate('start', x_optim_free_start, model_obj, df_obs) # Optimization of likelihood function if maxfun > 1: options = dict() if optimizer == 'SCIPY-BFGS': options['gtol'] = opt_options['SCIPY-BFGS']['gtol'] options['eps'] = opt_options['SCIPY-BFGS']['eps'] method = 'BFGS' bounds = None elif optimizer == 'SCIPY-POWELL': options['ftol'] = opt_options['SCIPY-POWELL']['ftol'] options['xtol'] = opt_options['SCIPY-POWELL']['xtol'] method = 'POWELL' bounds = None elif optimizer == 'SCIPY-L-BFGS-B': options['gtol'] = opt_options['SCIPY-L-BFGS-B']['gtol'] options['ftol'] = opt_options['SCIPY-L-BFGS-B']['ftol'] options['eps'] = opt_options['SCIPY-L-BFGS-B']['eps'] method = 'L-BFGS-B' bounds = x_free_bounds # Add bounds else: raise TrempyError('flawed choice of optimization method') try: opt = minimize(estimate_obj.evaluate, x_optim_free_start, method=method, options=options, bounds=bounds) except MaxfunError: opt = dict() opt['message'] = 'Optimization reached maximum number of function evaluations.' opt['success'] = False else: # We are not faced with a serious estimation request. opt = dict() opt['message'] = 'Single evaluation of criterion function at starting values.' opt['success'] = False # Now we can wrap up all estimation related tasks. estimate_obj.finish(opt) # We simulate a sample at the stopping point. if est_detailed: x_econ_all_step = estimate_obj.get_attr('x_econ_all_step') paras_obj.set_values('econ', 'all', x_econ_all_step) x_optim_free_step = paras_obj.get_values('optim', 'free') estimate_simulate('stop', x_optim_free_step, model_obj, df_obs) shutil.copy('stop/compare.trempy.info', 'compare.trempy.info') # We only return the best value of the criterion function and the corresponding parameter # vector. rslt = list() rslt.append(estimate_obj.get_attr('f_step')) rslt.append(estimate_obj.get_attr('x_econ_all_step')) return rslt
def test_6(): """Ensure that the weight c_t in the CES function is computed correctly.""" for _ in range(500): get_random_init() model_obj = ModelCls('test.trempy.ini') args = ['paras_obj', 'num_questions', 'version'] paras_obj, _, version = dist_class_attributes(model_obj, *args) if version in ['nonstationary']: nparas_econ = paras_obj.attr['nparas_econ'] alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, \ discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, \ unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, \ unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24 = \ paras_obj.get_values('econ', 'all')[:nparas_econ] discounting = paras_obj.attr['discounting'] stationary_model = paras_obj.attr['stationary_model'] df_other = paras_obj.attr['df_other'] copula_obj = get_copula_nonstationary( alpha, beta, gamma, y_scale, discount_factors_0, discount_factors_1, discount_factors_3, discount_factors_6, discount_factors_12, discount_factors_24, unrestricted_weights_0, unrestricted_weights_1, unrestricted_weights_3, unrestricted_weights_6, unrestricted_weights_12, unrestricted_weights_24, discounting=discounting, stationary_model=stationary_model, df_other=df_other ) unrestricted_weights = { 0: unrestricted_weights_0, 1: unrestricted_weights_1, 3: unrestricted_weights_3, 6: unrestricted_weights_6, 12: unrestricted_weights_12, 24: unrestricted_weights_24, } copula = copula_obj.attr['copula'] y_weights = copula.attr['y_weights'] d_f = copula.attr['discount_factors'] if stationary_model: for t, c_t in y_weights.items(): np.testing.assert_equal(c_t == y_scale, True) else: for t, c_t in y_weights.items(): if df_other in ['linear']: lhs = max(0, y_scale + t * unrestricted_weights_0) elif df_other in ['exponential']: lhs = y_scale * unrestricted_weights_0 ** t elif df_other in ['equal_univariate']: lhs = y_scale * d_f[t] ** (gamma - 1) elif df_other in ['free']: lhs = unrestricted_weights[t] np.testing.assert_equal(c_t == lhs, True)
def simulate(fname): """Simulate the model based on the initialization file.""" model_obj = ModelCls(fname) version = model_obj.attr['version'] # Get fixed args that do not change during simulation. args = [ model_obj, 'sim_agents', 'questions', 'sim_seed', 'sim_file', 'paras_obj', 'cutoffs' ] if version in ['scaled_archimedean']: args += ['upper', 'marginals'] sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs, upper, marginals = \ dist_class_attributes(*args) version_specific = {'upper': upper, 'marginals': marginals} elif version in ['nonstationary']: sim_agents, questions, sim_seed, sim_file, paras_obj, cutoffs = \ dist_class_attributes(*args) version_specific = dict() else: raise TrempyError('version not implemented') np.random.seed(sim_seed) m_optimal = get_optimal_compensations(version, paras_obj, questions, **version_specific) # First, get number of preference parameters. Paras with higher index belong to questions! nparas_econ = paras_obj.attr['nparas_econ'] # Now, get standard deviation for the error in each question. sds = paras_obj.get_values('econ', 'all')[nparas_econ:] heterogeneity = paras_obj.attr['heterogeneity'] if heterogeneity: sds_time = sds[1] sds_risk = sds[2] # TODO: This is what I am proposing instead of the loop below # Simulate data # data = [] # agent_identifier = np.arange(sim_agents) # for k, q in enumerate(questions): # lower_cutoff, upper_cutoff = cutoffs[q] # # If we estimate agent by agent, we use only two sds for time and risk quetions. # if heterogeneity: # if q <= 30: # sds_current_q = sds_time * (upper_cutoff - lower_cutoff) / 200 # else: # sds_current_q = sds_risk * (upper_cutoff - lower_cutoff) / 20 # else: # sds_current_q = sds[k] # m_latent = np.random.normal(loc=m_optimal[q], scale=sds_current_q, size=sim_agents) # m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf) # m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS # question_identifier = np.repeat(q, repeats=sim_agents) # data += list(zip(agent_identifier, question_identifier, m_observed)) data = [] for i in range(sim_agents): for k, q in enumerate(questions): lower_cutoff, upper_cutoff = cutoffs[q] # If we estimate agent by agent, we use only two sds for time and risk quetions. if heterogeneity: if q <= 30: sds_current_q = sds_time * (upper_cutoff - lower_cutoff) / 200 else: sds_current_q = sds_risk * (upper_cutoff - lower_cutoff) / 20 else: sds_current_q = sds[k] m_latent = np.random.normal(loc=m_optimal[q], scale=sds_current_q, size=1) m_observed = np.clip(m_latent, a_min=lower_cutoff, a_max=+np.inf) m_observed[m_observed > upper_cutoff] = NEVER_SWITCHERS data += [[i, q, m_observed]] # Post-processing step df = pd.DataFrame(data) df.rename({ 0: 'Individual', 1: 'Question', 2: 'Compensation' }, inplace=True, axis='columns') dtype = { 'Individual': np.int, 'Question': np.int, 'Compensation': np.float } df = df.astype(dtype) df.set_index(['Individual', 'Question'], inplace=True, drop=False) df.sort_index(inplace=True) df.to_pickle(sim_file + '.trempy.pkl', protocol=2) x_econ_all_current = paras_obj.get_values('econ', 'all') fval, _ = criterion_function(df, questions, cutoffs, paras_obj, version, sds, **version_specific) write_info(version, x_econ_all_current, df, questions, fval, m_optimal, sim_file + '.trempy.info') return df, fval