def generate_posterior_distribution( self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, transformation_pair=(None, None), **kwargs): if cache_directory is not None: self.cache_set = array([cache_directory]) #self.set_cache_attributes(cache_directory) else: if values is None or ids is None: raise StandardError, "Either cache_directory or values and ids must be given." self.set_posterior( year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, transformation_pair=transformation_pair) procedure_class = ModelComponentCreator().get_model_component( procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation( self.simulated_values, self.transformation_pair_for_prediction[1]) return self.simulated_values
def generate_posterior_distribution( self, year, quantity_of_interest, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None, intermediates=[], **kwargs): """ 'quantity_of_interest' is a variable name about which we want to get the posterior distribution. If there is multiple known_output, it must be made clear from which one the bias and variance is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used. """ self.set_posterior(year, quantity_of_interest, use_bias_and_variance_from) procedure_class = ModelComponentCreator().get_model_component( procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation( self.simulated_values, self.transformation_pair_for_prediction[1]) if aggregate_to is not None: self.simulated_values = self.aggregate( self.simulated_values, aggregate_from=VariableName( quantity_of_interest).get_dataset_name(), aggregate_to=aggregate_to, intermediates=intermediates) return self.simulated_values
def compute_weights(self, procedure="opus_core.bm_normal_weights", **kwargs): """ Launches the run method of the given 'procedure'. This should return the actual BM weights. The method passes self as first argument into the run method. If 'procedure' is not given, the method returns equal weights. """ self.compute_y() self.estimate_mu() self.estimate_bias() self.estimate_variance() if procedure is not None: procedure_class = ModelComponentCreator().get_model_component(procedure) self.weights, self.weight_components = procedure_class.run(self, **kwargs) else: self.weights = 1.0/self.number_of_runs * ones(self.number_of_runs) write_to_text_file(os.path.join(self.output_directory, self.weights_file_name), self.weights) return self.weights
def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None, intermediates=[], propagation_factor=1, no_propagation=True, additive_propagation=False, omit_bias=False, **kwargs): if (values is None or ids is None) and (self.cache_set is None): raise StandardError, "values and ids must be give if the BM object is initialized without cache_file_location." self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, propagation_factor=propagation_factor, no_propagation=no_propagation, additive_propagation=additive_propagation, omit_bias=omit_bias) procedure_class = ModelComponentCreator().get_model_component(procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation(self.simulated_values, self.transformation_pair_for_prediction[1]) self.simulated_values_ids = self.m_ids if aggregate_to is not None: (self.simulated_values, self.simulated_values_ids) = self.aggregate(self.simulated_values, aggregate_from=VariableName(quantity_of_interest).get_dataset_name(), aggregate_to=aggregate_to, intermediates=intermediates) return self.simulated_values
def estimate(self, specification, dataset, outcome_attribute, index=None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """'specification' is of type EquationSpecification, 'dataset' is of type Dataset, 'outcome_attribute' - string that determines the dependent variable, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'procedure' - name of the estimation procedure. If it is None, there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'. It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays). 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure. 'debuglevel' overwrites the class 'debuglevel'. """ #import wingdbstub self.debug.flag = debuglevel if estimate_config == None: estimate_config = Resources() if not isinstance(estimate_config, Resources) and isinstance( estimate_config, dict): estimate_config = Resources(estimate_config) self.estimate_config = estimate_config.merge_with_defaults( self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.procedure = procedure if self.procedure == None: self.procedure = self.estimate_config.get("estimation", None) if self.procedure is not None: self.procedure = ModelComponentCreator().get_model_component( self.procedure) else: logger.log_warning( "No estimation procedure given, or problems with loading the corresponding module." ) compute_resources = Resources({"debug": self.debug}) if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) if not isinstance(index, ndarray): index = array(index) estimation_size_agents = self.estimate_config.get( "estimation_size_agents", None) # should be a proportion of the agent_set if estimation_size_agents == None: estimation_size_agents = 1.0 else: estimation_size_agents = max(min(estimation_size_agents, 1.0), 0.0) # between 0 and 1 if estimation_size_agents < 1.0: self.debug.print_debug("Sampling agents for estimation ...", 3) estimation_idx = sample_noreplace( arange(index.size), int(index.size * estimation_size_agents)) else: estimation_idx = arange(index.size) estimation_idx = index[estimation_idx] self.debug.print_debug( "Number of observations for estimation: " + str(estimation_idx.size), 2) if estimation_idx.size <= 0: self.debug.print_debug("Nothing to be done.", 2) return (None, None) coefficients = create_coefficient_from_specification(specification) specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) submodels = specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels( submodels, self.submodel_string, dataset, estimation_idx, dataset_pool=self.dataset_pool, resources=compute_resources, submodel_size_max=self.estimate_config.get('submodel_size_max', None)) variables = specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) coef = {} estimated_coef = {} self.outcome = {} dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources) regression_resources = Resources(estimate_config) regression_resources.merge({"debug": self.debug}) outcome_variable_name = VariableName(outcome_attribute) for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( specified_coefficients, submodel) self.increment_current_status_piece() logger.log_status("Estimate regression for submodel " + str(submodel), tags=["estimate"], verbosity_level=2) logger.log_status("Number of observations: " + str(self.observations_mapping[submodel].size), tags=["estimate"], verbosity_level=2) self.data[ submodel] = dataset.create_regression_data_for_estimation( coef[submodel], index=estimation_idx[self.observations_mapping[submodel]]) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] if (self.data[submodel].shape[0] > 0 ) and (self.data[submodel].size > 0) and ( self.procedure is not None): # observations for this submodel available self.outcome[submodel] = dataset.get_attribute_by_index( outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]]) regression_resources.merge({"outcome": self.outcome[submodel]}) regression_resources.merge({ "coefficient_names": self.coefficient_names[submodel].tolist(), "constant_position": coef[submodel].get_constants_positions() }) estimated_coef[submodel] = self.procedure.run( self.data[submodel], self.regression, resources=regression_resources) if "estimators" in estimated_coef[submodel].keys(): coef[submodel].set_coefficient_values( estimated_coef[submodel]["estimators"]) if "standard_errors" in estimated_coef[submodel].keys(): coef[submodel].set_standard_errors( estimated_coef[submodel]["standard_errors"]) if "other_measures" in estimated_coef[submodel].keys(): for measure in estimated_coef[submodel][ "other_measures"].keys(): coef[submodel].set_measure( measure, estimated_coef[submodel]["other_measures"] [measure]) if "other_info" in estimated_coef[submodel].keys(): for info in estimated_coef[submodel]["other_info"]: coef[submodel].set_other_info( info, estimated_coef[submodel]["other_info"][info]) coefficients.fill_coefficients(coef) self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects) return (coefficients, estimated_coef)