def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, outcome_attribute = None, run_config=None, estimate_config=None, debuglevel=None, dataset_pool=None): """'outcome_attribute' must be specified in order to compute the residuals. """ RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool) self.outcome_attribute = outcome_attribute if (self.outcome_attribute is not None) and not isinstance(self.outcome_attribute, VariableName): self.outcome_attribute = VariableName(self.outcome_attribute)
def run(self, specification=None, coefficients=None, dataset=None, **kwargs): """At the moment it's a mock-up model.""" proposal_component_set = create_from_proposals_and_template_components(dataset, self.dataset_pool.get_dataset('development_template_component')) self.dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(), proposal_component_set) dataset.compute_variables('land_use_type_id = development_project_proposal.disaggregate(development_template.land_use_type_id)', dataset_pool=self.dataset_pool) outcome = RegressionModel.run(self, specification, coefficients, dataset, **kwargs) if (outcome == None) or (outcome.size <= 0): return outcome if re.search("^ln_", self.outcome_attribute_name): # if the outcome attr. name starts with 'ln_' # the results will be exponentiated. self.outcome_attribute_name = self.outcome_attribute_name[3:len(self.outcome_attribute_name)] outcome = exp(outcome) if self.outcome_attribute_name not in dataset.get_known_attribute_names(): dataset.add_primary_attribute(name=self.outcome_attribute_name, data=zeros(dataset.size(), dtype='f')) dataset.set_values_of_one_attribute(self.outcome_attribute_name, outcome) self.correct_infinite_values(dataset, self.outcome_attribute_name, clip_all_larger_values=True) #values = 6.7 * dataset['land_value']/dataset['parcel_sqft'].astype('float32') #dataset.add_primary_attribute(name=self.outcome_attribute_name, data=values) #props_values = proposal_dataset.compute_variables(['development_project_proposal.disaggregate(parcel.%s)' % self.outcome_attribute_name], # dataset_pool=self.dataset_pool) #proposal_dataset.add_primary_attribute(name=self.outcome_attribute_name, data=props_values) return outcome
def run(self, specification, coefficients, dataset, index=None, outcome_with_inital_error=True, **kwargs): """ See description above. If missing values of the outcome attribute are suppose to be excluded from the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True. Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0). Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True. In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. By default, an outlier is a data point smaller than 0. There is no default upper bound. """ if self.outcome_attribute is None: raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization." if self.outcome_attribute.get_alias() not in dataset.get_known_attribute_names(): try: dataset.compute_variables(self.outcome_attribute, dataset_pool=self.dataset_pool) except: raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % ( self.outcome_attribute.get_alias(), dataset.get_dataset_name()) if index is None: index = arange(dataset.size()) original_data = dataset.get_attribute_by_index(self.outcome_attribute, index) outcome = RegressionModel.run(self, specification, coefficients, dataset, index, initial_values=original_data.astype('float32'), **kwargs) initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias() if initial_error_name not in dataset.get_known_attribute_names(): initial_error = original_data - outcome dataset.add_primary_attribute(name=initial_error_name, data=zeros(dataset.size(), dtype="float32")) exclude_missing_values = self.run_config.get("exclude_missing_values_from_initial_error", False) exclude_outliers = self.run_config.get("exclude_outliers_from_initial_error", False) if exclude_missing_values: missing_value = self.run_config.get("outcome_attribute_missing_value", 0) initial_error[original_data == missing_value] = 0 logger.log_status('Values equal %s were excluded from adding residuals.' % missing_value) if exclude_outliers: outlier_low = self.run_config.get("outlier_is_less_than", 0) initial_error[original_data < outlier_low] = 0 outlier_high = self.run_config.get("outlier_is_greater_than", original_data.max()) initial_error[original_data > outlier_high] = 0 logger.log_status('Values less than %s and larger than %s were excluded from adding residuals.' % (outlier_low, outlier_high)) dataset.set_values_of_one_attribute(initial_error_name, initial_error, index) else: initial_error = dataset.get_attribute_by_index(initial_error_name, index) logger.log_status("initial_error saved to %s.%s" % (dataset.dataset_name, initial_error_name)) if outcome_with_inital_error: return outcome + initial_error else: logger.log_status("initial_error not added to outcome %s" % self.outcome_attribute.get_alias()) return outcome
def run(self, specification=None, coefficients=None, dataset=None, **kwargs): outcome = RegressionModel.run(self, specification, coefficients, dataset, **kwargs) if (outcome is None) or (outcome.size <= 0): return outcome if re.search("^ln_", self.outcome_attribute_name): # if the outcome attr. name starts with 'ln_' # the results will be exponentiated. self.outcome_attribute_name = self.outcome_attribute_name[3 : len(self.outcome_attribute_name)] outcome = exp(outcome) if self.outcome_attribute_name not in dataset.get_known_attribute_names(): dataset.add_primary_attribute(name=self.outcome_attribute_name, data=zeros(dataset.size(), dtype="f")) dataset.set_values_of_one_attribute(self.outcome_attribute_name, outcome) self.correct_infinite_values(dataset, self.outcome_attribute_name, clip_all_larger_values=True) # values = 6.7 * dataset['land_value']/dataset['parcel_sqft'].astype('float32') # dataset.add_primary_attribute(name=self.outcome_attribute_name, data=values) # props_values = proposal_dataset.compute_variables(['development_project_proposal.disaggregate(parcel.%s)' % self.outcome_attribute_name], # dataset_pool=self.dataset_pool) # proposal_dataset.add_primary_attribute(name=self.outcome_attribute_name, data=props_values) return outcome
def run_after_estimation(self, *args, **kwargs): return RegressionModel.run(self, *args, **kwargs)