def __init__(self, regression_procedure="opus_core.linear_regression",
               submodel_string=None, outcome_attribute = None,
               run_config=None, estimate_config=None, debuglevel=None, dataset_pool=None):
     """'outcome_attribute' must be specified in order to compute the residuals.
     """
     RegressionModel.__init__(self,
                              regression_procedure=regression_procedure,
                              submodel_string=submodel_string,
                              run_config=run_config,
                              estimate_config=estimate_config,
                              debuglevel=debuglevel, dataset_pool=dataset_pool)
     self.outcome_attribute = outcome_attribute
     if (self.outcome_attribute is not None) and not isinstance(self.outcome_attribute, VariableName):
         self.outcome_attribute = VariableName(self.outcome_attribute)
Пример #2
0
    def run(self, specification=None, coefficients=None, dataset=None,  **kwargs):
        """At the moment it's a mock-up model."""

        proposal_component_set = create_from_proposals_and_template_components(dataset, 
                                                self.dataset_pool.get_dataset('development_template_component'))
        
        self.dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(), proposal_component_set)
        dataset.compute_variables('land_use_type_id = development_project_proposal.disaggregate(development_template.land_use_type_id)', 
                                  dataset_pool=self.dataset_pool)
        outcome = RegressionModel.run(self, specification, coefficients, dataset, **kwargs)
        
        if (outcome == None) or (outcome.size <= 0):
            return outcome
        if re.search("^ln_", self.outcome_attribute_name): # if the outcome attr. name starts with 'ln_'
                                                      # the results will be exponentiated.
            self.outcome_attribute_name = self.outcome_attribute_name[3:len(self.outcome_attribute_name)]
            outcome = exp(outcome)

        if self.outcome_attribute_name not in dataset.get_known_attribute_names():
            dataset.add_primary_attribute(name=self.outcome_attribute_name, data=zeros(dataset.size(), dtype='f'))

        dataset.set_values_of_one_attribute(self.outcome_attribute_name,  outcome)
        self.correct_infinite_values(dataset, self.outcome_attribute_name, clip_all_larger_values=True)
            
        #values = 6.7 * dataset['land_value']/dataset['parcel_sqft'].astype('float32')
        #dataset.add_primary_attribute(name=self.outcome_attribute_name, data=values)
        
        #props_values = proposal_dataset.compute_variables(['development_project_proposal.disaggregate(parcel.%s)' % self.outcome_attribute_name], 
        #                                           dataset_pool=self.dataset_pool)
        #proposal_dataset.add_primary_attribute(name=self.outcome_attribute_name, data=props_values)
        return outcome
    def run(self, specification, coefficients, dataset, index=None, 
            outcome_with_inital_error=True, **kwargs):
        """
        See description above. If missing values of the outcome attribute are suppose to be excluded from
        the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True.
        Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0).
        Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True.
        In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. 
        By default, an outlier is a data point smaller than 0. There is no default upper bound.
        """
        if self.outcome_attribute is None:
            raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization."
        
        if self.outcome_attribute.get_alias() not in dataset.get_known_attribute_names():
            try:
                dataset.compute_variables(self.outcome_attribute, dataset_pool=self.dataset_pool)
            except:
                raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % (
                                                                self.outcome_attribute.get_alias(), dataset.get_dataset_name())
            
        if index is None:
            index = arange(dataset.size())
        original_data = dataset.get_attribute_by_index(self.outcome_attribute, index)
        
        outcome = RegressionModel.run(self, specification, coefficients, dataset, index, initial_values=original_data.astype('float32'), **kwargs)
        initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias()


        if initial_error_name not in dataset.get_known_attribute_names():
            initial_error = original_data - outcome
            dataset.add_primary_attribute(name=initial_error_name, data=zeros(dataset.size(), dtype="float32"))
            exclude_missing_values = self.run_config.get("exclude_missing_values_from_initial_error", False)
            exclude_outliers = self.run_config.get("exclude_outliers_from_initial_error", False)
            if exclude_missing_values:
                missing_value = self.run_config.get("outcome_attribute_missing_value", 0)
                initial_error[original_data == missing_value] = 0
                logger.log_status('Values equal %s were excluded from adding residuals.' % missing_value)
            if exclude_outliers:
                outlier_low = self.run_config.get("outlier_is_less_than", 0)
                initial_error[original_data < outlier_low] = 0
                outlier_high = self.run_config.get("outlier_is_greater_than", original_data.max())
                initial_error[original_data > outlier_high] = 0
                logger.log_status('Values less than %s and larger than %s were excluded from adding residuals.' % (outlier_low, outlier_high))
            dataset.set_values_of_one_attribute(initial_error_name, initial_error, index)
        else:
            initial_error = dataset.get_attribute_by_index(initial_error_name, index)

        logger.log_status("initial_error saved to %s.%s" % (dataset.dataset_name, initial_error_name))
        if outcome_with_inital_error:
            return outcome + initial_error
        else:
            logger.log_status("initial_error not added to outcome %s" % self.outcome_attribute.get_alias())
            return outcome
    def run(self, specification=None, coefficients=None, dataset=None, **kwargs):
        outcome = RegressionModel.run(self, specification, coefficients, dataset, **kwargs)

        if (outcome is None) or (outcome.size <= 0):
            return outcome
        if re.search("^ln_", self.outcome_attribute_name):  # if the outcome attr. name starts with 'ln_'
            # the results will be exponentiated.
            self.outcome_attribute_name = self.outcome_attribute_name[3 : len(self.outcome_attribute_name)]
            outcome = exp(outcome)

        if self.outcome_attribute_name not in dataset.get_known_attribute_names():
            dataset.add_primary_attribute(name=self.outcome_attribute_name, data=zeros(dataset.size(), dtype="f"))

        dataset.set_values_of_one_attribute(self.outcome_attribute_name, outcome)
        self.correct_infinite_values(dataset, self.outcome_attribute_name, clip_all_larger_values=True)

        # values = 6.7 * dataset['land_value']/dataset['parcel_sqft'].astype('float32')
        # dataset.add_primary_attribute(name=self.outcome_attribute_name, data=values)

        # props_values = proposal_dataset.compute_variables(['development_project_proposal.disaggregate(parcel.%s)' % self.outcome_attribute_name],
        #                                           dataset_pool=self.dataset_pool)
        # proposal_dataset.add_primary_attribute(name=self.outcome_attribute_name, data=props_values)
        return outcome
 def run_after_estimation(self, *args, **kwargs):
     return RegressionModel.run(self, *args, **kwargs)