示例#1
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0, **kwargs):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                               resources=res)
        outcome = RegressionModelWithAdditionInitialResiduals.run(self, specification, coefficients, dataset,
                                                                 index, chunk_specification=chunk_specification,
                                                                 run_config=run_config, debuglevel=debuglevel,
                                                                 **kwargs)
        if (outcome == None) or (outcome.size <= 0):
            return outcome
        if index == None:
            index = arange(dataset.size())
        if re.search("^ln_", self.outcome_attribute.get_alias()): # if the outcome attr. name starts with 'ln_'
                                                      # the results will be exponentiated.
            outcome_attribute_name = self.outcome_attribute.get_alias()[3:len(self.outcome_attribute.get_alias())]
            outcome = exp(outcome)
        else:
            outcome_attribute_name = self.outcome_attribute.get_alias()
        if outcome_attribute_name in dataset.get_known_attribute_names():
            values = dataset.get_attribute(outcome_attribute_name).copy()
            dataset.delete_one_attribute(outcome_attribute_name)
        else:
            values = zeros(dataset.size(), dtype='f')

        values[index] = outcome.astype(values.dtype)
        dataset.add_primary_attribute(name=outcome_attribute_name, data=values)
        self.correct_infinite_values(dataset, outcome_attribute_name, clip_all_larger_values=True)
        return outcome
示例#2
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            years = 4,
            data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                               resources=res)
        init_outcome = RegressionModelWithAdditionInitialResiduals.run(self, specification, coefficients, dataset,
                                         index, chunk_specification=chunk_specification,
                                         run_config=run_config, debuglevel=debuglevel)

        initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias()
        initial_error = dataset[initial_error_name][index]
        mean = init_outcome - initial_error

        rmse = dataset.compute_variables("paris.establishment.rmse_ln_emp_ratio")
        _epsilon = norm.rvs(location=0, scale=rmse) / years  # convert lump prediction to annual prediction
        _epsilon_name = "_epsilon_%s" % self.outcome_attribute.get_alias()

        if _epsilon_name not in dataset.get_known_attribute_names():
            dataset.add_primary_attribute(name=_epsilon_name, data=zeros(dataset.size(), dtype="float32"))
        dataset.set_values_of_one_attribute(_epsilon_name, _epsilon, index)
        outcome = mean + _epsilon[index]

        if (outcome == None) or (outcome.size <= 0):
            return outcome
        if index == None:
            index = arange(dataset.size())
    
        if re.search("^ln_", self.outcome_attribute.get_alias()): # if the outcome attr. name starts with 'ln_'
                                                      # the results will be exponentiated.
            outcome_attribute_name = self.outcome_attribute.get_alias()[3:len(self.outcome_attribute.get_alias())]
            outcome = exp(outcome)
        else:
            outcome_attribute_name = self.outcome_attribute.get_alias()
        if outcome_attribute_name in dataset.get_known_attribute_names():
            values = dataset.get_attribute(outcome_attribute_name).copy()
            dataset.delete_one_attribute(outcome_attribute_name)
        else:
            values = zeros(dataset.size(), dtype='f')

        values[index] = outcome.astype(values.dtype)
        dataset.add_primary_attribute(name=outcome_attribute_name, data=values)
        self.correct_infinite_values(dataset, outcome_attribute_name, clip_all_larger_values=True)
        return outcome