def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, 
             data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        dataset should be an instance of DevelopmentProjectProposalDataset, if it isn't,
        create dataset on the fly with parcel and development template
        index and self.filter_attribute (passed in __init___) are relative to dataset
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        proposal_component_set = create_from_proposals_and_template_components(dataset, 
                                                           self.dataset_pool.get_dataset('development_template_component'))
        
        self.dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(), proposal_component_set)
        #proposal_component_set.flush_dataset_if_low_memory_mode()
        #dataset.flush_dataset_if_low_memory_mode()
        
        result = RegressionModel.run(self, specification, coefficients, dataset, 
                                         index=index, chunk_specification=chunk_specification, data_objects=data_objects,
                                         run_config=run_config, debuglevel=debuglevel)

        if re.search("^ln_", self.outcome_attribute_name): # if the outcome attr. name starts with 'ln_'
                                                           # the results will be exponentiated.
            self.outcome_attribute_name = self.outcome_attribute_name[3:len(self.outcome_attribute_name)]
            result = exp(result)

        if self.outcome_attribute_name not in dataset.get_known_attribute_names():
            dataset.add_primary_attribute(self.defalult_value + zeros(dataset.size()),
                                             self.outcome_attribute_name)
        
        dataset.set_values_of_one_attribute(self.outcome_attribute_name, 
                                                 result, index=index)
        self.correct_infinite_values(dataset, self.outcome_attribute_name)
        return dataset            
示例#2
0
 def run(self,
         specification,
         coefficients,
         dataset,
         index=None,
         chunk_specification=None,
         data_objects=None,
         run_config=None,
         debuglevel=0):
     """ For info on the arguments see RegressionModel.
     """
     regression_outcome = RegressionModel.run(
         self,
         specification,
         coefficients,
         dataset,
         index=index,
         chunk_specification=chunk_specification,
         data_objects=data_objects,
         run_config=run_config,
         debuglevel=debuglevel)
     if (regression_outcome == None) or (regression_outcome.size <= 0):
         return regression_outcome
     if index == None:
         index = arange(dataset.size())
     result = exp(regression_outcome)
     result = result / (1.0 + result)
     if (self.attribute_to_modify
             not in dataset.get_known_attribute_names()):
         dataset.add_attribute(name=self.attribute_to_modify,
                               data=zeros((dataset.size(), ),
                                          dtype=float32))
     dataset.set_values_of_one_attribute(self.attribute_to_modify, result,
                                         index)
     return result
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            debuglevel=0):
        """ For info on the arguments see RegressionModel.
        dataset should be an instance of DevelopmentProjectProposalDataset, if it isn't,
        create dataset on the fly with parcel and development template
        index and self.filter_attribute (passed in __init___) are relative to dataset
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        proposal_component_set = create_from_proposals_and_template_components(
            dataset,
            self.dataset_pool.get_dataset('development_template_component'))

        self.dataset_pool.replace_dataset(
            proposal_component_set.get_dataset_name(), proposal_component_set)
        #proposal_component_set.flush_dataset_if_low_memory_mode()
        #dataset.flush_dataset_if_low_memory_mode()

        result = RegressionModel.run(self,
                                     specification,
                                     coefficients,
                                     dataset,
                                     index=index,
                                     chunk_specification=chunk_specification,
                                     data_objects=data_objects,
                                     run_config=run_config,
                                     debuglevel=debuglevel)

        if re.search("^ln_", self.outcome_attribute_name
                     ):  # if the outcome attr. name starts with 'ln_'
            # the results will be exponentiated.
            self.outcome_attribute_name = self.outcome_attribute_name[
                3:len(self.outcome_attribute_name)]
            result = exp(result)

        if self.outcome_attribute_name not in dataset.get_known_attribute_names(
        ):
            dataset.add_primary_attribute(
                self.defalult_value + zeros(dataset.size()),
                self.outcome_attribute_name)

        dataset.set_values_of_one_attribute(self.outcome_attribute_name,
                                            result,
                                            index=index)
        self.correct_infinite_values(dataset, self.outcome_attribute_name)
        return dataset
 def run(self, specification, coefficients, dataset, index=None, **kwargs):
     if index is None:
         index = arange(dataset.size())
     data_objects = kwargs.get("data_objects",{})
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     # filter out agents for this group
     new_index = self.group_member.get_index_of_my_agents(dataset, index, dataset_pool=self.dataset_pool)
     regresult = RegressionModel.run(self,  specification, coefficients, dataset,
                                            index=index[new_index], **kwargs)
     result = zeros(index.size, dtype=float32)
     result[new_index] = regresult
     return result
    def run(self, specification, coefficients, dataset, 
            index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        outcome_attribute_short = self.outcome_attribute.get_alias()
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index,
                                               dataset_pool=self.dataset_pool, resources=res)
        
        current_year = SimulationState().get_current_time()
        current_month = int( re.search('\d+$', outcome_attribute_short).group() )
        # date in YYYYMM format, matching to the id_name field of weather dataset
        date = int( "%d%02d" % (current_year, current_month) )
        date = array([date] * dataset.size())
        
        if "date" in dataset.get_known_attribute_names():
            dataset.set_values_of_one_attribute("date", date)
        else:
            dataset.add_primary_attribute(date, "date")

        water_demand = RegressionModel.run(self, specification, coefficients, dataset, 
                                           index, chunk_specification,
                                           run_config=run_config, debuglevel=debuglevel)
        if (water_demand == None) or (water_demand.size <=0):
            return water_demand
        
        if index == None:
            index = arange(dataset.size())
            
        if re.search("^ln_", outcome_attribute_short): 
            # if the outcome attr. name starts with 'ln_' the results will be exponentiated.
            outcome_attribute_name = outcome_attribute_short[3:len(outcome_attribute_short)]
            water_demand = exp(water_demand)
        else:
            outcome_attribute_name = outcome_attribute_short

        if outcome_attribute_name in dataset.get_known_attribute_names():
            dataset.set_values_of_one_attribute(outcome_attribute_name, water_demand, index)
        else:
            results = zeros(dataset.size(), dtype=water_demand.dtype)
            results[index] = water_demand
            dataset.add_primary_attribute(results, outcome_attribute_name)

        return water_demand
    def run(self, specification, coefficients, dataset, index=None, **kwargs):
        """
        See description above. If missing values of the outcome attribute are suppose to be excluded from
        the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True.
        Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0).
        Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True.
        In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. 
        By default, an outlier is a data point smaller than 0. There is no default upper bound.
        """
        if self.outcome_attribute is None:
            raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization."
        
        if self.outcome_attribute.get_alias() not in dataset.get_known_attribute_names():
            try:
                dataset.compute_variables(self.outcome_attribute, dataset_pool=self.dataset_pool)
            except:
                raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % (
                                                                self.outcome_attribute.get_alias(), dataset.get_dataset_name())
            
        if index is None:
            index = arange(dataset.size())
        original_data = dataset.get_attribute_by_index(self.outcome_attribute, index)
        
        outcome = RegressionModel.run(self, specification, coefficients, dataset, index, initial_values=original_data.astype('float32'), **kwargs)
        initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias()


        if initial_error_name not in dataset.get_known_attribute_names():
            initial_error = original_data - outcome
            dataset.add_primary_attribute(name=initial_error_name, data=zeros(dataset.size(), dtype="float32"))
            exclude_missing_values = self.run_config.get("exclude_missing_values_from_initial_error", False)
            exclude_outliers = self.run_config.get("exclude_outliers_from_initial_error", False)
            if exclude_missing_values:
                missing_value = self.run_config.get("outcome_attribute_missing_value", 0)
                initial_error[original_data == missing_value] = 0
                logger.log_status('Values equal %s were excluded from adding residuals.' % missing_value)
            if exclude_outliers:
                outlier_low = self.run_config.get("outlier_is_less_than", 0)
                initial_error[original_data < outlier_low] = 0
                outlier_high = self.run_config.get("outlier_is_greater_than", original_data.max())
                initial_error[original_data > outlier_high] = 0
                logger.log_status('Values less than %s and larger than %s were excluded from adding residuals.' % (outlier_low, outlier_high))
            dataset.set_values_of_one_attribute(initial_error_name, initial_error, index)
        else:
            initial_error = dataset.get_attribute_by_index(initial_error_name, index)
        return outcome + initial_error
 def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
         data_objects=None, run_config=None, debuglevel=0):
     """ For info on the arguments see RegressionModel.
     """
     regression_outcome = RegressionModel.run(self, specification, coefficients, dataset, 
                             index=index, chunk_specification=chunk_specification, data_objects=data_objects,
                             run_config=run_config, debuglevel=debuglevel)
     if (regression_outcome == None) or (regression_outcome.size <=0):
         return regression_outcome
     if index == None:
         index = arange(dataset.size())
     result = exp(regression_outcome)
     result = result/(1.0+result)
     if  (self.attribute_to_modify not in dataset.get_known_attribute_names()):
         dataset.add_attribute(name=self.attribute_to_modify,
                                data=zeros((dataset.size(),), dtype=float32))
     dataset.set_values_of_one_attribute(self.attribute_to_modify, result, index)
     return result
 def run(self, specification, coefficients, dataset, index=None, **kwargs):
     if index is None:
         index = arange(dataset.size())
     data_objects = kwargs.get("data_objects", {})
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     # filter out agents for this group
     new_index = self.group_member.get_index_of_my_agents(
         dataset, index, dataset_pool=self.dataset_pool)
     regresult = RegressionModel.run(self,
                                     specification,
                                     coefficients,
                                     dataset,
                                     index=index[new_index],
                                     **kwargs)
     result = zeros(index.size, dtype=float32)
     result[new_index] = regresult
     return result
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index,
                                               dataset_pool=self.dataset_pool, resources=res)
        housing_price = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification,
                                     run_config=run_config, debuglevel=debuglevel)
        if (housing_price == None) or (housing_price.size <=0):
            return housing_price
        if index == None:
             index = arange(dataset.size())
        dataset.set_values_of_one_attribute("housing_price", housing_price, index)

        return
示例#10
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, 
             data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index,
                                               dataset_pool=self.dataset_pool, resources=res)
        zeroworkers = dataset.compute_variables('household.workers == 0')
        index_zeroworker = where(zeroworkers)[0]
        #Run regression model
        incomes = RegressionModel.run(self, specification, coefficients, dataset, index_zeroworker, chunk_specification,
                                     run_config=run_config, debuglevel=debuglevel)
        dataset.set_values_of_one_attribute("income", incomes, index_zeroworker)
        #Bump up all negative incomes to zero
        negative_income = dataset.compute_variables('household.income < 0')
        index_neg_inc = where(negative_income==1)[0]
        if index_neg_inc.size > 0:
            dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc)

        return
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug": debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute,
                                               threshold=0,
                                               index=index,
                                               dataset_pool=self.dataset_pool,
                                               resources=res)
        housing_price = RegressionModel.run(self,
                                            specification,
                                            coefficients,
                                            dataset,
                                            index,
                                            chunk_specification,
                                            run_config=run_config,
                                            debuglevel=debuglevel)
        if (housing_price == None) or (housing_price.size <= 0):
            return housing_price
        if index == None:
            index = arange(dataset.size())
        dataset.set_values_of_one_attribute("housing_price", housing_price,
                                            index)

        return
示例#12
0
 def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
         data_objects=None, run_config=None, debuglevel=0):
     """ For info on the arguments see RegressionModel.
     """
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     if self.filter <> None:
         res = Resources({"debug":debuglevel})
         index = dataset.get_filtered_index(self.filter, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                            resources=res)
     ln_total_land_value = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification,
                                  run_config=run_config, debuglevel=debuglevel)
     if (ln_total_land_value == None) or (ln_total_land_value.size <=0):
         return ln_total_land_value
     if index == None:
          index = arange(dataset.size())
     total_land_value = exp(ln_total_land_value)
     residential_land_value = total_land_value * dataset.get_attribute_by_index("fraction_residential_land", index)
     nonresidential_land_value = total_land_value - residential_land_value
     dataset.set_values_of_one_attribute("residential_land_value", residential_land_value, index)
     dataset.set_values_of_one_attribute("nonresidential_land_value", nonresidential_land_value,
                                         index)
     self.post_check(dataset)
     return index
示例#13
0
coef, other_results = rm.estimate(specification, dataset=locations, 
                                  outcome_attribute="gridcell.cost", 
                                  procedure="opus_core.estimate_linear_regression")
coef.summary()

dstorage = StorageFactory().get_storage('dict_storage')
dstorage.write_table(
    table_name = 'gridcells',
    table_data = {'id':array([1,2,3,4]),
             'distance_to_cbd':array([2,4,6,8])
             })

ds = Dataset(in_storage=dstorage, in_table_name='gridcells',
             id_name='id', dataset_name='gridcell')

cost = rm.run(specification, coefficients=coef, dataset=ds)


# Variables

# Concept
locations.get_dataset_name()

locations.add_primary_attribute(name="percent_wetland",
                            data=[85,20,0,90,35,51,0,10,5])

from opus_core.datasets.dataset_pool import DatasetPool
dataset_pool = DatasetPool(package_order=['urbansim', 'opus_core'], storage=storage)
dataset_pool.datasets_in_pool()
hs = dataset_pool.get_dataset("household")
dataset_pool.datasets_in_pool()
示例#14
0
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        outcome_attribute_short = self.outcome_attribute.get_alias()
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug": debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute,
                                               threshold=0,
                                               index=index,
                                               dataset_pool=self.dataset_pool,
                                               resources=res)

        current_year = SimulationState().get_current_time()
        current_month = int(re.search('\d+$', outcome_attribute_short).group())
        # date in YYYYMM format, matching to the id_name field of weather dataset
        date = int("%d%02d" % (current_year, current_month))
        date = array([date] * dataset.size())

        if "date" in dataset.get_known_attribute_names():
            dataset.set_values_of_one_attribute("date", date)
        else:
            dataset.add_primary_attribute(date, "date")

        water_demand = RegressionModel.run(self,
                                           specification,
                                           coefficients,
                                           dataset,
                                           index,
                                           chunk_specification,
                                           run_config=run_config,
                                           debuglevel=debuglevel)
        if (water_demand == None) or (water_demand.size <= 0):
            return water_demand

        if index == None:
            index = arange(dataset.size())

        if re.search("^ln_", outcome_attribute_short):
            # if the outcome attr. name starts with 'ln_' the results will be exponentiated.
            outcome_attribute_name = outcome_attribute_short[
                3:len(outcome_attribute_short)]
            water_demand = exp(water_demand)
        else:
            outcome_attribute_name = outcome_attribute_short

        if outcome_attribute_name in dataset.get_known_attribute_names():
            dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                water_demand, index)
        else:
            results = zeros(dataset.size(), dtype=water_demand.dtype)
            results[index] = water_demand
            dataset.add_primary_attribute(results, outcome_attribute_name)

        return water_demand
示例#15
0
    def run(self, specification, coefficients, dataset, index=None, **kwargs):
        """
        See description above. If missing values of the outcome attribute are suppose to be excluded from
        the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True.
        Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0).
        Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True.
        In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. 
        By default, an outlier is a data point smaller than 0. There is no default upper bound.
        """
        if self.outcome_attribute is None:
            raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization."

        if self.outcome_attribute.get_alias(
        ) not in dataset.get_known_attribute_names():
            try:
                dataset.compute_variables(self.outcome_attribute,
                                          dataset_pool=self.dataset_pool)
            except:
                raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % (
                    self.outcome_attribute.get_alias(),
                    dataset.get_dataset_name())

        if index is None:
            index = arange(dataset.size())
        original_data = dataset.get_attribute_by_index(self.outcome_attribute,
                                                       index)

        outcome = RegressionModel.run(
            self,
            specification,
            coefficients,
            dataset,
            index,
            initial_values=original_data.astype('float32'),
            **kwargs)
        initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias(
        )

        if initial_error_name not in dataset.get_known_attribute_names():
            initial_error = original_data - outcome
            dataset.add_primary_attribute(name=initial_error_name,
                                          data=zeros(dataset.size(),
                                                     dtype="float32"))
            exclude_missing_values = self.run_config.get(
                "exclude_missing_values_from_initial_error", False)
            exclude_outliers = self.run_config.get(
                "exclude_outliers_from_initial_error", False)
            if exclude_missing_values:
                missing_value = self.run_config.get(
                    "outcome_attribute_missing_value", 0)
                initial_error[original_data == missing_value] = 0
                logger.log_status(
                    'Values equal %s were excluded from adding residuals.' %
                    missing_value)
            if exclude_outliers:
                outlier_low = self.run_config.get("outlier_is_less_than", 0)
                initial_error[original_data < outlier_low] = 0
                outlier_high = self.run_config.get("outlier_is_greater_than",
                                                   original_data.max())
                initial_error[original_data > outlier_high] = 0
                logger.log_status(
                    'Values less than %s and larger than %s were excluded from adding residuals.'
                    % (outlier_low, outlier_high))
            dataset.set_values_of_one_attribute(initial_error_name,
                                                initial_error, index)
        else:
            initial_error = dataset.get_attribute_by_index(
                initial_error_name, index)
        return outcome + initial_error
示例#16
0
 def run_after_estimation(self, *args, **kwargs):
     return RegressionModel.run(self, *args, **kwargs)
示例#17
0
 
 #create a ConsumptionDataset instance out of gridcells - simulate water demand for every gridcell
 resources = Resources({'data':{
         "grid_id":gridcells.get_id_attribute(),
         "year":year * ones(gridcells.size()),
         "month":months[0] * ones(gridcells.size()),
         "sum_demand":zeros(gridcells.size())
         }})
 this_consumption = dataset_pool.get_dataset(consumption_type)
 
 #join consumption set with weather data
 this_consumption.join(weather, name=weather_attributes, join_attribute="year", 
                  metadata=AttributeType.PRIMARY)
 #run simulation
 result = model.run(specification, coefficients, this_consumption, index=None,
                    chunk_specification={'nchunks':3},
                    data_objects=dataset_pool.datasets_in_pool())
 
 #result = exp(result)
 this_consumption.modify_attribute("sum_demand", result)
 
 #keep only those with meanful water demand pridiction, e.g. residential_units > 0 
 keep_index = where(result>0)[0]
 
 this_consumption.subset_by_index(keep_index)
 
 year_dir = os.path.join(cache_directory, str(year))
 out_storage = StorageFactory().get_storage(type="tab_storage", storage_location=year_dir)
 
 this_consumption.flush_dataset()
 print result
 
 #create a ConsumptionDataset instance out of gridcells - simulate water demand for every gridcell
 resources = Resources({'data':{
         "grid_id":gridcells.get_id_attribute(),
         "year":year * ones(gridcells.size()),
         "month":months[0] * ones(gridcells.size()),
         "sum_demand":zeros(gridcells.size())
         }})
 this_consumption = dataset_pool.get_dataset(consumption_type)
 
 #join consumption set with weather data
 this_consumption.join(weather, name=weather_attributes, join_attribute="year", 
                  metadata=AttributeType.PRIMARY)
 #run simulation
 result = model.run(specification, coefficients, this_consumption, index=None,
                    chunk_specification={'nchunks':3},
                    data_objects=dataset_pool.datasets_in_pool())
 
 #result = exp(result)
 this_consumption.modify_attribute("sum_demand", result)
 
 #keep only those with meanful water demand pridiction, e.g. residential_units > 0 
 keep_index = where(result>0)[0]
 
 this_consumption.subset_by_index(keep_index)
 
 year_dir = os.path.join(cache_directory, str(year))
 out_storage = StorageFactory().get_storage(type="tab_storage", storage_location=year_dir)
 
 this_consumption.flush_dataset()
 print result
 def run_after_estimation(self, *args, **kwargs):
     return RegressionModel.run(self, *args, **kwargs)
示例#20
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, 
             data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index,
                                               dataset_pool=self.dataset_pool, resources=res)
        ##Initialize income of 2-person households that the hh-formation models have assigned a brand new household id.
        new_2household_ids = dataset.compute_variables('(household.income==(-2))')
        initialize_2income = where(new_2household_ids == 1)[0]
        if initialize_2income.size > 0:
            dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*18593) + ((household.aggregate(person.education, function=mean))*11293) +  ((household.aggregate(person.age, function=mean))*889) - 95508)')[initialize_2income], initialize_2income)
        ##Initialize income of 1-person households that the hh-dissolution models have assigned a brand new household id.
        new_1household_ids = dataset.compute_variables('(household.income==(-1))')
        initialize_1income = where(new_1household_ids == 1)[0]
        if initialize_1income.size > 0:
            dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*24000) + ((household.aggregate(person.education, function=mean))*5590) +  ((household.aggregate(person.age, function=mean))*583) - 51957)')[initialize_1income], initialize_1income)
        ##Initialize income of 3-person households that the hh-formation models have assigned a brand new household id.
        new_3household_ids = dataset.compute_variables('(household.income==(-3))')
        initialize_3income = where(new_3household_ids == 1)[0]
        if initialize_3income.size > 0:
            dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*20078) + ((household.aggregate(person.education, function=mean))*8531) +  ((household.aggregate(person.age, function=mean))*861) - 72319)')[initialize_3income], initialize_3income)
        ##Initialize income of 4-person households that the hh-formation models have assigned a brand new household id.
        new_4household_ids = dataset.compute_variables('(household.income==(-4))')
        initialize_4income = where(new_4household_ids == 1)[0]
        if initialize_4income.size > 0:
            dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*21883) + ((household.aggregate(person.education, function=mean))*9656) +  ((household.aggregate(person.age, function=mean))*1806) - 112131)')[initialize_4income], initialize_4income)
        ##Initialize income of 5-person households that the hh-formation models have assigned a brand new household id.
        new_5household_ids = dataset.compute_variables('(household.income==(-5))')
        initialize_5income = where(new_5household_ids == 1)[0]
        if initialize_5income.size > 0:
            dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*8797) + ((household.aggregate(person.education, function=mean))*9049) +  ((household.aggregate(person.age, function=mean))*670) - 27224)')[initialize_5income], initialize_5income)
        negative_income = dataset.compute_variables('household.income < 0')
        index_neg_inc = where(negative_income==1)[0]
        if index_neg_inc.size > 0:
            dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc)
        #Run regression model- all coefficients are applied here except macro employment growth, which comes next
        incomes = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification,
                                     run_config=run_config, debuglevel=debuglevel)
        #Add to the regression equation the term for employment growth (this year's jobs / last year's jobs).  Job totals from the control total dataset.
        # current_year = SimulationState().get_current_time()
        # if current_year == 2010:
            # term_to_add = 1.04*1.82 #322729 #319190.3
        # else:
            # base_year = '2009'
            # base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
            # control_totals = ControlTotalDataset(in_storage=base_cache_storage, in_table_name="annual_employment_control_totals")
            # number_of_jobs = control_totals.get_attribute("number_of_jobs")
            # idx_current = where(control_totals.get_attribute("year")==current_year)[0]
            # jobs_current = number_of_jobs[idx_current]
            # idx_previous = where(control_totals.get_attribute("year")==(current_year-1))[0] 
            # jobs_previous = number_of_jobs[idx_previous]
            # emp_ratio = ((jobs_current.sum())*1.0)/(jobs_previous.sum())
            # logger.log_status("empratio:  %s" % (emp_ratio))
            # term_to_add = emp_ratio * 1.82
        # incomes = incomes + term_to_add
        incomes = exp(incomes)
        if (incomes == None) or (incomes.size <=0):
            return incomes
        if index == None:
             index = arange(dataset.size())
        dataset.set_values_of_one_attribute("income", incomes, index)
        #Bump up all negative incomes to zero
        negative_income = dataset.compute_variables('household.income < 0')
        index_neg_inc = where(negative_income==1)[0]
        if index_neg_inc.size > 0:
            dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc)
        ##Add code to bump down all incomes above 3million
        too_high_income = dataset.compute_variables('household.income > 5000000')
        index_too_high_income = where(too_high_income==1)[0]
        if index_too_high_income.size > 0:
            dataset.modify_attribute('income', array(index_too_high_income.size*[5000000]), index_too_high_income)

        return
示例#21
0
coef, other_results = rm.estimate(specification, dataset=locations, 
                                  outcome_attribute="gridcell.cost", 
                                  procedure="opus_core.estimate_linear_regression")
coef.summary()

dstorage = StorageFactory().get_storage('dict_storage')
dstorage.write_table(
    table_name = 'gridcells',
    table_data = {'id':array([1,2,3,4]),
             'distance_to_cbd':array([2,4,6,8])
             })

ds = Dataset(in_storage=dstorage, in_table_name='gridcells',
             id_name='id', dataset_name='gridcell')

cost = rm.run(specification, coefficients=coef, dataset=ds)


# Variables

# Concept
locations.get_dataset_name()

locations.add_primary_attribute(name="percent_wetland",
                            data=[85,20,0,90,35,51,0,10,5])

from opus_core.datasets.dataset_pool import DatasetPool
dataset_pool = DatasetPool(package_order=['urbansim', 'opus_core'], storage=storage)
dataset_pool.datasets_in_pool()
hs = dataset_pool.get_dataset("household")
dataset_pool.datasets_in_pool()