Exemplo n.º 1
0
    def __init__(self, regression_procedure="opus_core.linear_regression",
                  submodel_string=None,
                  run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None):
 
        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)
            
        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))
    def estimate(self, specification, agent_set, agents_index=None, procedure=None, data_objects=None,
                  estimate_config=None, debuglevel=0):
        """ Computes capacity if required and calls the estimate method of ChoiceModel.
        See ChoiceModel.estimate for details on arguments.
        """
        if agents_index==None:
            agents_index=arange(agent_set.size())
        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return (None, None)

        if estimate_config == None:
            estimate_config = Resources()
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.location_id_string is not None:
            agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool)
        
        capacity_for_estimation = None
        if self.estimate_config.get("compute_capacity_flag", False):
            capacity_string_for_estimation = self.estimate_config.get("capacity_string", None)
            capacity_for_estimation = self.determine_capacity(capacity_string=capacity_string_for_estimation, 
                                                              agent_set=agent_set, 
                                                              agents_index=agents_index)

        self.estimate_config.merge({"capacity":capacity_for_estimation})
        return ChoiceModel.estimate(self,specification, agent_set,
                                    agents_index, procedure, estimate_config=self.estimate_config, 
                                    debuglevel=debuglevel)
Exemplo n.º 3
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             events_for_estimation_storage=None,
                             events_for_estimation_table=None,
                             agents_filter='',
                             compute_variables=[],
                             data_objects={}):

        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        projects = None
        # create agents for estimation
        if events_for_estimation_storage is not None:
            projects = Dataset(in_storage=events_for_estimation_storage,
                               in_table_name=events_for_estimation_table,
                               id_name=[],
                               dataset_name='development_project')
            if compute_variables:
                projects.compute_variables(compute_variables,
                                           resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable),
                #                               VariableName(location_id_variable).get_alias())

            if agents_filter:
                values = projects.compute_variables(
                    agents_filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                projects.subset_by_index(index,
                                         flush_attributes_if_not_loaded=False)

        return (specification, projects)
Exemplo n.º 4
0
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """ # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":self.field_submodel_id,
         "field_coefficient_name":self.field_coefficient_name,
         "field_estimate":self.field_estimate,
         "field_standard_error":self.field_standard_error,
         "other_fields":self.other_fields})
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning("in_storage has to be of type Storage. No coefficients loaded.")
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels=submodels
         self.check_consistency()
Exemplo n.º 5
0
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config[
            'creating_baseyear_cache_configuration'].cache_directory_root

        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=base_cache_dir,
                                                start_time=self.config.get(
                                                    'base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config[
                'cache_directory'] = self.simulation_state.get_cache_directory(
                )

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(
                self.config['creating_baseyear_cache_configuration'].
                cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)
Exemplo n.º 6
0
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            initial_values=None,
            procedure=None,
            debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config, Resources) and isinstance(
                run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug": self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset)
        if procedure is not None:
            self.regression = RegressionModelFactory().get_model(
                name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(), ), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(), ),
                                        dtype=initial_values.dtype)
            self.initial_values[index] = initial_values

        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())

        result = ChunkModel.run(self,
                                chunk_specification,
                                dataset,
                                index,
                                float32,
                                specification=specification,
                                coefficients=coefficients)
        return result
Exemplo n.º 7
0
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """  # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id": self.field_submodel_id,
         "field_coefficient_name": self.field_coefficient_name,
         "field_estimate": self.field_estimate,
         "field_standard_error": self.field_standard_error,
         "other_fields": self.other_fields
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage has to be of type Storage. No coefficients loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[
             local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels = submodels
         self.check_consistency()
Exemplo n.º 8
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
    def __init__(self,
                 location_set,
                 model_name=None,
                 short_name=None,
                 sampler="opus_core.samplers.weighted_sampler",
                 utilities="opus_core.linear_utilities",
                 probabilities="opus_core.mnl_probabilities",
                 choices="opus_core.random_choices",
                 filter=None,
                 submodel_string=None,
                 location_id_string=None,
                 run_config=None,
                 estimate_config=None,
                 debuglevel=0,
                 dataset_pool=None,
                 variable_package="urbansim",
                 **kwargs):
        """
        :number_of_units_string:
          number of units string is used to determine whether a choice is over-filled, 
          by comparing it with number_of_agents_string in get_locations_vacancy().  
          TODO: How does it differ from capacity_string?
           
        """
        if model_name is not None:
            self.model_name = model_name
        if short_name is not None:
            self.model_short_name = short_name
        if (run_config is not None) and not isinstance(run_config, Resources):
            run_config = Resources(run_config)
        if (estimate_config
                is not None) and not isinstance(estimate_config, Resources):
            estimate_config = Resources(estimate_config)
        self.add_prefix_to_variable_names([
            "capacity_string", "number_of_agents_string",
            "number_of_units_string"
        ], location_set, variable_package, run_config)
        self.add_prefix_to_variable_names("weights_for_estimation_string",
                                          location_set, variable_package,
                                          estimate_config)

        LocationChoiceModel.__init__(self,
                                     location_set=location_set,
                                     sampler=sampler,
                                     utilities=utilities,
                                     probabilities=probabilities,
                                     choices=choices,
                                     filter=filter,
                                     submodel_string=submodel_string,
                                     location_id_string=location_id_string,
                                     run_config=run_config,
                                     estimate_config=estimate_config,
                                     debuglevel=debuglevel,
                                     dataset_pool=dataset_pool,
                                     **kwargs)
Exemplo n.º 10
0
 def run(self, data=None, coefficients=None, resources=None):
     local_resources = Resources()
     if resources:
         local_resources.merge(resources)
     last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources)
     this_result = self.compute_probabilities(resources=local_resources)
     if this_result <> None:
         last_result = this_result
     this_result = self.compute_choices(resources=local_resources)
     if this_result <> None:
         last_result = this_result
     return last_result
 def _compute_vacancy_variables(self, location_set, dev_model_configs, resources):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug": self.debug})
     self.units_variable = {}
     self.variable_for_vacancy = {}
     for project_type in dev_model_configs:
         self.units_variable[project_type] = dev_model_configs[project_type]["units"]
         self.variable_for_vacancy[project_type] = compute_resources.get(
             "%s_vacant_variable" % project_type,
             "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(), self.units_variable[project_type]),
         )
         location_set.compute_variables([self.variable_for_vacancy[project_type]], resources=compute_resources)
    def preprocess_projects(self, agent_set, agents_index=None, data_objects=None):
        """Split projects that don't find enough choices to smaller ones (of average size).
        """
        resources=Resources(data_objects)
        resources.merge({"debug":self.debug})

        self.choice_set.compute_variables([self.developable_maximum_unit_full_name,
                                           self.developable_minimum_unit_full_name],
                                          resources=resources)

        max_capacity = self.choice_set.get_attribute(self.developable_maximum_unit_short_name)
        min_capacity = self.choice_set.get_attribute(self.developable_minimum_unit_short_name)

        self.set_choice_set_size()
        nchoices = self.get_choice_set_size()
        project_average_size = agent_set.get_attribute(agent_set.get_attribute_name()).mean()
        add_projects = 0
        remove_projects = 0

        if agents_index == None:
            agents_index=arange(agent_set.size())
        # order agents by size
        ordered_indices = argsort(-1*agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index))
        improvement_values=[]
        projects_ids = agent_set.get_id_attribute()[agents_index].tolist()
        #   how many projects fit in each developable location
        project_sizes = agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index)
        for iagent in ordered_indices:
            project_size = project_sizes[iagent]
            capacity =  logical_and(project_size > min_capacity, (max_capacity / project_size) > 0)
            if where(capacity)[0].size < nchoices: # not enough choices found
                nsplitted = int(project_size/project_average_size)
                add_projects += nsplitted
                remove_projects+=1
                projects_ids.remove(agent_set.get_id_attribute()[agents_index[iagent]])
                improvement_values = improvement_values + \
                    nsplitted*[agent_set.get_attribute_by_index("improvement_value", agents_index[iagent])]
            else:
                break # we can break here, since the projects are sorted by size

        if remove_projects > 0:
            agent_set.remove_elements(agents_index[ordered_indices[0:remove_projects]])
            agents_index = agent_set.get_id_index(projects_ids)

        if add_projects > 0:
            max_id = agent_set.get_attribute(agent_set.get_id_name()[0]).max()
            ids = arange(max_id+1,max_id+1+add_projects)
            agent_set.add_elements(data={"project_id":ids,
                self.location_set.get_id_name()[0]:zeros((add_projects,)),
                "improvement_value":array(improvement_values),
                agent_set.get_attribute_name(): project_average_size*ones((add_projects,))},
                require_all_attributes=False)
            agents_index = agent_set.get_id_index(projects_ids + ids.tolist())
Exemplo n.º 13
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        building_types_table_name = 'building_types'
        storage.write_table(table_name=building_types_table_name,
                            table_data={
                                'building_type_id':
                                array([1, 2]),
                                'name':
                                array(['residential', 'commercial']),
                                'units':
                                array(['residential_units', 'commercial_sqft'])
                            })

        buildings_table_name = 'buildings'
        storage.write_table(
            table_name=buildings_table_name,
            table_data={
                'building_id': arange(7) + 1,
                'building_type_id': array([1, 2, 1, 2, 1, 1, 2]),
                'sqft': array([100, 350, 1000, 0, 430, 95, 750]),
                'residential_units': array([300, 0, 100, 0, 1300, 600, 10])
            },
        )

        building_types = BuildingTypeDataset(
            in_storage=storage, in_table_name=building_types_table_name)
        buildings = BuildingDataset(in_storage=storage,
                                    in_table_name=buildings_table_name,
                                    resources=Resources({
                                        'building_categories': {
                                            'residential':
                                            array([200, 500, 1200]),
                                            'commercial': array([200, 500])
                                        }
                                    }))

        variable_names = map(
            lambda type: '%s_%s' % (self.variable_name_prefix, type),
            ['commercial', 'residential'])
        buildings.compute_variables(variable_names,
                                    resources=Resources(
                                        {'building_type': building_types}))

        should_be_residential = array([2, 0, 1, 0, 4, 3, 0])
        should_be_commercial = array([0, 2, 0, 1, 0, 0, 3])
        values_commercial = buildings.get_attribute(variable_names[0])
        values_residential = buildings.get_attribute(variable_names[1])

        self.assert_(ma.allequal(values_commercial, should_be_commercial),
                     'Error in ' + variable_names[0])
        self.assert_(ma.allequal(values_residential, should_be_residential),
                     'Error in ' + variable_names[1])
Exemplo n.º 14
0
 def __init__(self, resources=None, dataset1=None, dataset2=None, index1 = None, index2 = None, 
             debuglevel=0):
     debug = DebugPrinter(debuglevel)
     debug.print_debug("Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2)
     
     local_resources = Resources(resources)
     local_resources.merge_if_not_None({"dataset1":dataset1, 
         "dataset2":dataset2, "debug":debug, 
         "index1":index1, "index2":index2})
     CoreInteractionDataset.__init__(self, resources = local_resources)
     
     
Exemplo n.º 15
0
 def load(self,
          resources=None,
          in_storage=None,
          in_table_name=None,
          variables=[]):
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":
         self.field_submodel_id,
         "field_equation_id":
         self.field_equation_id,
         "field_coefficient_name":
         self.field_coefficient_name,
         "field_variable_name":
         self.field_variable_name,
         "field_fixed_value":
         self.field_fixed_value
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage is not of type Storage. No EquationSpecification loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         equations = array([-1])
         if local_resources["field_equation_id"] in data:
             equations = data[local_resources["field_equation_id"]]
         vars = data[local_resources["field_variable_name"]]
         self.variables = tuple(map(lambda x: VariableName(x), vars))
         self.coefficients = data[local_resources["field_coefficient_name"]]
         if local_resources["field_submodel_id"] in data:
             submodels = data[local_resources["field_submodel_id"]]
         else:
             submodels = array([-2] * self.coefficients.size, dtype="int32")
         self.submodels = submodels
         if equations.max() >= 0:
             self.equations = equations
         if local_resources["field_fixed_value"] in data:
             self.fixed_values = data[local_resources["field_fixed_value"]]
         for field in data:
             if field not in [
                     local_resources["field_submodel_id"],
                     local_resources["field_equation_id"],
                     local_resources["field_variable_name"],
                     local_resources["field_coefficient_name"],
                     local_resources["field_fixed_value"]
             ]:
                 self.other_fields[field] = data[field]
         self.set_other_dim_field_names()
         if variables:
             self.shrink(variables)
Exemplo n.º 16
0
 def run(self, data=None, coefficients=None, resources=None):
     local_resources = Resources()
     if resources:
         local_resources.merge(resources)
     last_result = self.compute_utilities(data, coefficients,
                                          local_resources)
     this_result = self.compute_probabilities(local_resources)
     if this_result <> None:
         last_result = this_result
     this_result = self.compute_choices(local_resources)
     if this_result <> None:
         last_result = this_result
     return last_result
    def run(self, specification, coefficients, agent_set,
            agents_index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0):
        """ Run a simulation and return a numpy array of length agents_index, giving agent choices (ids of locations).
            'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'agent_set' is of type Dataset,
            'agent_index' are indices of individuals in the agent_set for which
                        the model runs. If it is None, the whole agent_set is considered.
            'chunk_specification' determines number of chunks in which the simulation is processed.
                        Default is to use 300 rows per chunk.
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'run_config' is of type Resources, it gives additional arguments for the run.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        
        ## what is the use of compute location_id string in run? it gets new values anyway
        #if self.location_id_string is not None:
        #    location_id = agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool)

        ## done in choice_model
        #location_id_name = self.choice_set.get_id_name()[0]
        #if (location_id_name not in agent_set.get_known_attribute_names()):
        #    agent_set.add_attribute(name=location_id_name, data=resize(array([-1]), agent_set.size()))
                    
        if self.run_config.get("agent_units_string", None): # used when agents take different amount of capacity from the total capacity
            agent_set.compute_variables([self.run_config["agent_units_string"]], dataset_pool=self.dataset_pool)

        self.compute_capacity_flag = self.run_config.get("compute_capacity_flag",  False)
        capacity_string = None
        self.capacity = None
        if self.compute_capacity_flag:
            capacity_string = self.run_config.get("capacity_string", None)
            if capacity_string is None:
                raise KeyError, \
                    "Entry 'capacity_string' has to be specified in 'run_config' if 'compute_capacity_flag' is True"
            
        ## if weights is None, use capacity for weights
        if self.run_config.get("weights_for_simulation_string", None) is None and capacity_string is not None:
            self.run_config.merge({"weights_for_simulation_string" : capacity_string})
            
        return ChoiceModel.run(self,specification, coefficients, agent_set,
                agents_index=agents_index, chunk_specification=chunk_specification, run_config=self.run_config,
                debuglevel=debuglevel)
Exemplo n.º 18
0
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()
Exemplo n.º 19
0
 def __init__(self,
              resources=None,
              dataset1=None,
              dataset2=None,
              index1=None,
              index2=None,
              dataset_name=None,
              debug=None):
     """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain:
             dataset1 - agent class
             dataset2 - class of the choice dataset
         Optional:
             index1 - 1D array, indices of dataset1
             index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to
                     i-th individual of dataset1[index1].
                     If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1].
             dataset_name - subdirectory in which implementation of the interaction variables is placed (default "")
         dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()).
     """
     self.resources = Resources(resources)
     self.resources.merge_if_not_None({
         "dataset1": dataset1,
         "dataset2": dataset2,
         "index1": index1,
         "index2": index2,
         "dataset_name": dataset_name,
         "debug": debug
     })
     self.attribute_boxes = {}
     self.attribute_names = []
     self.debug = self.resources.get("debug", 0)
     if not isinstance(self.debug, DebugPrinter):
         self.debug = DebugPrinter(self.debug)
     self.resources.check_obligatory_keys(["dataset1", "dataset2"])
     self.dataset1 = self.resources["dataset1"]
     self.dataset2 = self.resources["dataset2"]
     self.index1 = self.resources.get("index1", None)
     self.index2 = self.resources.get("index2", None)
     self.dataset_name = self.resources.get("dataset_name", None)
     if self.dataset_name == None:
         self.dataset_name = self.dataset1.get_dataset_name(
         ) + '_x_' + self.dataset2.get_dataset_name()
     self._primary_attribute_names = []
     self.index1_mapping = {}
     if self.index1 <> None:
         self.index1_mapping = do_id_mapping_dict_from_array(self.index1)
     self._id_names = None  # for compatibility with Dataset
     self.variable_factory = VariableFactory()
     self._aliases = {}  # for compatibility with Dataset
Exemplo n.º 20
0
    def run(self, data=None, coefficients=None, resources=None):
        local_resources = Resources()
        if resources:
            local_resources.merge(resources)
        last_result = self.compute_utilities(data, coefficients, local_resources)
#        self.debug.print_debug("utilities: %s" % last_result, 3) # added 7 jul 09
        this_result = self.compute_probabilities(local_resources)
#        self.debug.print_debug("probabilities: %s" % this_result, 3) # added 7 jul 09
        if this_result <> None:
            last_result = this_result
        this_result = self.compute_choices(local_resources) # determines choices based on probabilities
#        self.debug.print_debug("choices: %s" % this_result, 3) # added 7 jul 09
        if this_result <> None:
            last_result = this_result
        return last_result
    def run(self, specification, coefficients, agent_set,
            agents_index=None, agents_filter=None,
            chunk_specification=None, data_objects=None,
            run_config=None, debuglevel=0, maximum_runs=10):

        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if agents_index is None:
            if agents_filter is not None:
                agent_set.compute_variables(agents_filter, dataset_pool=self.dataset_pool)
                agents_index = where(agent_set.get_attribute(VariableName(agents_filter).get_alias()))[0]
            else:
                agents_index = arange(agent_set.size())
        if not isinstance(agents_index, ndarray):
            try:
                agents_index = array(agents_index)
            except:
                raise TypeError, "Argument agents_index is of wrong type (numpy array or list allowed.)"

        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return array([], dtype='int32')

        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.number_of_units_string = self.run_config.get("number_of_units_string", None)
        self.number_of_agents_string = self.run_config.get(
                        "number_of_agents_string",
                        "%s.number_of_agents(%s)" % (self.choice_set.get_dataset_name(), agent_set.get_dataset_name()))
            
        if self.number_of_units_string is None:
            maximum_runs = 1
        unplaced = arange(agents_index.size)
        id_name = self.choice_set.get_id_name()[0]
        for run in range(maximum_runs):
            unplaced_size_before_model = unplaced.size
            choices = LocationChoiceModel.run(self, specification, coefficients, agent_set,
                    agents_index[unplaced], chunk_specification, debuglevel=debuglevel)
            if run == 0:
                all_choices=choices
            else:
                all_choices[unplaced]=choices
            unplaced = self.get_movers_from_overfilled_locations(agent_set, agents_index, config=run_config)
            if (unplaced.size <= 0) or (unplaced_size_before_model == unplaced.size) or (unplaced.size == (unplaced_size_before_model - self.observations_mapping['mapped_index'].size)):
                break
            agent_set.set_values_of_one_attribute(id_name, -1, agents_index[unplaced])
        return all_choices
Exemplo n.º 22
0
    def setUp(self):
        run_configuration = TestCacheConfiguration()
        SimulationState(new_instance=True)
        SessionConfiguration(run_configuration,
                             new_instance=True,
                             package_order=['urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        self.base_year = run_configuration['base_year']
        self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp')

        # Use the test cache.
        opus_core_path = package().get_opus_core_path()
        test_cache_path = os.path.join(opus_core_path, 'data', 'test_cache')
        new_cache_path = os.path.join(self.temp_dir, 'cache')
        copytree(test_cache_path, new_cache_path)

        # Make sure the copied files are writable.
        for (dirpath, dirnames, filenames) in os.walk(new_cache_path):
            for file_name in filenames:
                full_path = os.path.join(dirpath, file_name)
                os.chmod(full_path, S_IWRITE | S_IREAD)

        SimulationState().set_cache_directory(new_cache_path)
        SimulationState().set_current_time(self.base_year)
        self.config = Resources(run_configuration)

        cache_directory = SimulationState().get_cache_directory()
        self.assertEqual(self.temp_dir, os.path.split(cache_directory)[0])
Exemplo n.º 23
0
 def test_read_resources_from_string(self):
     data = {"arg1":1, "arg2":"2", "dict1":{"three":3,"four":4}}
     resources = Resources(data)
     write_resources_to_file(self.file_name, resources)                        
     resources_string = read_file_content(self.file_name)
     loaded_resources = get_resources_from_string(resources_string)
     self.assertEquals(resources, loaded_resources)
Exemplo n.º 24
0
    def apply_filter(self, filter, agent_set, agents_index, submodel=-2):
        """ apply filter comparing to mean project size by submodel instead of 0, by shifting self.filter
        """
        project_size_filter = None
        if (filter is not None):
            if isinstance(filter, dict):
                submodel_filter = filter[submodel]
            else:
                submodel_filter = filter

            mean_project_size = agent_set.get_attribute(
                agent_set.get_attribute_name())[agents_index].mean()

            if isinstance(submodel_filter, str):
                resources = Resources({"debug": self.debug})
                self.choice_set.compute_variables(
                    [submodel_filter],
                    dataset_pool=self.dataset_pool,
                    resources=resources)
                filter_name = VariableName(submodel_filter)
                project_size_filter = self.choice_set.get_attribute(
                    filter_name.get_alias()) - mean_project_size
            else:
                project_size_filter = submodel_filter - mean_project_size

        return LocationChoiceModel.apply_filter(self,
                                                project_size_filter,
                                                agent_set=agent_set,
                                                agents_index=agents_index,
                                                submodel=submodel)
Exemplo n.º 25
0
    def _compute_variable_for_prior_year(self,
                                         dataset,
                                         full_name,
                                         time,
                                         resources=None):
        """Create a new dataset for this variable, compute the variable, and then return
        the values for this variable."""
        calling_dataset_pool = SessionConfiguration().get_dataset_pool()
        calling_time = SimulationState().get_current_time()
        SimulationState().set_current_time(time)
        try:
            # Get an empty dataset pool with same search paths.
            my_dataset_pool = DatasetPool(
                package_order=calling_dataset_pool.get_package_order(),
                storage=AttributeCache())

            ds = dataset.empty_dataset_like_me(in_storage=AttributeCache())

            # Don't pass any datasets via resources, since they may be from a different time.
            my_resources = Resources(resources)
            for key in my_resources:
                if isinstance(key, Dataset):
                    del my_resources[key]

            ds.compute_variables(full_name,
                                 my_dataset_pool,
                                 resources=my_resources)
            values = ds.get_attribute(full_name)
            return values
        finally:
            SimulationState().set_current_time(calling_time)
Exemplo n.º 26
0
 def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug":self.debug})
     self.variable_for_vacancy = {}
     self.variable_for_total_units = {}
     for ptype in project_types:
         self.variable_for_vacancy[ptype] = compute_resources.get(
                                 "%s_vacant_variable" % ptype,
                                 "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(),
                                                                  self.project_specific_units[ptype]))
         self.variable_for_total_units[ptype] = compute_resources.get(
                                 "%s_total_units_variable" % ptype,
                                 "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), 
                                                          self.project_specific_units[ptype]))
         location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], 
                                        dataset_pool=self.dataset_pool, resources = compute_resources)
Exemplo n.º 27
0
def prepare_for_running_macro(parser):
    from opus_core.file_utilities import get_resources_from_file
    parser.add_option("-r",
                      "--resources",
                      dest="resources_file_name",
                      action="store",
                      type="string",
                      help="Name of file containing resources")
    parser.add_option("-y",
                      "--year",
                      dest="year",
                      action="store",
                      type="int",
                      help="Year in which to 'run' the travel model")
    parser.add_option(
        "-o",
        "--output-file",
        dest="output_file",
        action="store",
        type="string",
        default=None,
        help=
        "Output log file. If not given, it is written into urbansim cache directory."
    )
    (options, args) = parser.parse_args()

    r = get_resources_from_file(options.resources_file_name)
    resources = Resources(get_resources_from_file(options.resources_file_name))

    SessionConfiguration(
        new_instance=True,
        package_order=resources['dataset_pool_configuration'].package_order,
        in_storage=AttributeCache())
    return (resources, options)
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(
            new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0)
        )

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config["cache_directory"] is None:
            self.config["cache_directory"] = self.simulation_state.get_cache_directory()

        SessionConfiguration(
            new_instance=True,
            package_order=self.config["dataset_pool_configuration"].package_order,
            in_storage=AttributeCache(),
        )

        ForkProcess().fork_new_process(
            self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config
        )

        # Create output database (normally done by run manager)
        if "estimation_database_configuration" in self.config:
            db_server = DatabaseServer(self.config["estimation_database_configuration"])
            if not db_server.has_database(self.config["estimation_database_configuration"].database_name):
                db_server.create_database(self.config["estimation_database_configuration"].database_name)
    def prepare_for_run(self,
                        specification_storage=None,
                        specification_table=None,
                        coefficients_storage=None,
                        coefficients_table=None,
                        agent_set=None,
                        agents_filter=None,
                        data_objects=None,
                        **kwargs):

        spec, coeff = prepare_specification_and_coefficients(
            specification_storage=specification_storage,
            specification_table=specification_table,
            coefficients_storage=coefficients_storage,
            coefficients_table=coefficients_table,
            **kwargs)

        if agents_filter is not None:
            agent_set.compute_variables(agents_filter,
                                        resources=Resources(data_objects))
            index = where(
                agent_set.get_attribute(
                    VariableName(agents_filter).get_alias()) > 0)[0]

        return (spec, coeff, index)
Exemplo n.º 30
0
        def run_model():
            households = HouseholdDataset(in_storage=storage,
                                          in_table_name='households')
            hlcm = RegionalHouseholdLocationChoiceModel(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=4)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_area1 = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_area2 = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            gridcells.delete_one_attribute("number_of_households")
            result = concatenate((result_area1, result_area2))
            return result
Exemplo n.º 31
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        job_building_types_table_name = 'job_building_types'
        storage.write_table(table_name=job_building_types_table_name,
                            table_data={
                                'id': array([1, 2, 3, 4]),
                                'home_based': array([1, 0, 1, 0])
                            })

        jobs_table_name = 'jobs'
        storage.write_table(table_name=jobs_table_name,
                            table_data={
                                'job_id':
                                arange(10) + 1,
                                'building_type':
                                array([3, 3, 2, 2, 4, 2, 1, 3, 4, 1])
                            })

        job_building_types = JobBuildingTypeDataset(
            in_storage=storage, in_table_name=job_building_types_table_name)
        jobs = JobDataset(in_storage=storage, in_table_name=jobs_table_name)

        jobs.compute_variables(self.variable_name,
                               resources=Resources(
                                   {'job_building_type': job_building_types}))

        values = jobs.get_attribute(self.variable_name)

        should_be = array([0, 0, 1, 1, 1, 1, 0, 0, 1, 0])

        self.assert_(ma.allequal(values, should_be),
                     'Error in ' + self.variable_name)
Exemplo n.º 32
0
        def run_model_2():
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='households',
                                table_data=household_data)
            households = HouseholdDataset(in_storage=storage,
                                          in_table_name='households')

            storage.write_table(table_name='gridcells',
                                table_data=gridcell_data)
            gridcells = GridcellDataset(in_storage=storage,
                                        in_table_name='gridcells')

            hlcm = HouseholdLocationChoiceModelCreator().get_model(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=8)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_more_attractive = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_less_attractive = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            return array(
                [result_more_attractive.sum(),
                 result_less_attractive.sum()])
Exemplo n.º 33
0
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=cache_directory)

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config[
                'cache_directory'] = self.simulation_state.get_cache_directory(
                )

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

        ForkProcess().fork_new_process(
            self.config['creating_baseyear_cache_configuration'].
            cache_scenario_database, self.config)

        # Create output database (normally done by run manager)
        if 'estimation_database_configuration' in self.config:
            db_server = DatabaseServer(
                self.config['estimation_database_configuration'])
            if not db_server.has_database(
                    self.config['estimation_database_configuration'].
                    database_name):
                db_server.create_database(
                    self.config['estimation_database_configuration'].
                    database_name)
 def _compute_vacancy_variables(self, location_set, dev_model_configs,
                                resources):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug": self.debug})
     self.units_variable = {}
     self.variable_for_vacancy = {}
     for project_type in dev_model_configs:
         self.units_variable[project_type] = dev_model_configs[
             project_type]['units']
         self.variable_for_vacancy[project_type] = compute_resources.get(
             "%s_vacant_variable" % project_type,
             "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(),
                                        self.units_variable[project_type]))
         location_set.compute_variables(
             [self.variable_for_vacancy[project_type]],
             resources=compute_resources)
Exemplo n.º 35
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        building_types_table_name = 'building_types'        
        storage.write_table(
                table_name=building_types_table_name,
                table_data={
                    'building_type_id':array([0,2]), 
                    'name': array(['foo', 'commercial'])
                    }
            )

        buildings_table_name = 'buildings'        
        storage.write_table(
                table_name=buildings_table_name,
                table_data={
                    'building_id':array([1,2,3]),
                    'building_type_id': array([2,0,2])
                    }
            )

        building_types = BuildingTypeDataset(in_storage=storage, in_table_name=building_types_table_name)
        buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name)
        
        buildings.compute_variables(self.variable_name, resources=Resources({'building_type':building_types}))
        
        values = buildings.get_attribute(self.variable_name)
        should_be = array([1,0,1])
        
        self.assert_(ma.allequal(values, should_be),
            'Error in ' + self.variable_name)
Exemplo n.º 36
0
    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)
        
        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy()
            
        if agents_index is None:
            agents_index = self.agents_index_for_prediction
        
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()
        
        #self.config['output_configuration'] = output_configuration
        
        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']
        
        try:
            run_year_namespace = copy.copy(self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False
        
        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1
            #agents.modify_attribute(name=choice_id_name, data=dummy_data)  #reset choices for all agents
            agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index)  #reset choices for agents in agents_index
            
            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)
            
            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names():
                agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data)
            logger.log_status("Predictions saved into attribute " + predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()
 def test_number_of_agents_expression(self):
     expr = "mygridcell.number_of_agents(myjob)+10"
     storage = StorageFactory().get_storage('dict_storage')
     gridcell_grid_id = array([1, 2, 3])
     job_grid_id = array(
         [2, 1, 3, 1]
     )  #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc.
     storage.write_table(table_name='gridcells',
                         table_data={'gid': gridcell_grid_id})
     storage.write_table(table_name='jobs',
                         table_data={
                             'jid': arange(4) + 1,
                             'gid': job_grid_id
                         })
     gs = Dataset(in_storage=storage,
                  in_table_name='gridcells',
                  id_name="gid",
                  dataset_name="mygridcell")
     jobs = Dataset(in_storage=storage,
                    in_table_name='jobs',
                    id_name="jid",
                    dataset_name="myjob")
     values = gs.compute_variables([expr],
                                   resources=Resources({
                                       "myjob": jobs,
                                       "mygridcell": gs
                                   }))
     should_be = array([12, 11, 11])
     self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                  msg="Error in " + expr)
Exemplo n.º 38
0
 def estimate(self,
              specification,
              dataset,
              outcome_attribute="unit_price",
              index=None,
              procedure="opus_core.estimate_linear_regression",
              data_objects=None,
              estimate_config=None,
              debuglevel=0):
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     if self.filter_attribute <> None:
         res = Resources({"debug": debuglevel})
         index = dataset.get_filtered_index(self.filter_attribute,
                                            threshold=0,
                                            index=index,
                                            dataset_pool=self.dataset_pool,
                                            resources=res)
     return RegressionModelWithAdditionInitialResiduals.estimate(
         self,
         specification,
         dataset,
         outcome_attribute,
         index,
         procedure,
         estimate_config=estimate_config,
         debuglevel=debuglevel)
Exemplo n.º 39
0
 def create_from_data(self, resources=None, id_name=None, in_storage=None, dataset_name=None,
         out_storage=None, in_table_name=None, out_table_name=None):
     self.resources = Resources(resources)
     self.resources.merge_if_not_None({ "id_name":id_name,
                         "dataset_name":dataset_name,
                         "in_storage":in_storage,
                         "out_storage":out_storage,
                         "in_table_name":in_table_name,
                         "out_table_name":out_table_name})
     self.resources.merge_with_defaults({"dataset_name":"dataset"})
     self.dataset_name = self.resources.get("dataset_name", None)
     self.attribute_cache = AttributeCache()
     self._aliases = {}
     self._id_names = self.resources.get("id_name", [])
     if not isinstance(self._id_names, list):
         self._id_names = [self._id_names]
     self.variable_factory = VariableFactory()
     self.debug = self.resources.get("debug",  0)
     self.df = pd.DataFrame(self.resources.get('in_storage').load_table(self.resources.get('in_table_name')))
     self._primary_attribute_names = self.get_attribute_names()
     self.df.set_index(self._id_names, inplace=True)
     self.attribute_boxes = {}
     for attr in self._primary_attribute_names:
         self.attribute_boxes[attr] = AttributeBox(self, [],
                                             variable_name=self.create_and_check_qualified_variable_name(attr),
                                             type=AttributeType.PRIMARY,
                                             is_in_memory=True,
                                             header=None,
                                             version=0)
     self.n = self.df.shape[0]
Exemplo n.º 40
0
        def run_model():
            hlcm = HouseholdLocationChoiceModelCreator().get_model(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=8)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_more_attractive = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_less_attractive = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            households.set_values_of_one_attribute(attribute="grid_id",
                                                   values=hh_grid_ids)
            gridcells.delete_one_attribute("number_of_households")
            result = concatenate(
                (result_more_attractive, result_less_attractive))
            return result
Exemplo n.º 41
0
    def run_chunk(self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(
            coefficients, specification, neqs=1)
        compute_resources = Resources({"debug": self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels,
                                     self.submodel_string,
                                     dataset,
                                     index,
                                     dataset_pool=self.dataset_pool,
                                     resources=compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        data = {}
        coef = {}
        outcome = self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                self.specified_coefficients, submodel)
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            self.debug.print_debug(
                "Compute regression for submodel " + str(submodel), 4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(
                coef[submodel],
                index=index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            vnames = asarray(coef[submodel].get_variable_names())
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "NaN(Not A Number) is returned from variable %s; it is replaced with %s."
                    % (vnames[nan_var_index], nan_to_num(nan)))
                #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "Inf is returned from variable %s; it is replaced with %s."
                    % (vnames[inf_var_index], nan_to_num(inf)))
                #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index]

            if (self.data[submodel].shape[0] >
                    0) and (self.data[submodel].size >
                            0):  # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome
 def test_number_of_agents(self):
     expr = "mygridcell.number_of_agents(myjob)"
     storage = StorageFactory().get_storage('dict_storage')
     gridcell_grid_id = array([1, 2, 3])
     job_grid_id = array([2, 1, 3, 1]) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc.
     storage.write_table(table_name='gridcells', table_data={'gid':gridcell_grid_id})
     storage.write_table(table_name='jobs', table_data={'jid':arange(4)+1, 'gid':job_grid_id})
     gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell")
     jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob")       
     values = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs}))
     should_be = array([2, 1, 1])            
     self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg = "Error in " + expr)
     # change gids of jobs (to test if computing dependencies is working)
     jobs.modify_attribute(name="gid", data=array([1,1,1,1]))
     values2 = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs}))
     should_be2 = array([4, 0, 0])            
     self.assert_(ma.allclose(values2, should_be2, rtol=1e-7), msg = "Error in " + expr)
Exemplo n.º 43
0
    def _search_for_dataset_helper(self, dataset_name, package_order,
                                   use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name,
                                           package=package_name,
                                           **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get('arguments', {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name
             ) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults({
                'dataset_name': dataset_name,
                'in_table_name': table_name,
                'out_table_name': table_name,
                'id_name': id_name_default
            })
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name
                 ) = self._table_module_class_names_for_dataset_deprecated(
                     dataset_name)
                resources = Resources(kwargs.get('arguments', {}))
                resources.merge_with_defaults({
                    'dataset_name': dataset_name,
                    'in_table_name': table_name,
                    'out_table_name': table_name,
                    'id_name': id_name_default
                })
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning(
                        "Could not create a generic Dataset '%s'." %
                        dataset_name)
                    raise
                #TODO: uncomment this warning when we change to singular
                #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
Exemplo n.º 44
0
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
Exemplo n.º 45
0
 def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
         data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0):
     """'specification' is of type EquationSpecification,
         'coefficients' is of type Coefficients,
         'dataset' is of type Dataset,
         'index' are indices of individuals in dataset for which
                     the model runs. If it is None, the whole dataset is considered.
         'chunk_specification' determines  number of chunks in which the simulation is processed.
         'data_objects' is a dictionary where each key is the name of an data object
         ('zone', ...) and its value is an object of class  Dataset.
        'run_config' is of type Resources, it gives additional arguments for the run.
        If 'procedure' is given, it overwrites the regression_procedure of the constructor.
        'initial_values' is an array of the initial values of the results. It will be overwritten
        by the results for those elements that are handled by the model (defined by submodels in the specification).
        By default the results are initialized with 0.
         'debuglevel' overwrites the constructor 'debuglevel'.
     """
     self.debug.flag = debuglevel
     if run_config == None:
         run_config = Resources()
     if not isinstance(run_config,Resources) and isinstance(run_config, dict):
         run_config = Resources(run_config)
     self.run_config = run_config.merge_with_defaults(self.run_config)
     self.run_config.merge({"debug":self.debug})
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     self.dataset_name = dataset.get_dataset_name()
     self.dataset_pool.replace_dataset(self.dataset_name, dataset)
     
     if procedure is not None: 
         self.regression = RegressionModelFactory().get_model(name=procedure)
     if initial_values is None:
         self.initial_values = zeros((dataset.size(),), dtype=float32)
     else:
         self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype)
         self.initial_values[index] = initial_values
         
     if dataset.size()<=0: # no data loaded yet
         dataset.get_id_attribute()
     if index == None:
         index = arange(dataset.size())
         
     result = ChunkModel.run(self, chunk_specification, dataset, index, float32,
                              specification=specification, coefficients=coefficients)
     return result
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_equation_id":self.field_equation_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_variable_name":self.field_variable_name,
            "field_fixed_value":self.field_fixed_value,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No EquationSpecifications written.")
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array([-2], dtype="int32"), len(self.get_coefficient_names())) #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size)

        values = {local_resources["field_submodel_id"]: submodel_ids,
               local_resources["field_equation_id"]:  equation_ids,
               local_resources["field_coefficient_name"]:  self.get_coefficient_names(),
               local_resources["field_variable_name"]:  self.get_long_variable_names()}
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_equation_id"]:  'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_variable_name"]:  'text'}

        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1})
        
        self.out_storage.write_table(table_name = local_resources['out_table_name'],
            table_data=local_resources['values']
            )
class RunSimulationFromMysql:
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(
            new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0)
        )

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config["cache_directory"] is None:
            self.config["cache_directory"] = self.simulation_state.get_cache_directory()

        SessionConfiguration(
            new_instance=True,
            package_order=self.config["dataset_pool_configuration"].package_order,
            in_storage=AttributeCache(),
        )

        ForkProcess().fork_new_process(
            self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config
        )

        # Create output database (normally done by run manager)
        if "estimation_database_configuration" in self.config:
            db_server = DatabaseServer(self.config["estimation_database_configuration"])
            if not db_server.has_database(self.config["estimation_database_configuration"].database_name):
                db_server.create_database(self.config["estimation_database_configuration"].database_name)

    def run_simulation(self, simulation_instance=None):
        logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name)
        try:
            if simulation_instance is None:
                simulation_instance = ModelSystem()
            simulation_instance.run(self.config)
            # simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        finally:
            logger.end_block()
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())

    def cleanup(self, remove_cache, remove_output_database):
        """Remove all outputs of this simulation."""
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        # Remove SessionConfiguration singleton, if it exists
        Singleton().remove_singleton_for_class(SessionConfiguration)

        cache_dir = self.config["cache_directory"]
        if os.path.exists(cache_dir):
            rmtree(cache_dir)
        if remove_output_database and ("estimation_database_configuration" in self.config):
            db_server = DatabaseServer(self.config["estimation_database_configuration"])
            db_server.drop_database(self.config["estimation_database_configuration"].database_name)

    def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
Exemplo n.º 48
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_estimate":self.field_estimate,
            "field_standard_error":self.field_standard_error,
            "other_fields":self.other_fields,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No coefficients written.")
            return

        submodels = self.get_submodels()
        if submodels.size <= 0 :
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {local_resources["field_submodel_id"]: submodels,
               local_resources["field_coefficient_name"]:  self.get_names(),
               local_resources["field_estimate"]:  self.get_values(),
               local_resources["field_standard_error"]:  self.get_standard_errors()}
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_estimate"]:  'double',
               local_resources["field_standard_error"]:  'double'}
        attrtypes = {local_resources["field_submodel_id"]: AttributeType.PRIMARY,
               local_resources["field_coefficient_name"]:  AttributeType.PRIMARY,
               local_resources["field_estimate"]:  AttributeType.PRIMARY,
               local_resources["field_standard_error"]: AttributeType.PRIMARY}
        for measure in self.other_measures.keys():
            types[measure]= 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1,
                               "attrtype":attrtypes})
        
        self.out_storage.write_table(table_name=local_resources['out_table_name'],
            table_data = local_resources['values'])       
Exemplo n.º 49
0
    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs 
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(map(lambda year : seed_dict[year], range(start_year, end_year+1)))
        else:
            seed(root_seed)
            seed_array = randint(1,2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year+1)):
            success = self._run_each_year_as_separate_process(iyear, year, 
                                                                 seed=seed_array[iyear],
                                                                 resources=resources,
                                                                 profiler_name=profiler_name,
                                                                 log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None: # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))
Exemplo n.º 50
0
    def plot_map(self, name, gridcell=None, **opt_args):
        if gridcell is None:
            gridcell = Resources()["gridcell"]
        gridcell.compute_variables("urbansim.gridcell.fazdistrict_id")

        name = VariableName(name).get_alias()
        if name in self.get_known_attribute_names(): # attribute of fazes
            new_name = name+'_of_fazdistrict'
            gridcell.join(self, name=name, new_name=new_name)
        elif name in gridcell.get_known_attribute_names(): # attribute of gridcells
            new_name = name
        else:
            raise StandardError, "Attribute " + name + " not known."
        gridcell.plot_map(new_name, **opt_args)
    def run(self, data, coefficients, resources=None):
        """
        Like linear_utilities, but in addition it runs linear utilities for
        modified data and stores utilities when each variable is set to its 5%, 95% quantiles,
        keeping the other variables at their median. Last row in the resulting file is the difference in
        utilities between these two.
        The file name can be passed in resources - entry 'utilities_diagnose_file'.
        """
        if data.ndim < 3:
            raise StandardError, "Argument 'data' must be a 3D numpy array."

        if not isinstance(resources, Resources):
            resources= Resources(resources)
        nobs, neqs, nvar = data.shape
        medians = zeros(nvar, dtype=float32)
        quant = zeros((2,nvar), dtype=float32)
        data_with_medians = array(data[0,:,:])
        for ivar in range(nvar): # compute medain and quantiles for each variable
            medians[ivar], quant[0,ivar], quant[1,ivar] = quantile(data[:,:,ivar].ravel(), array([0.5, 0.05, 0.95]))
            data_with_medians[:,ivar] = medians[ivar]


        file_name = resources.get("utilities_diagnose_file", "util")
        if resources.get("submodel", None) is not None:
            file_name = "%s_submodel_%s" % (file_name, resources.get("submodel", 1))
        diagnose_utilities = zeros((3, nvar), dtype=float32)
        argcor = ()
        for ivar in range(nvar): # iterate over variables
            for iquant in [0,1]: # 0 for 5% quantile, 1 for 95% quantile
                mod_data = array(data_with_medians).reshape(1,neqs, nvar) # copy original data
                mod_data[0,:,ivar] = quant[iquant, ivar]
                utility = linear_utilities.run(self, mod_data, coefficients, resources)
                diagnose_utilities[iquant, ivar] = utility[0,0]
            argcor = argcor + (data[:,:,ivar].ravel(),)
        diagnose_utilities[2,:] = diagnose_utilities[1,:] - diagnose_utilities[0,:]
        coef_names = resources.get("coefficient_names", map(lambda x: 'x%s' % x, arange(nvar)+1))
        #write_to_text_file(file_name, coef_names, delimiter=' ')
        #write_table_to_text_file( file_name, diagnose_utilities, mode='ab')
        logger.log_status("Diagnosed utilities written into %s." % file_name)
        return linear_utilities.run(self, data, coefficients, resources)
 def get_resources_for_dataset(self, 
           dataset_name, 
           in_storage,
           out_storage,
           resources={},
           in_table_name_pair=(None,None),
           out_table_name_pair=(None,None),
           attributes_pair=(None,None), 
           id_name_pair=(None,None), 
           nchunks_pair=(None,None), 
           debug_pair=(None,None)
           ):
                         
     """Create an object of class Resources to be used in a Dataset object. 
     The created resources are merged with the resources given as an argument 'resources'. 
     The first element
     of each tuple of the remaining arguments contains the desired value, the second element contains 
     the default value which is used if the first element is None. 
     Entries in resources of the same name as the argument values are overwritten if the one of the 
     tuple values is not equal None.
     """
         
     # merge resources with arguments
     local_resources = Resources(resources)
     local_resources.merge_if_not_None({
             "in_storage":in_storage,
             "out_storage":out_storage,
             "nchunks":nchunks_pair[0], "attributes":attributes_pair[0],
             "in_table_name": in_table_name_pair[0], "out_table_name": out_table_name_pair[0],
             "id_name":id_name_pair[0], "debug":debug_pair[0],
             "dataset_name":dataset_name})
         
     # merge resources with default values    
     local_resources.merge_with_defaults({
             "nchunks":nchunks_pair[1], "attributes":attributes_pair[1],
             "in_table_name":in_table_name_pair[1], "out_table_name":out_table_name_pair[1],
             "id_name":id_name_pair[1], "debug":debug_pair[1],
             "dataset_name":dataset_name})
         
     return local_resources
 def load(self, resources=None, in_storage=None, in_table_name=None, variables = []):
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":self.field_submodel_id,
         "field_equation_id":self.field_equation_id,
         "field_coefficient_name":self.field_coefficient_name,
         "field_variable_name":self.field_variable_name,
         "field_fixed_value":self.field_fixed_value})
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning("in_storage is not of type Storage. No EquationSpecification loaded.")
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         equations=array([-1])
         if local_resources["field_equation_id"] in data:
             equations = data[local_resources["field_equation_id"]]
         vars=data[local_resources["field_variable_name"]]
         self.variables=tuple(map(lambda x: VariableName(x), vars))
         self.coefficients=data[local_resources["field_coefficient_name"]]
         if local_resources["field_submodel_id"] in data:
             submodels = data[local_resources["field_submodel_id"]]
         else:
             submodels = array([-2]*self.coefficients.size, dtype="int32")
         self.submodels=submodels
         if equations.max() >= 0:
             self.equations=equations
         if local_resources["field_fixed_value"] in data:
             self.fixed_values = data[local_resources["field_fixed_value"]]
         for field in data:
             if field not in [local_resources["field_submodel_id"], local_resources["field_equation_id"],
                              local_resources["field_variable_name"], local_resources["field_coefficient_name"],
                              local_resources["field_fixed_value"]]:
                 self.other_fields[field] = data[field]
         self.set_other_dim_field_names()
         if variables:
             self.shrink(variables)
    def run(self, 
            config = None, ### TODO: Get rid of this parameter!
            unroll_gridcells = None, ### TODO: Get rid of this parameter!
            cache_directory = None, 
            base_year = None,
            creating_baseyear_cache_configuration = None,
            debuglevel = None,
            ):
        """
        Copy large baseyear datasets from MySQL into cache.
        """
        
        config = Resources(config)
        
        if unroll_gridcells is None:
            unroll_gridcells = config['creating_baseyear_cache_configuration'].unroll_gridcells
            
        if cache_directory is None:
            cache_directory = config['cache_directory']
            
        if base_year is None:
            base_year = config['base_year']
            
        if creating_baseyear_cache_configuration is None:
            creating_baseyear_cache_configuration = copy.deepcopy(config['creating_baseyear_cache_configuration'])
        
        if debuglevel is None:
            debuglevel = config.get('debuglevel', 3)

        CoreCacheScenarioDatabase().run(config)
        
        self.prepare_data_before_baseyear(
            cache_directory,
            base_year,
            creating_baseyear_cache_configuration
            )
 def skip_test_estimation_one_var(self):
     """ Test a regression estimation for a model with one independent variable
     """
     
     # First, use scipy to get reference values to compare the results of our
     # R-based regression to.
     #print "using scipy to calculate reference regression..."
     # Example regression from: http://www2.warwick.ac.uk/fac/sci/moac/currentstudents/peter_cock/python/lin_reg/
     from scipy import stats
     x = [5.05, 6.75, 3.21, 2.66]
     y = [1.65, 26.5, -5.93, 7.96]
     gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y)
     r_squared = r_value**2
     #print "Gradient and intercept", gradient, intercept
     ##Gradient and intercept 5.3935773612 -16.2811279931
     #print "R-squared", r_squared
     ##R-squared 0.524806275136
     #print "p-value", p_value
     ##p-value 0.275564857882
     
     # Next, setup the call to estimate_linear_regression_r.run(...)
     # Need to call run method on estimate_linear_regression_r, whose prototype is: 
     #   def run(self, data, regression=None, resources=None):
     #   regresion is not used by the run method
     #   things I need to store in resources:
     #     constant_position = resources.get("constant_position",  array([], dtype='int32')) #position for intercept
     #     coef_names = resources.get("coefficient_names",  nvar*[])
     #     outcome = resources["outcome"].astype("float64")
     
     # Create resources
     coeff = array(['EX'])
     resources = Resources()
     # No constant
     resources.add("constant_position", array([], dtype='int32'))
     resources.add("coefficient_names", coeff)
     resources.add("outcome", array(y))
     
     #data = array([x, y])
     data = resize(array([x]), (len(x), 1))
     
     # run RPy-based regression
     estimateR = estimate_linear_regression_r()
     result = estimateR.run(data, resources=resources)
     #print "results from RPy-base estimation: " + str(result)
 
     # Finally, compare the scipy-based regression to the R-based regression
     # Compare estimate of the independent 
     self.assertEqual(round(intercept, 4), round(result['estimators'][0], 4))
     # Compare the R-Squared
     self.assertEqual(round(r_squared, 6), round(result['other_info']['R-Squared'], 6))
Exemplo n.º 56
0
    def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name, package=package_name, **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get("arguments", {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults(
                {
                    "dataset_name": dataset_name,
                    "in_table_name": table_name,
                    "out_table_name": table_name,
                    "id_name": id_name_default,
                }
            )
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name) = self._table_module_class_names_for_dataset_deprecated(
                    dataset_name
                )
                resources = Resources(kwargs.get("arguments", {}))
                resources.merge_with_defaults(
                    {
                        "dataset_name": dataset_name,
                        "in_table_name": table_name,
                        "out_table_name": table_name,
                        "id_name": id_name_default,
                    }
                )
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning("Could not create a generic Dataset '%s'." % dataset_name)
                    raise
                # TODO: uncomment this warning when we change to singular
                # logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
Exemplo n.º 57
0
    def openev_plot(self, name, gridcell=None, **opt_args):
        if gridcell is None:
            gridcell = Resources()["gridcell"]
        gridcell.compute_variables("urbansim.gridcell.city_id")

#        if prototype_dataset is None and self.default_prototype_dataset is not None:
#            prototype_dataset = self.default_prototype_dataset
#        if template_project is None and self.default_template_project is not None:
#            template_project = self.default_template_project
#        if legend_file is None and self.default_legend_file is not None:
#            legend_file = self.default_legend_file

        if name in self.get_known_attribute_names(): # attribute of fazes
            new_name = name+'_of_city'
            gridcell.join(self, name=name, new_name=new_name)
        elif name in gridcell.get_known_attribute_names(): # attribute of gridcells
            new_name = name
        else:
            raise StandardError, "Attribute " + name + " not known."

        gridcell.openev_plot(new_name, **opt_args)
Exemplo n.º 58
0
class RunSimulation(object):
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root
        
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir,
                                                start_time=self.config.get('base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the 
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config['cache_directory'] = self.simulation_state.get_cache_directory()

        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)

    def run_simulation(self, simulation_instance=None):
        if simulation_instance is None:
            simulation_instance = ModelSystem()
        simulation_instance.run(self.config)
        #simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def cleanup(self, remove_cache=True):
        """Remove all outputs of this simulation."""    
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        SessionConfiguration().remove_singleton()
        if remove_cache:
            cache_dir = self.config['cache_directory']
            if os.path.exists(cache_dir):
                rmtree(cache_dir)

    def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
Exemplo n.º 59
0
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root
        
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir,
                                                start_time=self.config.get('base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the 
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config['cache_directory'] = self.simulation_state.get_cache_directory()

        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)
 def __init__(self, resources=None, dataset1=None, dataset2=None, index1=None, index2=None, dataset_name=None,
               debug=None):
     """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain:
             dataset1 - agent class
             dataset2 - class of the choice dataset
         Optional:
             index1 - 1D array, indices of dataset1
             index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to
                     i-th individual of dataset1[index1].
                     If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1].
             dataset_name - subdirectory in which implementation of the interaction variables is placed (default "")
         dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()).
     """
     self.resources = Resources(resources)
     self.resources.merge_if_not_None({
             "dataset1":dataset1, "dataset2":dataset2,
             "index1":index1, "index2":index2,
             "dataset_name":dataset_name, "debug":debug})
     self.attribute_boxes = {}
     self.attribute_names = []
     self.debug = self.resources.get("debug",  0)
     if not isinstance(self.debug, DebugPrinter):
         self.debug = DebugPrinter(self.debug)
     self.resources.check_obligatory_keys(["dataset1", "dataset2"])
     self.dataset1 = self.resources["dataset1"]
     self.dataset2 = self.resources["dataset2"]
     self.index1 = self.resources.get("index1", None)
     self.index2 = self.resources.get("index2", None)
     self.dataset_name = self.resources.get("dataset_name", None)
     if self.dataset_name == None:
         self.dataset_name = self.dataset1.get_dataset_name() + '_x_' + self.dataset2.get_dataset_name()
     self._primary_attribute_names=[]
     self.index1_mapping = {}
     if self.index1 <> None:
         self.index1_mapping = do_id_mapping_dict_from_array(self.index1)
     self._id_names = None # for compatibility with Dataset
     self.variable_factory = VariableFactory()
     self._aliases = {} # for compatibility with Dataset