Пример #1
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             events_for_estimation_storage=None,
                             events_for_estimation_table=None,
                             agents_filter='',
                             compute_variables=[],
                             data_objects={}):

        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        projects = None
        # create agents for estimation
        if events_for_estimation_storage is not None:
            projects = Dataset(in_storage=events_for_estimation_storage,
                               in_table_name=events_for_estimation_table,
                               id_name=[],
                               dataset_name='development_project')
            if compute_variables:
                projects.compute_variables(compute_variables,
                                           resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable),
                #                               VariableName(location_id_variable).get_alias())

            if agents_filter:
                values = projects.compute_variables(
                    agents_filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                projects.subset_by_index(index,
                                         flush_attributes_if_not_loaded=False)

        return (specification, projects)
Пример #2
0
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            initial_values=None,
            procedure=None,
            debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config, Resources) and isinstance(
                run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug": self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset)
        if procedure is not None:
            self.regression = RegressionModelFactory().get_model(
                name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(), ), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(), ),
                                        dtype=initial_values.dtype)
            self.initial_values[index] = initial_values

        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())

        result = ChunkModel.run(self,
                                chunk_specification,
                                dataset,
                                index,
                                float32,
                                specification=specification,
                                coefficients=coefficients)
        return result
Пример #3
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Пример #4
0
    def _search_for_dataset_helper(self, dataset_name, package_order,
                                   use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name,
                                           package=package_name,
                                           **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get('arguments', {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name
             ) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults({
                'dataset_name': dataset_name,
                'in_table_name': table_name,
                'out_table_name': table_name,
                'id_name': id_name_default
            })
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name
                 ) = self._table_module_class_names_for_dataset_deprecated(
                     dataset_name)
                resources = Resources(kwargs.get('arguments', {}))
                resources.merge_with_defaults({
                    'dataset_name': dataset_name,
                    'in_table_name': table_name,
                    'out_table_name': table_name,
                    'id_name': id_name_default
                })
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning(
                        "Could not create a generic Dataset '%s'." %
                        dataset_name)
                    raise
                #TODO: uncomment this warning when we change to singular
                #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
    def __init__(self,
                 location_set,
                 model_name=None,
                 short_name=None,
                 sampler="opus_core.samplers.weighted_sampler",
                 utilities="opus_core.linear_utilities",
                 probabilities="opus_core.mnl_probabilities",
                 choices="opus_core.random_choices",
                 filter=None,
                 submodel_string=None,
                 location_id_string=None,
                 run_config=None,
                 estimate_config=None,
                 debuglevel=0,
                 dataset_pool=None,
                 variable_package="urbansim",
                 **kwargs):
        """
        :number_of_units_string:
          number of units string is used to determine whether a choice is over-filled, 
          by comparing it with number_of_agents_string in get_locations_vacancy().  
          TODO: How does it differ from capacity_string?
           
        """
        if model_name is not None:
            self.model_name = model_name
        if short_name is not None:
            self.model_short_name = short_name
        if (run_config is not None) and not isinstance(run_config, Resources):
            run_config = Resources(run_config)
        if (estimate_config
                is not None) and not isinstance(estimate_config, Resources):
            estimate_config = Resources(estimate_config)
        self.add_prefix_to_variable_names([
            "capacity_string", "number_of_agents_string",
            "number_of_units_string"
        ], location_set, variable_package, run_config)
        self.add_prefix_to_variable_names("weights_for_estimation_string",
                                          location_set, variable_package,
                                          estimate_config)

        LocationChoiceModel.__init__(self,
                                     location_set=location_set,
                                     sampler=sampler,
                                     utilities=utilities,
                                     probabilities=probabilities,
                                     choices=choices,
                                     filter=filter,
                                     submodel_string=submodel_string,
                                     location_id_string=location_id_string,
                                     run_config=run_config,
                                     estimate_config=estimate_config,
                                     debuglevel=debuglevel,
                                     dataset_pool=dataset_pool,
                                     **kwargs)
Пример #6
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        building_types_table_name = 'building_types'
        storage.write_table(table_name=building_types_table_name,
                            table_data={
                                'building_type_id':
                                array([1, 2]),
                                'name':
                                array(['residential', 'commercial']),
                                'units':
                                array(['residential_units', 'commercial_sqft'])
                            })

        buildings_table_name = 'buildings'
        storage.write_table(
            table_name=buildings_table_name,
            table_data={
                'building_id': arange(7) + 1,
                'building_type_id': array([1, 2, 1, 2, 1, 1, 2]),
                'sqft': array([100, 350, 1000, 0, 430, 95, 750]),
                'residential_units': array([300, 0, 100, 0, 1300, 600, 10])
            },
        )

        building_types = BuildingTypeDataset(
            in_storage=storage, in_table_name=building_types_table_name)
        buildings = BuildingDataset(in_storage=storage,
                                    in_table_name=buildings_table_name,
                                    resources=Resources({
                                        'building_categories': {
                                            'residential':
                                            array([200, 500, 1200]),
                                            'commercial': array([200, 500])
                                        }
                                    }))

        variable_names = map(
            lambda type: '%s_%s' % (self.variable_name_prefix, type),
            ['commercial', 'residential'])
        buildings.compute_variables(variable_names,
                                    resources=Resources(
                                        {'building_type': building_types}))

        should_be_residential = array([2, 0, 1, 0, 4, 3, 0])
        should_be_commercial = array([0, 2, 0, 1, 0, 0, 3])
        values_commercial = buildings.get_attribute(variable_names[0])
        values_residential = buildings.get_attribute(variable_names[1])

        self.assert_(ma.allequal(values_commercial, should_be_commercial),
                     'Error in ' + variable_names[0])
        self.assert_(ma.allequal(values_residential, should_be_residential),
                     'Error in ' + variable_names[1])
Пример #7
0
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()
Пример #8
0
 def estimate(self,
              specification,
              dataset,
              outcome_attribute="unit_price",
              index=None,
              procedure="opus_core.estimate_linear_regression",
              data_objects=None,
              estimate_config=None,
              debuglevel=0):
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     if self.filter_attribute <> None:
         res = Resources({"debug": debuglevel})
         index = dataset.get_filtered_index(self.filter_attribute,
                                            threshold=0,
                                            index=index,
                                            dataset_pool=self.dataset_pool,
                                            resources=res)
     return RegressionModelWithAdditionInitialResiduals.estimate(
         self,
         specification,
         dataset,
         outcome_attribute,
         index,
         procedure,
         estimate_config=estimate_config,
         debuglevel=debuglevel)
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        job_building_types_table_name = 'job_building_types'
        storage.write_table(table_name=job_building_types_table_name,
                            table_data={
                                'id': array([1, 2, 3, 4]),
                                'home_based': array([1, 0, 1, 0])
                            })

        jobs_table_name = 'jobs'
        storage.write_table(table_name=jobs_table_name,
                            table_data={
                                'job_id':
                                arange(10) + 1,
                                'building_type':
                                array([3, 3, 2, 2, 4, 2, 1, 3, 4, 1])
                            })

        job_building_types = JobBuildingTypeDataset(
            in_storage=storage, in_table_name=job_building_types_table_name)
        jobs = JobDataset(in_storage=storage, in_table_name=jobs_table_name)

        jobs.compute_variables(self.variable_name,
                               resources=Resources(
                                   {'job_building_type': job_building_types}))

        values = jobs.get_attribute(self.variable_name)

        should_be = array([0, 0, 1, 1, 1, 1, 0, 0, 1, 0])

        self.assert_(ma.allequal(values, should_be),
                     'Error in ' + self.variable_name)
Пример #10
0
def prepare_for_running_macro(parser):
    from opus_core.file_utilities import get_resources_from_file
    parser.add_option("-r",
                      "--resources",
                      dest="resources_file_name",
                      action="store",
                      type="string",
                      help="Name of file containing resources")
    parser.add_option("-y",
                      "--year",
                      dest="year",
                      action="store",
                      type="int",
                      help="Year in which to 'run' the travel model")
    parser.add_option(
        "-o",
        "--output-file",
        dest="output_file",
        action="store",
        type="string",
        default=None,
        help=
        "Output log file. If not given, it is written into urbansim cache directory."
    )
    (options, args) = parser.parse_args()

    r = get_resources_from_file(options.resources_file_name)
    resources = Resources(get_resources_from_file(options.resources_file_name))

    SessionConfiguration(
        new_instance=True,
        package_order=resources['dataset_pool_configuration'].package_order,
        in_storage=AttributeCache())
    return (resources, options)
Пример #11
0
    def setUp(self):
        run_configuration = TestCacheConfiguration()
        SimulationState(new_instance=True)
        SessionConfiguration(run_configuration,
                             new_instance=True,
                             package_order=['urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        self.base_year = run_configuration['base_year']
        self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp')

        # Use the test cache.
        opus_core_path = package().get_opus_core_path()
        test_cache_path = os.path.join(opus_core_path, 'data', 'test_cache')
        new_cache_path = os.path.join(self.temp_dir, 'cache')
        copytree(test_cache_path, new_cache_path)

        # Make sure the copied files are writable.
        for (dirpath, dirnames, filenames) in os.walk(new_cache_path):
            for file_name in filenames:
                full_path = os.path.join(dirpath, file_name)
                os.chmod(full_path, S_IWRITE | S_IREAD)

        SimulationState().set_cache_directory(new_cache_path)
        SimulationState().set_current_time(self.base_year)
        self.config = Resources(run_configuration)

        cache_directory = SimulationState().get_cache_directory()
        self.assertEqual(self.temp_dir, os.path.split(cache_directory)[0])
Пример #12
0
    def apply_filter(self, filter, agent_set, agents_index, submodel=-2):
        """ apply filter comparing to mean project size by submodel instead of 0, by shifting self.filter
        """
        project_size_filter = None
        if (filter is not None):
            if isinstance(filter, dict):
                submodel_filter = filter[submodel]
            else:
                submodel_filter = filter

            mean_project_size = agent_set.get_attribute(
                agent_set.get_attribute_name())[agents_index].mean()

            if isinstance(submodel_filter, str):
                resources = Resources({"debug": self.debug})
                self.choice_set.compute_variables(
                    [submodel_filter],
                    dataset_pool=self.dataset_pool,
                    resources=resources)
                filter_name = VariableName(submodel_filter)
                project_size_filter = self.choice_set.get_attribute(
                    filter_name.get_alias()) - mean_project_size
            else:
                project_size_filter = submodel_filter - mean_project_size

        return LocationChoiceModel.apply_filter(self,
                                                project_size_filter,
                                                agent_set=agent_set,
                                                agents_index=agents_index,
                                                submodel=submodel)
Пример #13
0
    def _compute_variable_for_prior_year(self,
                                         dataset,
                                         full_name,
                                         time,
                                         resources=None):
        """Create a new dataset for this variable, compute the variable, and then return
        the values for this variable."""
        calling_dataset_pool = SessionConfiguration().get_dataset_pool()
        calling_time = SimulationState().get_current_time()
        SimulationState().set_current_time(time)
        try:
            # Get an empty dataset pool with same search paths.
            my_dataset_pool = DatasetPool(
                package_order=calling_dataset_pool.get_package_order(),
                storage=AttributeCache())

            ds = dataset.empty_dataset_like_me(in_storage=AttributeCache())

            # Don't pass any datasets via resources, since they may be from a different time.
            my_resources = Resources(resources)
            for key in my_resources:
                if isinstance(key, Dataset):
                    del my_resources[key]

            ds.compute_variables(full_name,
                                 my_dataset_pool,
                                 resources=my_resources)
            values = ds.get_attribute(full_name)
            return values
        finally:
            SimulationState().set_current_time(calling_time)
Пример #14
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_equation_id": self.field_equation_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_variable_name": self.field_variable_name,
            "field_fixed_value": self.field_fixed_value,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No EquationSpecifications written."
            )
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array(
                [-2], dtype="int32"), len(self.get_coefficient_names(
                )))  #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"),
                                  submodel_ids.size)

        values = {
            local_resources["field_submodel_id"]: submodel_ids,
            local_resources["field_equation_id"]: equation_ids,
            local_resources["field_coefficient_name"]:
            self.get_coefficient_names(),
            local_resources["field_variable_name"]:
            self.get_long_variable_names()
        }
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_equation_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_variable_name"]: 'text'
        }

        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
    def prepare_for_run(self,
                        specification_storage=None,
                        specification_table=None,
                        coefficients_storage=None,
                        coefficients_table=None,
                        agent_set=None,
                        agents_filter=None,
                        data_objects=None,
                        **kwargs):

        spec, coeff = prepare_specification_and_coefficients(
            specification_storage=specification_storage,
            specification_table=specification_table,
            coefficients_storage=coefficients_storage,
            coefficients_table=coefficients_table,
            **kwargs)

        if agents_filter is not None:
            agent_set.compute_variables(agents_filter,
                                        resources=Resources(data_objects))
            index = where(
                agent_set.get_attribute(
                    VariableName(agents_filter).get_alias()) > 0)[0]

        return (spec, coeff, index)
Пример #16
0
 def test_read_resources_from_string(self):
     data = {"arg1":1, "arg2":"2", "dict1":{"three":3,"four":4}}
     resources = Resources(data)
     write_resources_to_file(self.file_name, resources)                        
     resources_string = read_file_content(self.file_name)
     loaded_resources = get_resources_from_string(resources_string)
     self.assertEquals(resources, loaded_resources)
Пример #17
0
    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)
        
        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy()
            
        if agents_index is None:
            agents_index = self.agents_index_for_prediction
        
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()
        
        #self.config['output_configuration'] = output_configuration
        
        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']
        
        try:
            run_year_namespace = copy.copy(self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False
        
        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1
            #agents.modify_attribute(name=choice_id_name, data=dummy_data)  #reset choices for all agents
            agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index)  #reset choices for agents in agents_index
            
            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)
            
            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names():
                agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data)
            logger.log_status("Predictions saved into attribute " + predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()
Пример #18
0
        def run_model():
            households = HouseholdDataset(in_storage=storage,
                                          in_table_name='households')
            hlcm = RegionalHouseholdLocationChoiceModel(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=4)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_area1 = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_area2 = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            gridcells.delete_one_attribute("number_of_households")
            result = concatenate((result_area1, result_area2))
            return result
Пример #19
0
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """  # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id": self.field_submodel_id,
         "field_coefficient_name": self.field_coefficient_name,
         "field_estimate": self.field_estimate,
         "field_standard_error": self.field_standard_error,
         "other_fields": self.other_fields
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage has to be of type Storage. No coefficients loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[
             local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels = submodels
         self.check_consistency()
Пример #20
0
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config[
            'creating_baseyear_cache_configuration'].cache_directory_root

        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=base_cache_dir,
                                                start_time=self.config.get(
                                                    'base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config[
                'cache_directory'] = self.simulation_state.get_cache_directory(
                )

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(
                self.config['creating_baseyear_cache_configuration'].
                cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)
Пример #21
0
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=cache_directory)

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config[
                'cache_directory'] = self.simulation_state.get_cache_directory(
                )

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

        ForkProcess().fork_new_process(
            self.config['creating_baseyear_cache_configuration'].
            cache_scenario_database, self.config)

        # Create output database (normally done by run manager)
        if 'estimation_database_configuration' in self.config:
            db_server = DatabaseServer(
                self.config['estimation_database_configuration'])
            if not db_server.has_database(
                    self.config['estimation_database_configuration'].
                    database_name):
                db_server.create_database(
                    self.config['estimation_database_configuration'].
                    database_name)
Пример #22
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        building_types_table_name = 'building_types'        
        storage.write_table(
                table_name=building_types_table_name,
                table_data={
                    'building_type_id':array([0,2]), 
                    'name': array(['foo', 'commercial'])
                    }
            )

        buildings_table_name = 'buildings'        
        storage.write_table(
                table_name=buildings_table_name,
                table_data={
                    'building_id':array([1,2,3]),
                    'building_type_id': array([2,0,2])
                    }
            )

        building_types = BuildingTypeDataset(in_storage=storage, in_table_name=building_types_table_name)
        buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name)
        
        buildings.compute_variables(self.variable_name, resources=Resources({'building_type':building_types}))
        
        values = buildings.get_attribute(self.variable_name)
        should_be = array([1,0,1])
        
        self.assert_(ma.allequal(values, should_be),
            'Error in ' + self.variable_name)
Пример #23
0
        def run_model_2():
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='households',
                                table_data=household_data)
            households = HouseholdDataset(in_storage=storage,
                                          in_table_name='households')

            storage.write_table(table_name='gridcells',
                                table_data=gridcell_data)
            gridcells = GridcellDataset(in_storage=storage,
                                        in_table_name='gridcells')

            hlcm = HouseholdLocationChoiceModelCreator().get_model(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=8)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_more_attractive = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_less_attractive = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            return array(
                [result_more_attractive.sum(),
                 result_less_attractive.sum()])
Пример #24
0
        def run_model():
            hlcm = HouseholdLocationChoiceModelCreator().get_model(
                location_set=gridcells,
                compute_capacity_flag=False,
                choices="opus_core.random_choices_from_index",
                sample_size_locations=8)
            hlcm.run(specification,
                     coefficients,
                     agent_set=households,
                     debuglevel=1)

            # get results
            gridcells.compute_variables(
                ["urbansim.gridcell.number_of_households"],
                resources=Resources({"household": households}))
            result_more_attractive = gridcells.get_attribute_by_id(
                "number_of_households",
                arange(ngcs_attr) + 1)
            result_less_attractive = gridcells.get_attribute_by_id(
                "number_of_households", arange(ngcs_attr + 1, ngcs + 1))
            households.set_values_of_one_attribute(attribute="grid_id",
                                                   values=hh_grid_ids)
            gridcells.delete_one_attribute("number_of_households")
            result = concatenate(
                (result_more_attractive, result_less_attractive))
            return result
 def test_number_of_agents_expression(self):
     expr = "mygridcell.number_of_agents(myjob)+10"
     storage = StorageFactory().get_storage('dict_storage')
     gridcell_grid_id = array([1, 2, 3])
     job_grid_id = array(
         [2, 1, 3, 1]
     )  #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc.
     storage.write_table(table_name='gridcells',
                         table_data={'gid': gridcell_grid_id})
     storage.write_table(table_name='jobs',
                         table_data={
                             'jid': arange(4) + 1,
                             'gid': job_grid_id
                         })
     gs = Dataset(in_storage=storage,
                  in_table_name='gridcells',
                  id_name="gid",
                  dataset_name="mygridcell")
     jobs = Dataset(in_storage=storage,
                    in_table_name='jobs',
                    id_name="jid",
                    dataset_name="myjob")
     values = gs.compute_variables([expr],
                                   resources=Resources({
                                       "myjob": jobs,
                                       "mygridcell": gs
                                   }))
     should_be = array([12, 11, 11])
     self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                  msg="Error in " + expr)
Пример #26
0
    def run_chunk(self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(
            coefficients, specification, neqs=1)
        compute_resources = Resources({"debug": self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels,
                                     self.submodel_string,
                                     dataset,
                                     index,
                                     dataset_pool=self.dataset_pool,
                                     resources=compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        data = {}
        coef = {}
        outcome = self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                self.specified_coefficients, submodel)
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            self.debug.print_debug(
                "Compute regression for submodel " + str(submodel), 4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(
                coef[submodel],
                index=index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            vnames = asarray(coef[submodel].get_variable_names())
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "NaN(Not A Number) is returned from variable %s; it is replaced with %s."
                    % (vnames[nan_var_index], nan_to_num(nan)))
                #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "Inf is returned from variable %s; it is replaced with %s."
                    % (vnames[inf_var_index], nan_to_num(inf)))
                #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index]

            if (self.data[submodel].shape[0] >
                    0) and (self.data[submodel].size >
                            0):  # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome
 def test_number_of_agents(self):
     expr = "mygridcell.number_of_agents(myjob)"
     storage = StorageFactory().get_storage('dict_storage')
     gridcell_grid_id = array([1, 2, 3])
     job_grid_id = array([2, 1, 3, 1]) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc.
     storage.write_table(table_name='gridcells', table_data={'gid':gridcell_grid_id})
     storage.write_table(table_name='jobs', table_data={'jid':arange(4)+1, 'gid':job_grid_id})
     gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell")
     jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob")       
     values = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs}))
     should_be = array([2, 1, 1])            
     self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg = "Error in " + expr)
     # change gids of jobs (to test if computing dependencies is working)
     jobs.modify_attribute(name="gid", data=array([1,1,1,1]))
     values2 = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs}))
     should_be2 = array([4, 0, 0])            
     self.assert_(ma.allclose(values2, should_be2, rtol=1e-7), msg = "Error in " + expr)
Пример #28
0
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True, start_time=config.get('base_year', 0))
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
Пример #29
0
    def test_unplaced_agents_decrease_available_space(self):
        """Using the household location choice model, create a set of available spaces and
        2000 unplaced agents (along with 5000 placed agents). Run the model, and check that
        the unplaced agents were placed, and the number of available spaces has decreased"""
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
                            table_data={
                                'grid_id': array(2000 * [0] + 5000 * [1]),
                                'household_id': arange(7000) + 1
                            })

        storage.write_table(table_name='gridcells',
                            table_data={
                                'residential_units': array(50 * [10000]),
                                'grid_id': arange(50) + 1
                            })

        households = HouseholdDataset(in_storage=storage,
                                      in_table_name='households')
        gridcells = GridcellDataset(in_storage=storage,
                                    in_table_name='gridcells')

        coefficients = Coefficients(names=("dummy", ), values=(0.1, ))
        specification = EquationSpecification(
            variables=("gridcell.residential_units", ),
            coefficients=("dummy", ))
        """need to specify to the household location choice model exactly which households are moving,
        because by default it assumes all current households want to move, but in this test,
        the 5000 households already in gridcell #1 shouldn't move.
        here, we specify that only the unplaced households should be moved."""
        agents_index = where(households.get_attribute("grid_id") == 0)[0]

        hlcm = HouseholdLocationChoiceModelCreator().get_model(
            location_set=gridcells,
            choices="opus_core.random_choices_from_index",
            sample_size_locations=30)
        hlcm.run(specification,
                 coefficients,
                 agent_set=households,
                 agents_index=agents_index,
                 debuglevel=1)

        gridcells.compute_variables(
            ["urbansim.gridcell.vacant_residential_units"],
            resources=Resources({"household": households}))
        vacancies = gridcells.get_attribute("vacant_residential_units")
        """since there were 5000 households already in gridcell #1, and gridcell #1 has
        10000 residential units, there should be no more than 5000 vacant residential units
        in gridcell #1 after running this model"""
        self.assertEqual(vacancies[0] <= 5000, True,
                         "Error: %d" % (vacancies[0], ))
        """there should be exactly 430000 vacant residential units after the model run,
        because there were originally 50 gridcells with 10000 residential units each,
        and a total of 7000 units are occupied after the run"""
        self.assertEqual(
            sum(vacancies) == 50 * 10000 - 7000, True,
            "Error: %d" % (sum(vacancies)))
Пример #30
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_estimate": self.field_estimate,
            "field_standard_error": self.field_standard_error,
            "other_fields": self.other_fields,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No coefficients written."
            )
            return

        submodels = self.get_submodels()
        if submodels.size <= 0:
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {
            local_resources["field_submodel_id"]: submodels,
            local_resources["field_coefficient_name"]: self.get_names(),
            local_resources["field_estimate"]: self.get_values(),
            local_resources["field_standard_error"]:
            self.get_standard_errors()
        }
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_estimate"]: 'double',
            local_resources["field_standard_error"]: 'double'
        }
        attrtypes = {
            local_resources["field_submodel_id"]: AttributeType.PRIMARY,
            local_resources["field_coefficient_name"]: AttributeType.PRIMARY,
            local_resources["field_estimate"]: AttributeType.PRIMARY,
            local_resources["field_standard_error"]: AttributeType.PRIMARY
        }
        for measure in self.other_measures.keys():
            types[measure] = 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1,
            "attrtype": attrtypes
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])