def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, events_for_estimation_storage=None, events_for_estimation_table=None, agents_filter='', compute_variables=[], data_objects={}): specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) projects = None # create agents for estimation if events_for_estimation_storage is not None: projects = Dataset(in_storage=events_for_estimation_storage, in_table_name=events_for_estimation_table, id_name=[], dataset_name='development_project') if compute_variables: projects.compute_variables(compute_variables, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable), # VariableName(location_id_variable).get_alias()) if agents_filter: values = projects.compute_variables( agents_filter, resources=Resources(data_objects)) index = where(values > 0)[0] projects.subset_by_index(index, flush_attributes_if_not_loaded=False) return (specification, projects)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0): """'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'dataset' is of type Dataset, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. If 'procedure' is given, it overwrites the regression_procedure of the constructor. 'initial_values' is an array of the initial values of the results. It will be overwritten by the results for those elements that are handled by the model (defined by submodels in the specification). By default the results are initialized with 0. 'debuglevel' overwrites the constructor 'debuglevel'. """ self.debug.flag = debuglevel if run_config == None: run_config = Resources() if not isinstance(run_config, Resources) and isinstance( run_config, dict): run_config = Resources(run_config) self.run_config = run_config.merge_with_defaults(self.run_config) self.run_config.merge({"debug": self.debug}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset) if procedure is not None: self.regression = RegressionModelFactory().get_model( name=procedure) if initial_values is None: self.initial_values = zeros((dataset.size(), ), dtype=float32) else: self.initial_values = zeros((dataset.size(), ), dtype=initial_values.dtype) self.initial_values[index] = initial_values if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) result = ChunkModel.run(self, chunk_specification, dataset, index, float32, specification=specification, coefficients=coefficients) return result
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.model import get_specification_for_estimation specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace * index_to_unplace.size) end_index_to_unplace = sample_noreplace( index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1 * ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables( agent_filter, resources=Resources(data_objects)) index = where( estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index( index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index( estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs): # this part of the search_for_dataset code is factored into a helper method, rather than passing in # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this # keyword parameter along to the get_dataset method for package_name in package_order: try: dataset = self.get_dataset(dataset_name, package=package_name, **kwargs) if dataset is not None: break except ImportError: continue else: from opus_core.datasets.dataset import Dataset from opus_core.resources import Resources resources = Resources(kwargs.get('arguments', {})) if use_hidden_id: id_name_default = [] else: id_name_default = "%s_id" % dataset_name (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset(dataset_name) ## set table_name and id_name_default as default values in resources (arguments) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: # try to create a dataset using deprecated values (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset_deprecated( dataset_name) resources = Resources(kwargs.get('arguments', {})) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: logger.log_warning( "Could not create a generic Dataset '%s'." % dataset_name) raise #TODO: uncomment this warning when we change to singular #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name) return dataset
def __init__(self, location_set, model_name=None, short_name=None, sampler="opus_core.samplers.weighted_sampler", utilities="opus_core.linear_utilities", probabilities="opus_core.mnl_probabilities", choices="opus_core.random_choices", filter=None, submodel_string=None, location_id_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None, variable_package="urbansim", **kwargs): """ :number_of_units_string: number of units string is used to determine whether a choice is over-filled, by comparing it with number_of_agents_string in get_locations_vacancy(). TODO: How does it differ from capacity_string? """ if model_name is not None: self.model_name = model_name if short_name is not None: self.model_short_name = short_name if (run_config is not None) and not isinstance(run_config, Resources): run_config = Resources(run_config) if (estimate_config is not None) and not isinstance(estimate_config, Resources): estimate_config = Resources(estimate_config) self.add_prefix_to_variable_names([ "capacity_string", "number_of_agents_string", "number_of_units_string" ], location_set, variable_package, run_config) self.add_prefix_to_variable_names("weights_for_estimation_string", location_set, variable_package, estimate_config) LocationChoiceModel.__init__(self, location_set=location_set, sampler=sampler, utilities=utilities, probabilities=probabilities, choices=choices, filter=filter, submodel_string=submodel_string, location_id_string=location_id_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool, **kwargs)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') building_types_table_name = 'building_types' storage.write_table(table_name=building_types_table_name, table_data={ 'building_type_id': array([1, 2]), 'name': array(['residential', 'commercial']), 'units': array(['residential_units', 'commercial_sqft']) }) buildings_table_name = 'buildings' storage.write_table( table_name=buildings_table_name, table_data={ 'building_id': arange(7) + 1, 'building_type_id': array([1, 2, 1, 2, 1, 1, 2]), 'sqft': array([100, 350, 1000, 0, 430, 95, 750]), 'residential_units': array([300, 0, 100, 0, 1300, 600, 10]) }, ) building_types = BuildingTypeDataset( in_storage=storage, in_table_name=building_types_table_name) buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name, resources=Resources({ 'building_categories': { 'residential': array([200, 500, 1200]), 'commercial': array([200, 500]) } })) variable_names = map( lambda type: '%s_%s' % (self.variable_name_prefix, type), ['commercial', 'residential']) buildings.compute_variables(variable_names, resources=Resources( {'building_type': building_types})) should_be_residential = array([2, 0, 1, 0, 4, 3, 0]) should_be_commercial = array([0, 2, 0, 1, 0, 0, 3]) values_commercial = buildings.get_attribute(variable_names[0]) values_residential = buildings.get_attribute(variable_names[1]) self.assert_(ma.allequal(values_commercial, should_be_commercial), 'Error in ' + variable_names[0]) self.assert_(ma.allequal(values_residential, should_be_residential), 'Error in ' + variable_names[1])
def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage( 'tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc1_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc2_all.flush_dataset()
def estimate(self, specification, dataset, outcome_attribute="unit_price", index=None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) return RegressionModelWithAdditionInitialResiduals.estimate( self, specification, dataset, outcome_attribute, index, procedure, estimate_config=estimate_config, debuglevel=debuglevel)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') job_building_types_table_name = 'job_building_types' storage.write_table(table_name=job_building_types_table_name, table_data={ 'id': array([1, 2, 3, 4]), 'home_based': array([1, 0, 1, 0]) }) jobs_table_name = 'jobs' storage.write_table(table_name=jobs_table_name, table_data={ 'job_id': arange(10) + 1, 'building_type': array([3, 3, 2, 2, 4, 2, 1, 3, 4, 1]) }) job_building_types = JobBuildingTypeDataset( in_storage=storage, in_table_name=job_building_types_table_name) jobs = JobDataset(in_storage=storage, in_table_name=jobs_table_name) jobs.compute_variables(self.variable_name, resources=Resources( {'job_building_type': job_building_types})) values = jobs.get_attribute(self.variable_name) should_be = array([0, 0, 1, 1, 1, 1, 0, 0, 1, 0]) self.assert_(ma.allequal(values, should_be), 'Error in ' + self.variable_name)
def prepare_for_running_macro(parser): from opus_core.file_utilities import get_resources_from_file parser.add_option("-r", "--resources", dest="resources_file_name", action="store", type="string", help="Name of file containing resources") parser.add_option("-y", "--year", dest="year", action="store", type="int", help="Year in which to 'run' the travel model") parser.add_option( "-o", "--output-file", dest="output_file", action="store", type="string", default=None, help= "Output log file. If not given, it is written into urbansim cache directory." ) (options, args) = parser.parse_args() r = get_resources_from_file(options.resources_file_name) resources = Resources(get_resources_from_file(options.resources_file_name)) SessionConfiguration( new_instance=True, package_order=resources['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) return (resources, options)
def setUp(self): run_configuration = TestCacheConfiguration() SimulationState(new_instance=True) SessionConfiguration(run_configuration, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = run_configuration['base_year'] self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp') # Use the test cache. opus_core_path = package().get_opus_core_path() test_cache_path = os.path.join(opus_core_path, 'data', 'test_cache') new_cache_path = os.path.join(self.temp_dir, 'cache') copytree(test_cache_path, new_cache_path) # Make sure the copied files are writable. for (dirpath, dirnames, filenames) in os.walk(new_cache_path): for file_name in filenames: full_path = os.path.join(dirpath, file_name) os.chmod(full_path, S_IWRITE | S_IREAD) SimulationState().set_cache_directory(new_cache_path) SimulationState().set_current_time(self.base_year) self.config = Resources(run_configuration) cache_directory = SimulationState().get_cache_directory() self.assertEqual(self.temp_dir, os.path.split(cache_directory)[0])
def apply_filter(self, filter, agent_set, agents_index, submodel=-2): """ apply filter comparing to mean project size by submodel instead of 0, by shifting self.filter """ project_size_filter = None if (filter is not None): if isinstance(filter, dict): submodel_filter = filter[submodel] else: submodel_filter = filter mean_project_size = agent_set.get_attribute( agent_set.get_attribute_name())[agents_index].mean() if isinstance(submodel_filter, str): resources = Resources({"debug": self.debug}) self.choice_set.compute_variables( [submodel_filter], dataset_pool=self.dataset_pool, resources=resources) filter_name = VariableName(submodel_filter) project_size_filter = self.choice_set.get_attribute( filter_name.get_alias()) - mean_project_size else: project_size_filter = submodel_filter - mean_project_size return LocationChoiceModel.apply_filter(self, project_size_filter, agent_set=agent_set, agents_index=agents_index, submodel=submodel)
def _compute_variable_for_prior_year(self, dataset, full_name, time, resources=None): """Create a new dataset for this variable, compute the variable, and then return the values for this variable.""" calling_dataset_pool = SessionConfiguration().get_dataset_pool() calling_time = SimulationState().get_current_time() SimulationState().set_current_time(time) try: # Get an empty dataset pool with same search paths. my_dataset_pool = DatasetPool( package_order=calling_dataset_pool.get_package_order(), storage=AttributeCache()) ds = dataset.empty_dataset_like_me(in_storage=AttributeCache()) # Don't pass any datasets via resources, since they may be from a different time. my_resources = Resources(resources) for key in my_resources: if isinstance(key, Dataset): del my_resources[key] ds.compute_variables(full_name, my_dataset_pool, resources=my_resources) values = ds.get_attribute(full_name) return values finally: SimulationState().set_current_time(calling_time)
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_equation_id": self.field_equation_id, "field_coefficient_name": self.field_coefficient_name, "field_variable_name": self.field_variable_name, "field_fixed_value": self.field_fixed_value, "out_table_name": out_table_name }) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning( "out_storage has to be of type Storage. No EquationSpecifications written." ) return submodel_ids = self.get_submodels() if submodel_ids.size == 0: submodel_ids = resize(array( [-2], dtype="int32"), len(self.get_coefficient_names( ))) #set sub_model_id = -2 when there is no or 1 submodels equation_ids = self.get_equations() if equation_ids.size == 0: equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size) values = { local_resources["field_submodel_id"]: submodel_ids, local_resources["field_equation_id"]: equation_ids, local_resources["field_coefficient_name"]: self.get_coefficient_names(), local_resources["field_variable_name"]: self.get_long_variable_names() } if self.fixed_values.size > 0: values[local_resources["field_fixed_value"]] = self.fixed_values for field in self.other_fields.keys(): values[field] = self.other_fields[field] types = { local_resources["field_submodel_id"]: 'integer', local_resources["field_equation_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_variable_name"]: 'text' } local_resources.merge({ "values": values, 'valuetypes': types, "drop_table_flag": 1 }) self.out_storage.write_table( table_name=local_resources['out_table_name'], table_data=local_resources['values'])
def prepare_for_run(self, specification_storage=None, specification_table=None, coefficients_storage=None, coefficients_table=None, agent_set=None, agents_filter=None, data_objects=None, **kwargs): spec, coeff = prepare_specification_and_coefficients( specification_storage=specification_storage, specification_table=specification_table, coefficients_storage=coefficients_storage, coefficients_table=coefficients_table, **kwargs) if agents_filter is not None: agent_set.compute_variables(agents_filter, resources=Resources(data_objects)) index = where( agent_set.get_attribute( VariableName(agents_filter).get_alias()) > 0)[0] return (spec, coeff, index)
def test_read_resources_from_string(self): data = {"arg1":1, "arg2":"2", "dict1":{"three":3,"four":4}} resources = Resources(data) write_resources_to_file(self.file_name, resources) resources_string = read_file_content(self.file_name) loaded_resources = get_resources_from_string(resources_string) self.assertEquals(resources, loaded_resources)
def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy() if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy(self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1 #agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset choices for all agents agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index) #reset choices for agents in agents_index run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names(): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace()
def run_model(): households = HouseholdDataset(in_storage=storage, in_table_name='households') hlcm = RegionalHouseholdLocationChoiceModel( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=4) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_area1 = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_area2 = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) gridcells.delete_one_attribute("number_of_households") result = concatenate((result_area1, result_area2)) return result
def load(self, resources=None, in_storage=None, in_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_coefficient_name": self.field_coefficient_name, "field_estimate": self.field_estimate, "field_standard_error": self.field_standard_error, "other_fields": self.other_fields }) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning( "in_storage has to be of type Storage. No coefficients loaded." ) else: data = self.in_storage.load_table(table_name=in_table_name) submodels = data[local_resources["field_submodel_id"]] self.names = data[local_resources["field_coefficient_name"]] self.values = data[local_resources["field_estimate"]] self.standard_errors = data[ local_resources["field_standard_error"]] for measure in local_resources["other_fields"]: if measure in data.keys(): self.other_measures[measure] = data[measure] if submodels.max() >= 0: self.submodels = submodels self.check_consistency()
def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config[ 'creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get( 'base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) else: CacheFltData().run(self.config)
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) # Create output database (normally done by run manager) if 'estimation_database_configuration' in self.config: db_server = DatabaseServer( self.config['estimation_database_configuration']) if not db_server.has_database( self.config['estimation_database_configuration']. database_name): db_server.create_database( self.config['estimation_database_configuration']. database_name)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') building_types_table_name = 'building_types' storage.write_table( table_name=building_types_table_name, table_data={ 'building_type_id':array([0,2]), 'name': array(['foo', 'commercial']) } ) buildings_table_name = 'buildings' storage.write_table( table_name=buildings_table_name, table_data={ 'building_id':array([1,2,3]), 'building_type_id': array([2,0,2]) } ) building_types = BuildingTypeDataset(in_storage=storage, in_table_name=building_types_table_name) buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name) buildings.compute_variables(self.variable_name, resources=Resources({'building_type':building_types})) values = buildings.get_attribute(self.variable_name) should_be = array([1,0,1]) self.assert_(ma.allequal(values, should_be), 'Error in ' + self.variable_name)
def run_model_2(): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data=household_data) households = HouseholdDataset(in_storage=storage, in_table_name='households') storage.write_table(table_name='gridcells', table_data=gridcell_data) gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells') hlcm = HouseholdLocationChoiceModelCreator().get_model( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=8) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_more_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_less_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) return array( [result_more_attractive.sum(), result_less_attractive.sum()])
def run_model(): hlcm = HouseholdLocationChoiceModelCreator().get_model( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=8) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_more_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_less_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) households.set_values_of_one_attribute(attribute="grid_id", values=hh_grid_ids) gridcells.delete_one_attribute("number_of_households") result = concatenate( (result_more_attractive, result_less_attractive)) return result
def test_number_of_agents_expression(self): expr = "mygridcell.number_of_agents(myjob)+10" storage = StorageFactory().get_storage('dict_storage') gridcell_grid_id = array([1, 2, 3]) job_grid_id = array( [2, 1, 3, 1] ) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc. storage.write_table(table_name='gridcells', table_data={'gid': gridcell_grid_id}) storage.write_table(table_name='jobs', table_data={ 'jid': arange(4) + 1, 'gid': job_grid_id }) gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell") jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob") values = gs.compute_variables([expr], resources=Resources({ "myjob": jobs, "mygridcell": gs })) should_be = array([12, 11, 11]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + expr)
def run_chunk(self, index, dataset, specification, coefficients): self.specified_coefficients = SpecifiedCoefficients().create( coefficients, specification, neqs=1) compute_resources = Resources({"debug": self.debug}) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index, dataset_pool=self.dataset_pool, resources=compute_resources) variables = self.specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) data = {} coef = {} outcome = self.initial_values[index].copy() for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( self.specified_coefficients, submodel) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] self.debug.print_debug( "Compute regression for submodel " + str(submodel), 4) self.increment_current_status_piece() self.data[submodel] = dataset.create_regression_data( coef[submodel], index=index[self.observations_mapping[submodel]]) nan_index = where(isnan(self.data[submodel]))[1] inf_index = where(isinf(self.data[submodel]))[1] vnames = asarray(coef[submodel].get_variable_names()) if nan_index.size > 0: nan_var_index = unique(nan_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan))) #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index] if inf_index.size > 0: inf_var_index = unique(inf_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf))) #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available outcome[self.observations_mapping[submodel]] = \ self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:], resources=self.run_config).astype(outcome.dtype) return outcome
def test_number_of_agents(self): expr = "mygridcell.number_of_agents(myjob)" storage = StorageFactory().get_storage('dict_storage') gridcell_grid_id = array([1, 2, 3]) job_grid_id = array([2, 1, 3, 1]) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc. storage.write_table(table_name='gridcells', table_data={'gid':gridcell_grid_id}) storage.write_table(table_name='jobs', table_data={'jid':arange(4)+1, 'gid':job_grid_id}) gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell") jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob") values = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs})) should_be = array([2, 1, 1]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg = "Error in " + expr) # change gids of jobs (to test if computing dependencies is working) jobs.modify_attribute(name="gid", data=array([1,1,1,1])) values2 = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs})) should_be2 = array([4, 0, 0]) self.assert_(ma.allclose(values2, should_be2, rtol=1e-7), msg = "Error in " + expr)
def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True, start_time=config.get('base_year', 0)) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
def test_unplaced_agents_decrease_available_space(self): """Using the household location choice model, create a set of available spaces and 2000 unplaced agents (along with 5000 placed agents). Run the model, and check that the unplaced agents were placed, and the number of available spaces has decreased""" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data={ 'grid_id': array(2000 * [0] + 5000 * [1]), 'household_id': arange(7000) + 1 }) storage.write_table(table_name='gridcells', table_data={ 'residential_units': array(50 * [10000]), 'grid_id': arange(50) + 1 }) households = HouseholdDataset(in_storage=storage, in_table_name='households') gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells') coefficients = Coefficients(names=("dummy", ), values=(0.1, )) specification = EquationSpecification( variables=("gridcell.residential_units", ), coefficients=("dummy", )) """need to specify to the household location choice model exactly which households are moving, because by default it assumes all current households want to move, but in this test, the 5000 households already in gridcell #1 shouldn't move. here, we specify that only the unplaced households should be moved.""" agents_index = where(households.get_attribute("grid_id") == 0)[0] hlcm = HouseholdLocationChoiceModelCreator().get_model( location_set=gridcells, choices="opus_core.random_choices_from_index", sample_size_locations=30) hlcm.run(specification, coefficients, agent_set=households, agents_index=agents_index, debuglevel=1) gridcells.compute_variables( ["urbansim.gridcell.vacant_residential_units"], resources=Resources({"household": households})) vacancies = gridcells.get_attribute("vacant_residential_units") """since there were 5000 households already in gridcell #1, and gridcell #1 has 10000 residential units, there should be no more than 5000 vacant residential units in gridcell #1 after running this model""" self.assertEqual(vacancies[0] <= 5000, True, "Error: %d" % (vacancies[0], )) """there should be exactly 430000 vacant residential units after the model run, because there were originally 50 gridcells with 10000 residential units each, and a total of 7000 units are occupied after the run""" self.assertEqual( sum(vacancies) == 50 * 10000 - 7000, True, "Error: %d" % (sum(vacancies)))
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_coefficient_name": self.field_coefficient_name, "field_estimate": self.field_estimate, "field_standard_error": self.field_standard_error, "other_fields": self.other_fields, "out_table_name": out_table_name }) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning( "out_storage has to be of type Storage. No coefficients written." ) return submodels = self.get_submodels() if submodels.size <= 0: submodels = resize(array([-2], dtype=int32), self.size()) values = { local_resources["field_submodel_id"]: submodels, local_resources["field_coefficient_name"]: self.get_names(), local_resources["field_estimate"]: self.get_values(), local_resources["field_standard_error"]: self.get_standard_errors() } for measure in self.other_measures.keys(): values[measure] = self.other_measures[measure] types = { local_resources["field_submodel_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_estimate"]: 'double', local_resources["field_standard_error"]: 'double' } attrtypes = { local_resources["field_submodel_id"]: AttributeType.PRIMARY, local_resources["field_coefficient_name"]: AttributeType.PRIMARY, local_resources["field_estimate"]: AttributeType.PRIMARY, local_resources["field_standard_error"]: AttributeType.PRIMARY } for measure in self.other_measures.keys(): types[measure] = 'double' attrtypes[measure] = AttributeType.PRIMARY local_resources.merge({ "values": values, 'valuetypes': types, "drop_table_flag": 1, "attrtype": attrtypes }) self.out_storage.write_table( table_name=local_resources['out_table_name'], table_data=local_resources['values'])