def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, other_in_table_names=None, debuglevel=0 ): debug = DebugPrinter(debuglevel) debug.print_debug("Creating EmploymentSectorGroupDataset object.",2) resources = ResourceCreatorEmploymentSectorGroups().get_resources_for_dataset( resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, nchunks = nchunks, debug = debug, ) Dataset.__init__(self,resources = resources) if isinstance(other_in_table_names,list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources = resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds)
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, other_in_table_names=[], debuglevel=0 ): debug = DebugPrinter(debuglevel) debug.print_debug("Creating PlanTypeDataset object.",2) resources = self._get_resources_for_dataset( in_table_name_default="plan_types", in_table_name_groups_default="plan_type_group_definitions", out_table_name_default="plan_types", dataset_name="plan_type", resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, debug = debug, ) AbstractGroupDataset.__init__(self, resources=resources, other_in_table_names=other_in_table_names, use_groups=True )
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, other_in_table_names=[], debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating EmploymentSectorDataset object.", 2) resources = self._get_resources_for_dataset( in_table_name_default="employment_sectors", in_table_name_groups_default= "employment_adhoc_sector_group_definitions", out_table_name_default="employment_sectors", dataset_name="employment_sector", resources=resources, in_storage=in_storage, out_storage=out_storage, in_table_name=in_table_name, out_table_name=out_table_name, attributes=attributes, id_name=id_name, id_name_default=self.id_name_default, debug=debug, ) AbstractGroupDataset.__init__( self, resources=resources, other_in_table_names=other_in_table_names, use_groups=True)
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, attributes=None, out_table_name=None, id_name=None, nchunks=None, other_in_table_names=None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating DevelopmentGroupDataset object.",2) resources = ResourceCreatorDevelopmentGroups().get_resources_for_dataset( resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, nchunks = nchunks, debug = debug ) Dataset.__init__(self,resources = resources) if isinstance(other_in_table_names,list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources = resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds)
def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = location_id_name self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
def __init__(self): self.dependencies_list = None self.dataset = None self.number_of_compute_runs = 0 try: self.debug = SessionConfiguration().get('debuglevel', 0) except: self.debug = 0 if isinstance(self.debug, int): self.debug = DebugPrinter(self.debug)
def __init__(self, categories=array([ 1, ]), resources=None, what=None, attribute_name=None, data=None, names=None, in_storage=None, out_storage=None, in_table_name=None, attributes=None, out_table_name=None, id_name=None, nchunks=None, debuglevel=0): """ 'what' must be a string, such as 'residential' or 'commercial'. """ debug = DebugPrinter(debuglevel) debug.print_debug( "Creating DevelopmentProjectDataset object for %s projects." % what, 2) self.categories = categories self.what = what self.attribute_name = attribute_name attributes_default = AttributeType.PRIMARY dataset_name = "development_project" nchunks_default = 1 if data <> None: in_storage = StorageFactory().get_storage('dict_storage') in_storage.write_table(table_name='development_projects', table_data=data) in_table_name = 'development_projects' resources = ResourceFactory().get_resources_for_dataset( dataset_name, resources=resources, in_storage=in_storage, out_storage=out_storage, in_table_name_pair=(in_table_name, None), out_table_name_pair=(out_table_name, None), attributes_pair=(attributes, attributes_default), id_name_pair=(id_name, self.id_name_default), nchunks_pair=(nchunks, nchunks_default), debug_pair=(debug, None)) self.category_variable_name = resources.get( "category_variable_name", self.category_variable_name_default) Dataset.__init__(self, resources=resources)
def __init__(self, resources=None, dataset1=None, dataset2=None, index1 = None, index2 = None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2) local_resources = Resources(resources) local_resources.merge_if_not_None({"dataset1":dataset1, "dataset2":dataset2, "debug":debug, "index1":index1, "index2":index2}) CoreInteractionDataset.__init__(self, resources = local_resources)
def __init__( self, categories=array([1]), resources=None, what=None, attribute_name=None, data=None, names=None, in_storage=None, out_storage=None, in_table_name=None, attributes=None, out_table_name=None, id_name=None, nchunks=None, debuglevel=0, ): """ 'what' must be a string, such as 'residential' or 'commercial'. """ debug = DebugPrinter(debuglevel) debug.print_debug("Creating DevelopmentProjectDataset object for %s projects." % what, 2) self.categories = categories self.what = what self.attribute_name = attribute_name attributes_default = AttributeType.PRIMARY dataset_name = "development_project" nchunks_default = 1 if data <> None: in_storage = StorageFactory().get_storage("dict_storage") in_storage.write_table(table_name="development_projects", table_data=data) in_table_name = "development_projects" resources = ResourceFactory().get_resources_for_dataset( dataset_name, resources=resources, in_storage=in_storage, out_storage=out_storage, in_table_name_pair=(in_table_name, None), out_table_name_pair=(out_table_name, None), attributes_pair=(attributes, attributes_default), id_name_pair=(id_name, self.id_name_default), nchunks_pair=(nchunks, nchunks_default), debug_pair=(debug, None), ) self.category_variable_name = resources.get("category_variable_name", self.category_variable_name_default) Dataset.__init__(self, resources=resources)
def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = self.location_id_name_default self.variable_package = self.variable_package_default if location_id_name is not None: self.location_id_name = location_id_name if variable_package is not None: self.variable_package = variable_package self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
def run(self, projects, types, units, year=0, location_id_name="grid_id", debuglevel=0): debug = DebugPrinter(debuglevel) grid_ids_for_any_project = array([], dtype=int32) grid_ids_by_project_type = {} for project_type in types: grid_ids_by_project_type[project_type] = array([], dtype=int32) if projects[project_type] <> None: grid_ids_by_project_type[project_type] = projects[project_type].get_attribute(location_id_name) grid_ids_for_any_project = unique(concatenate((grid_ids_for_any_project, grid_ids_by_project_type[project_type]))) grid_ids_for_any_project = grid_ids_for_any_project[where(grid_ids_for_any_project>0)] if not len(grid_ids_for_any_project): return result_data = {location_id_name: grid_ids_for_any_project, "scheduled_year":(year*ones((grid_ids_for_any_project.size,))).astype(int32)} for unit in units: result_data[unit] = zeros((grid_ids_for_any_project.size,), dtype=int32) for project_type in types: result_data["%s_improvement_value" % project_type] = zeros((grid_ids_for_any_project.size,), dtype=int32) grid_idx=0 for grid_id in grid_ids_for_any_project: for i in range(0,len(types)): project_type = types[i] my_projects = projects[project_type] w = where(my_projects.get_attribute(location_id_name) == grid_id)[0] if w.size>0: unit_variable = units[i] result_data[unit_variable][grid_idx] = \ my_projects.get_attribute_by_index( my_projects.get_attribute_name(), w).sum() result_data["%s_improvement_value" % project_type][grid_idx] = \ my_projects.get_attribute_by_index( "improvement_value", w).sum() grid_idx += 1 storage = StorageFactory().get_storage('dict_storage') eventset_table_name = 'development_events_generated' storage.write_table(table_name=eventset_table_name, table_data=result_data) eventset = DevelopmentEventDataset( in_storage = storage, in_table_name = eventset_table_name, id_name = [location_id_name, "scheduled_year"], ) debug.print_debug("Number of events: " + str(grid_ids_for_any_project.size), 3) return eventset
def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.debug = DebugPrinter(debuglevel) self.dataset_pool = self.create_dataset_pool(dataset_pool) self.regression = RegressionModelFactory().get_model(name=regression_procedure) if self.regression == None: raise StandardError, "No regression procedure given." self.submodel_string = submodel_string self.run_config = run_config if self.run_config == None: self.run_config = Resources() if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict): self.run_config = Resources(self.run_config) self.estimate_config = estimate_config if self.estimate_config == None: self.estimate_config = Resources() if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict): self.estimate_config = Resources(self.estimate_config) self.data = {} self.coefficient_names = {} ChunkModel.__init__(self) self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))
def get_class(self, module_composed_name, class_name=None, arguments={}, debug=0): """ 'module_composed_name' is either a string or an instance of class ComposedName that represent the full name of the module in which the class given by 'class_name' is implemented. If 'class_name' is None, it is considered to have te same name as the module. 'arguments' is a dictionary with names and values of arguments of the class constructor. It returns an object of the given class. """ if module_composed_name == None: return None if isinstance(module_composed_name, str): module_c_name = ComposedName(module_composed_name) else: module_c_name = module_composed_name if class_name == None: class_name = module_c_name.get_short_name() if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) ev = "from " + module_c_name.get_full_name() + " import " + class_name try: exec(ev) except ImportError: raise ImportError("Module named '%s' does not exist or could not " "import class '%s' from it." % (module_c_name.get_full_name(), class_name)) return eval(class_name + "(**arguments)")
def __init__(self, probabilities="urbansim.rate_based_probabilities", choices="opus_core.random_choices", location_id_name="grid_id", model_name="Agent Relocation Model", debuglevel=0, resources=None): self.model_name = model_name self.location_id_name = location_id_name self.debug = DebugPrinter(debuglevel) self.upc_sequence = None if probabilities is not None: self.upc_sequence = UPCFactory().get_model( utilities=None, probabilities=probabilities, choices=choices, debuglevel=debuglevel) self.resources = merge_resources_if_not_None(resources)
def __init__(self, utility_class=None, probability_class=None, choice_class=None, resources=None, debuglevel=0): """utility_class, probability_class, choice_class are objects of the corresponding classes. They must have a method 'run'. """ self.utility_class = utility_class self.probability_class = probability_class self.choice_class = choice_class self.resources = resources if self.resources == None: self.resources = Resources() self.utilities = None self.probabilities = None self.choices = None self.debug = DebugPrinter(debuglevel)
def __init__(self, resources=None, what="household", in_storage=None, in_table_name=None, out_storage=None, out_table_name=None, id_name=None, nchunks=None, debuglevel=0): ## TODO remove "what" arguement debug = DebugPrinter(debuglevel) debug.print_debug("Creating ControlTotalDataset object for "+what+".",2) if not self.in_table_name_default: self.in_table_name_default = "annual_" + what + "_control_totals" if not self.out_table_name_default: self.out_table_name_default = "annual_" + what + "_control_totals" attributes_default = AttributeType.PRIMARY #dataset_name = "control_total" nchunks_default = 1 resources = ResourceFactory().get_resources_for_dataset( self.dataset_name, resources=resources, in_storage=in_storage, out_storage=out_storage, in_table_name_pair=(in_table_name,self.in_table_name_default), attributes_pair=(None, attributes_default), out_table_name_pair=(out_table_name, self.out_table_name_default), id_name_pair=(id_name,self.id_name_default), nchunks_pair=(nchunks,nchunks_default), debug_pair=(debug,None) ) table_name = resources["in_table_name"] if resources['id_name'] is None or len(resources['id_name'])== 0: #if both self.id_name_default and id_name argument in __init__ is unspecified, #ControlTotalDataset would use all attributes not beginning with "total" #as id_name id_names = [] column_names = resources["in_storage"].get_column_names(table_name) for column_name in column_names: if not re.search('^total', column_name): id_names.append(column_name) resources.merge({"id_name":resources["id_name"] + id_names}) Dataset.__init__(self, resources = resources)
def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.debug = DebugPrinter(debuglevel) self.dataset_pool = self.create_dataset_pool(dataset_pool) self.regression = RegressionModelFactory().get_model( name=regression_procedure) if self.regression == None: raise StandardError, "No regression procedure given." self.submodel_string = submodel_string self.run_config = run_config if self.run_config == None: self.run_config = Resources() if not isinstance(self.run_config, Resources) and isinstance( self.run_config, dict): self.run_config = Resources(self.run_config) self.estimate_config = estimate_config if self.estimate_config == None: self.estimate_config = Resources() if not isinstance(self.estimate_config, Resources) and isinstance( self.estimate_config, dict): self.estimate_config = Resources(self.estimate_config) self.data = {} self.coefficient_names = {} ChunkModel.__init__(self) self.get_status_for_gui().initialize_pieces(3, pieces_description=array([ 'initialization', 'computing variables', 'submodel: 1' ]))
def __init__(self, resources=None, dataset1=None, dataset2=None, index1=None, index2=None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug( "Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2) local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "debug": debug, "index1": index1, "index2": index2 }) CoreInteractionDataset.__init__(self, resources=local_resources)
def __init__(self, resources=None, data=None, names=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, debuglevel=0 ): debug = DebugPrinter(debuglevel) debug.print_debug("Creating LandUseDevelopmentDataset object.", 2 ) dataset_name = "landuse_development" nchunks_default = 1 if data <> None: in_storage = StorageFactory().get_storage('dict_storage') in_storage.write_table(table_name='landuse_developments', table_data=data) in_table_name='landuse_developments' resources = ResourceFactory().get_resources_for_dataset( dataset_name, in_storage=in_storage, out_storage=out_storage, resources=resources, in_table_name_pair=(in_table_name,None), out_table_name_pair=(out_table_name, None), id_name_pair=(id_name,self.id_name_default), nchunks_pair=(nchunks,nchunks_default), debug_pair=(debug,None) ) Dataset.__init__(self, resources = resources)
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, in_table_name_groups=None, other_in_table_names=None, attributes=None, use_groups=True, id_name=None, nchunks=None, debuglevel=0 ): debug = DebugPrinter(debuglevel) debug.print_debug("Creating DevelopmentTypeDataset object.",2) resources = self._get_resources_for_dataset( in_table_name_default="development_types", in_table_name_groups_default="development_type_group_definitions", out_table_name_default="development_types", dataset_name="development_type", resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, in_table_name_groups = in_table_name_groups, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, debug = debug, ) AbstractGroupDataset.__init__(self, resources=resources, other_in_table_names=other_in_table_names, use_groups=use_groups )
def __init__(self, resources=None, data=None, names=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating LandUseDevelopmentDataset object.", 2) dataset_name = "landuse_development" nchunks_default = 1 if data <> None: in_storage = StorageFactory().get_storage('dict_storage') in_storage.write_table(table_name='landuse_developments', table_data=data) in_table_name = 'landuse_developments' resources = ResourceFactory().get_resources_for_dataset( dataset_name, in_storage=in_storage, out_storage=out_storage, resources=resources, in_table_name_pair=(in_table_name, None), out_table_name_pair=(out_table_name, None), id_name_pair=(id_name, self.id_name_default), nchunks_pair=(nchunks, nchunks_default), debug_pair=(debug, None)) Dataset.__init__(self, resources=resources)
def __init__(self, resources=None, dataset1=None, dataset2=None, index1=None, index2=None, dataset_name=None, debug=None): """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain: dataset1 - agent class dataset2 - class of the choice dataset Optional: index1 - 1D array, indices of dataset1 index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to i-th individual of dataset1[index1]. If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1]. dataset_name - subdirectory in which implementation of the interaction variables is placed (default "") dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()). """ self.resources = Resources(resources) self.resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "dataset_name": dataset_name, "debug": debug }) self.attribute_boxes = {} self.attribute_names = [] self.debug = self.resources.get("debug", 0) if not isinstance(self.debug, DebugPrinter): self.debug = DebugPrinter(self.debug) self.resources.check_obligatory_keys(["dataset1", "dataset2"]) self.dataset1 = self.resources["dataset1"] self.dataset2 = self.resources["dataset2"] self.index1 = self.resources.get("index1", None) self.index2 = self.resources.get("index2", None) self.dataset_name = self.resources.get("dataset_name", None) if self.dataset_name == None: self.dataset_name = self.dataset1.get_dataset_name( ) + '_x_' + self.dataset2.get_dataset_name() self._primary_attribute_names = [] self.index1_mapping = {} if self.index1 <> None: self.index1_mapping = do_id_mapping_dict_from_array(self.index1) self._id_names = None # for compatibility with Dataset self.variable_factory = VariableFactory() self._aliases = {} # for compatibility with Dataset
def __init__(self, group_member=None, agents_grouping_attribute='job.building_type', filter=None, model_name=None, model_short_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.group_member = group_member if self.group_member: self.group_member.set_agents_grouping_attribute( agents_grouping_attribute) self.filter = filter self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) self.debug = DebugPrinter(debuglevel) if model_name is not None: self.model_name = model_name if model_short_name is not None: self.model_short_name = model_short_name if variable_package is not None: self.variable_package = variable_package
def __init__(self, probabilities = "opus_core.upc.rate_based_probabilities", choices = "opus_core.random_choices", model_name = None, debuglevel=0, resources=None ): if model_name is not None: self.model_name = model_name self.debug = DebugPrinter(debuglevel) self.upc_sequence = None if probabilities is not None: self.upc_sequence = UPCFactory().get_model(utilities=None, probabilities=probabilities, choices=choices, debuglevel=debuglevel) self.resources = merge_resources_if_not_None(resources)
def __init__(self, probabilities = "urbansim.rate_based_probabilities", choices = "opus_core.random_choices", location_id_name="grid_id", model_name = "Agent Relocation Model", debuglevel=0, resources=None ): self.model_name = model_name self.location_id_name = location_id_name self.debug = DebugPrinter(debuglevel) self.upc_sequence = None if probabilities is not None: self.upc_sequence = UPCFactory().get_model(utilities=None, probabilities=probabilities, choices=choices, debuglevel=debuglevel) self.resources = merge_resources_if_not_None(resources)
def __init__(self, group_member=None, agents_grouping_attribute = 'job.building_type', filter = None, model_name=None, model_short_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.group_member = group_member if self.group_member: self.group_member.set_agents_grouping_attribute(agents_grouping_attribute) self.filter = filter self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) self.debug = DebugPrinter(debuglevel) if model_name is not None: self.model_name = model_name if model_short_name is not None: self.model_short_name = model_short_name if variable_package is not None: self.variable_package = variable_package
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, debuglevel=0): try: debug = SessionConfiguration().get('debuglevel', 0) except: debug = 0 debug = DebugPrinter(debug) if debuglevel > debug.flag: debug.flag = debuglevel debug.print_debug( "Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2) resources = ResourceFactory().get_resources_for_dataset( self.dataset_name, resources=resources, in_storage=in_storage, in_table_name_pair=(in_table_name, self.in_table_name_default), attributes_pair=(attributes, self.attributes_default), out_storage=out_storage, out_table_name_pair=(out_table_name, self.out_table_name_default), id_name_pair=(id_name, self.id_name_default), nchunks_pair=(nchunks, self.nchunks_default), debug_pair=(debug, None), ) CoreDataset.__init__(self, resources=resources)
def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.model_name = "Building Transition Model"
class BusinessTransitionModel(Model): """Creates and removes businesses from business_set.""" model_name = "Business Transition Model" location_id_name = "building_id" variable_package = "urbansim_parcel" def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) def run(self, year, business_set, control_totals, data_objects=None, resources=None): business_id_name = business_set.get_id_name()[0] control_totals.get_attribute("total_number_of_businesses") idx = where(control_totals.get_attribute("year")==year) sectors = unique(control_totals.get_attribute_by_index("building_use_id", idx)) max_id = business_set.get_id_attribute().max() business_size = business_set.size() new_businesses = {self.location_id_name:array([], dtype='int32'), "building_use_id":array([], dtype='int32'), business_id_name:array([], dtype='int32'), "sqft":array([], dtype=int32), "employees":array([], dtype=int32),} compute_resources = Resources(data_objects) # compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug}) business_set.compute_variables( map(lambda x: "%s.%s.is_sector_%s" % (self.variable_package, business_set.get_dataset_name(), x), sectors), resources = compute_resources) remove_businesses = array([], dtype='int32') for sector in sectors: total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses is_in_sector = business_set.get_attribute("is_sector_%s" % sector) diff = int(total_businesses - is_in_sector.astype(int8).sum()) if diff < 0: # w = where(is_in_sector == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, w, -1*diff, self.location_id_name) remove_businesses = concatenate((remove_businesses, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # new_businesses[self.location_id_name]=concatenate((new_businesses[self.location_id_name],zeros((diff,), dtype="int32"))) new_businesses["building_use_id"]=concatenate((new_businesses["building_use_id"], sector*ones((diff,), dtype="int32"))) available_business_index = where(is_in_sector)[0] sampled_business = probsample_replace(available_business_index, diff, None) new_businesses["sqft"] = concatenate((new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) new_businesses["employees"] = concatenate((new_businesses["employees"], business_set.get_attribute("employees")[sampled_business])) new_max_id = max_id+diff new_businesses[business_id_name]=concatenate((new_businesses[business_id_name], arange(max_id+1, new_max_id+1))) max_id = new_max_id business_set.remove_elements(remove_businesses) business_set.add_elements(new_businesses, require_all_attributes=False) difference = business_set.size()-business_size self.debug.print_debug("Difference in number of businesses: %s (original %s," " new %s, created %s, deleted %s)" % (difference, business_size, business_set.size(), new_businesses[business_id_name].size, remove_businesses.size), 3) self.debug.print_debug("Number of unplaced businesses: %s" % where(business_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def prepare_for_run(self, storage, in_table_name, id_name, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, in_table_name=in_table_name, id_name=id_name ) # sample_control_totals(storage, control_totals, **kwargs) return control_totals
def __init__(self, resources=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.resources = resources self.model_name = "Development Event Transition Model"
def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel)
class EmploymentTransitionModel(Model): """Creates and removes jobs from job_set.""" model_name = "Employment Transition Model" location_id_name_default = "grid_id" variable_package_default = "urbansim" def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = self.location_id_name_default self.variable_package = self.variable_package_default if location_id_name is not None: self.location_id_name = location_id_name if variable_package is not None: self.variable_package = variable_package self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year") == year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set) def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None): self.max_id = job_set.get_id_attribute().max() self.job_size = job_set.size() self.job_id_name = job_set.get_id_name()[0] self.new_jobs = { self.location_id_name: array([], dtype=job_set.get_data_type(self.location_id_name, int32)), "sector_id": array([], dtype=job_set.get_data_type("sector_id", int32)), self.job_id_name: array([], dtype=job_set.get_data_type(self.job_id_name, int32)), "building_type": array([], dtype=job_set.get_data_type("building_type", int8)) } self.remove_jobs = array([], dtype=int32) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included( {job_building_types.get_dataset_name(): job_building_types}) self.available_building_types = job_building_types.get_id_attribute() def _compute_sector_variables(self, sectors, job_set): compute_resources = Resources({"debug": self.debug}) job_set.compute_variables(map( lambda x: "%s.%s.is_in_employment_sector_%s_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + map( lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + ["is_non_home_based_job", "is_home_based_job"], dataset_pool=self.dataset_pool, resources=compute_resources) def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique( self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where( self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute( "total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute( "total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute( "is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute( "is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_hb) - size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_nhb) - size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_hb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array( ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum(job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_nhb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array( ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_non_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_nhb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id def _update_job_set(self, job_set): job_set.remove_elements(self.remove_jobs) job_set.add_elements(self.new_jobs, require_all_attributes=False) difference = job_set.size() - self.job_size self.debug.print_debug( "Difference in number of jobs: %s (original %s," " new %s, created %s, deleted %s)" % (difference, self.job_size, job_set.size(), self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3) self.debug.print_debug( "Number of unplaced jobs: %s" % where(job_set.get_attribute(self.location_id_name) <= 0)[0].size, 3) return difference def prepare_for_run(self, storage, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, what="employment") sample_control_totals(storage, control_totals, **kwargs) return control_totals
class ScalingJobsModel(Model): """This model is used to place new jobs that are in specific employment sectors, such as military and education, do not tend to create new business locations or move existing business locations. It relocates given jobs according to the distribution of the scalable jobs of different sectors. """ model_name = "Scaling Jobs Model" model_short_name = "SJM" variable_package = "urbansim" def __init__(self, group_member=None, agents_grouping_attribute = 'job.building_type', filter = None, model_name=None, model_short_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.group_member = group_member if self.group_member: self.group_member.set_agents_grouping_attribute(agents_grouping_attribute) self.filter = filter self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) self.debug = DebugPrinter(debuglevel) if model_name is not None: self.model_name = model_name if model_short_name is not None: self.model_short_name = model_short_name if variable_package is not None: self.variable_package = variable_package def run(self, location_set, agent_set, agents_index=None, data_objects=None, resources=None, **kwargs): """ 'location_set', 'agent_set' are of type Dataset, 'agent_index' are indices of individuals in the agent_set for which the model runs. If it is None, the whole agent_set is considered. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. """ if isinstance(agents_index,list): agents_index=array(agents_index) if agents_index is None: agents_index=arange(agent_set.size()) if self.group_member: new_agents_index = self.group_member.get_index_of_my_agents(agent_set, agents_index) else: new_agents_index = arange(agents_index.size) self.debug.print_debug("Number of scalable jobs: " + str(agents_index[new_agents_index].size),2) choices = self._do_run(location_set, agent_set, agents_index[new_agents_index], data_objects, resources) result = resize(array([-1], dtype=choices.dtype), agents_index.size) result[new_agents_index] = choices return result def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None): location_id_name = location_set.get_id_name()[0] jobsubset = DatasetSubset(agent_set, agents_index) if jobsubset.size() <= 0: return array([], dtype='int32') #unplace jobs agent_set.set_values_of_one_attribute(location_id_name, resize(array([-1.0]), jobsubset.size()), agents_index) sector_ids = jobsubset.get_attribute("sector_id") sectors = unique(sector_ids) counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32')) if sectors.size <=1 : counts = array([counts]) variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors) compute_variables = map(lambda var: self.variable_package + "." + location_set.get_dataset_name()+ "." + var, variables) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set}) location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) i=0 for sector in sectors: distr = location_subset.get_attribute(variables[i]) if ma.allclose(distr.sum(), 0): uniform_prob = 1.0/distr.size distr = resize(array([uniform_prob], dtype='float64'), distr.size) logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0. Substituting uniform distribution!") # random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \ # probabilities = distr) distr = distr/float(distr.sum()) random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), prob_array=distr) idx = where(sector_ids == sector)[0] #modify job locations agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx]) i+=1 return agent_set.get_attribute_by_index(location_id_name, agents_index) def prepare_for_run(self, agent_set=None, agents_filter=None, agents_index=None): if agent_set is None or agents_filter is None: return agents_index filter = agent_set.compute_variables([agents_filter], dataset_pool=self.dataset_pool) if agents_index is not None: tmp = zeros(agent_set.size(), dtype='bool8') tmp[agents_index]=True filtered_index = logical_and(filter, tmp) return where(filtered_index)[0]
def get_variable(self, variable_name, dataset, quiet=False, debug=0, index_name=None): """Returns an instance of class Variable. 'variable_name' is an instance of class VariableName. 'dataset' is an object of class Dataset to which the variable belongs to. In case of an error in either importing the module or evaluating its constructor, the method returns None. If quiet is True no warnings are printed. index_name is used for lag variables only. """ lag_attribute_name = None lag_offset = 0 if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) if variable_name.get_autogen_class() is not None: # variable_name has an autogenerated class -- just use that variable_subclass = variable_name.get_autogen_class() substrings = () else: # either find the variable name in the expression library (if present), in an appropriate 'aliases' file, # or load our variable class as 'variable_subclass' using an import statement short_name = variable_name.get_short_name() dataset_name = variable_name.get_dataset_name() package_name = variable_name.get_package_name() # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere) if package_name is None: e = VariableFactory._expression_library.get( (dataset_name,short_name), None) if e is not None: if e == variable_name.get_expression(): # it is a primary attribute return None v = VariableName(e) return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) else: # not in the expression library - next look in the appropriate 'aliases' file, if one is present # (but only if we have a package name in the first place) try: stmt = 'from %s.%s.aliases import aliases' % (package_name, dataset_name) exec(stmt) except ImportError: aliases = [] for a in aliases: # for each definition, see if the alias is equal to the short_name. If it is, # then use that definition for the variable v = VariableName(a) if v.get_alias() == short_name: return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) lag_variable_parser = LagVariableParser() if lag_variable_parser.is_short_name_for_lag_variable(short_name): lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name(short_name) true_short_name = "VVV_lagLLL" substrings = (package_name, lag_attribute_name, lag_offset, dataset_name, index_name) opus_path = 'opus_core.variables' else: if package_name is None: raise LookupError("Incomplete variable specification for '%s.%s' (missing package name, " "and variable is not in expression library not a lag variable)." % (dataset_name, short_name)) opus_path = '%s.%s' % (package_name,dataset_name) true_short_name, substrings = VariableFamilyNameTranslator().\ get_translated_variable_name_and_substring_arguments(opus_path, short_name) module = '%s.%s' % (opus_path, true_short_name) # Note that simply checking for the .py module file would not # be safe here, as objects could be instantiated in __init__.py files. try: ev = "from %s import %s as variable_subclass" % (module, true_short_name) debug.print_debug("Evaluating '" + ev + "'.",12) exec(ev) debug.print_debug("Successful.", 12) except ImportError, e: if not quiet: from opus_core.simulation_state import SimulationState time = SimulationState().get_current_time() desc = '\n'.join(("Opus variable '%s' does not exist for dataset '%s' in year %s. " "The following error occured when finally trying to import " "the variable '%s' from the Python module " "'%s':", "%s", )) % (true_short_name, opus_path, time, true_short_name, module, indent_text(formatPlainTextExceptionInfoWithoutLog(''))) raise NameError(desc) return None
class Variable(object): """Abstract base class for variables. Each variable implementation must be a subclass of this class, placed in a module that has the same name as the variable class. Each variable class is expected to contain a method "compute" that takes one argument "arguments". It is of type Resources and can contain anything that the compute method might need. The 'compute' method returns a result of the computation which should be an array of size self.get_dataset().size(). Each variable class can contain a method "dependencies" which returns a list of attributes/variables that this class is dependent on. The dependencies list is a list of fully (or dataset) qualified variable names, one for each dependent variable. All dependent datasets must be included in 'arguments'. Each variable may have a pre- and post-check that will perform checks on the variable's inputs and the variable's results. This allows each variable's implementation to specify a contract about what it does. The 'check_variables' entry of the 'arguments' defines what variables to check (see method 'should_check'). If a variable is required to be checked, the 'S' method for that variable is called before the variable's 'compute' method, and the 'post_check' method for that variable is called after the variable's 'compute' method. Both 'pre_check' and 'post_check' take 2 arguments: values (the results from the 'compute' method), and 'arguments'. In case of using 'compute_with_dependencies' the datasets for which variables are computed, are expected to have a method 'compute_variables' that takes at least three arguments: name of the variable, package name and an object of class Resources. This dataset method should use the Variable method 'compute_with_dependencies' in order to work recursively through dependency trees (see compute_variables and _compute_one_variable of opus_core.Dataset). The return type of this variable is defined by it's _return_type property, which may have one of the following numpy types: "bool8", "int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float32", "float64", "complex64", "complex128", "longlong". """ _return_type = None def __new__(cls, *args, **kwargs): """Setup to automatically log the running time of the compute method.""" an_instance = object.__new__(cls) compute_method = an_instance.compute_with_dependencies def logged_method(*req_args, **opt_args): logger.start_block(name=an_instance.name(), verbose=False) try: results = compute_method(*req_args, **opt_args) an_instance._do_flush_dependent_variables_if_required() finally: logger.end_block() return results an_instance.compute_with_dependencies = logged_method return an_instance def __init__(self): self.dependencies_list = None self.dataset = None self.number_of_compute_runs = 0 try: self.debug = SessionConfiguration().get('debuglevel', 0) except: self.debug = 0 if isinstance(self.debug, int): self.debug = DebugPrinter(self.debug) def name(self): return self.__module__ def _do_flush_dependent_variables_if_required(self): try: if not SessionConfiguration().get('flush_variables', False): return except: return from opus_core.datasets.interaction_dataset import InteractionDataset dataset = self.get_dataset() dependencies = self.get_current_dependencies() my_dataset_name = dataset.get_dataset_name() for iattr in range( len(dependencies)): # iterate over dependent variables dep_item = dependencies[iattr][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name( ) # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() if dataset_name == my_dataset_name: ds = dataset else: ds = SessionConfiguration().get_dataset_from_pool(dataset_name) #ds = dataset_pool.get_dataset('dataset_name') if not isinstance(ds, InteractionDataset): short_name = depvar_name.get_alias() if short_name not in ds.get_id_name(): ds.flush_attribute(depvar_name) def compute(self, dataset_pool): """Returns the result of this variable. Private use only.""" raise NotImplementedError, "compute() method not implemented for this variable." def is_lag_variable(self): """Not a lag variable unless this function has been overridden to return True""" return False def _compute_and_check(self, dataset_pool): if has_this_method(self, "pre_check"): self.debug.print_debug( "Running pre_check() for " + self.__class__.__module__, 4) self.pre_check(dataset_pool) else: self.debug.print_debug( "No pre_check() defined for " + self.__class__.__module__, 4) values = self.compute(dataset_pool) if has_this_method(self, "post_check"): self.debug.print_debug( "Running post_check() for " + self.__class__.__module__, 4) self.post_check(values, dataset_pool) else: self.debug.print_debug( "No post_check() defined for " + self.__class__.__module__, 4) return values def compute_with_dependencies(self, dataset_pool, arguments={}): self._solve_dependencies(dataset_pool) if self.should_check(arguments): self.debug.print_debug( "Computing and checking " + self.__class__.__module__, 3) values = self._compute_and_check(dataset_pool) else: values = self.compute(dataset_pool) self.number_of_compute_runs += 1 if self._return_type: return self._cast_values(values, arguments) return values if longlong == int32: __long_size = 2**31 - 1 else: __long_size = 2**63 - 1 _max_storable_value = { "bool8": 1, "int8": 2**7 - 1, "uint8": 2**8 - 1, "int16": 2**15 - 1, "uint16": 2**16 - 1, "int32": 2**31 - 1, "uint32": 2**32 - 1, "int64": 2**63 - 1, "uint64": 2**64 - 1, "float32": 3.40282346638528860e+38, "float64": 1.79769313486231570e+308, "complex64": 3.40282346638528860e+38, "complex128": 1.79769313486231570e+308, "longlong": __long_size, } def _cast_values(self, values, arguments): """Change the return values to be of type self._return_type. If "should_check" is defined, first check for values that are too large for the destination type or integer wrap-around.""" type = values.dtype.str if self._return_type == type: return values if self.should_check(arguments): max_value = ma.maximum(values) if max_value > self._max_storable_value[self._return_type]: max_value_str = str(max_value) logger.log_error( "Variable '%s' is being cast to type '%s', but contains a value (%s) too large to fit into that type." % (self.name(), self._return_type, max_value_str)) return values.astype(self._return_type) def _solve_dependencies(self, dataset_pool): dataset = self.get_dataset() my_dataset_name = dataset.get_dataset_name() dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): # compute dependent variables dep_item = dependencies_list[i][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name( ) # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() version = dependencies_list[i][1] if dataset_name == my_dataset_name: ds = dataset else: ds = dataset_pool.get_dataset(dataset_name) (new_versions, value) = ds.compute_variables_return_versions_and_final_value( [(depvar_name, version)], dataset_pool) self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0]) def get_all_dependencies(self): """Return all variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is either AttributeBox or VariableName of the dependent variable and the second element is the version for which this variable was computed. """ def create_fake_dataset(dataset_name): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='fake_dataset', table_data={'id': array([], dtype='int32')}) dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id") return dataset result_others = [] dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): dep_item = dependencies_list[i][0] version = dependencies_list[i][1] isprimary = 0 if isinstance(dep_item, str): depvar_name = VariableName(dep_item) dataset_name = depvar_name.get_dataset_name() var = VariableFactory().get_variable( depvar_name, create_fake_dataset(dataset_name), quiet=True) result_others = result_others + [(depvar_name, version)] else: # dep_item should be an instance of AttributeBox var = dep_item.get_variable_instance() result_others = result_others + [(dep_item, version)] isprimary = dep_item.is_primary() if (var <> None) and (not isprimary): res = var.get_all_dependencies() result_others = result_others + res return result_others def get_dependencies(self): """Return variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is the name of the particular dataset and the second element is the variable name. It does not work through the dependencies tree. """ if has_this_method(self, "dependencies"): return self.dependencies() return [] def add_dependencies(self, dep_list=[]): """Can be used within 'compute' method to add dependencies. It is performed only when the compute method runs for the first time. dep_list can be either a list of character strings or a list of AttributeBoxes.""" if self.number_of_compute_runs == 0: if isinstance(dep_list, str): dep_list = [dep_list] self.dependencies_list = self.dependencies_list + map( lambda x: (x, 0), dep_list) def add_and_solve_dependencies(self, dep_list=[], dataset_pool=None): """Calls 'add_dependencies' and if it is run for the first time, it also calls the '_solve_dependencies' method.""" self.add_dependencies(dep_list) if self.number_of_compute_runs == 0: self._solve_dependencies(dataset_pool) def get_current_dependencies(self): if self.dependencies_list is None: self.dependencies_list = map(lambda x: (x, 0), self.get_dependencies()) return self.dependencies_list def do_check(self, condition_str, values): def condition(x): return eval(condition_str) # This is a bit ugly, but the upgrade from Python 2.3.5 to # Python 2.4 broke backward compatability in regard to map and # numpy's rank-0 arrays. This attempts to detect a rank-0 # array and convert it into something usable. try: try: len(values) except TypeError: values = array([values[()]]) except: pass count = where( array(map(lambda x: not (condition(x)), values)) > 0)[0].size if (count > 0): logger.log_warning( "Variable %s fails %d times on check %s" % (self.__class__.__module__, count, condition_str)) def should_check(self, arguments=None): """Return True if this variable should be checked, otherwise False. The information of what variables to check is provided in the 'arguments' entry "check_variables". If "check_variables" is missing or is None or is an empty list, do no checks. If "check_variables" is '*', check all variables. If "check_variables" is a list containing this variable's name, check this variable. """ if not isinstance(arguments, Resources): return False check_variables = arguments.get("check_variables", None) if check_variables == None: return False if (check_variables == '*') or \ (isinstance(check_variables, list) and (len(check_variables) > 0) and (self.__class__.__name__ in check_variables)): return True return False def are_dependent_variables_up_to_date(self, version): result = [] all_dependencies_list = self.get_all_dependencies() for variable, version in all_dependencies_list: if isinstance(variable, AttributeBox): result.append(variable.is_version(version)) else: # of type VariableName (means variable wasn't used yet) result.append(False) return result def get_highest_version_of_dependencies(self): dependencies_list = self.get_current_dependencies() if len(dependencies_list) <= 0: return 0 versions = array(map(lambda x: x[1], dependencies_list)) return versions.max() def set_dataset(self, dataset): self.dataset = dataset def get_dataset(self): return self.dataset def safely_divide_two_arrays(self, numerator, denominator, value_for_divide_by_zero=0.0): """Returns the result of numerator/denominator with the value_for_divide_by_zero wherever denominator == 0. """ return ma.filled( numerator / ma.masked_where(denominator == 0, denominator), value_for_divide_by_zero) def safely_divide_two_attributes(self, numerator_name, denominator_name, value_for_divide_by_zero=0.0): """Returns the result of dividing the numerator_name attribute of this variable by the denominator_name attribute of this variable; return the value_for_divide_by_zero wherever denominator == 0. """ numerator = self.get_dataset().get_attribute(numerator_name) denominator = self.get_dataset().get_attribute(denominator_name) return self.safely_divide_two_arrays(numerator, denominator, value_for_divide_by_zero)
class RegressionModel(ChunkModel): model_name = "Regression Model" model_short_name = "RM" def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.debug = DebugPrinter(debuglevel) self.dataset_pool = self.create_dataset_pool(dataset_pool) self.regression = RegressionModelFactory().get_model( name=regression_procedure) if self.regression == None: raise StandardError, "No regression procedure given." self.submodel_string = submodel_string self.run_config = run_config if self.run_config == None: self.run_config = Resources() if not isinstance(self.run_config, Resources) and isinstance( self.run_config, dict): self.run_config = Resources(self.run_config) self.estimate_config = estimate_config if self.estimate_config == None: self.estimate_config = Resources() if not isinstance(self.estimate_config, Resources) and isinstance( self.estimate_config, dict): self.estimate_config = Resources(self.estimate_config) self.data = {} self.coefficient_names = {} ChunkModel.__init__(self) self.get_status_for_gui().initialize_pieces(3, pieces_description=array([ 'initialization', 'computing variables', 'submodel: 1' ])) def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0): """'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'dataset' is of type Dataset, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. If 'procedure' is given, it overwrites the regression_procedure of the constructor. 'initial_values' is an array of the initial values of the results. It will be overwritten by the results for those elements that are handled by the model (defined by submodels in the specification). By default the results are initialized with 0. 'debuglevel' overwrites the constructor 'debuglevel'. """ self.debug.flag = debuglevel if run_config == None: run_config = Resources() if not isinstance(run_config, Resources) and isinstance( run_config, dict): run_config = Resources(run_config) self.run_config = run_config.merge_with_defaults(self.run_config) self.run_config.merge({"debug": self.debug}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if procedure is not None: self.regression = RegressionModelFactory().get_model( name=procedure) if initial_values is None: self.initial_values = zeros((dataset.size(), ), dtype=float32) else: self.initial_values = zeros((dataset.size(), ), dtype=initial_values.dtype) self.initial_values[index] = initial_values if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) result = ChunkModel.run(self, chunk_specification, dataset, index, float32, specification=specification, coefficients=coefficients) return result def run_chunk(self, index, dataset, specification, coefficients): self.specified_coefficients = SpecifiedCoefficients().create( coefficients, specification, neqs=1) compute_resources = Resources({"debug": self.debug}) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index, dataset_pool=self.dataset_pool, resources=compute_resources) variables = self.specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) data = {} coef = {} outcome = self.initial_values[index].copy() for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( self.specified_coefficients, submodel) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] self.debug.print_debug( "Compute regression for submodel " + str(submodel), 4) self.increment_current_status_piece() self.data[submodel] = dataset.create_regression_data( coef[submodel], index=index[self.observations_mapping[submodel]]) nan_index = where(isnan(self.data[submodel]))[1] inf_index = where(isinf(self.data[submodel]))[1] if nan_index.size > 0: nan_var_index = unique(nan_index) raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[ submodel].get_variable_names()[nan_var_index] if inf_index.size > 0: inf_var_index = unique(inf_index) raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[ submodel].get_variable_names()[inf_var_index] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available outcome[self.observations_mapping[submodel]] = \ self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:], resources=self.run_config).astype(outcome.dtype) return outcome def correct_infinite_values(self, dataset, outcome_attribute_name, maxvalue=1e+38, clip_all_larger_values=False): """Check if the model resulted in infinite values. If yes, print warning and clip the values to maxvalue. If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue. """ infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0] if infidx.size > 0: logger.log_warning("Infinite values in %s. Clipped to %s." % (outcome_attribute_name, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, infidx) if clip_all_larger_values: idx = where( dataset.get_attribute(outcome_attribute_name) > maxvalue)[0] if idx.size > 0: logger.log_warning( "Values in %s larger than %s. Clipped to %s." % (outcome_attribute_name, maxvalue, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, idx) def estimate(self, specification, dataset, outcome_attribute, index=None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """'specification' is of type EquationSpecification, 'dataset' is of type Dataset, 'outcome_attribute' - string that determines the dependent variable, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'procedure' - name of the estimation procedure. If it is None, there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'. It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays). 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure. 'debuglevel' overwrites the class 'debuglevel'. """ #import wingdbstub self.debug.flag = debuglevel if estimate_config == None: estimate_config = Resources() if not isinstance(estimate_config, Resources) and isinstance( estimate_config, dict): estimate_config = Resources(estimate_config) self.estimate_config = estimate_config.merge_with_defaults( self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.procedure = procedure if self.procedure == None: self.procedure = self.estimate_config.get("estimation", None) if self.procedure is not None: self.procedure = ModelComponentCreator().get_model_component( self.procedure) else: logger.log_warning( "No estimation procedure given, or problems with loading the corresponding module." ) compute_resources = Resources({"debug": self.debug}) if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) if not isinstance(index, ndarray): index = array(index) estimation_size_agents = self.estimate_config.get( "estimation_size_agents", None) # should be a proportion of the agent_set if estimation_size_agents == None: estimation_size_agents = 1.0 else: estimation_size_agents = max(min(estimation_size_agents, 1.0), 0.0) # between 0 and 1 if estimation_size_agents < 1.0: self.debug.print_debug("Sampling agents for estimation ...", 3) estimation_idx = sample_noreplace( arange(index.size), int(index.size * estimation_size_agents)) else: estimation_idx = arange(index.size) estimation_idx = index[estimation_idx] self.debug.print_debug( "Number of observations for estimation: " + str(estimation_idx.size), 2) if estimation_idx.size <= 0: self.debug.print_debug("Nothing to be done.", 2) return (None, None) coefficients = create_coefficient_from_specification(specification) specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) submodels = specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels( submodels, self.submodel_string, dataset, estimation_idx, dataset_pool=self.dataset_pool, resources=compute_resources, submodel_size_max=self.estimate_config.get('submodel_size_max', None)) variables = specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) coef = {} estimated_coef = {} self.outcome = {} dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources) regression_resources = Resources(estimate_config) regression_resources.merge({"debug": self.debug}) outcome_variable_name = VariableName(outcome_attribute) for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( specified_coefficients, submodel) self.increment_current_status_piece() logger.log_status("Estimate regression for submodel " + str(submodel), tags=["estimate"], verbosity_level=2) logger.log_status("Number of observations: " + str(self.observations_mapping[submodel].size), tags=["estimate"], verbosity_level=2) self.data[ submodel] = dataset.create_regression_data_for_estimation( coef[submodel], index=estimation_idx[self.observations_mapping[submodel]]) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] if (self.data[submodel].shape[0] > 0 ) and (self.data[submodel].size > 0) and ( self.procedure is not None): # observations for this submodel available self.outcome[submodel] = dataset.get_attribute_by_index( outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]]) regression_resources.merge({"outcome": self.outcome[submodel]}) regression_resources.merge({ "coefficient_names": self.coefficient_names[submodel].tolist(), "constant_position": coef[submodel].get_constants_positions() }) estimated_coef[submodel] = self.procedure.run( self.data[submodel], self.regression, resources=regression_resources) if "estimators" in estimated_coef[submodel].keys(): coef[submodel].set_coefficient_values( estimated_coef[submodel]["estimators"]) if "standard_errors" in estimated_coef[submodel].keys(): coef[submodel].set_standard_errors( estimated_coef[submodel]["standard_errors"]) if "other_measures" in estimated_coef[submodel].keys(): for measure in estimated_coef[submodel][ "other_measures"].keys(): coef[submodel].set_measure( measure, estimated_coef[submodel]["other_measures"] [measure]) if "other_info" in estimated_coef[submodel].keys(): for info in estimated_coef[submodel]["other_info"]: coef[submodel].set_other_info( info, estimated_coef[submodel]["other_info"][info]) coefficients.fill_coefficients(coef) self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects) return (coefficients, estimated_coef) def prepare_for_run(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs): spec, coef = prepare_specification_and_coefficients(**kwargs) if (dataset is not None) and (dataset_filter is not None): filter_values = dataset.compute_variables( [dataset_filter], dataset_pool=self.dataset_pool) index = where(filter_values > filter_threshold)[0] else: index = None return (spec, coef, index) def prepare_for_estimate(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs): spec = get_specification_for_estimation(**kwargs) if (dataset is not None) and (dataset_filter is not None): filter_values = dataset.compute_variables( [dataset_filter], dataset_pool=self.dataset_pool) index = where(filter_values > filter_threshold)[0] else: index = None return (spec, index) def get_data_as_dataset(self, submodel=-2): """Like get_all_data, but the retuning value is a Dataset containing attributes that correspond to the data columns. Their names are coefficient names.""" all_data = self.get_all_data(submodel) if all_data is None: return None names = self.get_coefficient_names(submodel) if names is None: return None dataset_data = {} for i in range(names.size): dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0]) dataset_data["id"] = arange(all_data.shape[0]) + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=dataset_data) ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset') return ds def save_predicted_values_and_errors(self, specification, coefficients, dataset, outcome_variable, index=None, data_objects=None): if self.estimate_config.get('save_predicted_values_and_errors', False): logger.log_status('Computing predicted values and residuals.') original_values = dataset.get_attribute_by_index( outcome_variable, index) predicted_values = zeros(dataset.size(), dtype='float32') predicted_values[index] = self.run_after_estimation( specification, coefficients, dataset, index=index, data_objects=data_objects) predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias( ) dataset.add_primary_attribute(name=predicted_attribute_name, data=predicted_values) dataset.flush_attribute(predicted_attribute_name) predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias( ) error_values = zeros(dataset.size(), dtype='float32') error_values[index] = (original_values - predicted_values[index]).astype( error_values.dtype) dataset.add_primary_attribute(name=predicted_error_attribute_name, data=error_values) dataset.flush_attribute(predicted_error_attribute_name) logger.log_status( 'Predicted values saved as %s (for the %s dataset)' % (predicted_attribute_name, dataset.get_dataset_name())) logger.log_status( 'Residuals saved as %s (for the %s dataset)' % (predicted_error_attribute_name, dataset.get_dataset_name())) def export_estimation_data(self, submodel=-2, file_name='./estimation_data_regression.txt', delimiter='\t'): import os from numpy import newaxis data = concatenate((self.outcome[submodel][..., newaxis], self.get_all_data(submodel=submodel)), axis=1) header = ['outcome'] + self.get_coefficient_names(submodel).tolist() nrows = data.shape[0] file_name_root, file_name_ext = os.path.splitext(file_name) out_file = "%s_submodel_%s.txt" % (file_name_root, submodel) fh = open(out_file, 'w') fh.write(delimiter.join(header) + '\n') #file header for row in range(nrows): line = [str(x) for x in data[row, ]] fh.write(delimiter.join(line) + '\n') fh.flush() fh.close print 'Data written into %s' % out_file def run_after_estimation(self, *args, **kwargs): return self.run(*args, **kwargs) def _get_status_total_pieces(self): return ChunkModel._get_status_total_pieces( self) * self.get_status_for_gui().get_total_number_of_pieces() def _get_status_current_piece(self): return ChunkModel._get_status_current_piece( self) * self.get_status_for_gui().get_total_number_of_pieces( ) + self.get_status_for_gui().get_current_piece() def _get_status_piece_description(self): return "%s %s" % (ChunkModel._get_status_piece_description( self), self.get_status_for_gui().get_current_piece_description()) def get_specified_coefficients(self): return self.specified_coefficients
class RegressionModel(ChunkModel): model_name = "Regression Model" model_short_name = "RM" def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.debug = DebugPrinter(debuglevel) self.dataset_pool = self.create_dataset_pool(dataset_pool) self.regression = RegressionModelFactory().get_model(name=regression_procedure) if self.regression == None: raise StandardError, "No regression procedure given." self.submodel_string = submodel_string self.run_config = run_config if self.run_config == None: self.run_config = Resources() if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict): self.run_config = Resources(self.run_config) self.estimate_config = estimate_config if self.estimate_config == None: self.estimate_config = Resources() if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict): self.estimate_config = Resources(self.estimate_config) self.data = {} self.coefficient_names = {} ChunkModel.__init__(self) self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1'])) def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0): """'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'dataset' is of type Dataset, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. If 'procedure' is given, it overwrites the regression_procedure of the constructor. 'initial_values' is an array of the initial values of the results. It will be overwritten by the results for those elements that are handled by the model (defined by submodels in the specification). By default the results are initialized with 0. 'debuglevel' overwrites the constructor 'debuglevel'. """ self.debug.flag = debuglevel if run_config == None: run_config = Resources() if not isinstance(run_config,Resources) and isinstance(run_config, dict): run_config = Resources(run_config) self.run_config = run_config.merge_with_defaults(self.run_config) self.run_config.merge({"debug":self.debug}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset) if procedure is not None: self.regression = RegressionModelFactory().get_model(name=procedure) if initial_values is None: self.initial_values = zeros((dataset.size(),), dtype=float32) else: self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype) self.initial_values[index] = initial_values if dataset.size()<=0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) result = ChunkModel.run(self, chunk_specification, dataset, index, float32, specification=specification, coefficients=coefficients) return result def run_chunk (self, index, dataset, specification, coefficients): self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) compute_resources = Resources({"debug":self.debug}) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index, dataset_pool=self.dataset_pool, resources = compute_resources) variables = self.specified_coefficients.get_full_variable_names_without_constants() self.debug.print_debug("Compute variables ...",4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool = self.dataset_pool, resources = compute_resources) data = {} coef = {} outcome=self.initial_values[index].copy() for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel) self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:] self.debug.print_debug("Compute regression for submodel " +str(submodel),4) self.increment_current_status_piece() self.data[submodel] = dataset.create_regression_data(coef[submodel], index = index[self.observations_mapping[submodel]]) nan_index = where(isnan(self.data[submodel]))[1] inf_index = where(isinf(self.data[submodel]))[1] vnames = asarray(coef[submodel].get_variable_names()) if nan_index.size > 0: nan_var_index = unique(nan_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning("NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan))) #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index] if inf_index.size > 0: inf_var_index = unique(inf_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning("Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf))) #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available outcome[self.observations_mapping[submodel]] = \ self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:], resources=self.run_config).astype(outcome.dtype) return outcome def correct_infinite_values(self, dataset, outcome_attribute_name, maxvalue=1e+38, clip_all_larger_values=False): """Check if the model resulted in infinite values. If yes, print warning and clip the values to maxvalue. If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue. """ infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0] if infidx.size > 0: logger.log_warning("Infinite values in %s. Clipped to %s." % (outcome_attribute_name, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, infidx) if clip_all_larger_values: idx = where(dataset.get_attribute(outcome_attribute_name) > maxvalue)[0] if idx.size > 0: logger.log_warning("Values in %s larger than %s. Clipped to %s." % (outcome_attribute_name, maxvalue, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, idx) def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """'specification' is of type EquationSpecification, 'dataset' is of type Dataset, 'outcome_attribute' - string that determines the dependent variable, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'procedure' - name of the estimation procedure. If it is None, there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'. It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays). 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure. 'debuglevel' overwrites the class 'debuglevel'. """ #import wingdbstub self.debug.flag = debuglevel if estimate_config == None: estimate_config = Resources() if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict): estimate_config = Resources(estimate_config) self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.procedure=procedure if self.procedure == None: self.procedure = self.estimate_config.get("estimation", None) if self.procedure is not None: self.procedure = ModelComponentCreator().get_model_component(self.procedure) else: logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.") compute_resources = Resources({"debug":self.debug}) if dataset.size()<=0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) if not isinstance(index,ndarray): index=array(index) estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set if estimation_size_agents == None: estimation_size_agents = 1.0 else: estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1 if estimation_size_agents < 1.0: self.debug.print_debug("Sampling agents for estimation ...",3) estimation_idx = sample_noreplace(arange(index.size), int(index.size*estimation_size_agents)) else: estimation_idx = arange(index.size) estimation_idx = index[estimation_idx] self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2) if estimation_idx.size <= 0: self.debug.print_debug("Nothing to be done.",2) return (None, None) coefficients = create_coefficient_from_specification(specification) self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx, dataset_pool=self.dataset_pool, resources = compute_resources, submodel_size_max=self.estimate_config.get('submodel_size_max', None)) variables = self.specified_coefficients.get_full_variable_names_without_constants() self.debug.print_debug("Compute variables ...",4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources) coef = {} estimated_coef={} self.outcome = {} dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources) regression_resources=Resources(estimate_config) regression_resources.merge({"debug":self.debug}) outcome_variable_name = VariableName(outcome_attribute) for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel) self.increment_current_status_piece() logger.log_status("Estimate regression for submodel " +str(submodel), tags=["estimate"], verbosity_level=2) #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size), #tags=["estimate"], verbosity_level=2) self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel], index = estimation_idx[self.observations_mapping[submodel]]) self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]]) regression_resources.merge({"outcome": self.outcome[submodel]}) regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(), "constant_position": coef[submodel].get_constants_positions()}) regression_resources.merge({"submodel": submodel}) estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression, resources=regression_resources) if "estimators" in estimated_coef[submodel].keys(): coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"]) if "standard_errors" in estimated_coef[submodel].keys(): coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"]) if "other_measures" in estimated_coef[submodel].keys(): for measure in estimated_coef[submodel]["other_measures"].keys(): coef[submodel].set_measure(measure, estimated_coef[submodel]["other_measures"][measure]) if "other_info" in estimated_coef[submodel].keys(): for info in estimated_coef[submodel]["other_info"]: coef[submodel].set_other_info(info, estimated_coef[submodel]["other_info"][info]) coefficients.fill_coefficients(coef) self.specified_coefficients.coefficients = coefficients self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects) return (coefficients, estimated_coef) def prepare_for_run(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs): spec, coef = prepare_specification_and_coefficients(**kwargs) if (dataset is not None) and (dataset_filter is not None): filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool) index = where(filter_values > filter_threshold)[0] else: index = None return (spec, coef, index) def prepare_for_estimate(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs): spec = get_specification_for_estimation(**kwargs) if (dataset is not None) and (dataset_filter is not None): filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool) index = where(filter_values > filter_threshold)[0] else: index = None return (spec, index) def get_data_as_dataset(self, submodel=-2): """Like get_all_data, but the retuning value is a Dataset containing attributes that correspond to the data columns. Their names are coefficient names.""" all_data = self.get_all_data(submodel) if all_data is None: return None names = self.get_coefficient_names(submodel) if names is None: return None dataset_data = {} for i in range(names.size): dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0]) dataset_data["id"] = arange(all_data.shape[0])+1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=dataset_data) ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset') return ds def save_predicted_values_and_errors(self, specification, coefficients, dataset, outcome_variable, index=None, data_objects=None): if self.estimate_config.get('save_predicted_values_and_errors', False): logger.log_status('Computing predicted values and residuals.') original_values = dataset.get_attribute_by_index(outcome_variable, index) predicted_values = zeros(dataset.size(), dtype='float32') predicted_values[index] = self.run_after_estimation(specification, coefficients, dataset, index=index, data_objects=data_objects) predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias() dataset.add_primary_attribute(name=predicted_attribute_name, data=predicted_values) dataset.flush_attribute(predicted_attribute_name) predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias() error_values = zeros(dataset.size(), dtype='float32') error_values[index] = (original_values - predicted_values[index]).astype(error_values.dtype) dataset.add_primary_attribute(name=predicted_error_attribute_name, data = error_values) dataset.flush_attribute(predicted_error_attribute_name) logger.log_status('Predicted values saved as %s (for the %s dataset)' % (predicted_attribute_name, dataset.get_dataset_name())) logger.log_status('Residuals saved as %s (for the %s dataset)' % (predicted_error_attribute_name, dataset.get_dataset_name())) def export_estimation_data(self, submodel=-2, file_name='./estimation_data_regression.txt', delimiter = '\t'): import os from numpy import newaxis data = concatenate((self.outcome[submodel][...,newaxis], self.get_all_data(submodel=submodel)), axis=1) header = ['outcome'] + self.get_coefficient_names(submodel).tolist() nrows = data.shape[0] file_name_root, file_name_ext = os.path.splitext(file_name) out_file = "%s_submodel_%s.txt" % (file_name_root, submodel) fh = open(out_file,'w') fh.write(delimiter.join(header) + '\n') #file header for row in range(nrows): line = [str(x) for x in data[row,]] fh.write(delimiter.join(line) + '\n') fh.flush() fh.close print 'Data written into %s' % out_file def run_after_estimation(self, *args, **kwargs): return self.run(*args, **kwargs) def _get_status_total_pieces(self): return ChunkModel._get_status_total_pieces(self) * self.get_status_for_gui().get_total_number_of_pieces() def _get_status_current_piece(self): return ChunkModel._get_status_current_piece(self)*self.get_status_for_gui().get_total_number_of_pieces() + self.get_status_for_gui().get_current_piece() def _get_status_piece_description(self): return "%s %s" % (ChunkModel._get_status_piece_description(self), self.get_status_for_gui().get_current_piece_description()) def get_specified_coefficients(self): return self.specified_coefficients
class upc_sequence(object): """ Invokes computation of utilities, probabilities and choices. """ def __init__(self, utility_class=None, probability_class=None, choice_class=None, resources=None, debuglevel=0): """utility_class, probability_class, choice_class are objects of the corresponding classes. They must have a method 'run'. """ self.utility_class = utility_class self.probability_class = probability_class self.choice_class = choice_class self.resources = resources if self.resources == None: self.resources = Resources() self.utilities = None self.probabilities = None self.choices = None self.debug = DebugPrinter(debuglevel) def run(self, data=None, coefficients=None, resources=None): local_resources = Resources() if resources: local_resources.merge(resources) last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources) this_result = self.compute_probabilities(resources=local_resources) if this_result <> None: last_result = this_result this_result = self.compute_choices(resources=local_resources) if this_result <> None: last_result = this_result return last_result def compute_utilities(self, data=None, coefficients=None, resources=None): if self.utility_class is None: self.debug.print_debug("No utilities class given.", 10) return None self.debug.print_debug("compute_utilities ...", 3) self.utilities = self.utility_class.run(data, coefficients, resources=resources) return self.utilities def compute_probabilities(self, resources=None): if self.probability_class is None: self.debug.print_debug("No probabilities class given.", 10) return None self.debug.print_debug("compute_probabilities ...", 3) self.probabilities = self.probability_class.run(self.utilities, resources=resources) return self.probabilities def compute_choices(self, resources=None): if self.choice_class is None: self.debug.print_debug("No choices class given.", 10) return None self.debug.print_debug("compute_choices ...", 3) self.choices = self.choice_class.run(self.probabilities, resources=resources) return self.choices def get_utilities(self): return self.utilities def get_probabilities(self): return self.probabilities def write_probability_sums(self): self.probability_class.check_sum(self.probabilities) def get_choices(self): return self.choices def get_choice_histogram(self, min=None, max=None, bins=None): """Give an array that represents a histogram of choices.""" if max == None: max = self.choices.max() + 1 if min == None: min = self.choices.min() if bins == None: bins = max - min return histogram(self.get_choices(), min, max, bins) def get_probabilities_sum(self): """Return probabilities sum along the first axis. """ probs = self.get_probabilities() if probs.ndim < 2: return probs.sum() return reshape(sum(probs, 0), probs.shape[1]) def plot_choice_histograms(self, capacity, main=""): self.plot_histogram(numrows=2) self.plot_histogram_with_capacity(capacity) def plot_histogram(self, main="", numrows=1, numcols=1, fignum=1): """Plot a histogram of choices and probability sums. Expects probabilities as (at least) a 2D array. """ from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot probabilities = self.get_probabilities() if probabilities.ndim < 2: raise StandardError, "probabilities must have at least 2 dimensions." alts = probabilities.shape[1] width_par = (1 / alts + 1) / 2.0 choice_counts = self.get_choice_histogram(0, alts) sum_probs = self.get_probabilities_sum() subplot(numrows, numcols, fignum) bar(arange(alts), choice_counts, width=width_par) bar(arange(alts) + width_par, sum_probs, width=width_par, color='g') xticks(arange(alts)) title(main) Axis = axis() text(alts + .5, -.1, "\nchoices histogram (blue),\nprobabilities sum (green)", horizontalalignment='right', verticalalignment='top') def plot_histogram_with_capacity(self, capacity, main=""): """Plot histogram of choices and capacities. The number of alternatives is determined from the second dimension of probabilities. """ from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot probabilities = self.get_probabilities() if probabilities.ndim < 2: raise StandardError, "probabilities must have at least 2 dimensions." alts = self.probabilities.shape[1] width_par = (1 / alts + 1) / 2.0 choice_counts = self.get_choice_histogram(0, alts) sum_probs = self.get_probabilities_sum() subplot(212) bar(arange(alts), choice_counts, width=width_par) bar(arange(alts) + width_par, capacity, width=width_par, color='r') xticks(arange(alts)) title(main) Axis = axis() text(alts + .5, -.1, "\nchoices histogram (blue),\ncapacities (red)", horizontalalignment='right', verticalalignment='top') def show_plots(self, file=None): """Render the plots that have been generated. This method should be the last method called in the script, since it hands control to matplotlib's rendering backend. """ from matplotlib.pylab import show, savefig if file is not None: savefig(file) else: show() def summary(self): logger.log_status("utilities") logger.log_status(self.get_utilities()) logger.log_status("probabilities") logger.log_status(self.get_probabilities()) logger.log_status("probabilities sums") self.write_probability_sums() logger.log_status("choices") logger.log_status(self.get_choices()) def get_excess_demand(self, capacity): demand = self.get_probabilities_sum() return where(demand > capacity, demand - capacity, 0) def get_dependent_datasets(self): result = [] if self.utility_class is not None: try: result = result + self.utility_class.get_dependent_datasets() except: pass if self.probability_class is not None: try: result = result + self.probability_class.get_dependent_datasets( ) except: pass if self.choice_class is not None: try: result = result + self.choice_class.get_dependent_datasets() except: pass return result
def get_variable(self, variable_name, dataset, quiet=False, debug=0, index_name=None): """Returns an instance of class Variable. 'variable_name' is an instance of class VariableName. 'dataset' is an object of class Dataset to which the variable belongs to. In case of an error in either importing the module or evaluating its constructor, the method returns None. If quiet is True no warnings are printed. index_name is used for lag variables only. """ lag_attribute_name = None lag_offset = 0 if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) if variable_name.get_autogen_class() is not None: # variable_name has an autogenerated class -- just use that variable_subclass = variable_name.get_autogen_class() substrings = () else: # either find the variable name in the expression library (if present), in an appropriate 'aliases' file, # or load our variable class as 'variable_subclass' using an import statement short_name = variable_name.get_short_name() dataset_name = variable_name.get_dataset_name() package_name = variable_name.get_package_name() # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere) if package_name is None: e = VariableFactory._expression_library.get( (dataset_name, short_name), None) if e is not None: if e == variable_name.get_expression( ): # it is a primary attribute return None v = VariableName(e) return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) else: # not in the expression library - next look in the appropriate 'aliases' file, if one is present # (but only if we have a package name in the first place) try: stmt = 'from %s.%s.aliases import aliases' % (package_name, dataset_name) exec(stmt) except ImportError: aliases = [] for a in aliases: # for each definition, see if the alias is equal to the short_name. If it is, # then use that definition for the variable v = VariableName(a) if v.get_alias() == short_name: return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) lag_variable_parser = LagVariableParser() if lag_variable_parser.is_short_name_for_lag_variable(short_name): lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name( short_name) true_short_name = "VVV_lagLLL" substrings = (package_name, lag_attribute_name, lag_offset, dataset_name, index_name) opus_path = 'opus_core.variables' else: if package_name is None: raise LookupError( "Incomplete variable specification for '%s.%s' (missing package name, " "and variable is not in expression library not a lag variable)." % (dataset_name, short_name)) opus_path = '%s.%s' % (package_name, dataset_name) true_short_name, substrings = VariableFamilyNameTranslator().\ get_translated_variable_name_and_substring_arguments(opus_path, short_name) module = '%s.%s' % (opus_path, true_short_name) # Note that simply checking for the .py module file would not # be safe here, as objects could be instantiated in __init__.py files. try: ev = "from %s import %s as variable_subclass" % ( module, true_short_name) debug.print_debug("Evaluating '" + ev + "'.", 12) exec(ev) debug.print_debug("Successful.", 12) except ImportError, e: if not quiet: from opus_core.simulation_state import SimulationState time = SimulationState().get_current_time() desc = '\n'.join(( "Opus variable '%s' does not exist for dataset '%s' in year %s. " "The following error occured when finally trying to import " "the variable '%s' from the Python module " "'%s':", "%s", )) % (true_short_name, opus_path, time, true_short_name, module, indent_text( formatPlainTextExceptionInfoWithoutLog(''))) raise NameError(desc) return None
class EmploymentTransitionModel(Model): """Creates and removes jobs from job_set.""" model_name = "Employment Transition Model" location_id_name_default = "grid_id" variable_package_default = "urbansim" def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = self.location_id_name_default self.variable_package = self.variable_package_default if location_id_name is not None: self.location_id_name = location_id_name if variable_package is not None: self.variable_package = variable_package self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set) def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None): self.max_id = job_set.get_id_attribute().max() self.job_size = job_set.size() self.job_id_name = job_set.get_id_name()[0] self.new_jobs = { self.location_id_name:array([], dtype=job_set.get_data_type(self.location_id_name, int32)), "sector_id":array([], dtype=job_set.get_data_type("sector_id", int32)), self.job_id_name:array([], dtype=job_set.get_data_type(self.job_id_name, int32)), "building_type":array([], dtype=job_set.get_data_type("building_type", int8)) } self.remove_jobs = array([], dtype=int32) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({job_building_types.get_dataset_name():job_building_types}) self.available_building_types = job_building_types.get_id_attribute() def _compute_sector_variables(self, sectors, job_set): compute_resources = Resources({"debug":self.debug}) job_set.compute_variables( map(lambda x: "%s.%s.is_in_employment_sector_%s_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + map(lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + ["is_non_home_based_job", "is_home_based_job"], dataset_pool = self.dataset_pool, resources = compute_resources) def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name], zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array(ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name], zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array(ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id+diff_nhb self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id def _update_job_set(self, job_set): job_set.remove_elements(self.remove_jobs) job_set.add_elements(self.new_jobs, require_all_attributes=False) difference = job_set.size()-self.job_size self.debug.print_debug("Difference in number of jobs: %s (original %s," " new %s, created %s, deleted %s)" % (difference, self.job_size, job_set.size(), self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3) self.debug.print_debug("Number of unplaced jobs: %s" % where(job_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def prepare_for_run(self, storage, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, what="employment") sample_control_totals(storage, control_totals, **kwargs) return control_totals
class HouseholdTransitionModel(Model): """Creates and removes households from household_set. New households are duplicated from the existing households, keeping the joint distribution of all characteristics. """ model_name = "Household Transition Model" def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = location_id_name self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, household_set, control_totals, characteristics, resources=None): self._do_initialize_for_run(household_set) control_totals.get_attribute("total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute("characteristic") self.all_categories = array(map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(household_set) return self._update_household_set(household_set) def _update_household_set(self, household_set): index_of_duplicated_hhs = household_set.duplicate_rows(self.mapping_existing_hhs_to_new_hhs) household_set.modify_attribute(name=self.location_id_name, data=-1 * ones((index_of_duplicated_hhs.size,), dtype=household_set.get_data_type(self.location_id_name)), index=index_of_duplicated_hhs) household_set.remove_elements(self.remove_households) if self.new_households[self.location_id_name].size > 0: max_id = household_set.get_id_attribute().max() self.new_households[self.household_id_name]=concatenate((self.new_households[self.household_id_name], arange(max_id+1, max_id+self.new_households[self.location_id_name].size+1))) household_set.add_elements(self.new_households, require_all_attributes=False) difference = household_set.size()-self.household_size self.debug.print_debug("Difference in number of households: %s" " (original %s, new %s, created %s, deleted %s)" % (difference, self.household_size, household_set.size(), self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size, self.remove_households.size), 3) if self.location_id_name in household_set.get_attribute_names(): self.debug.print_debug("Number of unplaced households: %s" % where(household_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def _do_initialize_for_run(self, household_set): self.household_id_name = household_set.get_id_name()[0] self.new_households = { self.location_id_name:array([], dtype=household_set.get_data_type(self.location_id_name, int32)), self.household_id_name:array([], dtype=household_set.get_data_type(self.household_id_name, int32)) } self.remove_households = array([], dtype='int32') self.household_size = household_set.size() self.max_id = household_set.get_id_attribute().max() self.arrays_from_categories = {} self.arrays_from_categories_mapping = {} self.mapping_existing_hhs_to_new_hhs = array([], dtype=household_set.get_data_type(self.household_id_name, int32)) def _do_run_for_this_year(self, household_set): self.household_set = household_set groups = self.control_totals_for_this_year.get_id_attribute() self.create_arrays_from_categories(self.household_set) all_characteristics = self.arrays_from_categories.keys() self.household_set.load_dataset_if_not_loaded(attributes = all_characteristics) # prevents from lazy loading to save runtime idx_shape = [] number_of_combinations=1 num_attributes=len(all_characteristics) for iattr in range(num_attributes): attr = all_characteristics[iattr] max_bins = self.arrays_from_categories[attr].max()+1 idx_shape.append(max_bins) number_of_combinations=number_of_combinations*max_bins if attr not in self.new_households.keys(): self.new_households[attr] = array([], dtype=self.household_set.get_data_type(attr, float32)) self.number_of_combinations = int(number_of_combinations) idx_tmp = indices(tuple(idx_shape)) categories_index = zeros((self.number_of_combinations,num_attributes)) for i in range(num_attributes): #create indices of all combinations categories_index[:,i] = idx_tmp[i].ravel() categories_index_mapping = {} for i in range(self.number_of_combinations): categories_index_mapping[tuple(categories_index[i,].tolist())] = i def get_category(values): bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values) try: return categories_index_mapping[tuple(bins)] except KeyError, msg: where_error = where(array(bins) == -1)[0] if where_error.size > 0: raise KeyError, \ "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % ( array(values)[where_error], array(all_characteristics)[where_error]) raise KeyError, msg if num_attributes > 0: # the next array must be a copy of the household values, otherwise, it changes the original values values_array = reshape(array(self.household_set.get_attribute(all_characteristics[0])), (self.household_set.size(),1)) if num_attributes > 1: for attr in all_characteristics[1:]: values_array = concatenate((values_array, reshape(array(self.household_set.get_attribute(attr)), (self.household_set.size(),1))), axis=1) for i in range(values_array.shape[1]): if values_array[:,i].max() > 10000: values_array[:,i] = values_array[:,i]/10 values_array[:,i] = clip(values_array[:,i], 0, self.arrays_from_categories[all_characteristics[i]].size-1) # determine for each household to what category it belongs to self.household_categories = array(map(lambda x: get_category(x), values_array)) # performance bottleneck number_of_households_in_categories = array(ndimage_sum(ones((self.household_categories.size,)), labels=self.household_categories+1, index = arange(self.number_of_combinations)+1)) else: # no marginal characteristics; consider just one group self.household_categories = zeros(self.household_set.size(), dtype='int32') number_of_households_in_categories = array([self.household_set.size()]) g=arange(num_attributes) #iterate over marginal characteristics for group in groups: if groups.ndim <= 1: # there is only one group (no marginal char.) id = group else: id = tuple(group.tolist()) group_element = self.control_totals_for_this_year.get_data_element_by_id(id) total = group_element.total_number_of_households for i in range(g.size): g[i] = eval("group_element."+self.arrays_from_categories.keys()[i]) if g.size <= 0: l = ones((number_of_households_in_categories.size,)) else: l = categories_index[:,0] == g[0] for i in range(1,num_attributes): l = logical_and(l, categories_index[:,i] == g[i]) # l has 1's for combinations of this group number_in_group = array(ndimage_sum(number_of_households_in_categories, labels=l, index = 1)) diff = int(total - number_in_group) if diff < 0: # households to be removed is_in_group = l[self.household_categories] w = where(is_in_group)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(self.household_set, w, -1*diff, self.location_id_name) self.remove_households = concatenate((self.remove_households, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # households to be created self._create_households(diff, l)
class upc_sequence(object): """ Invokes computation of utilities, probabilities and choices. """ def __init__(self, utility_class=None, probability_class=None, choice_class=None, resources=None, debuglevel=0): """utility_class, probability_class, choice_class are objects of the corresponding classes. They must have a method 'run'. """ self.utility_class = utility_class self.probability_class = probability_class self.choice_class = choice_class self.resources = resources if self.resources == None: self.resources = Resources() self.utilities = None self.probabilities = None self.choices = None self.debug = DebugPrinter(debuglevel) def run(self, data=None, coefficients=None, resources=None): local_resources = Resources() if resources: local_resources.merge(resources) last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources) this_result = self.compute_probabilities(resources=local_resources) if this_result <> None: last_result = this_result this_result = self.compute_choices(resources=local_resources) if this_result <> None: last_result = this_result return last_result def compute_utilities(self, data=None, coefficients=None, resources=None): if self.utility_class is None: self.debug.print_debug("No utilities class given.", 10) return None self.debug.print_debug("compute_utilities ...", 3) self.utilities = self.utility_class.run(data, coefficients, resources=resources) return self.utilities def compute_probabilities(self, resources=None): if self.probability_class is None: self.debug.print_debug("No probabilities class given.", 10) return None self.debug.print_debug("compute_probabilities ...", 3) self.probabilities = self.probability_class.run(self.utilities, resources=resources) return self.probabilities def compute_choices(self, resources=None): if self.choice_class is None: self.debug.print_debug("No choices class given.", 10) return None self.debug.print_debug("compute_choices ...", 3) self.choices = self.choice_class.run(self.probabilities, resources=resources) return self.choices def get_utilities(self): return self.utilities def get_probabilities(self): return self.probabilities def write_probability_sums(self): self.probability_class.check_sum(self.probabilities) def get_choices(self): return self.choices def get_choice_histogram(self, min=None, max=None, bins=None): """Give an array that represents a histogram of choices.""" if max == None: max = self.choices.max() + 1 if min == None: min = self.choices.min() if bins == None: bins = max - min return histogram(self.get_choices(), min, max, bins) def get_probabilities_sum(self): """Return probabilities sum along the first axis. """ probs = self.get_probabilities() if probs.ndim < 2: return probs.sum() return reshape(sum(probs, 0), probs.shape[1]) def plot_choice_histograms(self, capacity, main=""): self.plot_histogram(numrows=2) self.plot_histogram_with_capacity(capacity) def plot_histogram(self, main="", numrows=1, numcols=1, fignum=1): """Plot a histogram of choices and probability sums. Expects probabilities as (at least) a 2D array. """ from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot probabilities = self.get_probabilities() if probabilities.ndim < 2: raise StandardError, "probabilities must have at least 2 dimensions." alts = probabilities.shape[1] width_par = (1 / alts + 1) / 2.0 choice_counts = self.get_choice_histogram(0, alts) sum_probs = self.get_probabilities_sum() subplot(numrows, numcols, fignum) bar(arange(alts), choice_counts, width=width_par) bar(arange(alts) + width_par, sum_probs, width=width_par, color="g") xticks(arange(alts)) title(main) Axis = axis() text( alts + 0.5, -0.1, "\nchoices histogram (blue),\nprobabilities sum (green)", horizontalalignment="right", verticalalignment="top", ) def plot_histogram_with_capacity(self, capacity, main=""): """Plot histogram of choices and capacities. The number of alternatives is determined from the second dimension of probabilities. """ from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot probabilities = self.get_probabilities() if probabilities.ndim < 2: raise StandardError, "probabilities must have at least 2 dimensions." alts = self.probabilities.shape[1] width_par = (1 / alts + 1) / 2.0 choice_counts = self.get_choice_histogram(0, alts) sum_probs = self.get_probabilities_sum() subplot(212) bar(arange(alts), choice_counts, width=width_par) bar(arange(alts) + width_par, capacity, width=width_par, color="r") xticks(arange(alts)) title(main) Axis = axis() text( alts + 0.5, -0.1, "\nchoices histogram (blue),\ncapacities (red)", horizontalalignment="right", verticalalignment="top", ) def show_plots(self, file=None): """Render the plots that have been generated. This method should be the last method called in the script, since it hands control to matplotlib's rendering backend. """ from matplotlib.pylab import show, savefig if file is not None: savefig(file) else: show() def summary(self): logger.log_status("utilities") logger.log_status(self.get_utilities()) logger.log_status("probabilities") logger.log_status(self.get_probabilities()) logger.log_status("probabilities sums") self.write_probability_sums() logger.log_status("choices") logger.log_status(self.get_choices()) def get_excess_demand(self, capacity): demand = self.get_probabilities_sum() return where(demand > capacity, demand - capacity, 0) def get_dependent_datasets(self): result = [] if self.utility_class is not None: try: result = result + self.utility_class.get_dependent_datasets() except: pass if self.probability_class is not None: try: result = result + self.probability_class.get_dependent_datasets() except: pass if self.choice_class is not None: try: result = result + self.choice_class.get_dependent_datasets() except: pass return result
class AgentRelocationModel(Model): """Chooses agents for relocation (according to probabilities computed by the probabilities class). It includes all jobs that are unplaced. If probabilities is set to None, only unplaced agents are chosen. The run method returns indices of the chosen agents. """ def __init__(self, probabilities="urbansim.rate_based_probabilities", choices="opus_core.random_choices", location_id_name="grid_id", model_name="Agent Relocation Model", debuglevel=0, resources=None): self.model_name = model_name self.location_id_name = location_id_name self.debug = DebugPrinter(debuglevel) self.upc_sequence = None if probabilities is not None: self.upc_sequence = UPCFactory().get_model( utilities=None, probabilities=probabilities, choices=choices, debuglevel=debuglevel) self.resources = merge_resources_if_not_None(resources) def run(self, agent_set, resources=None, reset_attribute_value={}, append_unplaced_agents_index=True): self.resources.merge(resources) if agent_set.size() <= 0: agent_set.get_id_attribute() if agent_set.size() <= 0: self.debug.print_debug("Nothing to be done.", 2) return array([], dtype='int32') if self.upc_sequence and (self.upc_sequence.probability_class.rate_set or self.resources.get( 'relocation_rate', None)): self.resources.merge( {agent_set.get_dataset_name(): agent_set} ) #to be compatible with old-style one-relocation_probabilities-module-per-model self.resources.merge({'agent_set': agent_set}) choices = self.upc_sequence.run(resources=self.resources) # choices have value 1 for agents that should be relocated, otherwise 0. movers_indices = where(choices > 0)[0] else: movers_indices = array([], dtype='int32') if reset_attribute_value and movers_indices.size > 0: for key, value in reset_attribute_value.items(): agent_set.modify_attribute(name=key, data=resize(asarray(value), movers_indices.size), index=movers_indices) if append_unplaced_agents_index: # add unplaced agents unplaced_agents = where( agent_set.get_attribute(self.location_id_name) <= 0)[0] movers_indices = unique( concatenate((movers_indices, unplaced_agents))) logger.log_status("Number of movers: " + str(movers_indices.size)) return movers_indices def prepare_for_run(self, what=None, rate_dataset_name=None, rate_storage=None, rate_table=None, sample_rates=False, n=100, multiplicator=1, flush_rates=True): """ what - unused, argument kept to be compatible with old code """ from opus_core.datasets.dataset_factory import DatasetFactory from opus_core.session_configuration import SessionConfiguration if (rate_storage is None) or ((rate_table is None) and (rate_dataset_name is None)): return self.resources if not rate_dataset_name: rate_dataset_name = DatasetFactory().dataset_name_for_table( rate_table) rates = DatasetFactory().search_for_dataset( rate_dataset_name, package_order=SessionConfiguration().package_order, arguments={ 'in_storage': rate_storage, 'in_table_name': rate_table, }) if sample_rates: cache_storage = None if flush_rates: cache_storage = rate_storage rates.sample_rates(n=n, cache_storage=cache_storage, multiplicator=multiplicator) self.resources.merge( {rate_dataset_name: rates} ) #to be compatible with old-style one-relocation_probabilities-module-per-model self.resources.merge({'relocation_rate': rates}) return self.resources ### In order to remove a circular dependency between this file and ### household_location_choice_model_creator, these unit tests were moved into ### urbansim.tests.test_agent_relocation_model.
class DevelopmentEventTransitionModel(Model): """From given types of development projects, e.g. 'residential' or 'commercial', create development events, one for a gridcell. Only placed projects are considered. It returns an object of class DevelopmentEventDataset. """ def __init__(self, resources=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.resources = resources self.model_name = "Development Event Transition Model" def run(self, developments, year=0, landuse_types=None, units=None, resources=None): # landuse_types = ['residential', 'commercial', 'industrial', 'governmental'] # units=['residential_units', 'commercial_sqft','industrial_sqft','governmental_sqft'] if not isinstance(resources, Resources): resources = Resources() grid_ids_for_project = array([], dtype=int32) if developments <> None: grid_ids_for_project = developments.get_attribute("grid_id") grid_ids_for_project = unique(grid_ids_for_project) grid_ids_for_project = grid_ids_for_project[where(grid_ids_for_project>0)] if len(grid_ids_for_project)==0: return sizes = grid_ids_for_project.size result_data = {"grid_id": grid_ids_for_project, "scheduled_year":(year*ones((sizes,), dtype=int16)), "development_type_id": zeros((sizes,),dtype=int16), } for unit in units: result_data[unit] = zeros((sizes,), dtype=int32) for project_type in landuse_types: result_data["%s_improvement_value" % project_type] = zeros((sizes,), dtype=int32) grid_idx=0 for grid_id in grid_ids_for_project: w = where(developments.get_attribute('grid_id') == grid_id)[0] if w.size>0: result_data["development_type_id"][grid_idx] = \ developments.get_attribute_by_index("development_type_id", w[0]) for unit_variable in units: result_data[unit_variable][grid_idx] = \ developments.get_attribute_by_index(unit_variable , w).sum() result_data["%s_improvement_value" % unit_variable.split('_')[0]][grid_idx] = \ developments.get_attribute_by_index("improvement_value", w).sum() grid_idx += 1 storage = StorageFactory().get_storage('dict_storage') eventset_table_name = 'eventset' storage.write_table( table_name=eventset_table_name, table_data=result_data, ) eventset = DevelopmentEventDataset( in_storage = storage, in_table_name = eventset_table_name, id_name=['grid_id', 'scheduled_year'], ) self.debug.print_debug('Number of events: ' + str(grid_ids_for_project.size), 3) return eventset def prepare_for_run(self, model_configuration): all_types = [] all_units = [] for atype in model_configuration['landuse_development_types']: all_types.append(atype) all_units.append(model_configuration['landuse_development_types'][atype]['units']) return (all_types, all_units)
class ActiveDevelopmentsModel(Model): """ If you have questions, contact Jesse Ayers at MAG: [email protected] """ model_name = "Active Developments Model" model_short_name = "ADM" def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.debuglevel = debuglevel def run( self, percent_active_development=100, build_minimum_units=False, year=None, start_year=None, dataset_pool=None, capacity_this_year_variable="mag_zone.active_development.capacity_this_year", ): # General TODO: # - deal w/ "other_spaces" columns # - look at generalizing the weight used when building units # - build unit test for minimum build feature # LIST OF MODEL ASSUMPTIONS: # - TODO: can i generalize the need for these pre-defined variables? # - the model expects variables to exist that correspond to this naming pattern # for every is_developing building_type_name in the building_types dataset: # - total_<building_type_name>_units_col # - occupied_<building_type_name>_units_col # - building_type_name must be unique, lowercase, contain no spaces # - target_vacancy.is_developing defines which building_types are considered # Minimum build feature # - The user can specify 2 additional columns in the building_types dataset: # - adm_minimum_annual_build_units # - adm_minimum_annual_build_max_year # - If these fields are present, and the "build_minimum_units" run option is set to True # - The model will utilize the information in the fields to build the minimum # of units annually # specified in the building_types table up to the maximum year specified in the table. This feature # is designed to simulate the case when demand is too low to build new units, some will be built anyway # CURRENT LIST OF KNOWN ISSUES: # - # Get current simulation year if year is None: simulation_year = SimulationState().get_current_time() else: simulation_year = year # only run if start_year if start_year: if start_year > simulation_year: return # Get the percent_active_development # convert it to a float percent_active_development = percent_active_development / 100.0 # Get the dataset pool if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() else: dataset_pool = dataset_pool # get the active_developments dataset, subset it for actually active projects # compute some variables developments_dataset = dataset_pool.get_dataset("active_developments") active_developments_capacity = developments_dataset.compute_variables([capacity_this_year_variable]) # TODO: need to further filter active developments, not only by start_year<=simulation_year, # but also by whether they are built out, etc. active_developments_index = where(developments_dataset.get_attribute("start_year") <= simulation_year)[0] active_developments_capacity_this_year = active_developments_capacity[active_developments_index] # debug help self.debug.print_debug("\n*** BEGIN DEBUG INFO:", 1) self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1) self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1) self.debug.print_debug( "len(active_developments_capacity_this_year) = %s" % len(active_developments_capacity_this_year), 1 ) self.debug.print_debug("END DEBUG INFO ***\n", 1) # get the target_vacancy_rates dataset target_vacancy_rates_dataset = dataset_pool.get_dataset("target_vacancy") # get target vacancy rates for this simulation_year this_year_index = where(target_vacancy_rates_dataset.get_attribute("year") == simulation_year)[0] target_vacancies_for_this_year = DatasetSubset(target_vacancy_rates_dataset, this_year_index) # get some columns bldg_types = target_vacancies_for_this_year.get_attribute("building_type_id") tgt_vacancies = target_vacancies_for_this_year.get_attribute("target_vacancy") # get unique building types unique_building_types = unique1d(bldg_types) # build a dictionary containing building_type_id:{'target_vacancy_rate':<float>} developing_building_types_info = {} for unique_building_type in unique_building_types: unique_building_type_index = where(bldg_types == unique_building_type)[0] developing_building_types_info[unique_building_type] = { "target_vacancy_rate": tgt_vacancies[unique_building_type_index].mean() } # debug help if self.debuglevel > 0: self.debug_printer("developing_building_types_info", developing_building_types_info) # get the building_types dataset building_types_dataset = dataset_pool.get_dataset("building_type") # get the attribute names # I don't think this next line is used at all: # building_types_dataset_attribute_names = building_types_dataset.get_attribute_names() # get only the developing building types developing_types_index = where(building_types_dataset.get_attribute("is_developing") == 1)[0] developing_building_types_dataset = DatasetSubset(building_types_dataset, developing_types_index) # calculate active development capacity this simulation_year developing_building_type_ids = developing_building_types_dataset.get_attribute("building_type_id") building_type_names = developing_building_types_dataset.get_attribute("building_type_name") # add building_type_name to the dictionary # now the dictionary takes the form of: # building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>} counter = 0 for developing_building_type_id in developing_building_type_ids: try: developing_building_types_info[developing_building_type_id]["building_type_name"] = building_type_names[ counter ] counter += 1 except: logger.log_warning( "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset." ) # debug help if self.debuglevel > 0: self.debug_printer("developing_building_types_info", developing_building_types_info) # add 'is_residential' to the developing_building_types_info dictionary # now the dictionary takes the form of: # building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'is_residential':<integer>} for developing_building_type in developing_building_types_info: indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[0] developing_building_types_info[developing_building_type][ "is_residential" ] = building_types_dataset.get_attribute("is_residential")[indx][0] # debug help if self.debuglevel > 0: self.debug_printer("developing_building_types_info", developing_building_types_info) # add 'adm_minimum_annual_build_units' and 'adm_minimum_annual_build_max_year' to the developing_building_types_info dictionary # now the dictionary takes the form of: # building_type_id:{'':<float>,'building_type_name':<string>,'is_residential':<integer>,'adm_minimum_annual_build_units':<integer>, 'adm_minimum_annual_build_max_units':<integer>} if build_minimum_units: try: for developing_building_type in developing_building_types_info: indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[ 0 ] developing_building_types_info[developing_building_type][ "adm_minimum_annual_build_units" ] = building_types_dataset.get_attribute("adm_minimum_annual_build_units")[indx][0] for developing_building_type in developing_building_types_info: indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[ 0 ] developing_building_types_info[developing_building_type][ "adm_minimum_annual_build_max_year" ] = building_types_dataset.get_attribute("adm_minimum_annual_build_max_year")[indx][0] except: logger.log_error( '\n\nYou have the option "build_minimum_units" set to "True" but appear to be missing the "adm_minimum_annual_build_units" and "adm_minimum_annual_build_max_year" units in your "building_types" dataset.\n' ) return # build a list of total and occupied units variables to compute of the form # ['occupied_rsf_units_col','total_rsf_units_col', ...] # The variables that this section creates and computes need to be defined in the buildings # dataset aliases.py file building_variables = [] for building_type_id, dict_of_info in developing_building_types_info.iteritems(): try: total, occupied = ( "total_%s_units_col" % dict_of_info["building_type_name"], "occupied_%s_units_col" % dict_of_info["building_type_name"], ) building_variables.append(total) building_variables.append(occupied) except: logger.log_warning( "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset." ) # debug help if self.debuglevel > 0: self.debug_printer("building_variables", building_variables) # get the buildings dataset buildings_dataset = dataset_pool.get_dataset("building") # compute total and occupied units variables buildings_dataset.compute_variables(building_variables) # sum up those variables into a dictionary of the form: # {'occupied_rsf_units':<integer>, 'total_rsf_units':<integer>, ...} total_and_occupied_variable_sums = {} for building_variable in building_variables: summed_attribute = buildings_dataset.get_attribute("%s" % building_variable).sum() total_and_occupied_variable_sums[building_variable.replace("_col", "")] = summed_attribute # debug help if self.debuglevel > 0: self.debug_printer("total_and_occupied_variable_sums", total_and_occupied_variable_sums) # set up a table to log into status_log = PrettyTable() status_log.set_field_names( [ # "Type", "Name", "Occ Units", "Tot Units", "CurrentVR", "Target Units", "TargetVR", "Difference", "Max Act Dev Action", "Avail Act Dev", "Build Action", ] ) # compute target units, vacancy rates, etc # go over each developing building type and compute target units, differences, total development required, # available capacity in active_developments, and action to take in active_developments for developing_building_type in developing_building_types_info: # compute target variables # compute target variables into developing_building_types_info dict developing_building_types_info[developing_building_type][ "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] = int( round( total_and_occupied_variable_sums[ "occupied_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] / (1 - developing_building_types_info[developing_building_type]["target_vacancy_rate"]) ) ) # compute difference variables # compute difference variables into developing_building_types_info dict developing_building_types_info[developing_building_type][ "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"] ] = ( developing_building_types_info[developing_building_type][ "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] - total_and_occupied_variable_sums[ "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] ) # compute action variables # if the computed difference is 0 or negative (no demand for units of this type): if ( developing_building_types_info[developing_building_type][ "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"] ] < 1 ): # consider whether to build the minimum units # check simulation year against maximum annual build year if ( build_minimum_units and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"] >= simulation_year ): # build minimum developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"] else: # build nothing developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] = 0 # the computed difference is positive (demand for units of this type) # decide how much to build, the actual number demanded, or the minimum else: # compute the difference * the percent_active_development diff_with_pct_active = int( developing_building_types_info[developing_building_type][ "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"] ] * percent_active_development ) # if the diff_with_pct_active is greater than the minimum development: if ( build_minimum_units and diff_with_pct_active > developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"] ): # just build the diff_with_pct_active developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] = diff_with_pct_active # the pct_diff_with_pct_active < minimum build and the max year for annual build is appropriate: elif ( build_minimum_units and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"] >= simulation_year ): # build the minimum developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"] # last case is the demand < minimum, but the simulation year > max year: else: # build the pct_diff_with_pct_active developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] = diff_with_pct_active # compute how much development is available in active developments # add this information to the developing_building_types_info dictionary: # building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'available_active_capacity_this_year':<integer>} indx = where( developments_dataset.get_attribute("building_type_id")[active_developments_index] == developing_building_type ) developing_building_types_info[developing_building_type][ "active_developments_capacity_this_year_index" ] = indx developing_building_types_info[developing_building_type][ "available_active_capacity_this_year" ] = active_developments_capacity_this_year[indx].sum() # compute actual action to take action = developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ] available = developing_building_types_info[developing_building_type]["available_active_capacity_this_year"] actual_action = self.lesser(action, available) # revise actual action if minimum build units is in effect: if ( build_minimum_units and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"] >= simulation_year ): actual_action = self.greater( actual_action, developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"], ) developing_building_types_info[developing_building_type]["action_to_take_this_year"] = actual_action # create status line for logging status_line = [ # developing_building_type, developing_building_types_info[developing_building_type]["building_type_name"], total_and_occupied_variable_sums[ "occupied_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ], total_and_occupied_variable_sums[ "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ], round( 1 - ( total_and_occupied_variable_sums[ "occupied_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] / total_and_occupied_variable_sums[ "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ] ), 4, ), developing_building_types_info[developing_building_type][ "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"] ], developing_building_types_info[developing_building_type]["target_vacancy_rate"], developing_building_types_info[developing_building_type][ "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"] ], developing_building_types_info[developing_building_type][ "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"] ], developing_building_types_info[developing_building_type]["available_active_capacity_this_year"], actual_action, ] status_log.add_row(status_line) # print the status table to the log logger.log_status(status_log) # debug help if self.debuglevel > 0: self.debug_printer("developing_building_types_info", developing_building_types_info) # update the active_developments and buildings datasets with new units for developing_building_type in developing_building_types_info: if developing_building_types_info[developing_building_type]["action_to_take_this_year"] > 0: # update 'current_built_units' column in active_developments dataset # get the index of the records of the current developing_building_type indx = developing_building_types_info[developing_building_type][ "active_developments_capacity_this_year_index" ] # get the total number of units to build this year total_action = developing_building_types_info[developing_building_type]["action_to_take_this_year"] # compute the weight as build_out capacity - current_built_units buildout_capacity = developments_dataset.get_attribute("build_out_capacity")[active_developments_index][ indx ] current_built_units = developments_dataset.get_attribute("current_built_units")[ active_developments_index ][indx] weights = buildout_capacity - current_built_units weights_sum = float(weights.sum()) weight_array = weights / weights_sum # distribute the total to build against the weight action_array = (total_action * weight_array).astype("int32") new_built_units = current_built_units + action_array # make sure we are not going to build more than the buildout_capacity check = buildout_capacity - new_built_units check_lt_zero = where(check < 0) if check_lt_zero[0].size > 0: # We have a problem, set the new_built_units = the buildout_capacity # for those records where we are blowing the buildout of the development new_built_units[check_lt_zero] = buildout_capacity[check_lt_zero] # update the current_built_units column with new values developments_building_ids = developments_dataset.get_attribute("building_id") building_ids_to_be_updated = developments_building_ids[active_developments_index][indx] if self.debuglevel > 0: self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated) building_ids_to_be_updated_index_on_developments = in1d( developments_building_ids, building_ids_to_be_updated ) developments_dataset.set_values_of_one_attribute( "current_built_units", new_built_units, building_ids_to_be_updated_index_on_developments ) # debug help if self.debuglevel > 0: self.debug_printer("new_built_units", new_built_units) # update the relevant units column on the buildings dataset with new units # debug help if self.debuglevel > 0: self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated) building_ids_to_be_updated_index_on_buildings = buildings_dataset.get_id_index( building_ids_to_be_updated ) # debug help if self.debuglevel > 0: self.debug_printer( "building_ids_to_be_updated_index_on_buildings", building_ids_to_be_updated_index_on_buildings ) if developing_building_types_info[developing_building_type]["is_residential"]: buildings_dataset.set_values_of_one_attribute( "residential_units", new_built_units, building_ids_to_be_updated_index_on_buildings ) else: buildings_dataset.set_values_of_one_attribute( "non_residential_sqft", new_built_units, building_ids_to_be_updated_index_on_buildings ) def debug_printer(self, name, item_to_print): self.debug.print_debug("\n*** BEGIN DEBUG INFO:", self.debuglevel) self.debug.print_debug("Printing: %s" % name, self.debuglevel) if isinstance(item_to_print, dict): try: from json import dumps self.debug.print_debug(dumps(item_to_print, indent=4), self.debuglevel) except: for key1, value1 in item_to_print.iteritems(): self.debug.print_debug("primary dict key = %s" % key1, 1) for key2, value2 in value1.iteritems(): self.debug.print_debug("%s : %s" % (key2, value2), 1) else: self.debug.print_debug(item_to_print, self.debuglevel) self.debug.print_debug("END DEBUG INFO ***\n", self.debuglevel) def lesser(self, x, y): if x - y > 0: return y else: return x def greater(self, x, y): if x - y < 0: return y else: return x
class HouseholdTransitionModel(Model): """Creates and removes households from household_set. New households are duplicated from the existing households, keeping the joint distribution of all characteristics. """ model_name = "Household Transition Model" def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = location_id_name self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, household_set, control_totals, characteristics, resources=None): self._do_initialize_for_run(household_set) control_totals.get_attribute( "total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute( "characteristic") self.all_categories = array( map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names( self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") idx = where(control_totals.get_attribute("year") == year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(household_set) return self._update_household_set(household_set) def _update_household_set(self, household_set): index_of_duplicated_hhs = household_set.duplicate_rows( self.mapping_existing_hhs_to_new_hhs) household_set.modify_attribute( name=self.location_id_name, data=-1 * ones( (index_of_duplicated_hhs.size, ), dtype=household_set.get_data_type(self.location_id_name)), index=index_of_duplicated_hhs) household_set.remove_elements(self.remove_households) if self.new_households[self.location_id_name].size > 0: max_id = household_set.get_id_attribute().max() self.new_households[self.household_id_name] = concatenate( (self.new_households[self.household_id_name], arange( max_id + 1, max_id + self.new_households[self.location_id_name].size + 1))) household_set.add_elements(self.new_households, require_all_attributes=False) difference = household_set.size() - self.household_size self.debug.print_debug( "Difference in number of households: %s" " (original %s, new %s, created %s, deleted %s)" % (difference, self.household_size, household_set.size(), self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size, self.remove_households.size), 3) if self.location_id_name in household_set.get_attribute_names(): self.debug.print_debug( "Number of unplaced households: %s" % where(household_set.get_attribute(self.location_id_name) <= 0) [0].size, 3) return difference def _do_initialize_for_run(self, household_set): self.household_id_name = household_set.get_id_name()[0] self.new_households = { self.location_id_name: array([], dtype=household_set.get_data_type(self.location_id_name, int32)), self.household_id_name: array([], dtype=household_set.get_data_type(self.household_id_name, int32)) } self.remove_households = array([], dtype='int32') self.household_size = household_set.size() self.max_id = household_set.get_id_attribute().max() self.arrays_from_categories = {} self.arrays_from_categories_mapping = {} self.mapping_existing_hhs_to_new_hhs = array( [], dtype=household_set.get_data_type(self.household_id_name, int32)) def _do_run_for_this_year(self, household_set): self.household_set = household_set groups = self.control_totals_for_this_year.get_id_attribute() self.create_arrays_from_categories(self.household_set) all_characteristics = self.arrays_from_categories.keys() self.household_set.load_dataset_if_not_loaded( attributes=all_characteristics ) # prevents from lazy loading to save runtime idx_shape = [] number_of_combinations = 1 num_attributes = len(all_characteristics) for iattr in range(num_attributes): attr = all_characteristics[iattr] max_bins = self.arrays_from_categories[attr].max() + 1 idx_shape.append(max_bins) number_of_combinations = number_of_combinations * max_bins if attr not in self.new_households.keys(): self.new_households[attr] = array( [], dtype=self.household_set.get_data_type(attr, float32)) self.number_of_combinations = int(number_of_combinations) idx_tmp = indices(tuple(idx_shape)) categories_index = zeros((self.number_of_combinations, num_attributes)) for i in range(num_attributes): #create indices of all combinations categories_index[:, i] = idx_tmp[i].ravel() categories_index_mapping = {} for i in range(self.number_of_combinations): categories_index_mapping[tuple(categories_index[i, ].tolist())] = i def get_category(values): bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values) try: return categories_index_mapping[tuple(bins)] except KeyError, msg: where_error = where(array(bins) == -1)[0] if where_error.size > 0: raise KeyError, \ "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % ( array(values)[where_error], array(all_characteristics)[where_error]) raise KeyError, msg if num_attributes > 0: # the next array must be a copy of the household values, otherwise, it changes the original values values_array = reshape( array(self.household_set.get_attribute( all_characteristics[0])), (self.household_set.size(), 1)) if num_attributes > 1: for attr in all_characteristics[1:]: values_array = concatenate( (values_array, reshape(array(self.household_set.get_attribute(attr)), (self.household_set.size(), 1))), axis=1) for i in range(values_array.shape[1]): if values_array[:, i].max() > 10000: values_array[:, i] = values_array[:, i] / 10 values_array[:, i] = clip( values_array[:, i], 0, self.arrays_from_categories[all_characteristics[i]].size - 1) # determine for each household to what category it belongs to self.household_categories = array( map(lambda x: get_category(x), values_array)) # performance bottleneck number_of_households_in_categories = array( ndimage_sum(ones((self.household_categories.size, )), labels=self.household_categories + 1, index=arange(self.number_of_combinations) + 1)) else: # no marginal characteristics; consider just one group self.household_categories = zeros(self.household_set.size(), dtype='int32') number_of_households_in_categories = array( [self.household_set.size()]) g = arange(num_attributes) #iterate over marginal characteristics for group in groups: if groups.ndim <= 1: # there is only one group (no marginal char.) id = group else: id = tuple(group.tolist()) group_element = self.control_totals_for_this_year.get_data_element_by_id( id) total = group_element.total_number_of_households for i in range(g.size): g[i] = eval("group_element." + self.arrays_from_categories.keys()[i]) if g.size <= 0: l = ones((number_of_households_in_categories.size, )) else: l = categories_index[:, 0] == g[0] for i in range(1, num_attributes): l = logical_and(l, categories_index[:, i] == g[i]) # l has 1's for combinations of this group number_in_group = array( ndimage_sum(number_of_households_in_categories, labels=l, index=1)) diff = int(total - number_in_group) if diff < 0: # households to be removed is_in_group = l[self.household_categories] w = where(is_in_group)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(self.household_set, w, -1*diff, self.location_id_name) self.remove_households = concatenate( (self.remove_households, non_placed, sample_noreplace(sample_array, max(0, abs(diff) - size_non_placed)))) if diff > 0: # households to be created self._create_households(diff, l)
class RateBasedModel(Model): """Chooses agents for relocation (according to probabilities computed by the probabilities class). It includes all jobs that are unplaced. If probabilities is set to None, only unplaced agents are chosen. The run method returns indices of the chosen agents. """ model_name = 'Rate Based Model' def __init__(self, probabilities = "opus_core.upc.rate_based_probabilities", choices = "opus_core.random_choices", model_name = None, debuglevel=0, resources=None ): if model_name is not None: self.model_name = model_name self.debug = DebugPrinter(debuglevel) self.upc_sequence = None if probabilities is not None: self.upc_sequence = UPCFactory().get_model(utilities=None, probabilities=probabilities, choices=choices, debuglevel=debuglevel) self.resources = merge_resources_if_not_None(resources) def run(self, agent_set, resources=None, reset_attribute_value={}): self.resources.merge(resources) if agent_set.size()<=0: agent_set.get_id_attribute() if agent_set.size()<= 0: self.debug.print_debug("Nothing to be done.",2) return array([], dtype='int32') if self.upc_sequence and (self.upc_sequence.probability_class.rate_set or self.resources.get('rate_set', None)): self.resources.merge({agent_set.get_dataset_name():agent_set}) #to be compatible with old-style one-relocation_probabilities-module-per-model self.resources.merge({'agent_set':agent_set}) choices = self.upc_sequence.run(resources=self.resources) # choices have value 1 for agents that should be relocated, otherwise 0. movers_indices = where(choices>0)[0] else: movers_indices = array([], dtype='int32') if reset_attribute_value and movers_indices.size > 0: for key, value in reset_attribute_value.items(): agent_set.modify_attribute(name=key, data=resize(asarray(value), movers_indices.size), index=movers_indices) logger.log_status("Number of agents sampled based on rates: " + str(movers_indices.size)) return movers_indices def prepare_for_run(self, what=None, rate_dataset_name="rate", rate_storage=None, rate_table=None, probability_attribute=None, sample_rates=False, n=100, multiplicator=1, flush_rates=True): """ what - unused, argument kept to be compatible with old code """ from opus_core.datasets.dataset_factory import DatasetFactory from opus_core.session_configuration import SessionConfiguration if (rate_storage is None) or ((rate_table is None) and (rate_dataset_name is None)): return self.resources if not rate_dataset_name: rate_dataset_name = DatasetFactory().dataset_name_for_table(rate_table) rates = DatasetFactory().search_for_dataset(rate_dataset_name, package_order=SessionConfiguration().package_order, arguments={'in_storage':rate_storage, 'in_table_name':rate_table, } ) if probability_attribute is not None: rates.probability_attribute = probability_attribute if sample_rates: cache_storage=None if flush_rates: cache_storage=rate_storage rates.sample_rates(n=n, cache_storage=cache_storage, multiplicator=multiplicator) self.resources.merge({rate_dataset_name:rates}) #to be compatible with old-style one-relocation_probabilities-module-per-model self.resources.merge({'rate_set':rates}) return self.resources
class BusinessTransitionModel(Model): """Creates and removes businesses from business_set.""" model_name = "Business Transition Model" location_id_name = "building_id" variable_package = "sanfrancisco" def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) def run(self, year, business_set, control_totals, dataset_pool=None, resources=None): self.business_id_name = business_set.get_id_name()[0] control_for_businesses = False # If this is False, it is controlled for jobs if "total_number_of_businesses" in control_totals.get_known_attribute_names(): control_for_businesses = True control_totals.get_attribute("total_number_of_businesses") else: control_totals.get_attribute("total_number_of_jobs") idx = where(control_totals.get_attribute("year")==year) sectors = unique(control_totals.get_attribute_by_index("sector_id", idx)) self.max_id = business_set.get_id_attribute().max() business_size = business_set.size() self.new_businesses = {self.location_id_name:array([], dtype='int32'), "sector_id":array([], dtype='int32'), self.business_id_name:array([], dtype='int32'), "sqft":array([], dtype=int32), "employment":array([], dtype='int32'), "activity_id":array([], dtype='int32')} business_set.compute_variables( map(lambda x: "%s.%s.is_of_sector_%s" % (self.variable_package, business_set.get_dataset_name(), x), sectors), dataset_pool=dataset_pool, resources = resources) self.remove_businesses = array([], dtype='int32') for sector in sectors: b_is_in_sector = business_set.get_attribute("is_of_sector_%s" % sector) if control_for_businesses: total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses diff = int(total_businesses - b_is_in_sector.astype(int8).sum()) self._do_sector_for_businesses(sector, diff, business_set, b_is_in_sector) else: total_jobs = control_totals.get_data_element_by_id((year,sector)).total_number_of_jobs diff = int(total_jobs - business_set.get_attribute_by_index("employment", b_is_in_sector).sum()) self._do_sector_for_jobs(sector, diff, business_set, b_is_in_sector) business_set.remove_elements(self.remove_businesses) business_set.add_elements(self.new_businesses, require_all_attributes=False) difference = business_set.size()-business_size self.debug.print_debug("Difference in number of businesses: %s (original %s," " new %s, created %s, deleted %s)" % (difference, business_size, business_set.size(), self.new_businesses[self.business_id_name].size, self.remove_businesses.size), 3) self.debug.print_debug("Number of unplaced businesses: %s" % where(business_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector): available_business_index = where(is_in_sector)[0] if diff < 0: # sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, self.location_id_name) self.remove_businesses = concatenate((self.remove_businesses, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((diff,)))) self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((diff,)))) sampled_business = probsample_replace(available_business_index, diff, None) self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) self.new_businesses["employment"] = concatenate((self.new_businesses["employment"], business_set.get_attribute("employment")[sampled_business])) self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"], business_set.get_attribute("activity_id")[sampled_business])) new_max_id = self.max_id+diff self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id def _do_sector_for_jobs(self, sector, diff, business_set, b_is_in_sector): # diff is a difference in jobs (not businesses) employment = business_set.get_attribute('employment') available_business_index = where(b_is_in_sector)[0] if diff < 0: # placed, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, available_business_index, -1*available_business_index.size, self.location_id_name) consider_for_removing = concatenate((permutation(non_placed), permutation(placed))) empl_cumsum = cumsum(employment[consider_for_removing]) remove_b = consider_for_removing[empl_cumsum <= abs(diff)] self.remove_businesses = concatenate((self.remove_businesses, remove_b)) if diff > 0: # total_empl_added = 0 sampled_business = array([], dtype=int32) while total_empl_added < diff: consider_for_duplicating = permutation(available_business_index) empl_cumsum = cumsum(employment[consider_for_duplicating]) sampled_business = concatenate((sampled_business, consider_for_duplicating[empl_cumsum+total_empl_added <= diff])) if empl_cumsum[-1]+total_empl_added > diff: break total_empl_added += employment[sampled_business].sum() self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((sampled_business.size,)))) self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((sampled_business.size,)))) self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) self.new_businesses["employment"] = concatenate((self.new_businesses["employment"], employment[sampled_business])) self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"], business_set.get_attribute("activity_id")[sampled_business])) new_max_id = self.max_id+sampled_business.size self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id def prepare_for_run(self, storage, in_table_name, id_name, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, in_table_name=in_table_name, id_name=id_name ) return control_totals
class Variable(object): """Abstract base class for variables. Each variable implementation must be a subclass of this class, placed in a module that has the same name as the variable class. Each variable class is expected to contain a method "compute" that takes one argument "arguments". It is of type Resources and can contain anything that the compute method might need. The 'compute' method returns a result of the computation which should be an array of size self.get_dataset().size(). Each variable class can contain a method "dependencies" which returns a list of attributes/variables that this class is dependent on. The dependencies list is a list of fully (or dataset) qualified variable names, one for each dependent variable. All dependent datasets must be included in 'arguments'. Each variable may have a pre- and post-check that will perform checks on the variable's inputs and the variable's results. This allows each variable's implementation to specify a contract about what it does. The 'check_variables' entry of the 'arguments' defines what variables to check (see method 'should_check'). If a variable is required to be checked, the 'S' method for that variable is called before the variable's 'compute' method, and the 'post_check' method for that variable is called after the variable's 'compute' method. Both 'pre_check' and 'post_check' take 2 arguments: values (the results from the 'compute' method), and 'arguments'. In case of using 'compute_with_dependencies' the datasets for which variables are computed, are expected to have a method 'compute_variables' that takes at least three arguments: name of the variable, package name and an object of class Resources. This dataset method should use the Variable method 'compute_with_dependencies' in order to work recursively through dependency trees (see compute_variables and _compute_one_variable of opus_core.Dataset). The return type of this variable is defined by it's _return_type property, which may have one of the following numpy types: "bool8", "int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float32", "float64", "complex64", "complex128", "longlong". """ _return_type = None def __new__(cls, *args, **kwargs): """Setup to automatically log the running time of the compute method.""" an_instance = object.__new__(cls) compute_method = an_instance.compute_with_dependencies def logged_method (*req_args, **opt_args): logger.start_block(name=an_instance.name(), verbose=False) try: results = compute_method(*req_args, **opt_args) an_instance._do_flush_dependent_variables_if_required() finally: logger.end_block() return results an_instance.compute_with_dependencies = logged_method return an_instance def __init__(self): self.dependencies_list = None self.dataset = None self.number_of_compute_runs = 0 try: self.debug = SessionConfiguration().get('debuglevel', 0) except: self.debug = 0 if isinstance(self.debug, int): self.debug = DebugPrinter(self.debug) def name(self): return self.__module__ def _do_flush_dependent_variables_if_required(self): try: if not SessionConfiguration().get('flush_variables', False): return except: return from opus_core.datasets.interaction_dataset import InteractionDataset dataset = self.get_dataset() dependencies = self.get_current_dependencies() my_dataset_name = dataset.get_dataset_name() for iattr in range(len(dependencies)): # iterate over dependent variables dep_item = dependencies[iattr][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() if dataset_name == my_dataset_name: ds = dataset else: ds = SessionConfiguration().get_dataset_from_pool(dataset_name) #ds = dataset_pool.get_dataset('dataset_name') if not isinstance(ds, InteractionDataset): short_name = depvar_name.get_alias() if short_name not in ds.get_id_name(): ds.flush_attribute(depvar_name) def compute(self, dataset_pool): """Returns the result of this variable. Private use only.""" raise NotImplementedError, "compute() method not implemented for this variable." def is_lag_variable(self): """Not a lag variable unless this function has been overridden to return True""" return False def _compute_and_check(self, dataset_pool): if has_this_method(self, "pre_check"): self.debug.print_debug("Running pre_check() for " + self.__class__.__module__,4) self.pre_check(dataset_pool) else: self.debug.print_debug("No pre_check() defined for " + self.__class__.__module__,4) values = self.compute(dataset_pool) if has_this_method(self, "post_check"): self.debug.print_debug("Running post_check() for " + self.__class__.__module__,4) self.post_check(values, dataset_pool) else: self.debug.print_debug("No post_check() defined for " + self.__class__.__module__,4) return values def compute_with_dependencies(self, dataset_pool, arguments={}): self._solve_dependencies(dataset_pool) if self.should_check(arguments): self.debug.print_debug("Computing and checking " + self.__class__.__module__,3) values = self._compute_and_check(dataset_pool) else: values = self.compute(dataset_pool) self.number_of_compute_runs += 1 if self._return_type: return self._cast_values(values, arguments) return values if longlong == int32: __long_size = 2**31 - 1 else: __long_size = 2**63 - 1 _max_storable_value = {"bool8":1, "int8":2**7 - 1, "uint8":2**8 - 1, "int16":2**15 - 1, "uint16":2**16 - 1, "int32":2**31 - 1, "uint32":2**32 - 1, "int64":2**63 - 1, "uint64":2**64 - 1, "float32":3.40282346638528860e+38, "float64":1.79769313486231570e+308, "complex64":3.40282346638528860e+38, "complex128":1.79769313486231570e+308, "longlong":__long_size, } def _cast_values(self, values, arguments): """Change the return values to be of type self._return_type. If "should_check" is defined, first check for values that are too large for the destination type or integer wrap-around.""" type = values.dtype.str if self._return_type == type: return values if self.should_check(arguments): max_value = ma.maximum(values) if max_value > self._max_storable_value[self._return_type]: max_value_str = str(max_value) logger.log_error("Variable '%s' is being cast to type '%s', but contains a value (%s) too large to fit into that type." % (self.name(), self._return_type, max_value_str)) return values.astype(self._return_type) def _solve_dependencies(self, dataset_pool): dataset = self.get_dataset() my_dataset_name = dataset.get_dataset_name() dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): # compute dependent variables dep_item = dependencies_list[i][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() version = dependencies_list[i][1] if dataset_name == my_dataset_name: ds = dataset else: ds = dataset_pool.get_dataset(dataset_name) (new_versions, value) = ds.compute_variables_return_versions_and_final_value([(depvar_name, version)], dataset_pool) self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0]) def get_all_dependencies(self): """Return all variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is either AttributeBox or VariableName of the dependent variable and the second element is the version for which this variable was computed. """ def create_fake_dataset(dataset_name): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='fake_dataset', table_data={ 'id':array([], dtype='int32') } ) dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id") return dataset result_others = [] dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): dep_item = dependencies_list[i][0] version = dependencies_list[i][1] isprimary = 0 if isinstance(dep_item, str): depvar_name = VariableName(dep_item) dataset_name = depvar_name.get_dataset_name() var = VariableFactory().get_variable(depvar_name, create_fake_dataset(dataset_name), quiet=True) result_others = result_others + [(depvar_name, version)] else: # dep_item should be an instance of AttributeBox var = dep_item.get_variable_instance() result_others = result_others + [(dep_item, version)] isprimary = dep_item.is_primary() if (var <> None) and (not isprimary): res = var.get_all_dependencies() result_others = result_others + res return result_others def get_dependencies(self): """Return variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is the name of the particular dataset and the second element is the variable name. It does not work through the dependencies tree. """ if has_this_method(self, "dependencies"): return self.dependencies() return [] def add_dependencies(self, dep_list=[]): """Can be used within 'compute' method to add dependencies. It is performed only when the compute method runs for the first time. dep_list can be either a list of character strings or a list of AttributeBoxes.""" if self.number_of_compute_runs == 0: if isinstance(dep_list, str): dep_list = [dep_list] self.dependencies_list = self.dependencies_list + map(lambda x: (x, 0), dep_list) def add_and_solve_dependencies(self, dep_list=[], dataset_pool=None): """Calls 'add_dependencies' and if it is run for the first time, it also calls the '_solve_dependencies' method.""" self.add_dependencies(dep_list) if self.number_of_compute_runs == 0: self._solve_dependencies(dataset_pool) def get_current_dependencies(self): if self.dependencies_list is None: self.dependencies_list = map(lambda x: (x, 0), self.get_dependencies()) return self.dependencies_list def do_check(self, condition_str, values): def condition(x): return eval(condition_str) # This is a bit ugly, but the upgrade from Python 2.3.5 to # Python 2.4 broke backward compatability in regard to map and # numpy's rank-0 arrays. This attempts to detect a rank-0 # array and convert it into something usable. try: try: len(values) except TypeError: values = array([values[()]]) except: pass count = where(array(map(lambda x: not(condition(x)), values)) > 0)[0].size if (count > 0): logger.log_warning("Variable %s fails %d times on check %s" % (self.__class__.__module__, count, condition_str)) def should_check(self, arguments=None): """Return True if this variable should be checked, otherwise False. The information of what variables to check is provided in the 'arguments' entry "check_variables". If "check_variables" is missing or is None or is an empty list, do no checks. If "check_variables" is '*', check all variables. If "check_variables" is a list containing this variable's name, check this variable. """ if not isinstance(arguments, Resources): return False check_variables = arguments.get("check_variables", None) if check_variables == None: return False if (check_variables == '*') or \ (isinstance(check_variables, list) and (len(check_variables) > 0) and (self.__class__.__name__ in check_variables)): return True return False def are_dependent_variables_up_to_date(self, version): result = [] all_dependencies_list = self.get_all_dependencies() for variable, version in all_dependencies_list: if isinstance(variable, AttributeBox): result.append(variable.is_version(version)) else: # of type VariableName (means variable wasn't used yet) result.append(False) return result def get_highest_version_of_dependencies(self): dependencies_list = self.get_current_dependencies() if len(dependencies_list) <= 0: return 0 versions = array(map(lambda x: x[1], dependencies_list)) return versions.max() def set_dataset(self, dataset): self.dataset = dataset def get_dataset(self): return self.dataset def safely_divide_two_arrays(self, numerator, denominator, value_for_divide_by_zero=0.0): """Returns the result of numerator/denominator with the value_for_divide_by_zero wherever denominator == 0. """ return ma.filled(numerator / ma.masked_where(denominator == 0, denominator), value_for_divide_by_zero) def safely_divide_two_attributes(self, numerator_name, denominator_name, value_for_divide_by_zero=0.0): """Returns the result of dividing the numerator_name attribute of this variable by the denominator_name attribute of this variable; return the value_for_divide_by_zero wherever denominator == 0. """ numerator = self.get_dataset().get_attribute(numerator_name) denominator = self.get_dataset().get_attribute(denominator_name) return self.safely_divide_two_arrays(numerator, denominator, value_for_divide_by_zero)
class BusinessTransitionModel(Model): """Creates and removes businesses from business_set.""" model_name = "Business Transition Model" location_id_name = "building_id" variable_package = "urbansim_parcel" def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) def run(self, year, business_set, control_totals, data_objects=None, resources=None): business_id_name = business_set.get_id_name()[0] control_totals.get_attribute("total_number_of_businesses") idx = where(control_totals.get_attribute("year") == year) sectors = unique( control_totals.get_attribute_by_index("building_use_id", idx)) max_id = business_set.get_id_attribute().max() business_size = business_set.size() new_businesses = { self.location_id_name: array([], dtype='int32'), "building_use_id": array([], dtype='int32'), business_id_name: array([], dtype='int32'), "sqft": array([], dtype=int32), "employees": array([], dtype=int32), } compute_resources = Resources(data_objects) # compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug}) business_set.compute_variables(map( lambda x: "%s.%s.is_sector_%s" % (self.variable_package, business_set.get_dataset_name(), x), sectors), resources=compute_resources) remove_businesses = array([], dtype='int32') for sector in sectors: total_businesses = control_totals.get_data_element_by_id( (year, sector)).total_number_of_businesses is_in_sector = business_set.get_attribute("is_sector_%s" % sector) diff = int(total_businesses - is_in_sector.astype(int8).sum()) if diff < 0: # w = where(is_in_sector == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, w, -1*diff, self.location_id_name) remove_businesses = concatenate( (remove_businesses, non_placed, sample_noreplace(sample_array, max(0, abs(diff) - size_non_placed)))) if diff > 0: # new_businesses[self.location_id_name] = concatenate( (new_businesses[self.location_id_name], zeros((diff, ), dtype="int32"))) new_businesses["building_use_id"] = concatenate( (new_businesses["building_use_id"], sector * ones( (diff, ), dtype="int32"))) available_business_index = where(is_in_sector)[0] sampled_business = probsample_replace(available_business_index, diff, None) new_businesses["sqft"] = concatenate( (new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) new_businesses["employees"] = concatenate(( new_businesses["employees"], business_set.get_attribute("employees")[sampled_business])) new_max_id = max_id + diff new_businesses[business_id_name] = concatenate( (new_businesses[business_id_name], arange(max_id + 1, new_max_id + 1))) max_id = new_max_id business_set.remove_elements(remove_businesses) business_set.add_elements(new_businesses, require_all_attributes=False) difference = business_set.size() - business_size self.debug.print_debug( "Difference in number of businesses: %s (original %s," " new %s, created %s, deleted %s)" % (difference, business_size, business_set.size(), new_businesses[business_id_name].size, remove_businesses.size), 3) self.debug.print_debug( "Number of unplaced businesses: %s" % where(business_set.get_attribute(self.location_id_name) <= 0) [0].size, 3) return difference def prepare_for_run(self, storage, in_table_name, id_name, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, in_table_name=in_table_name, id_name=id_name) # sample_control_totals(storage, control_totals, **kwargs) return control_totals
def create_from_parcel_and_development_template(parcel_dataset, development_template_dataset, parcel_index=None, template_index=None, filter_attribute=None, consider_constraints_as_rules=True, template_opus_path="urbansim_parcel.development_template", proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed", dataset_pool=None, resources=None): """create development project proposals from parcel and development_template_dataset, parcel_index - 1D array, indices of parcel_dataset. Status of the proposals is set to 'tentative'. template_index - index to templates that are available to create proposals; filter_attribute - variable that is used to filter proposals; If a development constraint table exists, create proposal dataset include only proposals that are allowed by constraints, otherwise, create a proposal dataset with Cartesian product of parcels x templates """ resources = Resources(resources) debug = resources.get("debug", 0) if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) if parcel_index is not None and parcel_index.size <= 0: logger.log_warning("parcel index for creating development proposals is of size 0. No proposals will be created.") return None storage = StorageFactory().get_storage('dict_storage') current_year = SimulationState().get_current_time() def _get_data(parcel_ids, template_ids): return { "proposal_id": arange(1, parcel_ids.size+1, 1), "parcel_id" : parcel_ids, "template_id": template_ids, "start_year": array(parcel_ids.size*[current_year]), "status_id": resize(array([DevelopmentProjectProposalDataset.id_tentative], dtype="int16"), parcel_ids.size) } def _create_project_proposals(parcel_ids, template_ids): storage.write_table(table_name='development_project_proposals', table_data = _get_data(parcel_ids, template_ids) ) development_project_proposals = DevelopmentProjectProposalDataset(resources=Resources(resources), dataset1 = parcel_dataset, dataset2 = development_template_dataset, index1 = parcel_index, index2 = template_index, in_storage=storage, in_table_name='development_project_proposals', ) return development_project_proposals def _compute_filter(proposals): if filter_attribute is not None: proposals.compute_variables(filter_attribute, dataset_pool=dataset_pool, resources=Resources(resources)) filter_index = where(proposals.get_attribute(filter_attribute) > 0)[0] return filter_index return None def _subset_by_filter(proposals): filter_index = _compute_filter(proposals) if filter_index is not None: proposals.subset_by_index(filter_index, flush_attributes_if_not_loaded=False) return proposals if parcel_index is not None: index1 = parcel_index else: index1 = arange(parcel_dataset.size()) if template_index is not None: index2 = template_index else: index2 = arange(development_template_dataset.size()) has_constraint_dataset = True try: constraints = dataset_pool.get_dataset("development_constraint") constraints.load_dataset_if_not_loaded() except: has_constraint_dataset = False if has_constraint_dataset: constraint_types = unique(constraints.get_attribute("constraint_type")) #unit_per_acre, far etc development_template_dataset.compute_variables(map(lambda x: "%s.%s" % (template_opus_path, x), constraint_types), dataset_pool) parcel_dataset.get_development_constraints(constraints, dataset_pool, index=index1, consider_constraints_as_rules=consider_constraints_as_rules) generic_land_use_type_ids = development_template_dataset.compute_variables("urbansim_parcel.development_template.generic_land_use_type_id", dataset_pool=dataset_pool) parcel_ids = parcel_dataset.get_id_attribute() template_ids = development_template_dataset.get_id_attribute() proposal_parcel_ids = array([],dtype="int32") proposal_template_ids = array([],dtype="int32") logger.start_block("Combine parcels, templates and constraints") for i_template in index2: this_template_id = template_ids[i_template] fit_indicator = ones(index1.size, dtype="bool8") if has_constraint_dataset: generic_land_use_type_id = generic_land_use_type_ids[i_template] for constraint_type, constraint in parcel_dataset.development_constraints[generic_land_use_type_id].iteritems(): template_attribute = development_template_dataset.get_attribute(constraint_type)[i_template] #density converted to constraint variable name if template_attribute == 0: continue min_constraint = constraint[:, 0].copy() max_constraint = constraint[:, 1].copy() ## treat -1 as unconstrainted w_unconstr = min_constraint == -1 if w_unconstr.any(): min_constraint[w_unconstr] = template_attribute w_unconstr = max_constraint == -1 if w_unconstr.any(): max_constraint[w_unconstr] = template_attribute fit_indicator = logical_and(fit_indicator, logical_and(template_attribute >= min_constraint, template_attribute <= max_constraint)) if constraint_type == "units_per_acre": res_units_capacity = parcel_dataset.get_attribute("parcel_sqft")[index1] * max_constraint / 43560.0 debug.print_debug("template_id %s (GLU ID %s) max total residential capacity %s, %s of them fit constraints " % (this_template_id, generic_land_use_type_id, res_units_capacity.sum(), (res_units_capacity * fit_indicator).sum() ), 12) else: non_res_capacity = parcel_dataset.get_attribute("parcel_sqft")[index1] * max_constraint debug.print_debug("template_id %s (GLU ID %s) max total non residential capacity %s, %s of them fit constraints " % (this_template_id, generic_land_use_type_id, non_res_capacity.sum(), (non_res_capacity * fit_indicator).sum() ), 12) proposal_parcel_ids = concatenate((proposal_parcel_ids, parcel_ids[index1[fit_indicator]])) proposal_template_ids = concatenate( (proposal_template_ids, resize(array([this_template_id]), fit_indicator.sum()))) logger.end_block() proposals = _create_project_proposals(proposal_parcel_ids, proposal_template_ids) proposals = _subset_by_filter(proposals) # eliminate proposals with zero units_proposed units_proposed = proposals.compute_variables([proposed_units_variable], dataset_pool = dataset_pool) where_up_greater_zero = where(units_proposed > 0)[0] if where_up_greater_zero.size > 0: proposals.subset_by_index(where_up_greater_zero, flush_attributes_if_not_loaded=False) logger.log_status("proposal set created with %s proposals." % proposals.size()) #proposals.flush_dataset_if_low_memory_mode() return proposals
def run(self, projects, types, units, year=0, location_id_name="grid_id", debuglevel=0): debug = DebugPrinter(debuglevel) grid_ids_for_any_project = array([], dtype=int32) grid_ids_by_project_type = {} for project_type in types: grid_ids_by_project_type[project_type] = array([], dtype=int32) if projects[project_type] <> None: grid_ids_by_project_type[project_type] = projects[ project_type].get_attribute(location_id_name) grid_ids_for_any_project = unique( concatenate((grid_ids_for_any_project, grid_ids_by_project_type[project_type]))) grid_ids_for_any_project = grid_ids_for_any_project[where( grid_ids_for_any_project > 0)] if not len(grid_ids_for_any_project): return result_data = { location_id_name: grid_ids_for_any_project, "scheduled_year": (year * ones( (grid_ids_for_any_project.size, ))).astype(int32) } for unit in units: result_data[unit] = zeros((grid_ids_for_any_project.size, ), dtype=int32) for project_type in types: result_data["%s_improvement_value" % project_type] = zeros( (grid_ids_for_any_project.size, ), dtype=int32) grid_idx = 0 for grid_id in grid_ids_for_any_project: for i in range(0, len(types)): project_type = types[i] my_projects = projects[project_type] w = where( my_projects.get_attribute(location_id_name) == grid_id)[0] if w.size > 0: unit_variable = units[i] result_data[unit_variable][grid_idx] = \ my_projects.get_attribute_by_index( my_projects.get_attribute_name(), w).sum() result_data["%s_improvement_value" % project_type][grid_idx] = \ my_projects.get_attribute_by_index( "improvement_value", w).sum() grid_idx += 1 storage = StorageFactory().get_storage('dict_storage') eventset_table_name = 'development_events_generated' storage.write_table(table_name=eventset_table_name, table_data=result_data) eventset = DevelopmentEventDataset( in_storage=storage, in_table_name=eventset_table_name, id_name=[location_id_name, "scheduled_year"], ) debug.print_debug( "Number of events: " + str(grid_ids_for_any_project.size), 3) return eventset