def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index == None: dataset_index = arange(dataset.size()) if not isinstance(dataset_index, ndarray): dataset_index = array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order( all_indexed_individuals) # set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks': 1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int( ceil(all_indexed_individuals.size() / float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk + 1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange( (ichunk * chunksize), min((ichunk + 1) * chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk( dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index==None: dataset_index=arange(dataset.size()) if not isinstance(dataset_index,ndarray): dataset_index=array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks':1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk+1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize), min((ichunk+1)*chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def __init__( self, group_member, location_set, agents_grouping_attribute='building.building_type_id', sampler="opus_core.samplers.weighted_sampler", utilities="opus_core.linear_utilities", choices="urbansim.first_agent_first_choices", probabilities="opus_core.mnl_probabilities", estimation="opus_core.bhhh_mnl_estimation", capacity_string="is_developable_for_buildings_UNITS", estimation_weight_string=None, developable_maximum_unit_variable="developable_maximum_buildings_UNITS", developable_minimum_unit_variable="developable_minimum_UNITS", # None means don't consider any minimum. For default, set it to empty string number_of_agents_string="buildings_SSS_space", number_of_units_string="total_maximum_development_SSS", sample_proportion_locations=None, sample_size_locations=30, estimation_size_agents=1.0, compute_capacity_flag=True, filter="developable_maximum_buildings_UNITS", # None doesn't use any filter submodel_string="size_category_SSS", # Put here None, if no submodels should be used. nrecords_per_chunk_for_estimation_sampling=1000, # put here None, if everything in 1 chunk location_id_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None, variable_package="urbansim"): """ 'group_member' is of type ModelGroupMember. """ group_member_name = group_member.get_member_name() units = group_member.get_attribute_value("units")[0] if capacity_string: capacity_string = re.sub("UNITS", units, capacity_string) capacity_string = re.sub("SSS", group_member_name, capacity_string) if filter: filter = re.sub("UNITS", units, filter) filter = re.sub("SSS", group_member_name, filter) if developable_maximum_unit_variable: developable_maximum_unit_variable = re.sub( "UNITS", units, developable_maximum_unit_variable) developable_maximum_unit_variable = re.sub( "SSS", group_member_name, developable_maximum_unit_variable) if developable_minimum_unit_variable == "": developable_minimum_unit_variable = "developable_minimum_%s" % units elif developable_minimum_unit_variable: developable_minimum_unit_variable = re.sub( "UNITS", units, developable_minimum_unit_variable) developable_minimum_unit_variable = re.sub( "SSS", group_member_name, developable_minimum_unit_variable) if submodel_string: submodel_string = re.sub("UNITS", units, submodel_string) submodel_string = re.sub("SSS", group_member_name, submodel_string) if number_of_agents_string: number_of_agents_string = re.sub('UNITS', units, number_of_agents_string) number_of_agents_string = re.sub('SSS', group_member_name, number_of_agents_string) if number_of_units_string: number_of_units_string = re.sub('UNITS', units, number_of_units_string) number_of_units_string = re.sub('SSS', group_member_name, number_of_units_string) # create full names from (possibly) short names tmpdict = Resources({ "filter": filter, "max": developable_maximum_unit_variable, "min": developable_minimum_unit_variable }) self.add_prefix_to_variable_names(["filter", "max", "min"], location_set, variable_package, resources=tmpdict) filter = tmpdict["filter"] if tmpdict["max"]: self.developable_maximum_unit_variable = VariableName( tmpdict["max"]) else: self.developable_maximum_unit_variable = None if tmpdict["min"]: self.developable_minimum_unit_variable = VariableName( tmpdict["min"]) else: self.developable_minimum_unit_variable = None self.project_type = group_member_name self.units = units run_config = merge_resources_with_defaults(run_config, [ ("sample_proportion_locations", sample_proportion_locations), ("sample_size_locations", sample_size_locations), ("compute_capacity_flag", compute_capacity_flag), ("capacity_string", capacity_string), ("agent_units_string", "urbansim.building.building_size"), ("number_of_agents_string", number_of_agents_string), ("number_of_units_string", number_of_units_string), ]) estimate_config = merge_resources_with_defaults( estimate_config, [("estimation", estimation), ("sample_proportion_locations", sample_proportion_locations), ("sample_size_locations", sample_size_locations), ("estimation_size_agents", estimation_size_agents), ("weights_for_estimation_string", estimation_weight_string), ("capacity_string", capacity_string), ("compute_capacity_flag", compute_capacity_flag)]) if nrecords_per_chunk_for_estimation_sampling: estimate_config[ "chunk_specification_for_estimation"] = ChunkSpecification({ "records_per_chunk": nrecords_per_chunk_for_estimation_sampling }) AgentLocationChoiceModelMember.__init__( self, group_member, location_set, agents_grouping_attribute, model_name="Building Location Choice Model", short_name="BLCM", sampler=sampler, utilities=utilities, probabilities=probabilities, choices=choices, filter=filter, submodel_string=submodel_string, location_id_string=location_id_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool, variable_package=variable_package)
def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs): """Create interaction dataset with or without sampling of alternatives arguments to sampler_class is passed through config (run_config or estimation_config in configuration file), such as: 'include_chosen_choice', 'with_replacement', 'stratum', 'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' (for stratified sampler) """ nchoices = self.get_choice_set_size() sampling = True iterate_by_submodels = False if self.filter is not None and ( isinstance(self.filter, dict) or re.search("SUBMODEL", self.filter) is not None ): iterate_by_submodels = True ## apply (alternative) filter when alternative size equals to the size of choice set, or sampler class is None if (self.sampler_class is None) or (nchoices == self.choice_set.size()): if self.filter is None: return ChoiceModel.create_interaction_datasets(self, agent_set, agents_index, config) elif iterate_by_submodels: sampling = False else: # apply filter without doing sampling if filter is not defined by submodels filter_index = self.apply_filter(self.filter, agent_set, agents_index) self.model_interaction.create_interaction_datasets(agents_index, filter_index) self.update_choice_set_size(filter_index.size) return sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index) interaction_dataset = None # if filter is specified by submodel in a dict, call sampler submodel by submodel sampling_by_groups = False if ( iterate_by_submodels or config.get("sample_alternatives_by_submodel", False) or config.get("sample_alternatives_by_group", False) ): groups_equal_submodels = True groups = submodels sampling_by_groups = True if config.get("sample_alternatives_by_group", False): group_var = config.get("group_definition_for_sampling_alternatives", None) if group_var is None: logger.log_warning( 'No group variable defined for sampling alternatives. Set "group_definition_for_sampling_alternatives" in run_config/estimate_config.' ) if isinstance(self.filter, dict): logger.log_warning("Alternatives are sampled by submodel.") else: groups = [] sampling_by_groups = False else: group_values = agent_set.compute_variables([group_var], dataset_pool=self.dataset_pool)[ agents_index ] groups = unique(group_values) groups_equal_submodels = False index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32") attributes = {} ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function for group in groups: if groups_equal_submodels: where_group = self.observations_mapping[group] else: where_group = where(group_values == group)[0] if where_group.size == 0: continue agents_index_in_group = agents_index[where_group] choice_index = self.apply_filter( self.filter, agent_set=agent_set, agents_index=agents_index_in_group, submodel=group, replace_dict={"SUBMODEL": group} if groups_equal_submodels else {"GROUP": group}, ) if choice_index is not None and choice_index.size == 0: logger.log_error( "There is no alternative that passes filter %s for %s=%s; %s agents with id %s will remain unplaced." % ( self.filter, "SUBMODEL" if groups_equal_submodels else "GROUP", group, agents_index_in_group.size, agent_set.get_id_attribute()[agents_index_in_group], ) ) continue if sampling: group_sampling_weights = sampling_weights if isinstance(sampling_weights, str): group_sampling_weights = re.sub( "SUBMODEL" if groups_equal_submodels else "GROUP", str(int(group)), sampling_weights ) chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index_in_group) chunksize = chunk_specification.chunk_size(agents_index_in_group) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index_in_group, choice_index, nchoices, weights=group_sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize, ) filter_idx = interaction_dataset.index2 else: self.model_interaction.create_interaction_datasets(agents_index_in_group, choice_index) interaction_dataset = self.model_interaction.interaction_dataset filter_idx = -1 * ones((agents_index_in_group.size, nchoices), dtype="int32") filter_idx[:, interaction_dataset.index2] = interaction_dataset.index2 if ( not config.get("accept_unavailability_of_choices", False) and interaction_dataset.get_reduced_m() == 0 ): raise StandardError, "There are no locations available for the given sampling weights for group %s." % group if len(groups) > 1 or (agents_index.size > agents_index_in_group.size): if interaction_dataset.get_reduced_m() > 0: index2[where_group, :] = filter_idx for name in interaction_dataset.get_known_attribute_names(): attr_val = interaction_dataset.get_attribute(name) if not attributes.has_key(name): attributes[name] = zeros(index2.shape, dtype=attr_val.dtype) attributes[name][where_group, :] = attr_val if interaction_dataset is None: logger.log_warning( "There is no agent for groups %s. " % (groups) + "This may be due to mismatch between agent_filter and submodels included in specification." ) self.model_interaction.interaction_dataset = None return if len(groups) > 1 or ( agents_index.size > agents_index_in_group.size ): ## if there are more than 1 group, merge the data by submodel and recreate interaction_dataset if sampling: interaction_dataset = self.sampler_class.create_interaction_dataset( interaction_dataset.dataset1, interaction_dataset.dataset2, index1=agents_index, index2=index2 ) for name in attributes.keys(): interaction_dataset.add_primary_attribute(attributes[name], name) else: self.model_interaction.create_interaction_datasets(agents_index, index2) interaction_dataset = self.model_interaction.interaction_dataset self.update_choice_set_size(interaction_dataset.get_reduced_m()) if not sampling_by_groups: # no sampling by submodels/groups choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index) if choice_index is not None and choice_index.size == 0: message = ( "There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index]) ) if not config.get("accept_unavailability_of_choices", False): raise StandardError, message logger.log_error(message) self.model_interaction.interaction_dataset = None return chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index) chunksize = chunk_specification.chunk_size(agents_index) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index, choice_index, nchoices, weights=sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize, ) if not config.get("accept_unavailability_of_choices", False) and interaction_dataset.get_reduced_m() == 0: raise StandardError, "There are no locations available for the given sampling weights." self.update_choice_set_size(interaction_dataset.get_reduced_m()) self.model_interaction.interaction_dataset = interaction_dataset
def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs): """Create interactiondataset with or without sampling of alternatives arguments to sampler_class is passed through config (run_config or estimation_config in configuration file), such as: 'include_chosen_choice', 'with_replacement', 'stratum', 'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' (for stratified sampler) """ nchoices = self.get_choice_set_size() if nchoices==self.choice_set.size(): if self.filter is None: ChoiceModel.create_interaction_datasets(self, agent_set, agents_index, config) return else: # apply filter without doing sampling filter_index = self.apply_filter(self.filter, agent_set, agents_index) self.model_interaction.create_interaction_datasets(agents_index, filter_index) self.update_choice_set_size(filter_index.size) return sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index) #if filter is specified by submodel in a dict, call sampler submodel by submodel if isinstance(self.filter, dict) or config.get("sample_alternatives_by_submodel", False): index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32") attributes = {} #submodels = self.model_interaction.get_submodels() ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function for submodel in submodels: agents_index_in_submodel = agents_index[self.observations_mapping[submodel]] if agents_index_in_submodel.size==0: continue choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index_in_submodel, submodel=submodel) if choice_index is not None and choice_index.size == 0: logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \ (self.filter, agents_index_in_submodel.size, agent_set.get_id_attribute()[agents_index])) continue submodel_sampling_weights = sampling_weights if isinstance(sampling_weights, str): submodel_sampling_weights = re.sub('SUBMODEL', str(submodel), sampling_weights) chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks":1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index_in_submodel) chunksize = chunk_specification.chunk_size(agents_index_in_submodel) interaction_dataset = self.sample_alternatives_by_chunk(agent_set, agents_index_in_submodel, choice_index, nchoices, weights=submodel_sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize) if len(submodels)>1: index2[self.observations_mapping[submodel],:] = interaction_dataset.index2 for name in interaction_dataset.get_known_attribute_names(): attr_val = interaction_dataset.get_attribute(name) if not attributes.has_key(name): attributes[name] = zeros(index2.shape, dtype=attr_val.dtype) attributes[name][self.observations_mapping[submodel],:] = attr_val if len(submodels)>1: ## if there are more than 1 submodel, merge the data by submodel and recreate interaction_dataset interaction_dataset = self.sampler_class.create_interaction_dataset(interaction_dataset.dataset1, interaction_dataset.dataset2, index1=agents_index, index2=index2) for name in attributes.keys(): interaction_dataset.add_attribute(attributes[name], name) self.update_choice_set_size(interaction_dataset.get_reduced_m()) else: choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index) if choice_index is not None and choice_index.size == 0: logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \ (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index])) return #OR raise? chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks":1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index) chunksize = chunk_specification.chunk_size(agents_index) interaction_dataset = self.sample_alternatives_by_chunk(agent_set, agents_index, choice_index, nchoices, weights=sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize) self.update_choice_set_size(interaction_dataset.get_reduced_m()) self.model_interaction.interaction_dataset = interaction_dataset
def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs): """Create interactiondataset with or without sampling of alternatives arguments to sampler_class is passed through config (run_config or estimation_config in configuration file), such as: 'include_chosen_choice', 'with_replacement', 'stratum', 'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' (for stratified sampler) """ nchoices = self.get_choice_set_size() if nchoices == self.choice_set.size(): if self.filter is None: ChoiceModel.create_interaction_datasets( self, agent_set, agents_index, config) return else: # apply filter without doing sampling filter_index = self.apply_filter(self.filter, agent_set, agents_index) self.model_interaction.create_interaction_datasets( agents_index, filter_index) self.update_choice_set_size(filter_index.size) return sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index) #if filter is specified by submodel in a dict, call sampler submodel by submodel if isinstance(self.filter, dict) or config.get( "sample_alternatives_by_submodel", False): index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32") attributes = {} #submodels = self.model_interaction.get_submodels() ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function for submodel in submodels: agents_index_in_submodel = agents_index[ self.observations_mapping[submodel]] if agents_index_in_submodel.size == 0: continue choice_index = self.apply_filter( self.filter, agent_set=agent_set, agents_index=agents_index_in_submodel, submodel=submodel) if choice_index is not None and choice_index.size == 0: logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \ (self.filter, agents_index_in_submodel.size, agent_set.get_id_attribute()[agents_index])) continue submodel_sampling_weights = sampling_weights if isinstance(sampling_weights, str): submodel_sampling_weights = re.sub('SUBMODEL', str(submodel), sampling_weights) chunk_specification = config.get( "chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index_in_submodel) chunksize = chunk_specification.chunk_size( agents_index_in_submodel) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index_in_submodel, choice_index, nchoices, weights=submodel_sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize) if len(submodels) > 1: index2[self.observations_mapping[ submodel], :] = interaction_dataset.index2 for name in interaction_dataset.get_known_attribute_names( ): attr_val = interaction_dataset.get_attribute(name) if not attributes.has_key(name): attributes[name] = zeros(index2.shape, dtype=attr_val.dtype) attributes[name][ self.observations_mapping[submodel], :] = attr_val if len( submodels ) > 1: ## if there are more than 1 submodel, merge the data by submodel and recreate interaction_dataset interaction_dataset = self.sampler_class.create_interaction_dataset( interaction_dataset.dataset1, interaction_dataset.dataset2, index1=agents_index, index2=index2) for name in attributes.keys(): interaction_dataset.add_attribute(attributes[name], name) self.update_choice_set_size(interaction_dataset.get_reduced_m()) else: choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index) if choice_index is not None and choice_index.size == 0: logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \ (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index])) return #OR raise? chunk_specification = config.get( "chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index) chunksize = chunk_specification.chunk_size(agents_index) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index, choice_index, nchoices, weights=sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize) self.update_choice_set_size(interaction_dataset.get_reduced_m()) self.model_interaction.interaction_dataset = interaction_dataset