Python ChunkSpecification示例

编程语言: Python

命名空间/包名称: opus_core.chunk_specification

hotexamples.com的示例: 6

Python ChunkSpecification - 已找到6个示例。这些是从开源项目中提取的最受好评的opus_core.chunk_specification.ChunkSpecification现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

ChunkSpecification(3)

示例#1

显示文件

文件： chunk_model.py 项目： urban-ai/VIBe2UrbanSim

    def run(self,
            chunk_specification,
            dataset,
            dataset_index=None,
            result_array_type=float32,
            **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index == None:
            dataset_index = arange(dataset.size())
        if not isinstance(dataset_index, ndarray):
            dataset_index = array(dataset_index)
        logger.log_status("Total number of individuals: %s" %
                          dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(
            all_indexed_individuals)  # set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks': 1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(
            ceil(all_indexed_individuals.size() /
                 float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d." %
                               (self.model_short_name,
                                (ichunk + 1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange(
                    (ichunk * chunksize),
                    min((ichunk + 1) * chunksize,
                        all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" %
                                  chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(
                    dataset_index[chunk_agent_indices], dataset,
                    **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array

示例#2

显示文件

文件： chunk_model.py 项目： christianurich/VIBe2UrbanSim

    def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index==None:
            dataset_index=arange(dataset.size())
        if not isinstance(dataset_index,ndarray):
            dataset_index=array(dataset_index)
        logger.log_status("Total number of individuals: %s" % dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks':1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d."
                               % (self.model_short_name, (ichunk+1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize),
                                                                   min((ichunk+1)*chunksize,
                                                                       all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices],
                                                                   dataset, **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array

示例#3

显示文件

    def __init__(
            self,
            group_member,
            location_set,
            agents_grouping_attribute='building.building_type_id',
            sampler="opus_core.samplers.weighted_sampler",
            utilities="opus_core.linear_utilities",
            choices="urbansim.first_agent_first_choices",
            probabilities="opus_core.mnl_probabilities",
            estimation="opus_core.bhhh_mnl_estimation",
            capacity_string="is_developable_for_buildings_UNITS",
            estimation_weight_string=None,
            developable_maximum_unit_variable="developable_maximum_buildings_UNITS",
            developable_minimum_unit_variable="developable_minimum_UNITS",  # None means don't consider any minimum. For default, set it to empty string
            number_of_agents_string="buildings_SSS_space",
            number_of_units_string="total_maximum_development_SSS",
            sample_proportion_locations=None,
            sample_size_locations=30,
            estimation_size_agents=1.0,
            compute_capacity_flag=True,
            filter="developable_maximum_buildings_UNITS",  # None doesn't use any filter
            submodel_string="size_category_SSS",  # Put here None, if no submodels should be used.
            nrecords_per_chunk_for_estimation_sampling=1000,  # put here None, if everything in 1 chunk
            location_id_string=None,
            run_config=None,
            estimate_config=None,
            debuglevel=0,
            dataset_pool=None,
            variable_package="urbansim"):
        """ 'group_member' is of type ModelGroupMember.
        """
        group_member_name = group_member.get_member_name()
        units = group_member.get_attribute_value("units")[0]

        if capacity_string:
            capacity_string = re.sub("UNITS", units, capacity_string)
            capacity_string = re.sub("SSS", group_member_name, capacity_string)
        if filter:
            filter = re.sub("UNITS", units, filter)
            filter = re.sub("SSS", group_member_name, filter)
        if developable_maximum_unit_variable:
            developable_maximum_unit_variable = re.sub(
                "UNITS", units, developable_maximum_unit_variable)
            developable_maximum_unit_variable = re.sub(
                "SSS", group_member_name, developable_maximum_unit_variable)
        if developable_minimum_unit_variable == "":
            developable_minimum_unit_variable = "developable_minimum_%s" % units
        elif developable_minimum_unit_variable:
            developable_minimum_unit_variable = re.sub(
                "UNITS", units, developable_minimum_unit_variable)
            developable_minimum_unit_variable = re.sub(
                "SSS", group_member_name, developable_minimum_unit_variable)
        if submodel_string:
            submodel_string = re.sub("UNITS", units, submodel_string)
            submodel_string = re.sub("SSS", group_member_name, submodel_string)
        if number_of_agents_string:
            number_of_agents_string = re.sub('UNITS', units,
                                             number_of_agents_string)
            number_of_agents_string = re.sub('SSS', group_member_name,
                                             number_of_agents_string)
        if number_of_units_string:
            number_of_units_string = re.sub('UNITS', units,
                                            number_of_units_string)
            number_of_units_string = re.sub('SSS', group_member_name,
                                            number_of_units_string)

        # create full names from (possibly) short names
        tmpdict = Resources({
            "filter": filter,
            "max": developable_maximum_unit_variable,
            "min": developable_minimum_unit_variable
        })
        self.add_prefix_to_variable_names(["filter", "max", "min"],
                                          location_set,
                                          variable_package,
                                          resources=tmpdict)
        filter = tmpdict["filter"]
        if tmpdict["max"]:
            self.developable_maximum_unit_variable = VariableName(
                tmpdict["max"])
        else:
            self.developable_maximum_unit_variable = None

        if tmpdict["min"]:
            self.developable_minimum_unit_variable = VariableName(
                tmpdict["min"])
        else:
            self.developable_minimum_unit_variable = None

        self.project_type = group_member_name
        self.units = units

        run_config = merge_resources_with_defaults(run_config, [
            ("sample_proportion_locations", sample_proportion_locations),
            ("sample_size_locations", sample_size_locations),
            ("compute_capacity_flag", compute_capacity_flag),
            ("capacity_string", capacity_string),
            ("agent_units_string", "urbansim.building.building_size"),
            ("number_of_agents_string", number_of_agents_string),
            ("number_of_units_string", number_of_units_string),
        ])
        estimate_config = merge_resources_with_defaults(
            estimate_config,
            [("estimation", estimation),
             ("sample_proportion_locations", sample_proportion_locations),
             ("sample_size_locations", sample_size_locations),
             ("estimation_size_agents", estimation_size_agents),
             ("weights_for_estimation_string", estimation_weight_string),
             ("capacity_string", capacity_string),
             ("compute_capacity_flag", compute_capacity_flag)])
        if nrecords_per_chunk_for_estimation_sampling:
            estimate_config[
                "chunk_specification_for_estimation"] = ChunkSpecification({
                    "records_per_chunk":
                    nrecords_per_chunk_for_estimation_sampling
                })
        AgentLocationChoiceModelMember.__init__(
            self,
            group_member,
            location_set,
            agents_grouping_attribute,
            model_name="Building Location Choice Model",
            short_name="BLCM",
            sampler=sampler,
            utilities=utilities,
            probabilities=probabilities,
            choices=choices,
            filter=filter,
            submodel_string=submodel_string,
            location_id_string=location_id_string,
            run_config=run_config,
            estimate_config=estimate_config,
            debuglevel=debuglevel,
            dataset_pool=dataset_pool,
            variable_package=variable_package)

示例#4

显示文件

文件： location_choice_model.py 项目： psrc/urbansim

    def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs):
        """Create interaction dataset with or without sampling of alternatives
        
        arguments to sampler_class is passed through config 
        (run_config or estimation_config in configuration file), such as:
        'include_chosen_choice', 'with_replacement', 'stratum', 
        'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' 
        (for stratified sampler)
        
        """

        nchoices = self.get_choice_set_size()
        sampling = True
        iterate_by_submodels = False
        if self.filter is not None and (
            isinstance(self.filter, dict) or re.search("SUBMODEL", self.filter) is not None
        ):
            iterate_by_submodels = True

        ## apply (alternative) filter when alternative size equals to the size of choice set, or sampler class is None
        if (self.sampler_class is None) or (nchoices == self.choice_set.size()):
            if self.filter is None:
                return ChoiceModel.create_interaction_datasets(self, agent_set, agents_index, config)
            elif iterate_by_submodels:
                sampling = False
            else:  # apply filter without doing sampling if filter is not defined by submodels
                filter_index = self.apply_filter(self.filter, agent_set, agents_index)
                self.model_interaction.create_interaction_datasets(agents_index, filter_index)
                self.update_choice_set_size(filter_index.size)
                return

        sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index)
        interaction_dataset = None
        # if filter is specified by submodel in a dict, call sampler submodel by submodel
        sampling_by_groups = False
        if (
            iterate_by_submodels
            or config.get("sample_alternatives_by_submodel", False)
            or config.get("sample_alternatives_by_group", False)
        ):
            groups_equal_submodels = True
            groups = submodels
            sampling_by_groups = True
            if config.get("sample_alternatives_by_group", False):
                group_var = config.get("group_definition_for_sampling_alternatives", None)
                if group_var is None:
                    logger.log_warning(
                        'No group variable defined for sampling alternatives. Set "group_definition_for_sampling_alternatives" in run_config/estimate_config.'
                    )
                    if isinstance(self.filter, dict):
                        logger.log_warning("Alternatives are sampled by submodel.")
                    else:
                        groups = []
                        sampling_by_groups = False
                else:
                    group_values = agent_set.compute_variables([group_var], dataset_pool=self.dataset_pool)[
                        agents_index
                    ]
                    groups = unique(group_values)
                    groups_equal_submodels = False

            index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32")
            attributes = {}
            ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function
            for group in groups:
                if groups_equal_submodels:
                    where_group = self.observations_mapping[group]
                else:
                    where_group = where(group_values == group)[0]
                if where_group.size == 0:
                    continue
                agents_index_in_group = agents_index[where_group]

                choice_index = self.apply_filter(
                    self.filter,
                    agent_set=agent_set,
                    agents_index=agents_index_in_group,
                    submodel=group,
                    replace_dict={"SUBMODEL": group} if groups_equal_submodels else {"GROUP": group},
                )
                if choice_index is not None and choice_index.size == 0:
                    logger.log_error(
                        "There is no alternative that passes filter %s for %s=%s; %s agents with id %s will remain unplaced."
                        % (
                            self.filter,
                            "SUBMODEL" if groups_equal_submodels else "GROUP",
                            group,
                            agents_index_in_group.size,
                            agent_set.get_id_attribute()[agents_index_in_group],
                        )
                    )
                    continue

                if sampling:
                    group_sampling_weights = sampling_weights
                    if isinstance(sampling_weights, str):
                        group_sampling_weights = re.sub(
                            "SUBMODEL" if groups_equal_submodels else "GROUP", str(int(group)), sampling_weights
                        )

                    chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1})
                    if type(chunk_specification) == str:
                        chunk_specification = eval(chunk_specification)
                    chunk_specification = ChunkSpecification(chunk_specification)
                    nchunks = chunk_specification.nchunks(agents_index_in_group)
                    chunksize = chunk_specification.chunk_size(agents_index_in_group)

                    interaction_dataset = self.sample_alternatives_by_chunk(
                        agent_set,
                        agents_index_in_group,
                        choice_index,
                        nchoices,
                        weights=group_sampling_weights,
                        config=config,
                        nchunks=nchunks,
                        chunksize=chunksize,
                    )
                    filter_idx = interaction_dataset.index2
                else:
                    self.model_interaction.create_interaction_datasets(agents_index_in_group, choice_index)
                    interaction_dataset = self.model_interaction.interaction_dataset
                    filter_idx = -1 * ones((agents_index_in_group.size, nchoices), dtype="int32")
                    filter_idx[:, interaction_dataset.index2] = interaction_dataset.index2

                if (
                    not config.get("accept_unavailability_of_choices", False)
                    and interaction_dataset.get_reduced_m() == 0
                ):
                    raise StandardError, "There are no locations available for the given sampling weights for group %s." % group
                if len(groups) > 1 or (agents_index.size > agents_index_in_group.size):
                    if interaction_dataset.get_reduced_m() > 0:
                        index2[where_group, :] = filter_idx

                        for name in interaction_dataset.get_known_attribute_names():
                            attr_val = interaction_dataset.get_attribute(name)
                            if not attributes.has_key(name):
                                attributes[name] = zeros(index2.shape, dtype=attr_val.dtype)
                            attributes[name][where_group, :] = attr_val

            if interaction_dataset is None:
                logger.log_warning(
                    "There is no agent for groups %s. " % (groups)
                    + "This may be due to mismatch between agent_filter and submodels included in specification."
                )
                self.model_interaction.interaction_dataset = None
                return
            if len(groups) > 1 or (
                agents_index.size > agents_index_in_group.size
            ):  ## if there are more than 1 group, merge the data by submodel and recreate interaction_dataset
                if sampling:
                    interaction_dataset = self.sampler_class.create_interaction_dataset(
                        interaction_dataset.dataset1, interaction_dataset.dataset2, index1=agents_index, index2=index2
                    )
                    for name in attributes.keys():
                        interaction_dataset.add_primary_attribute(attributes[name], name)
                else:
                    self.model_interaction.create_interaction_datasets(agents_index, index2)
                    interaction_dataset = self.model_interaction.interaction_dataset

            self.update_choice_set_size(interaction_dataset.get_reduced_m())
        if not sampling_by_groups:  # no sampling by submodels/groups
            choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index)
            if choice_index is not None and choice_index.size == 0:
                message = (
                    "There is no alternative that passes filter %s; %s agents with id %s will remain unplaced."
                    % (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index])
                )
                if not config.get("accept_unavailability_of_choices", False):
                    raise StandardError, message
                logger.log_error(message)
                self.model_interaction.interaction_dataset = None
                return

            chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1})
            if type(chunk_specification) == str:
                chunk_specification = eval(chunk_specification)
            chunk_specification = ChunkSpecification(chunk_specification)
            nchunks = chunk_specification.nchunks(agents_index)
            chunksize = chunk_specification.chunk_size(agents_index)
            interaction_dataset = self.sample_alternatives_by_chunk(
                agent_set,
                agents_index,
                choice_index,
                nchoices,
                weights=sampling_weights,
                config=config,
                nchunks=nchunks,
                chunksize=chunksize,
            )
            if not config.get("accept_unavailability_of_choices", False) and interaction_dataset.get_reduced_m() == 0:
                raise StandardError, "There are no locations available for the given sampling weights."
            self.update_choice_set_size(interaction_dataset.get_reduced_m())

        self.model_interaction.interaction_dataset = interaction_dataset

示例#5

显示文件

文件： location_choice_model.py 项目： christianurich/VIBe2UrbanSim

    def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs):
        """Create interactiondataset with or without sampling of alternatives
        
        arguments to sampler_class is passed through config 
        (run_config or estimation_config in configuration file), such as:
        'include_chosen_choice', 'with_replacement', 'stratum', 
        'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' 
        (for stratified sampler)
        
        """
        
        nchoices = self.get_choice_set_size()
        if nchoices==self.choice_set.size():
            if self.filter is None:
                ChoiceModel.create_interaction_datasets(self, agent_set, agents_index, config)
                return
            else:  # apply filter without doing sampling
                filter_index = self.apply_filter(self.filter, agent_set, agents_index)
                self.model_interaction.create_interaction_datasets(agents_index, filter_index)
                self.update_choice_set_size(filter_index.size)
                return
        
        sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index)
        #if filter is specified by submodel in a dict, call sampler submodel by submodel
        if isinstance(self.filter, dict) or config.get("sample_alternatives_by_submodel", False):
            index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32")
            attributes = {}
            #submodels = self.model_interaction.get_submodels()
            ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function
            for submodel in submodels:                
                agents_index_in_submodel = agents_index[self.observations_mapping[submodel]]
                if agents_index_in_submodel.size==0:
                    continue
                choice_index = self.apply_filter(self.filter, agent_set=agent_set, 
                                                 agents_index=agents_index_in_submodel,  
                                                 submodel=submodel)
                if choice_index is not None and choice_index.size == 0:
                    logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \
                                     (self.filter, agents_index_in_submodel.size, agent_set.get_id_attribute()[agents_index]))
                    continue
                
                submodel_sampling_weights = sampling_weights
                if isinstance(sampling_weights, str):
                    submodel_sampling_weights = re.sub('SUBMODEL', str(submodel), sampling_weights)
                    
                chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks":1})
                if type(chunk_specification) == str:
                    chunk_specification = eval(chunk_specification)
                chunk_specification = ChunkSpecification(chunk_specification)
                nchunks = chunk_specification.nchunks(agents_index_in_submodel)
                chunksize = chunk_specification.chunk_size(agents_index_in_submodel)
                
                interaction_dataset = self.sample_alternatives_by_chunk(agent_set, agents_index_in_submodel, 
                                                  choice_index, nchoices,
                                                  weights=submodel_sampling_weights,
                                                  config=config,
                                                  nchunks=nchunks, chunksize=chunksize)
                
                if len(submodels)>1:
                    index2[self.observations_mapping[submodel],:] = interaction_dataset.index2                        
                    for name in interaction_dataset.get_known_attribute_names():
                        attr_val = interaction_dataset.get_attribute(name)
                        if not attributes.has_key(name):
                            attributes[name] = zeros(index2.shape, dtype=attr_val.dtype)
                        attributes[name][self.observations_mapping[submodel],:] = attr_val

            if len(submodels)>1:  ## if there are more than 1 submodel, merge the data by submodel and recreate interaction_dataset
                interaction_dataset = self.sampler_class.create_interaction_dataset(interaction_dataset.dataset1, 
                                                                                    interaction_dataset.dataset2, 
                                                                                    index1=agents_index, 
                                                                                    index2=index2)
                for name in attributes.keys():
                    interaction_dataset.add_attribute(attributes[name], name)
                
            self.update_choice_set_size(interaction_dataset.get_reduced_m())
        else:
            choice_index = self.apply_filter(self.filter, agent_set=agent_set, 
                                             agents_index=agents_index)
            if choice_index is not None and choice_index.size == 0:
                logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \
                                 (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index]))
                return #OR raise?
            
            chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks":1})
            if type(chunk_specification) == str:
                chunk_specification = eval(chunk_specification)
            chunk_specification = ChunkSpecification(chunk_specification)            
            nchunks = chunk_specification.nchunks(agents_index)
            chunksize = chunk_specification.chunk_size(agents_index)
            interaction_dataset = self.sample_alternatives_by_chunk(agent_set, agents_index, 
                                                                    choice_index, nchoices,
                                                                    weights=sampling_weights,
                                                                    config=config,
                                                                    nchunks=nchunks, chunksize=chunksize)
            self.update_choice_set_size(interaction_dataset.get_reduced_m())
            

        self.model_interaction.interaction_dataset = interaction_dataset

示例#6

显示文件

文件： location_choice_model.py 项目： urban-ai/VIBe2UrbanSim

    def create_interaction_datasets(self,
                                    agent_set,
                                    agents_index,
                                    config,
                                    submodels=[],
                                    **kwargs):
        """Create interactiondataset with or without sampling of alternatives
        
        arguments to sampler_class is passed through config 
        (run_config or estimation_config in configuration file), such as:
        'include_chosen_choice', 'with_replacement', 'stratum', 
        'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' 
        (for stratified sampler)
        
        """

        nchoices = self.get_choice_set_size()
        if nchoices == self.choice_set.size():
            if self.filter is None:
                ChoiceModel.create_interaction_datasets(
                    self, agent_set, agents_index, config)
                return
            else:  # apply filter without doing sampling
                filter_index = self.apply_filter(self.filter, agent_set,
                                                 agents_index)
                self.model_interaction.create_interaction_datasets(
                    agents_index, filter_index)
                self.update_choice_set_size(filter_index.size)
                return

        sampling_weights = self.get_sampling_weights(config,
                                                     agent_set=agent_set,
                                                     agents_index=agents_index)
        #if filter is specified by submodel in a dict, call sampler submodel by submodel
        if isinstance(self.filter, dict) or config.get(
                "sample_alternatives_by_submodel", False):
            index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32")
            attributes = {}
            #submodels = self.model_interaction.get_submodels()
            ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function
            for submodel in submodels:
                agents_index_in_submodel = agents_index[
                    self.observations_mapping[submodel]]
                if agents_index_in_submodel.size == 0:
                    continue
                choice_index = self.apply_filter(
                    self.filter,
                    agent_set=agent_set,
                    agents_index=agents_index_in_submodel,
                    submodel=submodel)
                if choice_index is not None and choice_index.size == 0:
                    logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \
                                     (self.filter, agents_index_in_submodel.size, agent_set.get_id_attribute()[agents_index]))
                    continue

                submodel_sampling_weights = sampling_weights
                if isinstance(sampling_weights, str):
                    submodel_sampling_weights = re.sub('SUBMODEL',
                                                       str(submodel),
                                                       sampling_weights)

                chunk_specification = config.get(
                    "chunk_specification_for_sampling", {"nchunks": 1})
                if type(chunk_specification) == str:
                    chunk_specification = eval(chunk_specification)
                chunk_specification = ChunkSpecification(chunk_specification)
                nchunks = chunk_specification.nchunks(agents_index_in_submodel)
                chunksize = chunk_specification.chunk_size(
                    agents_index_in_submodel)

                interaction_dataset = self.sample_alternatives_by_chunk(
                    agent_set,
                    agents_index_in_submodel,
                    choice_index,
                    nchoices,
                    weights=submodel_sampling_weights,
                    config=config,
                    nchunks=nchunks,
                    chunksize=chunksize)

                if len(submodels) > 1:
                    index2[self.observations_mapping[
                        submodel], :] = interaction_dataset.index2
                    for name in interaction_dataset.get_known_attribute_names(
                    ):
                        attr_val = interaction_dataset.get_attribute(name)
                        if not attributes.has_key(name):
                            attributes[name] = zeros(index2.shape,
                                                     dtype=attr_val.dtype)
                        attributes[name][
                            self.observations_mapping[submodel], :] = attr_val

            if len(
                    submodels
            ) > 1:  ## if there are more than 1 submodel, merge the data by submodel and recreate interaction_dataset
                interaction_dataset = self.sampler_class.create_interaction_dataset(
                    interaction_dataset.dataset1,
                    interaction_dataset.dataset2,
                    index1=agents_index,
                    index2=index2)
                for name in attributes.keys():
                    interaction_dataset.add_attribute(attributes[name], name)

            self.update_choice_set_size(interaction_dataset.get_reduced_m())
        else:
            choice_index = self.apply_filter(self.filter,
                                             agent_set=agent_set,
                                             agents_index=agents_index)
            if choice_index is not None and choice_index.size == 0:
                logger.log_error("There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % \
                                 (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index]))
                return  #OR raise?

            chunk_specification = config.get(
                "chunk_specification_for_sampling", {"nchunks": 1})
            if type(chunk_specification) == str:
                chunk_specification = eval(chunk_specification)
            chunk_specification = ChunkSpecification(chunk_specification)
            nchunks = chunk_specification.nchunks(agents_index)
            chunksize = chunk_specification.chunk_size(agents_index)
            interaction_dataset = self.sample_alternatives_by_chunk(
                agent_set,
                agents_index,
                choice_index,
                nchoices,
                weights=sampling_weights,
                config=config,
                nchunks=nchunks,
                chunksize=chunksize)
            self.update_choice_set_size(interaction_dataset.get_reduced_m())

        self.model_interaction.interaction_dataset = interaction_dataset