class RunSimulationFromMysql:
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(
            new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0)
        )

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config["cache_directory"] is None:
            self.config["cache_directory"] = self.simulation_state.get_cache_directory()

        SessionConfiguration(
            new_instance=True,
            package_order=self.config["dataset_pool_configuration"].package_order,
            in_storage=AttributeCache(),
        )

        ForkProcess().fork_new_process(
            self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config
        )

        # Create output database (normally done by run manager)
        if "estimation_database_configuration" in self.config:
            db_server = DatabaseServer(self.config["estimation_database_configuration"])
            if not db_server.has_database(self.config["estimation_database_configuration"].database_name):
                db_server.create_database(self.config["estimation_database_configuration"].database_name)

    def run_simulation(self, simulation_instance=None):
        logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name)
        try:
            if simulation_instance is None:
                simulation_instance = ModelSystem()
            simulation_instance.run(self.config)
            # simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        finally:
            logger.end_block()
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())

    def cleanup(self, remove_cache, remove_output_database):
        """Remove all outputs of this simulation."""
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        # Remove SessionConfiguration singleton, if it exists
        Singleton().remove_singleton_for_class(SessionConfiguration)

        cache_dir = self.config["cache_directory"]
        if os.path.exists(cache_dir):
            rmtree(cache_dir)
        if remove_output_database and ("estimation_database_configuration" in self.config):
            db_server = DatabaseServer(self.config["estimation_database_configuration"])
            db_server.drop_database(self.config["estimation_database_configuration"].database_name)

    def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
Пример #2
0
class RunSimulation(object):
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config[
            'creating_baseyear_cache_configuration'].cache_directory_root

        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=base_cache_dir,
                                                start_time=self.config.get(
                                                    'base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config[
                'cache_directory'] = self.simulation_state.get_cache_directory(
                )

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(
                self.config['creating_baseyear_cache_configuration'].
                cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)

    def run_simulation(self, simulation_instance=None):
        if simulation_instance is None:
            simulation_instance = ModelSystem()
        simulation_instance.run(self.config)
        #simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        logger.log_status("Data cache in %s" %
                          self.simulation_state.get_cache_directory())

    def cleanup(self, remove_cache=True):
        """Remove all outputs of this simulation."""
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        SessionConfiguration().remove_singleton()
        if remove_cache:
            cache_dir = self.config['cache_directory']
            if os.path.exists(cache_dir):
                rmtree(cache_dir)

    def prepare_and_run(self,
                        run_configuration,
                        simulation_instance=None,
                        remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
class RunSimulationFromMysql:
    def prepare_for_simulation(self, run_configuration, cache_directory=None):
        self.config = Resources(run_configuration)
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, 
                                                start_time=self.config.get('base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config['cache_directory'] = self.simulation_state.get_cache_directory()

        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
        ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config)
        
        # Create output database (normally done by run manager)
        if 'estimation_database_configuration' in self.config:
            db_server = DatabaseServer(self.config['estimation_database_configuration'])
            if not db_server.has_database(self.config['estimation_database_configuration'].database_name):
                db_server.create_database(self.config['estimation_database_configuration'].database_name)
                   
    def run_simulation(self, simulation_instance=None):
        logger.start_block('Simulation on database %s' 
            % self.config['scenario_database_configuration'].database_name)
        try:
            if simulation_instance is None:
                simulation_instance = ModelSystem()
            simulation_instance.run(self.config)
            #simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        finally:
            logger.end_block()
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def cleanup(self, remove_cache, remove_output_database):
        """Remove all outputs of this simulation."""
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        # Remove SessionConfiguration singleton, if it exists
        Singleton().remove_singleton_for_class(SessionConfiguration)
        
        cache_dir = self.config['cache_directory']
        if os.path.exists(cache_dir):
            rmtree(cache_dir)
        if remove_output_database and ('estimation_database_configuration' in self.config):
            db_server = DatabaseServer(self.config['estimation_database_configuration'])
            db_server.drop_database(self.config['estimation_database_configuration'].database_name)

    def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
        
Пример #4
0
class RunSimulation(object):
    def prepare_for_simulation(self, config, cache_directory=None):
        self.config = Resources(config)
        base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root
        
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir,
                                                start_time=self.config.get('base_year', 0))

        ### TODO: Get rid of this! There is no good reason to be changing the 
        ###       Configuration.
        if self.config['cache_directory'] is None:
            self.config['cache_directory'] = self.simulation_state.get_cache_directory()

        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
        if config['creating_baseyear_cache_configuration'].cache_from_database:
            ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config)
        else:
            CacheFltData().run(self.config)

    def run_simulation(self, simulation_instance=None):
        if simulation_instance is None:
            simulation_instance = ModelSystem()
        simulation_instance.run(self.config)
        #simulation_instance.run_multiprocess(self.config, is_run_subset=True)
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def cleanup(self, remove_cache=True):
        """Remove all outputs of this simulation."""    
        self.simulation_state.remove_singleton(delete_cache=remove_cache)
        SessionConfiguration().remove_singleton()
        if remove_cache:
            cache_dir = self.config['cache_directory']
            if os.path.exists(cache_dir):
                rmtree(cache_dir)

    def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True):
        self.prepare_for_simulation(run_configuration)
        self.run_simulation(simulation_instance)
        self.cleanup(remove_cache)
    def _initialize_previous_years(self, attribute_cache, 
                                   base_year,
                                   creating_baseyear_cache_configuration):
        simulation_state = SimulationState()
        cache_directory = simulation_state.get_cache_directory()

        baseyear_cache_path = os.path.join(cache_directory, str(base_year))
        for table_name, year in creating_baseyear_cache_configuration.tables_to_copy_to_previous_years.iteritems():
            year_cache_path = os.path.join(cache_directory, str(year))
            dest_file_path = os.path.join(year_cache_path, table_name)
            if os.path.exists(dest_file_path):
                rmtree(dest_file_path)
            copytree(os.path.join(baseyear_cache_path, table_name),
                     dest_file_path)
    def _initialize_previous_years(self, attribute_cache, base_year,
                                   creating_baseyear_cache_configuration):
        simulation_state = SimulationState()
        cache_directory = simulation_state.get_cache_directory()

        baseyear_cache_path = os.path.join(cache_directory, str(base_year))
        for table_name, year in creating_baseyear_cache_configuration.tables_to_copy_to_previous_years.iteritems(
        ):
            year_cache_path = os.path.join(cache_directory, str(year))
            dest_file_path = os.path.join(year_cache_path, table_name)
            if os.path.exists(dest_file_path):
                rmtree(dest_file_path)
            copytree(os.path.join(baseyear_cache_path, table_name),
                     dest_file_path)
Пример #7
0
    def run(self, table_names, out_storage=None, table_name_pattern=None, cache_directory=None, year=None, **kwargs):
        """
        export specified tables to database

        table_name_pattern: For example '{table_name}_{scenario_name}_{year}'
        """
        if not hasattr(self, "out_storage"):
            if out_storage is None:
                raise ValueError, "Either out_storage argument needs to be specified or " + "prepare_for_run called before run method to create a valid out_storage."
            else:
                self.out_storage = out_storage
        sim_state = SimulationState()
        if sim_state.get_current_time() == 0:
            sim_state.set_current_time(9999)
        if cache_directory is None:
            cache_directory = sim_state.get_cache_directory()

        attr_cache = AttributeCache(cache_directory=cache_directory)
        if year is None:
            years = attr_cache._get_sorted_list_of_years()
        else:
            assert isinstance(year, int)
            years = [year]

        for table_name in table_names:
            kwargs["table_name"] = table_name
            for year in years:
                kwargs["year"] = year
                out_table_name = table_name_pattern.format(**kwargs)
                in_storage = attr_cache.get_flt_storage_for_year(year)
                # cache_path = os.path.join(cache_directory, str(year))
                # in_storage = flt_storage(storage_location=cache_path)
                # TODO drop_table(table_name) if table_name exists
                ExportStorage().export_dataset(
                    table_name, in_storage=in_storage, out_storage=self.out_storage, out_dataset_name=out_table_name
                )
        self.post_run(kwargs["scenario_name"], years)
Пример #8
0
class ModelExplorer(object):
    def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, 
                 cache_directory=None):
        self.model_group = model_group
        self.explored_model = model
 
        if configuration is None:
            if xml_configuration is None:
                raise StandardError, "Either dictionary based or XML based configuration must be given."
            config = xml_configuration.get_run_configuration(scenario_name)
        else:
            config = Configuration(configuration)
            
        self.scenario_models = config['models']
        if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None:
            del config['models_in_year'][year]
        if model is not None:
            dependent_models = config['models_configuration'][model]['controller'].get('dependencies', [])
            config['models'] = dependent_models
            if model_group is None:
                config['models'] = config['models'] + [{model: ["run"]}]
            else:
                config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}]
        else:
            config['models'] = []
            
        config['years'] = [year, year]
        config["datasets_to_cache_after_each_model"]=[]
        config['flush_variables'] = False
        
        self.config = Resources(config)
        self.xml_configuration = xml_configuration
        
        if cache_directory is None:
            cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, 
                                                start_time=config.get('base_year', 0))
        self.config['cache_directory'] = cache_directory
        
        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
    def run(self):
        self.model_system = ModelSystem()
        self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False,
                              cleanup_datasets=False)
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def get_agents_for_simulation(self):
        return self.get_active_agent_set()
        
    def get_model_name(self):
        return (self.explored_model, self.model_group)
        
    def get_specification(self):
        return self.get_model().get_specified_coefficients().specification
    
    def get_probabilities(self, submodel=-2):
        """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices.
        Works only for the ChoiceModel class.
        """
        model = self.get_model()
        #if isinstance(model, ChoiceModel):
        return model.get_probabilities_and_choices(submodel)
        #print '\nMethod is implemented only for ChoiceModels.\n'

    def export_probabilities(self, submodel=-2, filename='./choice_model.txt'):
        """Export probabilities and choices into a file. Works only for the ChoiceModel class"""
        
        model = self.get_model()
        #if isinstance(model, ChoiceModel):
        model.export_probabilities(submodel, file_name=filename)
        #else:
        #    print '\nMethod is implemented only for ChoiceModels.\n'
            
    def get_model(self):
        """Return a model object."""
        return self.model_system.run_year_namespace["model"]
    
    def get_dataset(self, dataset_name):
        """Return a Dataset object of the given name."""
        ds = self.model_system.run_year_namespace.get(dataset_name, None)
        if ds is None:
            if dataset_name not in self.model_system.run_year_namespace["datasets"].keys():
                ds = self.get_dataset_pool().get_dataset(dataset_name)
            else:
                ds = self.model_system.run_year_namespace["datasets"][dataset_name]
        return ds
        
    def get_data(self, coefficient, submodel=-2):
        """Calls method get_data of the Model object. Should return a data array for the 
        given coefficient and submodel. Can be used only on in models that are estimable."""
        return self.get_model().get_data(coefficient, submodel)

    def get_coefficient_names(self, submodel=-2):
        """Calls method get_coefficient_names of the Model object which should return
           coefficient names for the given submodel. Can be used only on in models that are estimable."""
        return self.get_model().get_coefficient_names(submodel)
    
    def get_coefficients(self, submodel=-2):
        """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. 
        Can be used only on in models that are estimable."""
        return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel)

    def get_data_as_dataset(self, submodel=-2, **kwargs):
        """Calls method get_data_as_dataset of the Model object which should return
        an object of class Dataset containing model data. 
        Works only for ChoiceModel (returns InteractionDataset), 
        and for RegressionModel (returns Dataset). 
        """
        return self.get_model().get_data_as_dataset(submodel, **kwargs)
                
    def get_choice_set(self): 
        """Return a Dataset of choices. Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_dataset(2)
    
    def get_choice_set_index(self):
        """Return an array of indices of choices. Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_index(2)
        
    def get_choice_set_index_for_submodel(self, submodel):
        """Return an array of indices of choices for the given submodel. 
        Works only for the ChoiceModel class.
        """
        index = self.get_choice_set_index()
        return take (index, indices=self.get_model().observations_mapping[submodel], axis=0)
    
    def get_active_choice_set(self, submodel=None):
        """Return choice set as seen by agents in the model.
        Works only for the ChoiceModel class.
        """
        if submodel is None:
            choices = self.get_choice_set_index()
        else:
            choices = self.get_choice_set_index_for_submodel(submodel)
        choices = unique(choices.flatten())
        ds = self.get_choice_set()
        return DatasetSubset(ds, choices)
                             
    def get_agent_set(self):
        """Return a Dataset of all agents.
        """
        return self.get_model().get_agent_set()
        
    def get_agent_set_index(self):
        """Return an array of indices of agents active in the model. 
        """
        return self.get_model().get_agent_set_index()
        
    def get_agent_set_index_for_submodel(self, submodel):
        """Return an array of indices of agents for the given submodel. 
        """
        return self.get_model().get_agent_set_index_for_submodel(submodel)
    
    def get_active_agent_set(self, submodel=None):
        """Return agent set that make choices in the model.
        Works only for the ChoiceModel class.
        """
        agents = self.get_agent_set()
        if submodel is None:
            index = self.get_agent_set_index()
        else:
            index = self.get_agent_set_index_for_submodel(submodel)
        return DatasetSubset(agents, index)
    
    def agent_summary(self, submodel=None):
        ds = self.get_active_agent_set(submodel=submodel)
        ds.summary()
        
    def choice_summary(self, submodel=None):
        ds = self.get_active_choice_set(submodel=submodel)
        ds.summary()
       
    def data_summary(self, **kwargs):
        ds = self.get_data_as_dataset(**kwargs)
        ds.summary()
        
    def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs):
        dataset_name = var_name.get_dataset_name()
        ds = self.get_dataset(dataset_name)
        ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
        ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs)
        return ds
    
    def get_before_after_attribute(self, attribute_name):
        """Return a dictionary with elements 'before' (contains an array of the given attribute
        that is reloaded from the cache) and 'after' (contains an array of the given attribute 
        with the current values).
        """
        from opus_core.store.attribute_cache import AttributeCache
        var_name = VariableName(attribute_name)
        storage = AttributeCache(self.simulation_state.get_cache_directory())
        ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage,
                   package_order=self.get_dataset_pool().get_package_order())       
        return {'after': ds[var_name.get_alias()],
                'before': ds.get_attribute('%s_reload__' % var_name.get_alias())}
        
    def summary_before_after(self, attribute_name):
        """Print summary of the given attribute 'before' (values
        reloaded from the cache) and 'after' (current values).
        """
        from opus_core.store.attribute_cache import AttributeCache
        var_name = VariableName(attribute_name)
        storage = AttributeCache(self.simulation_state.get_cache_directory())
        ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, 
                   package_order=self.get_dataset_pool().get_package_order())
        print ''
        print 'Before model run:'
        print '================='
        ds.summary(names=['%s_reload__' % var_name.get_alias()])
        print ''
        print 'After model run:'
        print '================='
        #ds.summary(names=[var_name.get_alias()])
        ds.summary(names=[var_name.get_alias()])
        
    def model_dependencies(self, model=None, group=None):
        """Prints out all dependencies for the model."""
        from opus_core.variables.dependency_query import DependencyChart
        if model is None: # current model
            model, group = self.get_model_name()
            spec = self.get_specification()
        else:
            spec = None
        if model == 'all': # print dependencies for all models
            for thismodel in self.scenario_models:
                thisgroups = None
                if isinstance(thismodel, dict):
                    thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None)
                    thismodel = thismodel.keys()[0]
                if not isinstance(thisgroups, list):
                    thisgroups = [thisgroups]                
                for group in thisgroups:
                    chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group)
                    chart.print_model_dependencies()
        else:
            chart = DependencyChart(self.xml_configuration, model=model, model_group=group, 
                                specification=spec)
            chart.print_model_dependencies()
        
    def variable_dependencies(self, name):
        """Prints out dependencies of this variable. 'name' can be either an alias from 
        the model specification or an expression."""
        from opus_core.variables.dependency_query import DependencyChart
        varname = None
        allvars = self.get_specification().get_variable_names()
        for ivar in range(len(allvars)):
            thisvar = allvars[ivar]
            if not isinstance(thisvar, VariableName):
                thisvar = VariableName(thisvar)
            if name == thisvar.get_alias():
                varname = thisvar
                break
        if varname is None:
            varname = VariableName(name)
        chart = DependencyChart(self.xml_configuration)
        chart.print_dependencies(varname.get_expression())
              
    def compute_expression(self, attribute_name):
        """Compute any expression and return its values."""
        var_name = VariableName(attribute_name)
        dataset_name = var_name.get_dataset_name()
        ds = self.get_dataset(dataset_name)
        return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
        
    def get_dataset_pool(self):
        return self.model_system.run_year_namespace["dataset_pool"]
    
    def plot_histogram_before_after(self, attribute_name, bins=None):
        """Plot histograms of values returned by the method get_before_after_attribute."""
        from opus_core.plot_functions import create_histogram, show_plots
        from matplotlib.pylab import figure
        values = self.get_before_after_attribute(attribute_name)
        alias = VariableName(attribute_name).get_alias()
        fig = figure()
        fig.add_subplot(121)
        create_histogram(values['before'], main='%s (before)' % alias, bins=bins)
        fig.add_subplot(122)
        create_histogram(values['after'], main='%s (after)' % alias, bins=bins)
        show_plots()
        
    def get_correlation(self, submodel=-2):
        """Return an array of correlations between all variables of the model data (for given submodel).
        Works only for ChoiceModel and RegressionModel"""
        ds = self.get_data_as_dataset(submodel)
        attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()]
        return ds.correlation_matrix(attrs)
        
    def plot_correlation(self, submodel=-2, useR=False, **kwargs):
        """Plot correlations between all variables of the model data (for given submodel).
        Works only for ChoiceModel and RegressionModel"""
        ds = self.get_data_as_dataset(submodel)
        attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()]
        ds.correlation_image(attrs, useR=useR, **kwargs)
        
    def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs):
        """Plot map of the sampled choice set. 
        agents_index can be given to restrict the set of agents to which the choice set belongs to. 
        aggregate_to is a name of a dataset which the choice set should be aggregated to.
        If matplotlib is False, mapnik is used (and required). 
        Additional arguments are passed to plot_map or plot_map_matplotlib.
        E.g. (choice set are buildings, aggregated to zones, for the first agent)
        er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', 
                            file='choice_set0.png', agents_index=0)
        """
        choice_set = self.get_choice_set()
        if agents_index is None:
            flatten_choice_index = self.get_choice_set_index().ravel()
        else:
            flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel()
        if aggregate_to is not None:
            ds_aggr = self.get_dataset(aggregate_to)
            result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], 
                                               ones(flatten_choice_index.size))
            ds = ds_aggr
        else:
            result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], 
                                             ones(flatten_choice_index.size))
            ds = choice_set
        dummy_attribute_name = '__sampled_choice_set__'
        ds.add_attribute(name=dummy_attribute_name, data=result)
        if matplotlib:
            coord_syst = None
            if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'):
                coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool())
            ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs)
        else:
            ds.plot_map(dummy_attribute_name, background=-1, **kwargs)
        ds.delete_one_attribute(dummy_attribute_name)
        
    def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', 
                                  matplotlib=True, **kwargs):
        """Plot map of the given attribute for the sampled choice set.
        agents_index can be given to restrict the set of agents to which the choice set belongs to. 
        aggregate_to is a name of a dataset which the choice set should be aggregated to.
        function defines the aggregating function (e.g. sum, mean, median, etc.)
        If matplotlib is False, mapnik is used (and required). 
        Additional arguments are passed to plot_map or plot_map_matplotlib.
        E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, 
                                    project_name='psrc_parcel', file='choice_resunits.png')
        """
        choice_set = self.get_choice_set()
        if agents_index is None:
            flatten_choice_index = self.get_choice_set_index().ravel()
        else:
            flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel()
        filter_var = ones(choice_set.size(), dtype='int16')
        filter_var[unique(flatten_choice_index)] = 0
        filter_idx = where(filter_var)[0]
        if aggregate_to is not None:
            ds_aggr = self.get_dataset(aggregate_to)
            result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], 
                                                     what=choice_set[name][flatten_choice_index], function=function)
            filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], 
                                                     ones(filter_idx.size))
            filter = filter > 0
            ds = ds_aggr
        else:
            result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], 
                                                   what=choice_set[name][flatten_choice_index], function=function)
            filter = filter_var
            ds = choice_set
        dummy_attribute_name = '__sampled_choice_set_attribute__'
        ds.add_attribute(name=dummy_attribute_name, data=result)
        dummy_filter_name = '__sampled_choice_set_filter__'
        ds.add_attribute(name=dummy_filter_name, data=filter)
        if matplotlib:
            coord_syst = None
            if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'):
                coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool())
            ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs)
        else:
            ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs)
        ds.delete_one_attribute(dummy_attribute_name)
        ds.delete_one_attribute(dummy_filter_name)
                   
    def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, 
                          store_values_to_file=None):
        """ Plot a barchart of coefficient values. This can be used in a regression model, 
        when coefficients are standardized 
        (i.e. using the estimation module opus_core.estimate_linear_regression_standardized).
        store_values_to_file can be a file name where the values are stored.
        """
        coef = self.get_coefficients(submodel)
        values = coef.get_coefficient_values()
        names = coef.get_coefficient_names()
        sd = coef.get_standard_errors()
        idx=ones(names.shape[1], dtype="bool")
        if exclude_constant:
            pos = coef.get_constants_positions()
            if pos.size > 0:               
                idx[pos]=0
        if store_values_to_file is not None:
            n = idx.sum()
            result = concatenate((reshape(names[eqidx, idx], (n,1)), 
                                 reshape(values[eqidx, idx], (n,1)),
                                 reshape(sd[eqidx, idx], (n,1))), axis=1)
            write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), 
                               delimiter='\t')
            write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a')
        if plot:
            plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx])
        else:
            return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]}
        
    def create_latex_tables(self, directory, other_info_keys=None):
        from opus_core.latex_table_creator import LatexTableCreator
        LTC = LatexTableCreator()
        LTC.create_latex_table_for_coefficients_for_model(
            self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, 
                                other_info_keys=other_info_keys)
        LTC.create_latex_table_for_specifications_for_model(
            self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
Пример #9
0
class MoreDatasetTests(opus_unittest.OpusTestCase):
    def setUp(self):
        self.start_year = 2001
        self.expected_sic_data = array([6,4,7,808,6])
        self.job_id = array([1,2,3,4,5])
        self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp_test_dataset')
        self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir)
        self.dir = self.simulation_state.get_cache_directory()
        self.simulation_state.set_current_time(self.start_year)
        
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)

        self.in_storage = StorageFactory().get_storage('dict_storage')
        self.in_storage.write_table(
            table_name='jobs',
            table_data={
                'grid_id':array([10,20,30,40,50]),
                'job_id':self.job_id,
                },
            )
            
        self.out_storage = StorageFactory().get_storage('dict_storage')
    
        self.job_set_resources = ResourceFactory().get_resources_for_dataset(
            'job', 
            in_storage = self.in_storage, 
            out_storage = self.out_storage,
            in_table_name_pair = ('jobs',None),
            out_table_name_pair = ('jobs_exported',None),
            attributes_pair = (None,AttributeType.PRIMARY),
            id_name_pair = ('job_id','job_id'), 
            nchunks_pair = (1,1), 
            debug_pair = (1,None)
            )
            
    def tearDown(self):
        if os.path.exists(self.base_cache_dir):
            rmtree(self.base_cache_dir)
        
    def test_err_when_asking_for_attribute_that_is_not_in_cache(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.flush_dataset()
        job_set.get_attribute('job_id')
        self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist')
            
    def test_compute_one_variable_when_asking_for_attribute_that_is_not_in_cache(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.flush_dataset()
        job_id_variable_name = VariableName('opus_core.jobs.attribute_that_does_not_exist')
        
        logger.enable_hidden_error_and_warning_words()
        try:
            self.assertRaises(StandardError, job_set._compute_one_variable, job_id_variable_name)
            
        finally:
            logger.enable_hidden_error_and_warning_words()
            
    def test_flush_dataset_correct_flags(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        self.assert_(not 'job_id' in job_set.attribute_boxes)
        
        job_set.get_attribute("job_id")
        self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(not job_set.attribute_boxes["job_id"].is_cached())
        
        job_set.flush_dataset()
        self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(job_set.attribute_boxes["job_id"].is_cached())
        
        job_set.get_attribute("job_id")
        self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(job_set.attribute_boxes["job_id"].is_cached())
        
    def test_flush_dataset_correct_data(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED)
        job_set.flush_dataset()
        returned_sic_data = job_set.get_attribute("sic")
        returned_id_data = job_set.get_attribute("job_id")
        self.assert_(ma.allequal(returned_id_data,self.job_id))
        self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
Пример #10
0
class TestDataset(opus_unittest.OpusTestCase): 
    def setUp(self):
        self.start_year = 2001
        self.expected_sic_data = array([6,4,7,808,6])
        self.job_id = array([1,2,3,4,5])
        self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp')
        self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir)
        self.dir = self.simulation_state.get_cache_directory()
        self.simulation_state.set_current_time(self.start_year)
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)
        
        
    def tearDown(self):
        # the logger has a file open in the cache directory (by default, disable that file logging)
        if logger._file_stream:
            logger.disable_file_logging()
        
        for root, dirs, files in os.walk(self.dir, topdown=False):
            for filename in files:
                os.remove(os.path.join(root, filename))
            for directory in dirs:
                os.rmdir(os.path.join(root, directory))
        os.rmdir(self.dir)
        os.rmdir(self.base_cache_dir)
    
            
    def test_dict_dataset(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset',
            table_data={
                "id":array([1,2,3,4]), 
                "attr":array([4,7,2,1])
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        
        self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.")
        self.assert_(ds.size()==4, "Wrong size of dataset.")
        
    def test_flt_dataset(self):
        import opus_core
        from opus_core.store.flt_storage import flt_storage
        
        attribute = 'little_endian'
        
        location = os.path.join(opus_core.__path__[0], 'data', 'flt')
        storage = flt_storage(storage_location=location)
        ds = Dataset(in_storage=storage, id_name=attribute, in_table_name='endians')
        
        self.assertAlmostEqual(11.0, ds.get_attribute_by_index(attribute, 0))
        self.assertEqual(None, ds.get_attribute_header(attribute))
 
    def test_join_by_rows(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={    
                'id':array([2,4,6,8]), 
                'attr':array([4,7,2,1])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                'id':array([1,5,9]), 
                'attr':array([55,66,100])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        ds1.join_by_rows(ds2)
        self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100])))
        self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
        
    def test_join_by_rows_for_unique_ids(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={
                "id":array([2,4]), 
                "attr":array([4,7])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                "id":array([1,2]), 
                "attr":array([55,66])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        threw_exception = False
        try: 
            ds1.join_by_rows(ds2)
        except StandardError:
            threw_exception = True
        self.assert_(threw_exception)
        
    def test_join_by_rows_for_char_arrays(self):
        from numpy import alltrue
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={
                'id':array([2,4,6,8]), 
                'attr':array(['4','7','2','1'])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                'id':array([1,5,9]), 
                'attr':array(['55','66','100'])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        ds1.join_by_rows(ds2)
        self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100'])))
        self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
        
    def test_variable_dependencies_tree_with_versioning(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='tests',
            table_data={
                'id':array([2,4]), 
                'a_dependent_variable':array([4,7]),
                'a_dependent_variable2':array([10,1])
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests')
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==0) #initially version=0
        self.assert_(ds.get_version("a_dependent_variable")==0)
        self.assert_(ds.get_version("a_dependent_variable2")==0)
        
        ds.modify_attribute("a_dependent_variable", array([0,0]))
        self.assert_(ds.get_version("a_dependent_variable")==1) # version=1
        
        ds.modify_attribute("a_dependent_variable", array([1,1]))
        self.assert_(ds.get_version("a_dependent_variable")==2) # version=2
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1)
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) # version does not change
        
        autogen_variable = "my_var = 3 * opus_core.tests.a_dependent_variable"
        ds.compute_variables([autogen_variable])
        self.assert_(ds.get_version("my_var")==0)
        ds.compute_variables([autogen_variable])
        self.assert_(ds.get_version("my_var")==0)
        
    def test_compute_variable_with_unknown_package(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='tests',
            table_data={
                'id':array([2,4]), 
                'attr1':array([4,7]),
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='test')
        
        ds.compute_one_variable_with_unknown_package("attr1_times_2", package_order=["opus_core"])
        
    def test_join_datasets_with_2_ids(self):
        from numpy import ma
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='data1',
            table_data={
                'id1':array([2,4,2]),
                'id2':array([1,2,3]),
                'attr1':array([4,7,1]),
                'attr2':array([100,0,1000]),
                }
            )
        storage.write_table(
            table_name='data2',
            table_data={
                'id1':array([4,2,2]),
                'id2':array([2,3,1]),
                'attr1':array([50,60,70])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1')
        ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2')
        ds1.join(ds2, 'attr1')
        self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True)
        self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
Пример #11
0
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """

    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(
        self,
        resources,
        write_datasets_to_cache_at_end_of_year=True,
        log_file_name="run_model_system.log",
        cleanup_datasets=True,
    ):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3})
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get("cache_directory", None) is not None:
            self.simulation_state.set_cache_directory(resources["cache_directory"])

        if "expression_library" in resources:
            VariableFactory().set_expression_library(resources["expression_library"])

        if resources.get("sample_input", False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block("Start simulation run"):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get("seed", NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " + str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool().remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets,
                            )
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and (
                    len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())
                ):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get("flush_variables", False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get("datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(
        self,
        year,
        models,
        simulation_state,
        debuglevel,
        resources,
        write_datasets_to_cache_at_end_of_year,
        cleanup_datasets=True,
    ):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, "w")
        try:
            logger.start_block("Simulate year %s" % year)
            try:
                base_year = resources["base_year"]
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
                self.vardict["cache_storage"] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
                self.vardict["base_cache_storage"] = base_cache_storage
                simulation_state.set_flush_datasets(resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get("datasets_to_preload", {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get("models_configuration", {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec "%s=its_dataset" % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration
                )
                self.run_year_namespace = locals()
                # ==========
                # Run the models.
                # ==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(1, len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name]["controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[model_name][0].keys()[imember]
                            group_member = ModelGroupMember(model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name, model_name)
                            if member_model_name in models_configuration.keys():
                                model_configuration = models_configuration[member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration["controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            "_model_structure_dependencies_", {}
                        ).get("dataset", [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec "%s=ds" % dataset_name
                            except:
                                logger.log_warning("Failed to load dataset %s." % dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec ("from %s import %s" % (import_module, import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys():
                            import_replacement_config = controller_config["gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec ("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number, resources.get("status_file_for_gui", None)
                            )
                            model.write_status_for_gui()
                            # prepare part
                            exec (self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + "=self.vardict[outputvar]"

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(resources.get("command_file_for_gui", None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block("Writing datasets to cache for year %s" % year)
                    try:
                        for dataset_name, its_dataset in (
                            SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems()
                        ):
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state["controller_config"]["init"]
        group_member = parent_state["group_member"]
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval(
                "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            )
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))
            )
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ""

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if "group_by_attribute" in models_configuration[model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[model_name]["controller"][
                            "group_by_attribute"
                        ]
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute
                        )
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][member_name] = member[member_name]
                            if not isinstance(model_group_members_to_run[model_name][0][member_name], list):
                                model_group_members_to_run[model_name][0][member_name] = [
                                    model_group_members_to_run[model_name][0][member_name]
                                ]
                            number_of_models += len(model_group_members_to_run[model_name][0][member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][member] = ["run"]
                            number_of_models += len(model_group_members_to_run[model_name][0][member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == "stop":
                logger.log_warning("Simulation stopped.")
                sys.exit()
            elif line == "resume":
                break
            elif line <> "pause":
                logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources["cache_directory"] = cache_directory

        log_file = os.path.join(cache_directory, "run_multiprocess.log")
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get("_seed_dictionary_", None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get("_seed_dictionary_")
            seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2 ** 30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file
            )
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))

    # TODO: changing of configuration
    def _run_each_year_as_separate_process(
        self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None
    ):

        logger.start_block("Running simulation for year %d in new process" % year)
        resources["years"] = (year, year)
        resources["seed"] = (seed,)

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ["--log-file-name", os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                "opus_core.model_coordinators.model_system", resources, optional_args=optional_args
            )
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
        self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system"
    ):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get("models_with_sampled_coefficients", [])
        if "models_in_year" not in config.keys():
            config["models_in_year"] = {}
        if config["models_in_year"].get(config["base_year"] + 1, None) is None:
            config["models_in_year"][config["base_year"] + 1] = config.get("models")

        for umodel in models_to_update:
            try:
                i = config["models_in_year"][config["base_year"] + 1].index(umodel)
                new_model_name = "%s_sampled_coef" % umodel
                config["models_in_year"][config["base_year"] + 1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "sample_coefficients"
            ] = True
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "distribution"
            ] = "'normal'"
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "cache_storage"
            ] = "base_cache_storage"
Пример #12
0
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """
    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name),
                                   after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(
                        dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \
                                         (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get('flush_variables', False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool(
            ).datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get(
                "datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(self,
                  year,
                  models,
                  simulation_state,
                  debuglevel,
                  resources,
                  write_datasets_to_cache_at_end_of_year,
                  cleanup_datasets=True):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(),
                                     "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, 'w')
        try:
            logger.start_block('Simulate year %s' % year)
            try:
                base_year = resources['base_year']
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(
                    year_for_base_year_cache)
                self.vardict['cache_storage'] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(
                    base_year)
                self.vardict['base_cache_storage'] = base_cache_storage
                simulation_state.set_flush_datasets(
                    resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get(
                    'datasets_to_preload_in_year', {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get(
                        'datasets_to_preload', {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get('models_configuration',
                                                     {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool(
                ).iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec '%s=its_dataset' % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration)
                self.run_year_namespace = locals()
                #==========
                # Run the models.
                #==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(
                        1,
                        len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name][
                            "controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[
                                model_name][0].keys()[imember]
                            group_member = ModelGroupMember(
                                model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][
                                0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name,
                                                           model_name)
                            if member_model_name in models_configuration.keys(
                            ):
                                model_configuration = models_configuration[
                                    member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration[
                                        "controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            '_model_structure_dependencies_',
                            {}).get('dataset', [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(
                                        dataset_name) or (
                                            dataset_name
                                            not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec '%s=ds' % dataset_name
                            except:
                                logger.log_warning(
                                    'Failed to load dataset %s.' %
                                    dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec("from %s import %s" %
                                     (import_module,
                                      import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys(
                        ):
                            import_replacement_config = controller_config[
                                "gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number,
                                resources.get('status_file_for_gui', None))
                            model.write_status_for_gui()
                            # prepare part
                            exec(self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + '=self.vardict[outputvar]'

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(
                                resources.get('command_file_for_gui', None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block(
                        'Writing datasets to cache for year %s' % year)
                    try:
                        for dataset_name, its_dataset in SessionConfiguration(
                        ).get_dataset_pool().datasets_in_pool().iteritems():
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state['controller_config']["init"]
        group_member = parent_state['group_member']
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (
                init_config["name"],
                self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval("%s(group_member, %s)" %
                         (init_config["name"],
                          self.construct_arguments_from_config(init_config)))
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" %
                (prepare_config["name"],
                 self.construct_arguments_from_config(prepare_config)))
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ''

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (
            process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(
            self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if 'group_by_attribute' in models_configuration[
                            model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[
                            model_name]["controller"]['group_by_attribute']
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(
                                group_dataset_name), group_attribute)
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[
                            0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][
                                member_name] = member[member_name]
                            if not isinstance(
                                    model_group_members_to_run[model_name][0]
                                [member_name], list):
                                model_group_members_to_run[model_name][0][
                                    member_name] = [
                                        model_group_members_to_run[model_name]
                                        [0][member_name]
                                    ]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][
                                member] = ["run"]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == 'stop':
                logger.log_warning('Simulation stopped.')
                sys.exit()
            elif line == 'resume':
                break
            elif line <> 'pause':
                logger.log_warning(
                    "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'."
                    % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(
                map(lambda year: seed_dict[year],
                    range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" %
                          (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear,
                year,
                seed=seed_array[iyear],
                resources=resources,
                profiler_name=profiler_name,
                log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" %
                          (start_year, end_year))

    #TODO: changing of configuration
    def _run_each_year_as_separate_process(self,
                                           iyear,
                                           year,
                                           seed=None,
                                           resources=None,
                                           profiler_name=None,
                                           log_file=None):

        logger.start_block('Running simulation for year %d in new process' %
                           year)
        resources['years'] = (year, year)
        resources['seed'] = seed,

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ['--log-file-name', os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                'opus_core.model_coordinators.model_system',
                resources,
                optional_args=optional_args)
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
            self,
            resources,
            run_in_background=False,
            class_path='opus_core.model_coordinators.model_system'):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._fork_new_process('%s' % class_path,
                               resources,
                               delete_temp_dir=False,
                               run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self,
                          module_name,
                          resources,
                          run_in_background=False,
                          **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(
            module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get('models_with_sampled_coefficients', [])
        if 'models_in_year' not in config.keys():
            config['models_in_year'] = {}
        if config['models_in_year'].get(config['base_year'] + 1, None) is None:
            config['models_in_year'][config['base_year'] +
                                     1] = config.get('models')

        for umodel in models_to_update:
            try:
                i = config['models_in_year'][config['base_year'] +
                                             1].index(umodel)
                new_model_name = '%s_sampled_coef' % umodel
                config['models_in_year'][config['base_year'] +
                                         1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(
                config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["sample_coefficients"] = True
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["distribution"] = "'normal'"
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"][
                    "cache_storage"] = "base_cache_storage"
Пример #13
0
class ModelExplorer(object):
    def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, 
                 cache_directory=None):
        self.model_group = model_group
        self.explored_model = model
 
        if configuration is None:
            if xml_configuration is None:
                raise StandardError, "Either dictionary based or XML based configuration must be given."
            config = xml_configuration.get_run_configuration(scenario_name)
        else:
            config = Configuration(configuration)
            
        self.scenario_models = config['models']
        if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None:
            del config['models_in_year'][year]
        if model is not None:
            dependent_models = config['models_configuration'][model]['controller'].get('dependencies', [])
            config['models'] = dependent_models
            if model_group is None:
                config['models'] = config['models'] + [{model: ["run"]}]
            else:
                config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}]
        else:
            config['models'] = []
            
        config['years'] = [year, year]
        config["datasets_to_cache_after_each_model"]=[]
        config['flush_variables'] = False
        
        self.config = Resources(config)
        self.xml_configuration = xml_configuration
        
        if cache_directory is None:
            cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, 
                                                start_time=config.get('base_year', 0))
        self.config['cache_directory'] = cache_directory
        
        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
    def run(self):
        self.model_system = ModelSystem()
        self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False,
                              cleanup_datasets=False)
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def get_agents_for_simulation(self):
        return self.get_active_agent_set()
        
    def get_model_name(self):
        return (self.explored_model, self.model_group)
        
    def get_specification(self):
        return self.get_model().get_specified_coefficients().specification
    
    def get_probabilities(self, submodel=-2):
        """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices.
        Works only for the ChoiceModel class.
        """
        model = self.get_model()
        #if isinstance(model, ChoiceModel):
        return model.get_probabilities_and_choices(submodel)
        #print '\nMethod is implemented only for ChoiceModels.\n'

    def export_probabilities(self, submodel=-2, filename='./choice_model.txt'):
        """Export probabilities and choices into a file. Works only for the ChoiceModel class"""
        
        model = self.get_model()
        #if isinstance(model, ChoiceModel):
        model.export_probabilities(submodel, file_name=filename)
        #else:
        #    print '\nMethod is implemented only for ChoiceModels.\n'
            
    def get_model(self):
        """Return a model object."""
        return self.model_system.run_year_namespace["model"]
    
    def get_dataset(self, dataset_name):
        """Return a Dataset object of the given name."""
        ds = self.model_system.run_year_namespace.get(dataset_name, None)
        if ds is None:
            if dataset_name not in self.model_system.run_year_namespace["datasets"].keys():
                ds = self.get_dataset_pool().get_dataset(dataset_name)
            else:
                ds = self.model_system.run_year_namespace["datasets"][dataset_name]
        return ds
        
    def get_data(self, coefficient, submodel=-2):
        """Calls method get_data of the Model object. Should return a data array for the 
        given coefficient and submodel. Can be used only on in models that are estimable."""
        return self.get_model().get_data(coefficient, submodel)

    def get_coefficient_names(self, submodel=-2):
        """Calls method get_coefficient_names of the Model object which should return
           coefficient names for the given submodel. Can be used only on in models that are estimable."""
        return self.get_model().get_coefficient_names(submodel)
    
    def get_coefficients(self, submodel=-2):
        """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. 
        Can be used only on in models that are estimable."""
        return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel)

    def get_data_as_dataset(self, submodel=-2, **kwargs):
        """Calls method get_data_as_dataset of the Model object which should return
        an object of class Dataset containing model data. 
        Works only for ChoiceModel (returns InteractionDataset), 
        and for RegressionModel (returns Dataset). 
        """
        return self.get_model().get_data_as_dataset(submodel, **kwargs)
                
    def get_choice_set(self): 
        """Return a Dataset of choices. Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_dataset(2)
    
    def get_choice_set_index(self):
        """Return an array of indices of choices. Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_index(2)
        
    def get_choice_set_index_for_submodel(self, submodel):
        """Return an array of indices of choices for the given submodel. 
        Works only for the ChoiceModel class.
        """
        index = self.get_choice_set_index()
        return take (index, indices=self.get_model().observations_mapping[submodel], axis=0)
    
    def get_active_choice_set(self, submodel=None):
        """Return choice set as seen by agents in the model.
        Works only for the ChoiceModel class.
        """
        if submodel is None:
            choices = self.get_choice_set_index()
        else:
            choices = self.get_choice_set_index_for_submodel(submodel)
        choices = unique(choices.flatten())
        ds = self.get_choice_set()
        return DatasetSubset(ds, choices)
                             
    def get_agent_set(self):
        """Return a Dataset of all agents. Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_dataset(1)
        
    def get_agent_set_index(self):
        """Return an array of indices of agents that are the choosers. 
        Works only for the ChoiceModel class.
        """
        return self.get_model().model_interaction.interaction_dataset.get_index(1)
        
    def get_agent_set_index_for_submodel(self, submodel):
        """Return an array of indices of agents for the given submodel that are the choosers. 
        Works only for the ChoiceModel class.
        """
        model = self.get_model()
        return model.model_interaction.interaction_dataset.get_index(1)[model.observations_mapping[submodel]]
    
    def get_active_agent_set(self, submodel=None):
        """Return agent set that make choices in the model.
        Works only for the ChoiceModel class.
        """
        agents = self.get_agent_set()
        if submodel is None:
            index = self.get_agent_set_index()
        else:
            index = self.get_agent_set_index_for_submodel(submodel)
        return DatasetSubset(agents, index)
    
    def agent_summary(self, submodel=None):
        ds = self.get_active_agent_set(submodel=submodel)
        ds.summary()
        
    def choice_summary(self, submodel=None):
        ds = self.get_active_choice_set(submodel=submodel)
        ds.summary()
       
    def data_summary(self, **kwargs):
        ds = self.get_data_as_dataset(**kwargs)
        ds.summary()
        
    def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs):
        dataset_name = var_name.get_dataset_name()
        ds = self.get_dataset(dataset_name)
        ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
        ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs)
        return ds
    
    def get_before_after_attribute(self, attribute_name):
        """Return a dictionary with elements 'before' (contains an array of the given attribute
        that is reloaded from the cache) and 'after' (contains an array of the given attribute 
        with the current values).
        """
        from opus_core.store.attribute_cache import AttributeCache
        var_name = VariableName(attribute_name)
        storage = AttributeCache(self.simulation_state.get_cache_directory())
        ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage,
                   package_order=self.get_dataset_pool().get_package_order())       
        return {'after': ds[var_name.get_alias()],
                'before': ds.get_attribute('%s_reload__' % var_name.get_alias())}
        
    def summary_before_after(self, attribute_name):
        """Print summary of the given attribute 'before' (values
        reloaded from the cache) and 'after' (current values).
        """
        from opus_core.store.attribute_cache import AttributeCache
        var_name = VariableName(attribute_name)
        storage = AttributeCache(self.simulation_state.get_cache_directory())
        ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, 
                   package_order=self.get_dataset_pool().get_package_order())
        print ''
        print 'Before model run:'
        print '================='
        ds.summary(names=['%s_reload__' % var_name.get_alias()])
        print ''
        print 'After model run:'
        print '================='
        #ds.summary(names=[var_name.get_alias()])
        ds.summary(names=[var_name.get_alias()])
        
    def model_dependencies(self, model=None, group=None):
        """Prints out all dependencies for the model."""
        from opus_core.variables.dependency_query import DependencyChart
        if model is None: # current model
            model, group = self.get_model_name()
            spec = self.get_specification()
        else:
            spec = None
        if model == 'all': # print dependencies for all models
            for thismodel in self.scenario_models:
                thisgroups = None
                if isinstance(thismodel, dict):
                    thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None)
                    thismodel = thismodel.keys()[0]
                if not isinstance(thisgroups, list):
                    thisgroups = [thisgroups]                
                for group in thisgroups:
                    chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group)
                    chart.print_model_dependencies()
        else:
            chart = DependencyChart(self.xml_configuration, model=model, model_group=group, 
                                specification=spec)
            chart.print_model_dependencies()
        
    def variable_dependencies(self, name):
        """Prints out dependencies of this variable. 'name' can be either an alias from 
        the model specification or an expression."""
        from opus_core.variables.dependency_query import DependencyChart
        varname = None
        allvars = self.get_specification().get_variable_names()
        for ivar in range(len(allvars)):
            thisvar = allvars[ivar]
            if not isinstance(thisvar, VariableName):
                thisvar = VariableName(thisvar)
            if name == thisvar.get_alias():
                varname = thisvar
                break
        if varname is None:
            varname = VariableName(name)
        chart = DependencyChart(self.xml_configuration)
        chart.print_dependencies(varname.get_expression())
              
    def compute_expression(self, attribute_name):
        """Compute any expression and return its values."""
        var_name = VariableName(attribute_name)
        dataset_name = var_name.get_dataset_name()
        ds = self.get_dataset(dataset_name)
        return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
        
    def get_dataset_pool(self):
        return self.model_system.run_year_namespace["dataset_pool"]
    
    def plot_histogram_before_after(self, attribute_name, bins=None):
        """Plot histograms of values returned by the method get_before_after_attribute."""
        from opus_core.plot_functions import create_histogram, show_plots
        from matplotlib.pylab import figure
        values = self.get_before_after_attribute(attribute_name)
        alias = VariableName(attribute_name).get_alias()
        fig = figure()
        fig.add_subplot(121)
        create_histogram(values['before'], main='%s (before)' % alias, bins=bins)
        fig.add_subplot(122)
        create_histogram(values['after'], main='%s (after)' % alias, bins=bins)
        show_plots()
        
    def get_correlation(self, submodel=-2):
        """Return an array of correlations between all variables of the model data (for given submodel).
        Works only for ChoiceModel and RegressionModel"""
        ds = self.get_data_as_dataset(submodel)
        attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()]
        return ds.correlation_matrix(attrs)
        
    def plot_correlation(self, submodel=-2, useR=False, **kwargs):
        """Plot correlations between all variables of the model data (for given submodel).
        Works only for ChoiceModel and RegressionModel"""
        ds = self.get_data_as_dataset(submodel)
        attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()]
        ds.correlation_image(attrs, useR=useR, **kwargs)
        
    def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs):
        """Plot map of the sampled choice set. 
        agents_index can be given to restrict the set of agents to which the choice set belongs to. 
        aggregate_to is a name of a dataset which the choice set should be aggregated to.
        If matplotlib is False, mapnik is used (and required). 
        Additional arguments are passed to plot_map or plot_map_matplotlib.
        E.g. (choice set are buildings, aggregated to zones, for the first agent)
        er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', 
                            file='choice_set0.png', agents_index=0)
        """
        choice_set = self.get_choice_set()
        if agents_index is None:
            flatten_choice_index = self.get_choice_set_index().ravel()
        else:
            flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel()
        if aggregate_to is not None:
            ds_aggr = self.get_dataset(aggregate_to)
            result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], 
                                               ones(flatten_choice_index.size))
            ds = ds_aggr
        else:
            result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], 
                                             ones(flatten_choice_index.size))
            ds = choice_set
        dummy_attribute_name = '__sampled_choice_set__'
        ds.add_attribute(name=dummy_attribute_name, data=result)
        if matplotlib:
            coord_syst = None
            if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'):
                coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool())
            ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs)
        else:
            ds.plot_map(dummy_attribute_name, background=-1, **kwargs)
        ds.delete_one_attribute(dummy_attribute_name)
        
    def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', 
                                  matplotlib=True, **kwargs):
        """Plot map of the given attribute for the sampled choice set.
        agents_index can be given to restrict the set of agents to which the choice set belongs to. 
        aggregate_to is a name of a dataset which the choice set should be aggregated to.
        function defines the aggregating function (e.g. sum, mean, median, etc.)
        If matplotlib is False, mapnik is used (and required). 
        Additional arguments are passed to plot_map or plot_map_matplotlib.
        E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, 
                                    project_name='psrc_parcel', file='choice_resunits.png')
        """
        choice_set = self.get_choice_set()
        if agents_index is None:
            flatten_choice_index = self.get_choice_set_index().ravel()
        else:
            flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel()
        filter_var = ones(choice_set.size(), dtype='int16')
        filter_var[unique(flatten_choice_index)] = 0
        filter_idx = where(filter_var)[0]
        if aggregate_to is not None:
            ds_aggr = self.get_dataset(aggregate_to)
            result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], 
                                                     what=choice_set[name][flatten_choice_index], function=function)
            filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], 
                                                     ones(filter_idx.size))
            filter = filter > 0
            ds = ds_aggr
        else:
            result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], 
                                                   what=choice_set[name][flatten_choice_index], function=function)
            filter = filter_var
            ds = choice_set
        dummy_attribute_name = '__sampled_choice_set_attribute__'
        ds.add_attribute(name=dummy_attribute_name, data=result)
        dummy_filter_name = '__sampled_choice_set_filter__'
        ds.add_attribute(name=dummy_filter_name, data=filter)
        if matplotlib:
            coord_syst = None
            if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'):
                coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool())
            ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs)
        else:
            ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs)
        ds.delete_one_attribute(dummy_attribute_name)
        ds.delete_one_attribute(dummy_filter_name)
                   
    def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, 
                          store_values_to_file=None):
        """ Plot a barchart of coefficient values. This can be used in a regression model, 
        when coefficients are standardized 
        (i.e. using the estimation module opus_core.estimate_linear_regression_standardized).
        store_values_to_file can be a file name where the values are stored.
        """
        coef = self.get_coefficients(submodel)
        values = coef.get_coefficient_values()
        names = coef.get_coefficient_names()
        sd = coef.get_standard_errors()
        idx=ones(names.shape[1], dtype="bool")
        if exclude_constant:
            pos = coef.get_constants_positions()
            if pos.size > 0:               
                idx[pos]=0
        if store_values_to_file is not None:
            n = idx.sum()
            result = concatenate((reshape(names[eqidx, idx], (n,1)), 
                                 reshape(values[eqidx, idx], (n,1)),
                                 reshape(sd[eqidx, idx], (n,1))), axis=1)
            write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), 
                               delimiter='\t')
            write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a')
        if plot:
            plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx])
        else:
            return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]}
        
    def create_latex_tables(self, directory, other_info_keys=None):
        from opus_core.latex_table_creator import LatexTableCreator
        LTC = LatexTableCreator()
        LTC.create_latex_table_for_coefficients_for_model(
            self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, 
                                other_info_keys=other_info_keys)
        LTC.create_latex_table_for_specifications_for_model(
            self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
Пример #14
0
class ModelExplorer(GenericModelExplorer):
    def __init__(self,
                 model,
                 year,
                 scenario_name=None,
                 model_group=None,
                 configuration=None,
                 xml_configuration=None,
                 cache_directory=None):
        self.model_group = model_group
        self.explored_model = model

        if configuration is None:
            if xml_configuration is None:
                raise StandardError, "Either dictionary based or XML based configuration must be given."
            config = xml_configuration.get_run_configuration(scenario_name)
        else:
            config = Configuration(configuration)

        if model is not None:
            dependent_models = config['models_configuration'][model][
                'controller'].get('dependencies', [])
            config['models'] = dependent_models
            if model_group is None:
                config['models'] = config['models'] + [{model: ["run"]}]
            else:
                config['models'] = config['models'] + [{
                    model: {
                        "group_members": [{
                            model_group: ["run"]
                        }]
                    }
                }]
        else:
            config['models'] = []

        config['years'] = [year, year]
        config["datasets_to_cache_after_each_model"] = []
        config['flush_variables'] = False

        self.config = Resources(config)
        self.xml_configuration = xml_configuration

        if cache_directory is None:
            cache_directory = config[
                'creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy
        self.simulation_state = SimulationState(new_instance=True,
                                                base_cache_dir=cache_directory)
        self.config['cache_directory'] = cache_directory

        SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=AttributeCache())

    def run(self):
        self.model_system = ModelSystem()
        self.model_system.run(self.config,
                              write_datasets_to_cache_at_end_of_year=False)
        logger.log_status("Data cache in %s" %
                          self.simulation_state.get_cache_directory())

    def get_agents_for_simulation(self):
        return self.get_active_agent_set()

    def get_model_name(self):
        return (self.explored_model, self.model_group)

    def get_specification(self):
        return self.get_model().get_specified_coefficients().specification

    def get_probabilities(self, submodel=-2):
        """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices.
        Works only for the ChoiceModel class.
        """
        model = self.get_model()
        if isinstance(model, ChoiceModel):
            return model.get_probabilities_and_choices(submodel)
        print '\nMethod is implemented only for ChoiceModels.\n'

    def export_probabilities(self, submodel=-2, filename='./choice_model.txt'):
        """Export probabilities and choices into a file. Works only for the ChoiceModel class"""

        model = self.get_model()
        if isinstance(model, ChoiceModel):
            model.export_probabilities(submodel, file_name=filename)
        else:
            print '\nMethod is implemented only for ChoiceModels.\n'
Пример #15
0
class TestLagVariables(opus_unittest.OpusTestCase):
    
    def setUp(self):
        self.config = TestCacheConfiguration()

        self.simulation_state = SimulationState(new_instance=True)
        SessionConfiguration(self.config, new_instance=True, 
                             package_order=['urbansim', 'opus_core'],
                             in_storage=AttributeCache()) 

        self.base_year = self.config['base_year']
        creating_baseyear_cache_configuration = self.config['creating_baseyear_cache_configuration']
        
        self.simulation_state.set_current_time(self.base_year)

        cache_directory = self.simulation_state.get_cache_directory()
        copytree(os.path.join(creating_baseyear_cache_configuration.baseyear_cache.existing_cache_to_copy, 
                              str(self.base_year)),
                 os.path.join(cache_directory, str(self.base_year)))
        cacher = CacheScenarioDatabase()
        cacher.prepare_data_before_baseyear(cache_directory, self.base_year, creating_baseyear_cache_configuration)
        
        self.config['cache_directory'] = cache_directory
        
        cache_storage = AttributeCache().get_flt_storage_for_year(self.base_year)
        cache_directory = self.simulation_state.get_cache_directory()
        flt_directory = os.path.join(cache_directory, str(self.base_year))
        self.gridcell = DatasetFactory().get_dataset('gridcell', 
            package='urbansim',
            subdir='datasets',
            arguments={'in_storage':StorageFactory().get_storage('flt_storage', storage_location=flt_directory)}
            )
        
    def tearDown(self):
        self.simulation_state.remove_singleton(delete_cache=True)
        
    def test_lag_variables(self):
        """Test lag variables"""
        # A weak test that computing a lag variable on a realistic dataset does not crash.
        self.gridcell.compute_variables('urbansim.gridcell.n_recent_transitions_to_developed',
                                        resources=self.config)
       
        # The following tests are fragile, since they need to know exactly what values are being
        # subtracted, and ignore any negative amount that is truncated at zero.
        # If you change the "subset" dataset to a different region, you will
        # have to update the expected value.
        self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft_lag1',
                                        resources=self.config)
        self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft_lag2',
                                        resources=self.config)

        sqft = self.gridcell.get_attribute('commercial_sqft').sum()
        sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum()
        sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum()

        logger.log_status('sqft = %s' % sqft)
        logger.log_status('sqft_lag1 = %s' % sqft_lag1)
        logger.log_status('sqft_lag2 = %s' % sqft_lag2)
        logger.log_status('base_year = %s' % self.base_year)
        
        self.assertEqual(self.base_year, SimulationState().get_current_time())
        self.assertEqual(sqft, sqft_lag1)
        self.assertEqual(578+2083+1103+87, sqft_lag1 - sqft_lag2)
       
        # Do lag variables produce different results for derived attributes?
        self.gridcell.compute_variables('urbansim.gridcell.n_recent_development_projects',
                                        resources=self.config)
        self.gridcell.compute_variables('urbansim.gridcell.n_recent_development_projects_lag1',
                                        resources=self.config)
        n_recent_projects = self.gridcell.get_attribute('n_recent_development_projects').sum()
        n_recent_projects_lag1 = self.gridcell.get_attribute('n_recent_development_projects_lag1').sum()
        
        self.assertEqual(n_recent_projects, 11)
        self.assertEqual(n_recent_projects_lag1, 15)
       
        # Do lag_variables produce different results for derived attributes without lags?
        self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft_lag4',
                                        resources=self.config)
        sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum()
        sqft_lag4 = self.gridcell.get_attribute('ln_commercial_sqft_lag4').sum()
        
        self.assertNotEqual(sqft, sqft_lag4)
Пример #16
0
class ModelExplorer(GenericModelExplorer):
    def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, 
                 cache_directory=None):
        self.model_group = model_group
        self.explored_model = model
 
        if configuration is None:
            if xml_configuration is None:
                raise StandardError, "Either dictionary based or XML based configuration must be given."
            config = xml_configuration.get_run_configuration(scenario_name)
        else:
            config = Configuration(configuration)
            
        if model is not None:
            dependent_models = config['models_configuration'][model]['controller'].get('dependencies', [])
            config['models'] = dependent_models
            if model_group is None:
                config['models'] = config['models'] + [{model: ["run"]}]
            else:
                config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}]
        else:
            config['models'] = []
            
        config['years'] = [year, year]
        config["datasets_to_cache_after_each_model"]=[]
        config['flush_variables'] = False
        
        self.config = Resources(config)
        self.xml_configuration = xml_configuration
        
        if cache_directory is None:
            cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy
        self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory)
        self.config['cache_directory'] = cache_directory
        
        SessionConfiguration(new_instance=True,
                             package_order=self.config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        
    def run(self):
        self.model_system = ModelSystem()
        self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False)
        logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
        
    def get_agents_for_simulation(self):
        return self.get_active_agent_set()
        
    def get_model_name(self):
        return (self.explored_model, self.model_group)
        
    def get_specification(self):
        return self.get_model().get_specified_coefficients().specification
    
    def get_probabilities(self, submodel=-2):
        """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices.
        Works only for the ChoiceModel class.
        """
        model = self.get_model()
        if isinstance(model, ChoiceModel):
            return model.get_probabilities_and_choices(submodel)
        print '\nMethod is implemented only for ChoiceModels.\n'

    def export_probabilities(self, submodel=-2, filename='./choice_model.txt'):
        """Export probabilities and choices into a file. Works only for the ChoiceModel class"""
        
        model = self.get_model()
        if isinstance(model, ChoiceModel):
            model.export_probabilities(submodel, file_name=filename)
        else:
            print '\nMethod is implemented only for ChoiceModels.\n'
Пример #17
0
class TestLagVariables(opus_unittest.OpusTestCase):
    def setUp(self):
        self.config = TestCacheConfiguration()

        self.simulation_state = SimulationState(new_instance=True)
        SessionConfiguration(self.config,
                             new_instance=True,
                             package_order=['urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        self.base_year = self.config['base_year']
        creating_baseyear_cache_configuration = self.config[
            'creating_baseyear_cache_configuration']

        self.simulation_state.set_current_time(self.base_year)

        cache_directory = self.simulation_state.get_cache_directory()
        copytree(
            os.path.join(
                creating_baseyear_cache_configuration.baseyear_cache.
                existing_cache_to_copy, str(self.base_year)),
            os.path.join(cache_directory, str(self.base_year)))
        cacher = CacheScenarioDatabase()
        cacher.prepare_data_before_baseyear(
            cache_directory, self.base_year,
            creating_baseyear_cache_configuration)

        self.config['cache_directory'] = cache_directory

        cache_storage = AttributeCache().get_flt_storage_for_year(
            self.base_year)
        cache_directory = self.simulation_state.get_cache_directory()
        flt_directory = os.path.join(cache_directory, str(self.base_year))
        self.gridcell = DatasetFactory().get_dataset(
            'gridcell',
            package='urbansim',
            subdir='datasets',
            arguments={
                'in_storage':
                StorageFactory().get_storage('flt_storage',
                                             storage_location=flt_directory)
            })

    def tearDown(self):
        self.simulation_state.remove_singleton(delete_cache=True)

    def test_lag_variables(self):
        """Test lag variables"""
        # A weak test that computing a lag variable on a realistic dataset does not crash.
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_transitions_to_developed',
            resources=self.config)

        # The following tests are fragile, since they need to know exactly what values are being
        # subtracted, and ignore any negative amount that is truncated at zero.
        # If you change the "subset" dataset to a different region, you will
        # have to update the expected value.
        self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.commercial_sqft_lag1', resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.commercial_sqft_lag2', resources=self.config)

        sqft = self.gridcell.get_attribute('commercial_sqft').sum()
        sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum()
        sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum()

        logger.log_status('sqft = %s' % sqft)
        logger.log_status('sqft_lag1 = %s' % sqft_lag1)
        logger.log_status('sqft_lag2 = %s' % sqft_lag2)
        logger.log_status('base_year = %s' % self.base_year)

        self.assertEqual(self.base_year, SimulationState().get_current_time())
        self.assertEqual(sqft, sqft_lag1)
        self.assertEqual(578 + 2083 + 1103 + 87, sqft_lag1 - sqft_lag2)

        # Do lag variables produce different results for derived attributes?
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_development_projects',
            resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_development_projects_lag1',
            resources=self.config)
        n_recent_projects = self.gridcell.get_attribute(
            'n_recent_development_projects').sum()
        n_recent_projects_lag1 = self.gridcell.get_attribute(
            'n_recent_development_projects_lag1').sum()

        self.assertEqual(n_recent_projects, 11)
        self.assertEqual(n_recent_projects_lag1, 15)

        # Do lag_variables produce different results for derived attributes without lags?
        self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.ln_commercial_sqft_lag4', resources=self.config)
        sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum()
        sqft_lag4 = self.gridcell.get_attribute(
            'ln_commercial_sqft_lag4').sum()

        self.assertNotEqual(sqft, sqft_lag4)