def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy() if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy(self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1 #agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset choices for all agents agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index) #reset choices for agents in agents_index run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names(): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace()
def test_run_estimation(self): cache_dir = mkdtemp(prefix="test_washtenaw_run_estimation_tmp") try: # Cache to a temporary folder. ev = '%s "%s" --cache-directory="%s" washtenaw.tests.test_run_estimation_config' % ( sys.executable, create_baseyear_cache_script_path, cache_dir, ) logger.log_status("Invoking '%s'" % ev) return_code = os.system(ev) if return_code > 0: raise EnvironmentError( "Failed while creating the baseyear cache " "needed to run Washtenaw estimation tests." ) estimation_config = { "cache_directory": cache_dir, "dataset_pool_configuration": DatasetPoolConfiguration( package_order=["washtenaw", "urbansim", "opus_core"] ), "datasets_to_cache_after_each_model": [], "low_memory_mode": False, "base_year": 2000, "years": (2000, 2000), } failed = [] succeeded = [] for model_name in [ "lpm", "hlcm", "elcm-industrial", "elcm-commercial", # 'elcm-home_based', # fails "dplcm-industrial", "dplcm-commercial", "dplcm-residential", "rlsm", ]: try: self.estimation_runner.run_estimation(estimation_config, model_name, save_estimation_results=False) succeeded.append(model_name) except: logger.log_stack_trace() failed.append(model_name) if len(succeeded) > 0: print "Succeeded in estimating the following models: %s." % ", ".join(succeeded) if len(failed) > 0: self.fail("Failed to estimate the following models: %s." % ", ".join(failed)) finally: if os.path.exists(cache_dir): rmtree(cache_dir)
def get_specification_attributes_from_dictionary(specification_dict): """ Creates a specification object from a dictionary specification_dict. Keys of the dictionary are submodels. If there is only one submodel, use -2 as key. A value of specification_dict for each submodel entry is either a list or a dictionary containing specification for the particular submodel. If it is a list, each element can be defined in one of the following forms: - a character string specifying a variable in its fully qualified name or as an expression - in such a case the coefficient name will be the alias of the variable - a tuple of length 2: variable name as above, and the corresponding coefficient name - a tuple of length 3: variable name, coefficient name, fixed value of the coefficient (if the coefficient should not be estimated) - a dictionary with pairs variable name, coefficient name If it is a dictionary, it can contain specification for each equation or for elements of other fields. It can contain an entry 'name' which specifies the name of the field (by default the name is 'equation'). If it is another name, the values are stored in the dictionary attribute 'other_fields'. Each element of the submodel dictionary can be again a list (see the previous paragraph), or a dictionary (like the one described in this paragraph). specification_dict can contain an entry '_definition_' which should be a list of elements in one of the forms described in the second paragraph. In such a case, the entries defined for submodels can contain only the variable aliases. The corresponding coefficient names and fixed values (if defined) are taken from the definition section. See examples in unit tests below. """ variables = [] coefficients = [] equations = [] submodels = [] fixed_values = [] definition = {} other_fields = {} try: if "_definition_" in specification_dict.keys(): definition["variables"], definition["coefficients"], definition["equations"], dummy1, definition["fixed_values"], dummy2 = \ get_variables_coefficients_equations_for_submodel(specification_dict["_definition_"], "_definition_") definition["alias"] = map(lambda x: VariableName(x).get_alias(), definition["variables"]) del specification_dict["_definition_"] for sub_model, submodel_spec in specification_dict.items(): variable, coefficient, equation, submodel, fixed_value, other_field = get_variables_coefficients_equations_for_submodel( submodel_spec, sub_model, definition) variables += variable coefficients += coefficient equations += equation submodels += submodel fixed_values += fixed_value for key, value in other_field.iteritems(): if key in other_fields: other_fields[key] = concatenate((other_fields[key], value)) else: other_fields[key] = array(value) except Exception, e: logger.log_stack_trace() raise ValueError, "Wrong specification format for model specification: %s" % e
def test_run_estimation(self): cache_dir = mkdtemp(prefix='test_washtenaw_run_estimation_tmp') try: # Cache to a temporary folder. ev = ('%s "%s" --cache-directory="%s" washtenaw.tests.test_run_estimation_config' % (sys.executable, create_baseyear_cache_script_path, cache_dir)) logger.log_status("Invoking '%s'" % ev) return_code = os.system(ev) if return_code > 0: raise EnvironmentError('Failed while creating the baseyear cache ' 'needed to run Washtenaw estimation tests.') estimation_config = { 'cache_directory' : cache_dir, 'dataset_pool_configuration': DatasetPoolConfiguration( package_order=['washtenaw', 'urbansim', 'opus_core'], ), 'datasets_to_cache_after_each_model':[], 'low_memory_mode':False, 'base_year': 2000, 'years': (2000,2000), } failed = [] succeeded = [] for model_name in [ 'lpm', 'hlcm', 'elcm-industrial', 'elcm-commercial', # 'elcm-home_based', # fails 'dplcm-industrial', 'dplcm-commercial', 'dplcm-residential', 'rlsm', ]: try: self.estimation_runner.run_estimation(estimation_config, model_name, save_estimation_results=False) succeeded.append(model_name) except: logger.log_stack_trace() failed.append(model_name) if len(succeeded) > 0: print 'Succeeded in estimating the following models: %s.' % ', '.join(succeeded) if len(failed) > 0: self.fail('Failed to estimate the following models: %s.' % ', '.join(failed)) finally: if os.path.exists(cache_dir): rmtree(cache_dir)
def _print_table(self, table_name): """Provide debugging info to figure out why the above test is failing, sometimes.""" try: results = self.db.GetResultsFromQuery('select * from %s' % table_name) logger.start_block('Contents of table %s' % table_name) try: for row in results: logger.log_status(row) finally: logger.end_block() except: logger.log_status('Error accessing table %s' % table_name) logger.log_stack_trace()
class VariableFactory(object): """Class for creating an instance of class Variable from a string that specifies the variable name. It should be used by calling the method 'get_variable'. Each variable should be implemented as one of: - a class with a name of the variable, which should be placed in a module of the same name as the class - an expression - an alias that has a corresponding expression in the aliases.py file in the variables directory for that dataset Beware: the methods of this class are class methods, not object methods. """ # Class dictionary holding the expression library. The keys in the dictionary are pairs # (dataset_name, variable_name) and the values are the corresponding expressions. # This starts out as an empty dictionary, and can be set using the set_expression_library method. _expression_library = {} def set_expression_library(self, lib): VariableFactory._expression_library = lib def get_variable(self, variable_name, dataset, quiet=False, debug=0, index_name=None): """Returns an instance of class Variable. 'variable_name' is an instance of class VariableName. 'dataset' is an object of class Dataset to which the variable belongs to. In case of an error in either importing the module or evaluating its constructor, the method returns None. If quiet is True no warnings are printed. index_name is used for lag variables only. """ lag_attribute_name = None lag_offset = 0 if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) if variable_name.get_autogen_class() is not None: # variable_name has an autogenerated class -- just use that variable_subclass = variable_name.get_autogen_class() substrings = () else: # either find the variable name in the expression library (if present), in an appropriate 'aliases' file, # or load our variable class as 'variable_subclass' using an import statement short_name = variable_name.get_short_name() dataset_name = variable_name.get_dataset_name() package_name = variable_name.get_package_name() # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere) if package_name is None: e = VariableFactory._expression_library.get( (dataset_name, short_name), None) if e is not None: if e == variable_name.get_expression( ): # it is a primary attribute return None v = VariableName(e) return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) else: # not in the expression library - next look in the appropriate 'aliases' file, if one is present # (but only if we have a package name in the first place) try: stmt = 'from %s.%s.aliases import aliases' % (package_name, dataset_name) exec(stmt) except ImportError: aliases = [] for a in aliases: # for each definition, see if the alias is equal to the short_name. If it is, # then use that definition for the variable v = VariableName(a) if v.get_alias() == short_name: return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) lag_variable_parser = LagVariableParser() if lag_variable_parser.is_short_name_for_lag_variable(short_name): lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name( short_name) true_short_name = "VVV_lagLLL" substrings = (package_name, lag_attribute_name, lag_offset, dataset_name, index_name) opus_path = 'opus_core.variables' else: if package_name is None: raise LookupError( "Incomplete variable specification for '%s.%s' (missing package name, " "and variable is not in expression library not a lag variable)." % (dataset_name, short_name)) opus_path = '%s.%s' % (package_name, dataset_name) true_short_name, substrings = VariableFamilyNameTranslator().\ get_translated_variable_name_and_substring_arguments(opus_path, short_name) module = '%s.%s' % (opus_path, true_short_name) # Note that simply checking for the .py module file would not # be safe here, as objects could be instantiated in __init__.py files. try: ev = "from %s import %s as variable_subclass" % ( module, true_short_name) debug.print_debug("Evaluating '" + ev + "'.", 12) exec(ev) debug.print_debug("Successful.", 12) except ImportError, e: if not quiet: from opus_core.simulation_state import SimulationState time = SimulationState().get_current_time() desc = '\n'.join(( "Opus variable '%s' does not exist for dataset '%s' in year %s. " "The following error occured when finally trying to import " "the variable '%s' from the Python module " "'%s':", "%s", )) % (true_short_name, opus_path, time, true_short_name, module, indent_text( formatPlainTextExceptionInfoWithoutLog(''))) raise NameError(desc) return None try: var_class = variable_subclass(*substrings) except: logger.log_error("Could not initialize class of variable %s." % variable_name.get_expression()) logger.log_stack_trace() raise var_class.set_dataset(dataset) return var_class
def test_run_estimation(self): cache_dir = mkdtemp(prefix='test_washtenaw_run_estimation_tmp') try: # Cache to a temporary folder. ev = ( '%s "%s" --cache-directory="%s" washtenaw.tests.test_run_estimation_config' % (sys.executable, create_baseyear_cache_script_path, cache_dir)) logger.log_status("Invoking '%s'" % ev) return_code = os.system(ev) if return_code > 0: raise EnvironmentError( 'Failed while creating the baseyear cache ' 'needed to run Washtenaw estimation tests.') estimation_config = { 'cache_directory': cache_dir, 'dataset_pool_configuration': DatasetPoolConfiguration( package_order=['washtenaw', 'urbansim', 'opus_core'], ), 'datasets_to_cache_after_each_model': [], 'low_memory_mode': False, 'base_year': 2000, 'years': (2000, 2000), } failed = [] succeeded = [] for model_name in [ 'lpm', 'hlcm', 'elcm-industrial', 'elcm-commercial', # 'elcm-home_based', # fails 'dplcm-industrial', 'dplcm-commercial', 'dplcm-residential', 'rlsm', ]: try: self.estimation_runner.run_estimation( estimation_config, model_name, save_estimation_results=False) succeeded.append(model_name) except: logger.log_stack_trace() failed.append(model_name) if len(succeeded) > 0: print 'Succeeded in estimating the following models: %s.' % ', '.join( succeeded) if len(failed) > 0: self.fail('Failed to estimate the following models: %s.' % ', '.join(failed)) finally: if os.path.exists(cache_dir): rmtree(cache_dir)
def get_variable(self, variable_name, dataset, quiet=False, debug=0, index_name=None): """Returns an instance of class Variable. 'variable_name' is an instance of class VariableName. 'dataset' is an object of class Dataset to which the variable belongs to. In case of an error in either importing the module or evaluating its constructor, the method returns None. If quiet is True no warnings are printed. index_name is used for lag variables only. """ lag_attribute_name = None lag_offset = 0 if not isinstance(debug, DebugPrinter): debug = DebugPrinter(debug) if variable_name.get_autogen_class() is not None: # variable_name has an autogenerated class -- just use that variable_subclass = variable_name.get_autogen_class() substrings = () else: # either find the variable name in the expression library (if present), in an appropriate 'aliases' file, # or load our variable class as 'variable_subclass' using an import statement short_name = variable_name.get_short_name() dataset_name = variable_name.get_dataset_name() package_name = variable_name.get_package_name() # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere) if package_name is None: e = VariableFactory._expression_library.get( (dataset_name,short_name), None) if e is not None: if e == variable_name.get_expression(): # it is a primary attribute return None v = VariableName(e) return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) else: # not in the expression library - next look in the appropriate 'aliases' file, if one is present # (but only if we have a package name in the first place) try: stmt = 'from %s.%s.aliases import aliases' % (package_name, dataset_name) exec(stmt) except ImportError: aliases = [] for a in aliases: # for each definition, see if the alias is equal to the short_name. If it is, # then use that definition for the variable v = VariableName(a) if v.get_alias() == short_name: return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug) lag_variable_parser = LagVariableParser() if lag_variable_parser.is_short_name_for_lag_variable(short_name): lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name(short_name) true_short_name = "VVV_lagLLL" substrings = (package_name, lag_attribute_name, lag_offset, dataset_name, index_name) directory_path = 'opus_core.variables' else: if package_name is None: raise LookupError("Incomplete variable specification for '%s.%s' (missing package name, " "and variable is not in expression library not a lag variable)." % (dataset_name, short_name)) directory_path = '%s.%s' % (package_name,dataset_name) true_short_name, substrings = VariableFamilyNameTranslator().\ get_translated_variable_name_and_substring_arguments(directory_path, short_name) module = '%s.%s' % (directory_path, true_short_name) try: ev = "from %s import %s as variable_subclass" % (module, true_short_name) debug.print_debug("Evaluating '" + ev + "'.",12) exec(ev) debug.print_debug("Successful.", 12) except ImportError: if not quiet: from opus_core.simulation_state import SimulationState time = SimulationState().get_current_time() raise NameError("Opus variable '%s' does not exist for dataset '%s' in year %s" % (true_short_name, directory_path, time)) return None try: var_class = variable_subclass(*substrings) except: logger.log_error("Could not initialize class of variable %s." % variable_name.get_expression()) logger.log_stack_trace() raise var_class.set_dataset(dataset) return var_class