def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('travel_zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config=config, year=year, zone_set=zone_set, datasets=dataset_pool, *args, **kwargs)
def __init__(self, config): ss = SimulationState(new_instance=True) ss.set_current_time(config['base_year']) ss.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) #if not os.path.exists(config['cache_directory']): ## if cache exists, it will automatically skip cacher = CreateBaseyearCache() cache_dir = cacher.run(config) if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().get_storage( 'sql_storage', storage_location = db) else: output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1)) out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache) dataset_pool = SessionConfiguration().get_dataset_pool() households = dataset_pool.get_dataset("household") buildings = dataset_pool.get_dataset("building") zones = dataset_pool.get_dataset("zone") zone_ids = zones.get_id_attribute() capacity_attribute_name = "residential_units" #_of_use_id_%s" % id capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \ (capacity_attribute_name, capacity_attribute_name) buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool) zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool) building_zone_id = buildings.get_attribute('zone_id') # is_household_unplace = datasets['household'].get_attribute("building_id") <= 0 is_household_unplaced = 1 #all households are unplaced household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id") for zone_id in zone_ids: capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id) is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id) is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced is_building_in_this_zone = (building_zone_id == zone_id) # if not is_household_in_this_zone.sum() <= capacity: if capacity == 0 or is_household_in_this_zone.sum()==0: print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity) continue prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64) r = random(sum(is_unplaced_household_in_this_zone)) prob_cumsum = ncumsum(prob) index_to_bldg = searchsorted(prob_cumsum, r) household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg) # import pdb;pdb.set_trace() households.set_values_of_one_attribute('building_id', household_building_id) households.write_dataset(out_table_name='households', out_storage=out_storage)
def __init__(self, config): ss = SimulationState(new_instance=True) ss.set_current_time(config['base_year']) ss.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) #if not os.path.exists(config['cache_directory']): ## if cache exists, it will automatically skip cacher = CreateBaseyearCache() cache_dir = cacher.run(config) if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().get_storage( 'sql_storage', storage_location = db) else: output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1)) out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache) dataset_pool = SessionConfiguration().get_dataset_pool() households = dataset_pool.get_dataset("household") buildings = dataset_pool.get_dataset("building") zones = dataset_pool.get_dataset("zone") zone_ids = zones.get_id_attribute() capacity_attribute_name = "residential_units" #_of_use_id_%s" % id capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \ (capacity_attribute_name, capacity_attribute_name) buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool) zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool) building_zone_id = buildings.get_attribute('zone_id') # is_household_unplace = datasets['household'].get_attribute("building_id") <= 0 is_household_unplaced = 1 #all households are unplaced household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id") for zone_id in zone_ids: capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id) is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id) is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced is_building_in_this_zone = (building_zone_id == zone_id) # if not is_household_in_this_zone.sum() <= capacity: if capacity == 0 or is_household_in_this_zone.sum()==0: print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity) continue prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64) r = random(sum(is_unplaced_household_in_this_zone)) prob_cumsum = ncumsum(prob) index_to_bldg = searchsorted(prob_cumsum, r) household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg) # import pdb;pdb.set_trace() households.set_values_of_one_attribute('building_id', household_building_id) households.write_dataset(out_table_name='households', out_storage=out_storage)
def run(self, config, show_output = False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration = server_configuration, base_scenario_database_name = server_configuration.database_name ) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping() self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning('The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def __init__(self, config): if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().build_storage_for_dataset( type='sql_storage', storage_location=db) else: out_storage = StorageFactory().get_storage(type='flt_storage', storage_location=os.path.join(config['cache_directory'], str(config['base_year']+1))) simulation_state = SimulationState() simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) if not os.path.exists(os.path.join(config['cache_directory'], str(config['base_year']))): #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first" CacheScenarioDatabase().run(config, unroll_gridcells=False) for dataset_name in config['datasets_to_preload']: SessionConfiguration().get_dataset_from_pool(dataset_name) households = SessionConfiguration().get_dataset_from_pool("household") household_ids = households.get_id_attribute() workers = households.get_attribute("workers") hh_ids = [] member_ids = [] is_worker = [] job_ids = [] for i in range(households.size()): if workers[i] > 0: hh_ids += [household_ids[i]] * workers[i] member_ids += range(1, workers[i]+1) is_worker += [1] * workers[i] job_ids += [-1] * workers[i] in_storage = StorageFactory().get_storage('dict_storage') persons_table_name = 'persons' in_storage.write_table( table_name=persons_table_name, table_data={ 'person_id':arange(len(hh_ids))+1, 'household_id':array(hh_ids), 'member_id':array(member_ids), 'is_worker':array(is_worker), 'job_id':array(job_ids), }, ) persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name) persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
def run(self, year, cache_directory=None): """The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for daysim to read. The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping of the configuration file. """ if cache_directory is None: cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() tm_config = self.config['travel_model_configuration'] data_to_export = tm_config['urbansim_to_tm_variable_mapping'] table_names = data_to_export.keys() variable_names = {} datasets = {} filenames = {} in_table_names = {} for table_name in table_names: filter = data_to_export[table_name].get('__filter__', None) if filter is not None: del data_to_export[table_name]['__filter__'] out_table_name = data_to_export[table_name].get('__out_table_name__', None) if out_table_name is not None: del data_to_export[table_name]['__out_table_name__'] else: out_table_name = table_name variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys()) dataset_name = None for var in variables_to_export: var_name = VariableName(var) if dataset_name is None: dataset_name = var_name.get_dataset_name() ds = dataset_pool.get_dataset(dataset_name) datasets[dataset_name] = ds filenames[dataset_name] = out_table_name in_table_names[dataset_name] = table_name if dataset_name not in variable_names.keys(): variable_names[dataset_name] = [] variable_names[dataset_name].append(var_name.get_alias()) ds.compute_variables([var_name], dataset_pool=dataset_pool) if filter is not None: filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0] ds = DatasetSubset(ds, index = filter_idx) datasets[dataset_name] = ds return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join( self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load( in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset( in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={ "gridcell": gridcells, "constants": constants, "flush_variables": True }, chunk_specification={'nchunks': 1}) land_covers.flush_dataset() del gridcells del land_covers
def run(self, year, skim_directory=None): """ It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] self.write_travel_data(year, cache_directory)
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration'][ 'locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % ( intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables([ '%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) job_set.compute_variables([ '%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): ''' Target function.''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction #assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 \ for k in self.target.keys() ]) results = func(predct, target) return results
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status("Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def run(self, year): """Like its parent, but report files have different format and there are no banks. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration']['locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % (intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables(['%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) job_set.compute_variables(['%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
def run(self, year): """Like its parent, but skims are stored locally in matrix_directory in hdf5 format. It is one file per year, called xxxx-travelmodel.h5, where xxxx is the year. Each file has one group per bank, e.g. Bank1, which contains the matrices. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) #bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status( "Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get( 'reports_to_copy', []) for x in 1, 2, 3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map'][ "bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, node_variable_map)
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): """ Target function.""" simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction # assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()]) results = func(predct, target) return results
def import_travel_model_data(config, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) out_store = AttributeCache().get_flt_storage_for_year(year+1) out_store_loc = out_store.get_storage_location() tm_config = config['travel_model_configuration'] data_to_import = tm_config['tm_to_urbansim_variable_mapping'] base_dir = mtc_common.tm_get_base_dir(config) data_dir = tm_config[year]['data_dir'] for dataset_name, skim_file in data_to_import.iteritems(): skim_file = os.path.join(base_dir, data_dir, skim_file) data = read_csv(skim_file, header=0) with block("Caching {} to {}".format(dataset_name, out_store_loc)): logger.log_status("Source file {}".format(skim_file)) opus_ds = to_opus_dataset(data, out_store, dataset_name)
def run(self, year): """ Copy skims stored in hdf5 format into the UrbanSim cache. Should run after psrc_parcel.emme.models.run_export_skims which creates the skims hdf5 file. It creates a travel_model dataset with each skim being an attribute of it. Zones are assumed to have no gaps. Arguments: year -- year of the urbansim run. Used to extract the TM year from the bank configuration. Configuration entries (in travel_model_configuration) used: matrix_variable_map -- dictionary of bank names and corresponding skim names. Bank names are the path where (back-)slashes are replaced by dots, e.g. skims.auto.am. A value for each of such bank name is a dictionary with keys being skim names and values being the desired urbansim attribute name. E.g. {'skims.nonmotorized.am': {'abketm': 'am_bike_to_work_travel_time', 'awlktm': 'am_walk_time_in_minutes' } } matrix_h5_directory -- path to the hdf5 file called xxxx-travelmodel.h5 where xxxx is replaced by the TM year (default is the Emme base directory), which contains the skims as n x n matrices. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) tmconfig = self.config['travel_model_configuration'] year_config = tmconfig[year] matrix_directory = tmconfig.get('matrix_h5_directory', self.get_emme2_base_dir()) bank_year = tmconfig[year]['bank'][0] bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path, variable_dict, bank_file=bank_file)
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name= self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load(in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':1} ) land_covers.flush_dataset() del gridcells del land_covers
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) logger.start_block('Getting data from travel model') next_year = year + 1 flt_dir_for_next_year = os.path.join(cache_directory, str(next_year)) if not os.path.exists(flt_dir_for_next_year): os.mkdir(flt_dir_for_next_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() zone_set = dataset_pool.get_dataset('zone') # zone_set = ZoneDataset(in_storage_location=flt_dir_for_this_year, # in_storage_type='flt_storage', # in_table_name='zones') zone_set.load_dataset() self.prepare_for_run(config['travel_model_configuration'], year) travel_data_set = self.get_travel_data_from_travel_model(config, year, zone_set, *args, **kwargs) logger.end_block() logger.start_block('Writing travel data to cache') out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_dir_for_next_year) #out_storage = flt_storage(Resources(data={"storage_location":flt_dir_for_next_year})) travel_data_set.write_dataset(attributes=travel_data_set.get_known_attribute_names(), out_storage=out_storage, out_table_name='travel_data') logger.end_block()
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config, year, zone_set, dataset_pool, *args, **kwargs)
def run(self, config, show_output=False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration=server_configuration, base_scenario_database_name=server_configuration.database_name) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping( ) self.tables_to_cache = config[ 'creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning( 'The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get('reports_to_copy', []) for x in 1,2,3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map']["bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, node_variable_map)
runs = Runs.runs comparison_variables = Runs.comparison_variables baseline = Runs.baseline simulation_state = SimulationState() simulation_state.set_current_time(options.year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) if options.augment_variables == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory=run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables:
] refinements = None refinements_storage = None if options.refinements_directory is not None: refinements_storage = StorageFactory().get_storage( 'flt_storage', storage_location=options.refinements_directory) refinements = DatasetFactory().search_for_dataset( 'refinement', package_order, arguments={'in_storage': refinements_storage}) years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() simulation_state = SimulationState() simulation_state.set_cache_directory(options.cache_directory) simulation_state.set_current_time(start_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=package_order, in_storage=attribute_cache).get_dataset_pool() if refinements is None: refinements = dataset_pool.get_dataset('refinement') years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() for year in range(start_year, end_year + 1): logger.start_block("Doing refinement for %s" % year)
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run(self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name='run_model_system.log', cleanup_datasets=True): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", { "tags": [], "verbosity_level": 3 }) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run( resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get('cache_directory', None) is not None: self.simulation_state.set_cache_directory( resources['cache_directory']) if 'expression_library' in resources: VariableFactory().set_expression_library( resources['expression_library']) if resources.get('sample_input', False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block('Start simulation run'): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance( years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get('seed', NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year= write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len( dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \ (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get('flush_variables', False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool( ).datasets_in_pool().keys() else: datasets_to_cache = resources.get( "datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year(self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, 'w') try: logger.start_block('Simulate year %s' % year) try: base_year = resources['base_year'] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year( year_for_base_year_cache) self.vardict['cache_storage'] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year( base_year) self.vardict['base_cache_storage'] = base_cache_storage simulation_state.set_flush_datasets( resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get( 'datasets_to_preload_in_year', {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get( 'datasets_to_preload', {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get('models_configuration', {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool( ).iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec '%s=its_dataset' % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration) self.run_year_namespace = locals() #========== # Run the models. #========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max( 1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name][ "controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[ model_name][0].keys()[imember] group_member = ModelGroupMember( model_group, group_member_name) processes = model_group_members_to_run[model_name][ 0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys( ): model_configuration = models_configuration[ member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration[ "controller"] datasets_to_preload_for_this_model = controller_config.get( '_model_structure_dependencies_', {}).get('dataset', []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset( dataset_name) or ( dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec '%s=ds' % dataset_name except: logger.log_warning( 'Failed to load dataset %s.' % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys( ): import_replacement_config = controller_config[ "gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get('status_file_for_gui', None)) model.write_status_for_gui() # prepare part exec(self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + '=self.vardict[outputvar]' # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui( resources.get('command_file_for_gui', None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block( 'Writing datasets to cache for year %s' % year) try: for dataset_name, its_dataset in SessionConfiguration( ).get_dataset_pool().datasets_in_pool().iteritems(): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) init_config = parent_state['controller_config']["init"] group_member = parent_state['group_member'] if group_member is None: # No model group cmd = "%s(%s)" % ( init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval("%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return '' def do_process(self, parent_state): for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % ( process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run( self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if 'group_by_attribute' in models_configuration[ model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[ model_name]["controller"]['group_by_attribute'] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool( group_dataset_name), group_attribute) if not isinstance(group_members, list): group_members = [group_members] if group_members[ 0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][ member_name] = member[member_name] if not isinstance( model_group_members_to_run[model_name][0] [member_name], list): model_group_members_to_run[model_name][0][ member_name] = [ model_group_members_to_run[model_name] [0][member_name] ] number_of_models += len( model_group_members_to_run[model_name][0] [member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][ member] = ["run"] number_of_models += len( model_group_members_to_run[model_name][0] [member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == 'stop': logger.log_warning('Simulation stopped.') sys.exit() elif line == 'resume': break elif line <> 'pause': logger.log_warning( "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources['cache_directory'] = cache_directory log_file = os.path.join(cache_directory, 'run_multiprocess.log') logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get('_seed_dictionary_', None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get('_seed_dictionary_') seed_array = array( map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2**30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) #TODO: changing of configuration def _run_each_year_as_separate_process(self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None): logger.start_block('Running simulation for year %d in new process' % year) resources['years'] = (year, year) resources['seed'] = seed, if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ['--log-file-name', os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( 'opus_core.model_coordinators.model_system', resources, optional_args=optional_args) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path='opus_core.model_coordinators.model_system'): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._fork_new_process('%s' % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process( module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get('models_with_sampled_coefficients', []) if 'models_in_year' not in config.keys(): config['models_in_year'] = {} if config['models_in_year'].get(config['base_year'] + 1, None) is None: config['models_in_year'][config['base_year'] + 1] = config.get('models') for umodel in models_to_update: try: i = config['models_in_year'][config['base_year'] + 1].index(umodel) new_model_name = '%s_sampled_coef' % umodel config['models_in_year'][config['base_year'] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration( config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["sample_coefficients"] = True config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["distribution"] = "'normal'" config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"][ "cache_storage"] = "base_cache_storage"
class Estimator(GenericModelExplorer): def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config def estimate(self, out_storage=None): self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False) self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) def reestimate(self, specification_module_name=None, specification_dict=None, out_storage=None, type=None, submodels=None): """specification_module_name is name of a module that contains a dictionary called 'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object. 'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary is expected to have an entry of this name. If 'submodels' is given (list or a number), the restimation is done only for those submodels. """ if specification_module_name is not None: exec("import " + specification_module_name) eval("reload (" + specification_module_name + ")") exec("specification_dict =" + specification_module_name + ".specification") if type is not None: specification_dict = specification_dict[type] if submodels is not None: #remove all submodels but the given ones from specification submodels_to_be_deleted = specification_dict.keys() if not isinstance(submodels, list): submodels = [submodels] for sm in submodels: if sm not in submodels_to_be_deleted: raise ValueError, "Submodel %s not in the specification." % sm submodels_to_be_deleted.remove(sm) if "_definition_" in submodels_to_be_deleted: submodels_to_be_deleted.remove("_definition_") for sm in submodels_to_be_deleted: del specification_dict[sm] self.specification = EquationSpecification(specification_dict=specification_dict) new_namespace = self.model_system.run_year_namespace keys_coeff_spec = self.get_keys_for_coefficients_and_specification() new_namespace[keys_coeff_spec["specification"]] = self.specification self.coefficients, coeff_dict_dummy = self.model_system.do_process(new_namespace) ## update run_year_namespce since it's not been updated by do_process self.model_system.run_year_namespace = new_namespace self.model_system.run_year_namespace[keys_coeff_spec["coefficients"]] = self.coefficients ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method #self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy() if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy(self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1 agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset all choices run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names(): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace() return False
def run(self, year=None, years_to_run=[], configuration=None): if year not in years_to_run or self.data_to_export == None: return cache_directory = configuration['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() package_order=configuration['dataset_pool_configuration'].package_order dataset_pool = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=attribute_cache ).get_dataset_pool() out_dir = os.path.join(cache_directory, "mtc_data") out_storage = csv_storage(storage_location=out_dir) # Adjust the age distribution per ABAG/MTC's specifications age_control_dir = os.path.join(paths.OPUS_DATA_PATH, configuration['project_name'], "ageControl") age_control_storage = csv_storage(storage_location=age_control_dir) age_control_files = os.listdir(age_control_dir) years = np.array(map(lambda x: int(os.path.basename(x).replace("tazData", "").replace(".csv", "")), glob.glob(os.path.join(age_control_dir, "tazData*.csv")))) closest_year = years[np.argmin(np.abs(years - [year]*len(years)))] if closest_year != year: logger.log_warning("Could not find age control data for " + str(year) + ". Choosing nearest year " + str(closest_year) + ".") age_control_table = age_control_storage.load_table("tazData" + str(closest_year), lowercase=False) # Calculate the ABAG shares of person by age age_categories = ['AGE0004', 'AGE0519', 'AGE2044', 'AGE4564', 'AGE65P'] age_category_sums = dict((k, age_control_table[k].sum()) for k in age_categories) total = sum(age_category_sums.values()) abag_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories) for data_fname, variable_mapping in self.data_to_export.iteritems(): if not flip_urbansim_to_tm_variable_mappling: col_names = variable_mapping.values() variables_aliases = ["=".join(mapping[::-1]) for mapping in \ variable_mapping.iteritems()] else: col_names = variable_mapping.keys() variables_aliases = ["=".join(mapping) for mapping in \ variable_mapping.iteritems()] dataset_name = VariableName(variables_aliases[0]).get_dataset_name() dataset = dataset_pool.get_dataset(dataset_name) dataset.compute_variables(variables_aliases) if data_fname == "ABAGData": logger.log_status("Adjusting ABAGData to match age controls") age_category_sums = dict((k, dataset[k].sum()) for k in age_categories) total = sum(age_category_sums.values()) us_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories) adjustments = dict((k, abag_age_category_shares[k]/us_age_category_shares[k]) for k in age_categories) diff = np.zeros(dataset.n) for k in age_categories: before = dataset[k] dataset[k] = np.array(map(lambda v : round(v*adjustments[k]), dataset.get_attribute(k))) diff += (dataset[k] - before) dataset["TOTPOP"] += diff dataset["HHPOP"] += diff logger.log_status("NOTE: Adjusted total population by %d (%2.3f%%) due to rounding error." % (int(diff.sum()), diff.sum()*100/total)) org_fname = os.path.join(out_dir, "%s.computed.csv" % data_fname) new_fname = os.path.join(out_dir, "%s%s.csv" % (year,data_fname)) block_msg = "Writing {} for travel model to {}".format(data_fname, new_fname) with block(block_msg): dataset.write_dataset(attributes=col_names, out_storage=out_storage, out_table_name=data_fname) #rename & process header shutil.move(org_fname, new_fname) os.system("sed 's/:[a-z][0-9]//g' -i %s" % new_fname)
("paris.household_x_neighborhood.age_lnprice","age_lnprice"), ("paris.household_x_neighborhood.lninc_lnprice","lninc_lnprice"), ("paris.neighborhood.delta_pop","delta_pop"), ("neighborhood.rail9","rail"), ("neighborhood.subway","subway"), ("neighborhood.disthwy","disthwy"), ("neighborhood.tc","tc"), ("neighborhood.vp","vp"), ("paris.household_x_neighborhood.hhfem_nbtc","hhfem_nbtc") ) } from my_estimation_config import my_configuration ss = SimulationState() ss.set_current_time(2000) ss.set_cache_directory(my_configuration['cache_directory']) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=my_configuration['dataset_pool_configuration'].package_order, in_storage=attribute_cache) #settings = ParisSettings() #settings.prepare_session_configuration() estimator = HLCMEstimator(config=my_configuration, save_estimation_results=False) #estimator = HLCMEstimator(settings=my_configuration, # run_land_price_model_before_estimation=False, # save_estimation_results=False,
start_year = options.start_year end_year = options.end_year package_order = [ package.strip() for package in options.package_order.split(",") ] refinements = None refinements_storage = None if options.refinements_directory is not None: refinements_storage = StorageFactory().get_storage('flt_storage', storage_location=options.refinements_directory) refinements = DatasetFactory().search_for_dataset('refinement', package_order, arguments={'in_storage':refinements_storage}) years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() simulation_state = SimulationState() simulation_state.set_cache_directory(options.cache_directory) simulation_state.set_current_time(start_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=attribute_cache).get_dataset_pool() if refinements is None: refinements = dataset_pool.get_dataset('refinement') years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() for year in range(start_year, end_year+1): logger.start_block("Doing refinement for %s" % year ) simulation_state.set_current_time(year)
def __init__(self, config): if 'estimation_database_configuration' in config: db_server = DatabaseServer( config['estimation_database_configuration']) db = db_server.get_database( config['estimation_database_configuration'].database_name) out_storage = StorageFactory().build_storage_for_dataset( type='sql_storage', storage_location=db) else: out_storage = StorageFactory().get_storage( type='flt_storage', storage_location=os.path.join(config['cache_directory'], str(config['base_year'] + 1))) simulation_state = SimulationState() simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) attribute_cache = AttributeCache() SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) if not os.path.exists( os.path.join(config['cache_directory'], str( config['base_year']))): #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first" CacheScenarioDatabase().run(config, unroll_gridcells=False) for dataset_name in config['datasets_to_preload']: SessionConfiguration().get_dataset_from_pool(dataset_name) households = SessionConfiguration().get_dataset_from_pool("household") household_ids = households.get_id_attribute() workers = households.get_attribute("workers") hh_ids = [] member_ids = [] is_worker = [] job_ids = [] for i in range(households.size()): if workers[i] > 0: hh_ids += [household_ids[i]] * workers[i] member_ids += range(1, workers[i] + 1) is_worker += [1] * workers[i] job_ids += [-1] * workers[i] in_storage = StorageFactory().get_storage('dict_storage') persons_table_name = 'persons' in_storage.write_table( table_name=persons_table_name, table_data={ 'person_id': arange(len(hh_ids)) + 1, 'household_id': array(hh_ids), 'member_id': array(member_ids), 'is_worker': array(is_worker), 'job_id': array(job_ids), }, ) persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name) persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
def run(self, optimizer='lbfgsb', results_pickle_prefix="calib", optimizer_kwargs={}): ''' Call specifized optimizer to calibrate Arguments: - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.) - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved Returns: - the results from the opimizater - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified ''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] init_v = array([], dtype='f8') for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: init_v = np.concatenate((init_v, dataset[calib_attr][index])) elif type(calib_attr) in (list, tuple): for attr in calib_attr: init_v = np.concatenate((init_v, dataset[attr][index])) else: raise TypeError, "Unrecongized data type in calib_datasets" t0 = time.time() if is_parallelizable == True: set_parallel(True) print OKBLUE + "\noptimizer = {} (is_parallel = {})".format( optimizer, is_parallelizable) + ENDC print OKBLUE + "-------------------------------------------------------\n" + ENDC if optimizer == 'bfgs': default_kwargs = { 'fprime': None, 'epsilon': 1e-08, 'maxiter': None, 'full_output': 1, 'disp': 1, 'retall': 0, 'callback': None } optimizer_func = fmin_bfgs elif optimizer == 'lbfgsb': default_kwargs = { 'fprime': None, 'approx_grad': True, 'bounds': None, 'factr': 1e12, 'iprint': 1 } optimizer_func = fmin_l_bfgs_b elif optimizer == 'anneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True } optimizer_func = anneal elif optimizer == 'panneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True, 'cores': 24, 'interv': 20 } optimizer_func = panneal else: raise ValueError, "Unrecognized optimizer {}".format(optimizer) default_kwargs.update(optimizer_kwargs) results = optimizer_func(self.target_func, copy(init_v), **default_kwargs) duration = time.time() - t0 if results_pickle_prefix is not None: pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer) pickle_file = os.path.join(self.log_directory, pickle_file) pickle.dump(results, open(pickle_file, "wb")) if is_parallelizable == True: set_parallel(False) logger.log_status('init target_func: {}'.format( self.target_func(init_v))) logger.log_status('end target_func: {}'.format( results[:])) #which one? logger.log_status('outputs from optimizer: {}'.format(results)) logger.log_status('Execution time: {}'.format(duration))
("neighborhood.ln_price", "ln_price"), ("paris.household_x_neighborhood.age_lnprice", "age_lnprice"), ("paris.household_x_neighborhood.lninc_lnprice", "lninc_lnprice"), ("paris.neighborhood.delta_pop", "delta_pop"), ("neighborhood.rail9", "rail"), ("neighborhood.subway", "subway"), ("neighborhood.disthwy", "disthwy"), ("neighborhood.tc", "tc"), ("neighborhood.vp", "vp"), ("paris.household_x_neighborhood.hhfem_nbtc", "hhfem_nbtc")) } from my_estimation_config import my_configuration ss = SimulationState() ss.set_current_time(2000) ss.set_cache_directory(my_configuration['cache_directory']) attribute_cache = AttributeCache() sc = SessionConfiguration( new_instance=True, package_order=my_configuration['dataset_pool_configuration']. package_order, in_storage=attribute_cache) #settings = ParisSettings() #settings.prepare_session_configuration() estimator = HLCMEstimator(config=my_configuration, save_estimation_results=False) #estimator = HLCMEstimator(settings=my_configuration, # run_land_price_model_before_estimation=False,
def import_openamos_data(config, year, zone_set=None): tm_config = config['travel_model_configuration'] if tm_config.has_key('skim_dir'): skim_dir = tm_config.get('skim_dir') else: projectLoc = tm_config.get("project_path") #openamos_dir = tm_config[year] #skim_dir = "/workspace/workdata/SimTRAVEL_data/base_scenario/skims/bootstrap/" print "--->", projectLoc skim_dir = os.path.join(projectLoc, "skimOutput/dynamic") logger.log_status('Reading skims from {}'.format(skim_dir)) skim_files = glob.glob(os.path.join(skim_dir, "skim*.dat")) print skim_files skims = None """ for skim_file in skim_files: i = int( re.search('\d+', skim_file).group(0) ) skim = read_csv_with_numpy(skim_file, header=False, columns=['from_zone_id', 'to_zone_id', str(i)]) if skims is None: skims = skim else: import pdb; pdb.set_trace() skims = np.hstack((skims, skim[str(i)])) """ attr_pattern = '{}{}' for skim_file in skim_files: i = int( re.findall('\d+', skim_file)[-1] ) skim = read_csv(skim_file, header=0, names=['from_zone_id', 'to_zone_id', 'travel_time', 'travel_distance']) if skims is None: skims = skim.rename(columns={'travel_time': attr_pattern.format('tm', str(i)), 'travel_distance': attr_pattern.format('td', str(i)), }, copy=False) else: #skims.insert(i, str(i), skim.travel_time skims[attr_pattern.format('tm', str(i))] = skim.travel_time skims[attr_pattern.format('td', str(i))] = skim.travel_distance skims.set_index(['from_zone_id', 'to_zone_id'], inplace=True) peak_hours = set([6, 7, 8, 9, 16, 17, 18, 19]) off_peak_hours = set(range(24)) - peak_hours peak_travel_time = avg_travel_time(skims, peak_hours, prefix='tm') off_peak_travel_time = avg_travel_time(skims, off_peak_hours, prefix='tm') peak_travel_distance = avg_travel_time(skims, peak_hours, prefix='td') off_peak_travel_distance = avg_travel_time(skims, off_peak_hours, prefix='td') travel_time = DataFrame({'peak_travel_time': peak_travel_time, 'off_peak_travel_time': off_peak_travel_time, 'peak_travel_distance': peak_travel_distance, 'off_peak_travel_distance': off_peak_travel_distance, }) ## subset to include only zones appearing in zone_set #zone_ids = zone_set['zone_id'] #zone_pairs = [z for z in product(zone_ids, zone_ids)] #travel_time = travel_time.ix[zone_pairs] cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) out_store = AttributeCache().get_flt_storage_for_year(year+1) logger.log_status('Caching travel_data to {}'.format(out_store.get_storage_location())) travel_data = to_opus_dataset(travel_time, out_store, 'travel_data') return travel_data
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run( self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name="run_model_system.log", cleanup_datasets=True, ): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3}) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get("cache_directory", None) is not None: self.simulation_state.set_cache_directory(resources["cache_directory"]) if "expression_library" in resources: VariableFactory().set_expression_library(resources["expression_library"]) if resources.get("sample_input", False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block("Start simulation run"): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance(years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get("seed", NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool().remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets, ) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and ( len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name()) ): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get("flush_variables", False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys() else: datasets_to_cache = resources.get("datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year( self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True, ): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, "w") try: logger.start_block("Simulate year %s" % year) try: base_year = resources["base_year"] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) self.vardict["cache_storage"] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) self.vardict["base_cache_storage"] = base_cache_storage simulation_state.set_flush_datasets(resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get("datasets_to_preload", {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get("models_configuration", {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec "%s=its_dataset" % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration ) self.run_year_namespace = locals() # ========== # Run the models. # ========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max(1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name]["controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[model_name][0].keys()[imember] group_member = ModelGroupMember(model_group, group_member_name) processes = model_group_members_to_run[model_name][0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys(): model_configuration = models_configuration[member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration["controller"] datasets_to_preload_for_this_model = controller_config.get( "_model_structure_dependencies_", {} ).get("dataset", []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec "%s=ds" % dataset_name except: logger.log_warning("Failed to load dataset %s." % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec ("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys(): import_replacement_config = controller_config["gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec ("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get("status_file_for_gui", None) ) model.write_status_for_gui() # prepare part exec (self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + "=self.vardict[outputvar]" # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui(resources.get("command_file_for_gui", None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block("Writing datasets to cache for year %s" % year) try: for dataset_name, its_dataset in ( SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems() ): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) init_config = parent_state["controller_config"]["init"] group_member = parent_state["group_member"] if group_member is None: # No model group cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval( "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) ) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config)) ) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return "" def do_process(self, parent_state): for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if "group_by_attribute" in models_configuration[model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[model_name]["controller"][ "group_by_attribute" ] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute ) if not isinstance(group_members, list): group_members = [group_members] if group_members[0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][member_name] = member[member_name] if not isinstance(model_group_members_to_run[model_name][0][member_name], list): model_group_members_to_run[model_name][0][member_name] = [ model_group_members_to_run[model_name][0][member_name] ] number_of_models += len(model_group_members_to_run[model_name][0][member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][member] = ["run"] number_of_models += len(model_group_members_to_run[model_name][0][member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == "stop": logger.log_warning("Simulation stopped.") sys.exit() elif line == "resume": break elif line <> "pause": logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources["cache_directory"] = cache_directory log_file = os.path.join(cache_directory, "run_multiprocess.log") logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get("_seed_dictionary_", None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get("_seed_dictionary_") seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2 ** 30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file ) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) # TODO: changing of configuration def _run_each_year_as_separate_process( self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None ): logger.start_block("Running simulation for year %d in new process" % year) resources["years"] = (year, year) resources["seed"] = (seed,) if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ["--log-file-name", os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( "opus_core.model_coordinators.model_system", resources, optional_args=optional_args ) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system" ): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get("models_with_sampled_coefficients", []) if "models_in_year" not in config.keys(): config["models_in_year"] = {} if config["models_in_year"].get(config["base_year"] + 1, None) is None: config["models_in_year"][config["base_year"] + 1] = config.get("models") for umodel in models_to_update: try: i = config["models_in_year"][config["base_year"] + 1].index(umodel) new_model_name = "%s_sampled_coef" % umodel config["models_in_year"][config["base_year"] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "sample_coefficients" ] = True config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "distribution" ] = "'normal'" config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "cache_storage" ] = "base_cache_storage"
def run(self, resources, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, in_storage=AttributeCache()) arguments = {'in_storage': attribute_cache} gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', arguments=arguments) runs = { #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)', #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)', #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)', r'X:\urbansim_cache\run_1847.2007_01_15_15_23': r'(run 1847 - no UGB 1/17/2007)', r'X:\urbansim_cache\run_1848.2007_01_15_15_40': r'(run 1848 - no UGB+1.5xhighway 1/17/2007)', # r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)', r'V:\psrc\run_1850.2007_01_15_17_03': r'(run 1850 - baseline 1/17/2007)', r'V:\psrc\run_1851.2007_01_15_17_07': r'(run 1851 - no build 1/17/2007)' } #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16' baseline = r'V:\psrc\run_1850.2007_01_15_17_03' comparison_variables = { 'gridcell': [ 'urbansim.gridcell.population', 'urbansim.gridcell.number_of_jobs' ], 'faz': ['urbansim.faz.population', 'urbansim.faz.number_of_jobs'], } #datasets_to_preload = { # 'gridcell':{ 'nchunks':2}, # 'household':{}, # 'job':{}, # 'zone':{}, # 'faz':{}, # 'development_type':{}, # 'development_event_history':{}, # 'development_constraint':{}, # 'job_building_type':{}, # 'urbansim_constant':{}, # } year = 2025 simulation_state = SimulationState() simulation_state.set_current_time(year) SessionConfiguration(new_instance=True, package_order=['psrc', 'urbansim', 'opus_core'], in_storage=AttributeCache()) #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload, # "urbansim", # additional_arguments={'in_storage': AttributeCache()}) variable_augment = False if variable_augment == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset( dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory = run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset( dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables: short_name = VariableName(variable).alias() attribute = dataset.get_attribute(short_name) run_dataset.add_attribute(attribute[match_index], 'baseline_' + short_name, metadata=1) run_dataset.flush_attribute('baseline_' + short_name) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() #indicators_module = args[0] #eval("from %s import config" % indicators_module) from make_indicators_openev import config from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators #from make_indicators_openev import config #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators for run, descriptin in runs.iteritems(): config.request_years = [year] config.cache_directory = run config.run_description = descriptin generate_indicators(config)
cache_directory = _cache_directory try: year = int(options.year) except IndexError: parser.error("year must be provided.") parser.print_help() sys.exit(1) if package_order is None: package_order = eval(options.package_order) st = SimulationState() st.set_current_time(year) st.set_cache_directory(cache_directory) attribute_cache = AttributeCache() dp = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=attribute_cache ).get_dataset_pool() ## example usage: # python -i explore_run_cache.py -p bay_area_parcel -r 105 2025 # >>> h2025 = dp.get_dataset('household') # >>> children_5yr = h2025.compute_variables('household.aggregate(person.age <= 5)') # python -d /workspace/opus/data/bay_area_parcel/base_year_data 2010 # >>> h2010 = dp.get_dataset('household') # >>> children_5yr = h2010.compute_variables('household.aggregate(person.age <= 5)')
def run(self, config, year, storage_type='sql'): """ """ tm_config = config['travel_model_configuration'] database_server_config = tm_config.get("database_server_configuration", 'simtravel_database_server') database_name = tm_config.get("database_name", 'mag_zone_baseyear') cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() if storage_type == 'sql': db_server = DatabaseServer(DatabaseConfiguration( database_name = database_name, database_configuration = database_server_config ) ) if not db_server.has_database(database_name): print "Db doesn't exist creating one" db_server.create_database(database_name) db = db_server.get_database(database_name) output_storage = sql_storage(storage_location = db) elif storage_type == 'csv': csv_directory = os.path.join(cache_directory, 'csv', str(year)) output_storage = csv_storage(storage_location=csv_directory) else: raise ValueError, "Unsupported output storage type {}".format(storage_type) logger.start_block('Compute and export data to openAMOS...') hh = dataset_pool.get_dataset('household') hh_recs = dataset_pool.get_dataset('households_recs') #hh_recs.add_attribute(0,"htaz1") #hh_recs.flush_dataset() #syn_hh = dataset_pool.get_dataset('synthetic_household') hh_variables = ['houseid=household.household_id', "hhsize=household.number_of_agents(person)", "one=(household.household_id>0).astype('i')", "inclt35k=(household.income<35000).astype('i')", "incge35k=(household.income>=35000).astype('i')", "incge50k=(household.income>=50000).astype('i')", "incge75k=(household.income>=75000).astype('i')", "incge100k=(household.income>=100000).astype('i')", "inc35t50=((household.income>=35000) & (household.income<50000)).astype('i')", "inc50t75=((household.income>=50000) & (household.income<75000)).astype('i')", "inc75t100=((household.income>=75000) & (household.income<100000)).astype('i')", 'htaz = ((houseid>0)*(household.disaggregate(building.zone_id)-100) + (houseid<=0)*0)', #'htaz = ((houseid>0) & (htaz1>100))*(htaz1-100)+((houseid>0) & (htaz1==-1))*1122', "withchild = (household.aggregate(person.age<18)>0).astype('i')", "noc = household.aggregate(person.age<18)", "numadlt = household.aggregate(person.age>=18)", "hinc=household.income", "wif=household.workers", #"wif=household.aggregate(mag_zone.person.is_employed)", 'numwrkr=household.workers', #'numwrkr=household.aggregate(mag_zone.person.is_employed)', 'nwrkcnt=household.number_of_agents(person) - household.workers', #'nwrkcnt=household.number_of_agents(person) - household.aggregate(mag_zone.person.is_employed)', 'yrbuilt=mag_zone.household.yrbuilt', 'mag_zone.household.sparent', 'mag_zone.household.rur', 'mag_zone.household.urb', 'zonetid = household.disaggregate(building.zone_id)', ] self.prepare_attributes(hh, hh_variables) attrs_to_export = hh_recs.get_known_attribute_names() self.write_dataset(hh, attrs_to_export, output_storage) dataset_pool._remove_dataset(hh.dataset_name) persons = dataset_pool.get_dataset('person') persons.out_table_name_default = 'persons' # Recoding invalid work and school locations to some random valid values persons_recs = dataset_pool.get_dataset('persons_recs') persons_recs.add_attribute(persons['person_id'],"personuniqueid") persons_recs.add_attribute(persons['marriage_status'],"marstat") persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(persons['wtaz0'],"htaz_act") persons_recs.add_attribute(0,"wtaz_rec") persons_recs.add_attribute(0,"wtaz_rec1") persons_recs.add_attribute(0,"wtaz_rec2") persons_recs.add_attribute(0,"wtaz1_1") persons_recs.add_attribute(0,"wtaz1_2") persons_recs.add_attribute(0,"wtaz1_3") #persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(0,"wtaz1") persons_recs.add_attribute(0,"htaz") persons_recs.add_attribute(0,"schtaz1") persons_recs.flush_dataset() #syn_persons = dataset_pool.get_dataset('synthetic_person') persons_variables = ['personid=mag_zone.person.member_id', 'personuniqueid=person.person_id', 'houseid=person.household_id', "one=(person.person_id>0).astype('i')", 'trvtime=mag_zone.person.travel_time_from_home_to_work', 'timetowk=mag_zone.person.travel_time_from_home_to_work', #'mag_zone.person.tmtowrk', #'tmtowrk=person.disaggregate(synthetic_person.tmtowrk)', "ag5t10=((person.age>=5) & (person.age<=10)).astype('i')", "ag11t14=((person.age>=11) & (person.age<=14)).astype('i')", "ag15t17=((person.age>=15) & (person.age<=17)).astype('i')", "ag18t24=((person.age>=18) & (person.age<=24)).astype('i')", "ag25t34=((person.age>=25) & (person.age<=34)).astype('i')", "ag35t44=((person.age>=35) & (person.age<=44)).astype('i')", "ag45t54=((person.age>=45) & (person.age<=54)).astype('i')", "ag55t64=((person.age>=55) & (person.age<=64)).astype('i')", "agge65=(person.age>=65).astype('i')", "ag12t17=((person.age>=12) & (person.age<=17)).astype('i')", "ag5t14=((person.age>=5) & (person.age<=14)).astype('i')", "agge15=(person.age>=15).astype('i')", "wrkr=(person.employment_status==1).astype('i')", "isemploy=(person.employment_status==1).astype('i')", "fulltim=(mag_zone.person.full_time==1).astype('i')", 'parttim=mag_zone.person.part_time', 'htaz = ((houseid>0)*(person.disaggregate(building.zone_id, intermediates=[household])-100) + (houseid<=0)*0)', 'wtaz1=(person.wtaz <= 0)*0 + (person.wtaz > 0)*(person.wtaz-100)', "presch = ((person.age < 5)&(houseid>0)).astype('i')", "mag_zone.person.schstat", 'schtaz1 = (person.schtaz <= 0)*0 + (person.schtaz > 0)*(person.schtaz-100)', 'marstat = person.marriage_status', 'enroll = person.student_status', 'grade = person.student_status & person.education', 'educ = person.education', "male = (person.sex==1).astype('i')", "female = (person.sex==2).astype('i')", "coled = (person.education >= 10).astype('i')", 'race1 = person.race', "white = (person.race == 1).astype('i')", 'person.hispanic' ] self.prepare_attributes(persons, persons_variables) attrs_to_export = persons_recs.get_known_attribute_names() self.write_dataset(persons, attrs_to_export, output_storage) dataset_pool._remove_dataset(persons.dataset_name) zones = dataset_pool.get_dataset('zone') zones_variables = [ "retail_employment=zone.aggregate(mag_zone.job.sector_group=='retail')", "public_employment=zone.aggregate(mag_zone.job.sector_group=='public')", "office_employment=zone.aggregate(mag_zone.job.sector_group=='office')", "industrial_employment=zone.aggregate(mag_zone.job.sector_group=='individual')", "other_employment=zone.aggregate(mag_zone.job.sector_group=='other')", "retail_employment_density=zone.aggregate(mag_zone.job.sector_group=='retail')/zone.acres", "public_employment_density=zone.aggregate(mag_zone.job.sector_group=='public')/zone.acres", "office_employment_density=zone.aggregate(mag_zone.job.sector_group=='office')/zone.acres", "industrial_employment_density=zone.aggregate(mag_zone.job.sector_group=='individual')/zone.acres", "other_employment_density=zone.aggregate(mag_zone.job.sector_group=='other')/zone.acres", "total_area=zone.acres", "lowest_income=zone.aggregate(household.income < scoreatpercentile(household.income, 20))", "low_income=zone.aggregate(household.income < scoreatpercentile(household.income, 40))", "high_income=zone.aggregate(household.income > scoreatpercentile(household.income, 80))", #"institutional_population=zone.disaggregate(locations.institutional_population)", #"groupquarter_households=zone.disaggregate(locations.groupquarter_households)", "residential_households=zone.number_of_agents(household)", "locationid=zone.zone_id", ] locations = dataset_pool['locations'] self.prepare_attributes(zones, zones_variables, dataset2=locations) attrs_to_export = locations.get_known_attribute_names() self.write_dataset(locations, attrs_to_export, output_storage) dataset_pool._remove_dataset(locations.dataset_name) #raw_input("check location block") logger.end_block()
class Estimator(GenericModelExplorer): def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError( "The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models', []) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or ( isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get( 'config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({ 'models_configuration': { self.model_name: { 'controller': { 'init': { 'arguments': {} } } } } }) estimate_config_str = self.config['models_configuration'].get( self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][ self.model_name]['controller']['init']['arguments'][ 'estimate_config'] = 'Resources(%s)' % estimate_config def estimate(self, out_storage=None): self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False) self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) def reestimate(self, specification_module_name=None, specification_dict=None, out_storage=None, type=None, submodels=None): """specification_module_name is name of a module that contains a dictionary called 'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object. 'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary is expected to have an entry of this name. If 'submodels' is given (list or a number), the restimation is done only for those submodels. """ if specification_module_name is not None: exec("import " + specification_module_name) eval("reload (" + specification_module_name + ")") exec("specification_dict =" + specification_module_name + ".specification") if type is not None: specification_dict = specification_dict[type] if submodels is not None: #remove all submodels but the given ones from specification submodels_to_be_deleted = specification_dict.keys() if not isinstance(submodels, list): submodels = [submodels] for sm in submodels: if sm not in submodels_to_be_deleted: raise ValueError, "Submodel %s not in the specification." % sm submodels_to_be_deleted.remove(sm) if "_definition_" in submodels_to_be_deleted: submodels_to_be_deleted.remove("_definition_") for sm in submodels_to_be_deleted: del specification_dict[sm] self.specification = EquationSpecification( specification_dict=specification_dict) new_namespace = self.model_system.run_year_namespace keys_coeff_spec = self.get_keys_for_coefficients_and_specification() new_namespace[keys_coeff_spec["specification"]] = self.specification self.coefficients, coeff_dict_dummy = self.model_system.do_process( new_namespace) ## update run_year_namespce since it's not been updated by do_process self.model_system.run_year_namespace = new_namespace self.model_system.run_year_namespace[ keys_coeff_spec["coefficients"]] = self.coefficients ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method #self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy( ) if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller'][ 'run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller'][ 'run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller'][ 'run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy( self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype) - 1 agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset all choices run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config[ 'models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names( ): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace() return False
runs = Runs.runs comparison_variables = Runs.comparison_variables baseline = Runs.baseline simulation_state = SimulationState() simulation_state.set_current_time(options.year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) if options.augment_variables == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory=run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables:
def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder, coefficients_name, specification_name, convert_flt=True, convert_input=False): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name=coefficients_name) specification = EquationSpecification(in_storage=storage) specification.load(in_table_name=specification_name) specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['biocomplexity', 'urbansim', 'opus_core'], in_storage=AttributeCache()) ncols = LccmConfiguration.ncols if temp_folder is None: self.temp_land_cover_dir = tempfile.mkdtemp() else: self.temp_land_cover_dir = temp_folder for year in years: land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, years, output_directory, convert_flt, convert_input) #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData) max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version offset = min(LccmConfiguration.offset, max_size) s = 0 t = offset while (s < t and t <= max_size): logger.log_status("Offset: ", s, t) index = arange(s,t) land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers') self._clean_up_land_cover_cache(land_cover_cache_path) simulation_state.set_current_time(year) # 2nd instance of lc_dataset land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) # land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':5}) ## chunk size set here land_covers.flush_dataset() del gridcells del land_covers # self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt) self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt) if t >= max_size: break s = max(t-10*ncols,s) t = min(t+offset-10*ncols,max_size) # clean up temp storage after done simulation shutil.rmtree(self.temp_land_cover_dir)
def run(self, resources, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, in_storage=AttributeCache()) arguments = {'in_storage':attribute_cache} gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', arguments=arguments) runs = { #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)', #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)', #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)', r'X:\urbansim_cache\run_1847.2007_01_15_15_23':r'(run 1847 - no UGB 1/17/2007)', r'X:\urbansim_cache\run_1848.2007_01_15_15_40':r'(run 1848 - no UGB+1.5xhighway 1/17/2007)', # r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)', r'V:\psrc\run_1850.2007_01_15_17_03':r'(run 1850 - baseline 1/17/2007)', r'V:\psrc\run_1851.2007_01_15_17_07':r'(run 1851 - no build 1/17/2007)' } #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16' baseline = r'V:\psrc\run_1850.2007_01_15_17_03' comparison_variables = {'gridcell': ['urbansim.gridcell.population', 'urbansim.gridcell.number_of_jobs'], 'faz':['urbansim.faz.population', 'urbansim.faz.number_of_jobs'], } #datasets_to_preload = { # 'gridcell':{ 'nchunks':2}, # 'household':{}, # 'job':{}, # 'zone':{}, # 'faz':{}, # 'development_type':{}, # 'development_event_history':{}, # 'development_constraint':{}, # 'job_building_type':{}, # 'urbansim_constant':{}, # } year = 2025 simulation_state = SimulationState() simulation_state.set_current_time(year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload, # "urbansim", # additional_arguments={'in_storage': AttributeCache()}) variable_augment = False if variable_augment == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory=run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables: short_name = VariableName(variable).alias() attribute = dataset.get_attribute(short_name) run_dataset.add_attribute(attribute[match_index],'baseline_'+short_name,metadata=1) run_dataset.flush_attribute('baseline_'+short_name) SessionConfiguration().get_dataset_pool().remove_all_datasets() #indicators_module = args[0] #eval("from %s import config" % indicators_module) from make_indicators_openev import config from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators #from make_indicators_openev import config #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators for run, descriptin in runs.iteritems(): config.request_years = [year] config.cache_directory = run config.run_description = descriptin generate_indicators(config)
def run(self, optimizer="lbfgsb", results_pickle_prefix="calib", optimizer_kwargs={}): """ Call specifized optimizer to calibrate Arguments: - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.) - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved Returns: - the results from the opimizater - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified """ simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] init_v = array([], dtype="f8") for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: init_v = np.concatenate((init_v, dataset[calib_attr][index])) elif type(calib_attr) in (list, tuple): for attr in calib_attr: init_v = np.concatenate((init_v, dataset[attr][index])) else: raise TypeError, "Unrecongized data type in calib_datasets" t0 = time.time() if is_parallelizable == True: set_parallel(True) print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(optimizer, is_parallelizable) + ENDC print OKBLUE + "-------------------------------------------------------\n" + ENDC if optimizer == "bfgs": default_kwargs = { "fprime": None, "epsilon": 1e-08, "maxiter": None, "full_output": 1, "disp": 1, "retall": 0, "callback": None, } optimizer_func = fmin_bfgs elif optimizer == "lbfgsb": default_kwargs = {"fprime": None, "approx_grad": True, "bounds": None, "factr": 1e12, "iprint": 1} optimizer_func = fmin_l_bfgs_b elif optimizer == "anneal": default_kwargs = { "schedule": "fast", "full_output": 1, "T0": None, "Tf": 1e-12, "maxeval": None, "maxaccept": None, "maxiter": 400, "boltzmann": 1.0, "learn_rate": 0.5, "feps": 1e-06, "quench": 1.0, "m": 1.0, "n": 1.0, "lower": -1, "upper": 1, "dwell": 50, "disp": True, } optimizer_func = anneal elif optimizer == "panneal": default_kwargs = { "schedule": "fast", "full_output": 1, "T0": None, "Tf": 1e-12, "maxeval": None, "maxaccept": None, "maxiter": 400, "boltzmann": 1.0, "learn_rate": 0.5, "feps": 1e-06, "quench": 1.0, "m": 1.0, "n": 1.0, "lower": -1, "upper": 1, "dwell": 50, "disp": True, "cores": 24, "interv": 20, } optimizer_func = panneal else: raise ValueError, "Unrecognized optimizer {}".format(optimizer) default_kwargs.update(optimizer_kwargs) results = optimizer_func(self.target_func, copy(init_v), **default_kwargs) duration = time.time() - t0 if results_pickle_prefix is not None: pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer) pickle_file = os.path.join(self.log_directory, pickle_file) pickle.dump(results, open(pickle_file, "wb")) if is_parallelizable == True: set_parallel(False) logger.log_status("init target_func: {}".format(self.target_func(init_v))) logger.log_status("end target_func: {}".format(results[:])) # which one? logger.log_status("outputs from optimizer: {}".format(results)) logger.log_status("Execution time: {}".format(duration))