Python SimulationState.set_cache_directory примеры использования

Язык программирования: Python

Пространство имен/Пакет: opus_core.simulation_state

Класс/Тип: SimulationState

Метод/Функция: set_cache_directory

Примеров на hotexamples.com: 44

Python SimulationState.set_cache_directory - 44 примеров найдено. Это лучшие примеры Python кода для opus_core.simulation_state.SimulationState.set_cache_directory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SimulationState(30)

set_current_time(20)

set_cache_directory(18)

get_cache_directory(9)

remove_singleton(3)

set_low_memory_run(2)

get_current_cache_directory(1)

get_current_time(1)

set_start_time(1)

Пример #1

Показать файл

Файл: get_cache_data_into_travel_model.py Проект: psrc/urbansim

    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for travel mdel to read.  
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}),
                                                            #"urbansim",
                                                            #additional_arguments={'in_storage': attribute_cache})
        zone_set = dataset_pool.get_dataset('travel_zone')
        self.prepare_for_run(config['travel_model_configuration'], year)
        self.create_travel_model_input_file(config=config, 
                                            year=year, 
                                            zone_set=zone_set, 
                                            datasets=dataset_pool,
                                            *args, **kwargs)

Пример #2

Показать файл

Файл: household_synthesizer.py Проект: urban-ai/VIBe2UrbanSim

    def __init__(self, config):
        ss = SimulationState(new_instance=True)
        ss.set_current_time(config['base_year'])
        ss.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        #if not os.path.exists(config['cache_directory']):  ## if cache exists, it will automatically skip
        cacher = CreateBaseyearCache()
        cache_dir = cacher.run(config)

        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
            out_storage = StorageFactory().get_storage(
                'sql_storage', 
                storage_location = db)
        else:
            output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1))
            out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache)

        dataset_pool = SessionConfiguration().get_dataset_pool()
        households = dataset_pool.get_dataset("household")
        buildings = dataset_pool.get_dataset("building")
        zones = dataset_pool.get_dataset("zone")
        zone_ids = zones.get_id_attribute()
        capacity_attribute_name = "residential_units"  #_of_use_id_%s" % id
        capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \
                                 (capacity_attribute_name, capacity_attribute_name)
        buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool)
        zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool)

        building_zone_id = buildings.get_attribute('zone_id')
        
#        is_household_unplace = datasets['household'].get_attribute("building_id") <= 0
        is_household_unplaced = 1 #all households are unplaced
        household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id")
        
        for zone_id in zone_ids:
            capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id)
            is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id)
            is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced
            is_building_in_this_zone = (building_zone_id == zone_id)
#            if not is_household_in_this_zone.sum() <= capacity:
            if capacity == 0 or is_household_in_this_zone.sum()==0:
                print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity)
                continue
                        
            prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64)

            r = random(sum(is_unplaced_household_in_this_zone))
            prob_cumsum = ncumsum(prob)
            index_to_bldg = searchsorted(prob_cumsum, r)

            household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg)

#        import pdb;pdb.set_trace()
        households.set_values_of_one_attribute('building_id', household_building_id)
        households.write_dataset(out_table_name='households', out_storage=out_storage)

Пример #3

Показать файл

    def __init__(self, config):
        ss = SimulationState(new_instance=True)
        ss.set_current_time(config['base_year'])
        ss.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        #if not os.path.exists(config['cache_directory']):  ## if cache exists, it will automatically skip
        cacher = CreateBaseyearCache()
        cache_dir = cacher.run(config)

        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
            out_storage = StorageFactory().get_storage(
                'sql_storage', 
                storage_location = db)
        else:
            output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1))
            out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache)

        dataset_pool = SessionConfiguration().get_dataset_pool()
        households = dataset_pool.get_dataset("household")
        buildings = dataset_pool.get_dataset("building")
        zones = dataset_pool.get_dataset("zone")
        zone_ids = zones.get_id_attribute()
        capacity_attribute_name = "residential_units"  #_of_use_id_%s" % id
        capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \
                                 (capacity_attribute_name, capacity_attribute_name)
        buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool)
        zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool)

        building_zone_id = buildings.get_attribute('zone_id')
        
#        is_household_unplace = datasets['household'].get_attribute("building_id") <= 0
        is_household_unplaced = 1 #all households are unplaced
        household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id")
        
        for zone_id in zone_ids:
            capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id)
            is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id)
            is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced
            is_building_in_this_zone = (building_zone_id == zone_id)
#            if not is_household_in_this_zone.sum() <= capacity:
            if capacity == 0 or is_household_in_this_zone.sum()==0:
                print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity)
                continue
                        
            prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64)

            r = random(sum(is_unplaced_household_in_this_zone))
            prob_cumsum = ncumsum(prob)
            index_to_bldg = searchsorted(prob_cumsum, r)

            household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg)

#        import pdb;pdb.set_trace()
        households.set_values_of_one_attribute('building_id', household_building_id)
        households.write_dataset(out_table_name='households', out_storage=out_storage)

Пример #4

Показать файл

Файл: cache_scenario_database.py Проект: christianurich/VIBe2UrbanSim

    def run(self, config, show_output = False):
        logger.log_status("Caching large SQL tables to: " + config['cache_directory'])
        self.show_output = show_output
        
        #import pydevd;pydevd.settrace()
        
        server_configuration = config['scenario_database_configuration']
        
        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration = server_configuration, 
            base_scenario_database_name = server_configuration.database_name                                                         
        )
        
        self.database_server = DatabaseServer(server_configuration)
        
        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping()
        
        self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache
                
        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
                  
        self.tables_cached = set()      
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning('The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)

Пример #5

Показать файл

Файл: expand_persons_from_households.py Проект: christianurich/VIBe2UrbanSim

    def __init__(self, config):
        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
        
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=db)
        else:
            out_storage = StorageFactory().get_storage(type='flt_storage',
                storage_location=os.path.join(config['cache_directory'], str(config['base_year']+1)))

        simulation_state = SimulationState()
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
        attribute_cache = AttributeCache()
        
        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=attribute_cache)
        
        if not os.path.exists(os.path.join(config['cache_directory'], str(config['base_year']))):
            #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first"
            CacheScenarioDatabase().run(config, unroll_gridcells=False)

        for dataset_name in config['datasets_to_preload']:
            SessionConfiguration().get_dataset_from_pool(dataset_name)

        households = SessionConfiguration().get_dataset_from_pool("household")
        household_ids = households.get_id_attribute()
        workers = households.get_attribute("workers")
        
        hh_ids = []
        member_ids = []
        is_worker = []
        job_ids = []

        for i in range(households.size()):  
            if workers[i] > 0:
                hh_ids += [household_ids[i]] * workers[i]
                member_ids += range(1, workers[i]+1)
                is_worker += [1] * workers[i]
                job_ids += [-1] * workers[i]

        in_storage = StorageFactory().get_storage('dict_storage')
        
        persons_table_name = 'persons'
        in_storage.write_table(
                table_name=persons_table_name,
                table_data={
                    'person_id':arange(len(hh_ids))+1,
                    'household_id':array(hh_ids),
                    'member_id':array(member_ids),
                    'is_worker':array(is_worker),                    
                    'job_id':array(job_ids),
                    },
            )

        persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name)
        persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)

Пример #6

Показать файл

 def run(self, year, cache_directory=None):
     """The class is initialized with the appropriate configuration info from the 
     travel_model_configuration part of this config, and then copies the specified 
     UrbanSim data into files for daysim to read.
     The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping
     of the configuration file.
     """
     if cache_directory is None:
         cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_cache_directory(cache_directory)
     simulation_state.set_current_time(year)
     attribute_cache = AttributeCache()
     sc = SessionConfiguration(new_instance=True,
                               package_order=self.config['dataset_pool_configuration'].package_order,
                               in_storage=attribute_cache)
     dataset_pool = sc.get_dataset_pool()
     tm_config = self.config['travel_model_configuration']
     data_to_export = tm_config['urbansim_to_tm_variable_mapping']
     
     table_names = data_to_export.keys()
     variable_names = {}
     datasets = {}
     filenames = {}
     in_table_names = {}
     for table_name in table_names:
         filter = data_to_export[table_name].get('__filter__', None)
         if filter is not None:
             del data_to_export[table_name]['__filter__']
         out_table_name = data_to_export[table_name].get('__out_table_name__', None)
         if out_table_name is not None:
             del data_to_export[table_name]['__out_table_name__']
         else:
             out_table_name = table_name
         variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys())
         dataset_name = None            
         for var in variables_to_export:
             var_name = VariableName(var)
             if dataset_name is None:
                 dataset_name = var_name.get_dataset_name()
                 ds = dataset_pool.get_dataset(dataset_name)
                 
                 datasets[dataset_name] = ds
                 filenames[dataset_name] = out_table_name
                 in_table_names[dataset_name] = table_name
                 if dataset_name not in variable_names.keys():
                     variable_names[dataset_name] = []
             variable_names[dataset_name].append(var_name.get_alias())                
             ds.compute_variables([var_name], dataset_pool=dataset_pool)
         if filter is not None:
             filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0]
             ds = DatasetSubset(ds, index = filter_idx)
             datasets[dataset_name] = ds
             
     return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)

Пример #7

Показать файл

Файл: run_simulation.py Проект: urban-ai/VIBe2UrbanSim

    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts,
                                     submodel_string=self.lct_attribute,
                                     choice_attribute_name=self.lct_attribute,
                                     debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage',
                                               storage_location=os.path.join(
                                                   self.package_path, 'data'))
        coefficients.load(in_storage=storage,
                          in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(
            in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(
                in_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                out_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache,
                                        debuglevel=4)

            agents_index = None
            model.run(specification,
                      coefficients,
                      land_covers,
                      data_objects={
                          "gridcell": gridcells,
                          "constants": constants,
                          "flush_variables": True
                      },
                      chunk_specification={'nchunks': 1})
            land_covers.flush_dataset()
            del gridcells
            del land_covers

Пример #8

Показать файл

 def run(self, year, skim_directory=None):
     """ It gets the appropriate values from the 
     travel_model_configuration part of this config, and then copies the specified 
     data into the specified travel_data variable names.  Results in
     a new travel_data cache for year+1.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     self.write_travel_data(year, cache_directory)

Пример #9

Показать файл

    def run(self, year):
        """This is the main entry point.  The class is initialized with the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for emme/2 to read.  
        If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate'
        in the travel_model_configuration should be a list of dataset names over which the zone_id 
        will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building']
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        sc = SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=attribute_cache)
        dataset_pool = sc.get_dataset_pool()

        hh_set = dataset_pool.get_dataset('household')
        zone_set = dataset_pool.get_dataset('zone')
        job_set = dataset_pool.get_dataset('job')
        locations_to_disaggregate = self.config['travel_model_configuration'][
            'locations_to_disaggregate']
        len_locations_to_disaggregate = len(locations_to_disaggregate)
        if len_locations_to_disaggregate > 0:
            primary_location = locations_to_disaggregate[0]
            if len_locations_to_disaggregate > 1:
                intermediates_string = ", intermediates=["
                for i in range(1, len_locations_to_disaggregate):
                    intermediates_string = "%s%s, " % (
                        intermediates_string, locations_to_disaggregate[i])
                intermediates_string = "%s]" % intermediates_string
            else:
                intermediates_string = ""
            hh_set.compute_variables([
                '%s = household.disaggregate(%s.%s %s)' %
                (zone_set.get_id_name()[0], primary_location,
                 zone_set.get_id_name()[0], intermediates_string)
            ],
                                     dataset_pool=dataset_pool)
            job_set.compute_variables([
                '%s = job.disaggregate(%s.%s %s)' %
                (zone_set.get_id_name()[0], primary_location,
                 zone_set.get_id_name()[0], intermediates_string)
            ],
                                      dataset_pool=dataset_pool)

        return self._call_input_file_writer(year, dataset_pool)

Пример #10

Показать файл

    def target_func(self,
                    est_v,
                    func=lambda x, y: np.sum(np.abs(x - y)),
                    **kwargs):
        ''' Target function.'''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state,
                                            dataset_pool, calib_datasets,
                                            **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        #assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 \
                           for k in self.target.keys() ])
        results = func(predct, target)

        return results

Пример #11

Показать файл

Файл: multiple_runs.py Проект: apdjustino/DRCOG_Urbansim

def setup_environment(cache_directory, year, package_order, additional_datasets={}):
    gc.collect()
    ss = SimulationState(new_instance=True)
    ss.set_cache_directory(cache_directory)
    ss.set_current_time(year)
    ac = AttributeCache()
    storage = ac.get_flt_storage_for_year(year)
    sc = SessionConfiguration(new_instance=True,
                         package_order=package_order,
                         in_storage=ac)
    logger.log_status("Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location()))
    dp = sc.get_dataset_pool()
    for name, ds in additional_datasets.iteritems():
        dp.replace_dataset(name, ds)
    return dp

Пример #12

Показать файл

 def run(self, year):
     """Like its parent, but report files have different format and there are no banks.
     Zones are assumed to have no gaps.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     
     bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank'])
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         path_name = os.path.sep.join([bank_path] + path.split('.'))
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path_name, variable_dict)

Пример #13

Показать файл

Файл: get_cache_data_into_emme2.py Проект: christianurich/VIBe2UrbanSim

    def run(self, year):
        """This is the main entry point.  The class is initialized with the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for emme/2 to read.  
        If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate'
        in the travel_model_configuration should be a list of dataset names over which the zone_id 
        will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building']
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        sc = SessionConfiguration(new_instance=True,
                                  package_order=self.config['dataset_pool_configuration'].package_order,
                                  in_storage=attribute_cache)
        dataset_pool = sc.get_dataset_pool()

        hh_set = dataset_pool.get_dataset('household')
        zone_set = dataset_pool.get_dataset('zone')
        job_set = dataset_pool.get_dataset('job')
        locations_to_disaggregate = self.config['travel_model_configuration']['locations_to_disaggregate']
        len_locations_to_disaggregate = len(locations_to_disaggregate)
        if len_locations_to_disaggregate > 0:
            primary_location = locations_to_disaggregate[0]
            if len_locations_to_disaggregate > 1:
                intermediates_string = ", intermediates=["
                for i in range(1, len_locations_to_disaggregate):
                    intermediates_string = "%s%s, " % (intermediates_string, locations_to_disaggregate[i])
                intermediates_string = "%s]" % intermediates_string
            else:
                intermediates_string = ""
            hh_set.compute_variables(['%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0],
                                                                                 primary_location, zone_set.get_id_name()[0],
                                                                                 intermediates_string)], 
                                      dataset_pool=dataset_pool)
            job_set.compute_variables(['%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0],
                                                                            primary_location, zone_set.get_id_name()[0],
                                                                            intermediates_string)], 
                                       dataset_pool=dataset_pool)
        
        return self._call_input_file_writer(year, dataset_pool)

Пример #14

Показать файл

 def run(self, year):
     """Like its parent, but skims are stored locally in matrix_directory in hdf5 format.
     It is one file per year, called xxxx-travelmodel.h5, where xxxx is the year. 
     Each file has one group per bank, e.g. Bank1, which contains the matrices.
     Zones are assumed to have no gaps.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     
     bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank'])
     #bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year)
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         path_name = os.path.sep.join([bank_path] + path.split('.'))
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path_name, variable_dict)

Пример #15

Показать файл

def setup_environment(cache_directory,
                      year,
                      package_order,
                      additional_datasets={}):
    gc.collect()
    ss = SimulationState(new_instance=True)
    ss.set_cache_directory(cache_directory)
    ss.set_current_time(year)
    ac = AttributeCache()
    storage = ac.get_flt_storage_for_year(year)
    sc = SessionConfiguration(new_instance=True,
                              package_order=package_order,
                              in_storage=ac)
    logger.log_status(
        "Setup environment for year %s. Use cache directory %s." %
        (year, storage.get_storage_location()))
    dp = sc.get_dataset_pool()
    for name, ds in additional_datasets.iteritems():
        dp.replace_dataset(name, ds)
    return dp

Пример #16

Показать файл

    def run(self, year, matrix_directory=None):
        """This is the main entry point.  It gets the appropriate values from the 
        travel_model_configuration part of this config, and then copies the specified 
        emme/2 matrices into the specified travel_data variable names.  Results in
        a new travel_data cache for year+1.
        If matrix_directory is not None, it is assumed the matrices files are already created 
        in the given directory.
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        simulation_state.set_cache_directory(cache_directory)

        year_config = self.config['travel_model_configuration'][year]
        matrices_created = False
        if matrix_directory is not None:
            matrices_created = True
        reports = self.config['travel_model_configuration'].get(
            'reports_to_copy', [])

        for x in 1, 2, 3:
            if matrix_directory is None:
                bank_dir = self.get_emme2_dir(year, "bank%i" % x)
            else:
                bank_dir = os.path.join(matrix_directory, "bank%i" % x)
            if "bank%i" % x in year_config['matrix_variable_map']:
                self.get_needed_matrices_from_emme2(
                    year, year_config['cache_directory'], bank_dir,
                    year_config['matrix_variable_map']["bank%i" % x],
                    matrices_created)
                for report in reports:
                    self.copy_report_to_cache(report, year,
                                              year_config['cache_directory'],
                                              bank_dir)
            if "bank%i" % x in year_config.get('node_matrix_variable_map', {}):
                node_variable_map = year_config['node_matrix_variable_map'][
                    "bank%i" % x]
                if len(node_variable_map.keys()) > 0:
                    self.get_needed_node_matrices_from_emme2(
                        year, year_config['cache_directory'], bank_dir,
                        node_variable_map)

Пример #17

Показать файл

Файл: start_calibration.py Проект: apdjustino/DRCOG_Urbansim

    def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs):
        """ Target function."""

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True, package_order=self.package_order, in_storage=attribute_cache
        ).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []})
            assert (
                subset is None
                or subset.get(dataset_name, None) is None
                or subset_patterns is None
                or subset_patterns.get(dataset_name, None) is None
            )
            if subset is not None and subset.get(dataset_name, None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]])
            else:
                index = arange(dataset.size(), dtype="i")

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        # assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()])
        results = func(predct, target)

        return results

Пример #18

Показать файл

def import_travel_model_data(config, year):

    cache_directory = config['cache_directory']
    simulation_state = SimulationState()
    simulation_state.set_current_time(year)
    simulation_state.set_cache_directory(cache_directory)
    out_store = AttributeCache().get_flt_storage_for_year(year+1)
    out_store_loc = out_store.get_storage_location()

    tm_config = config['travel_model_configuration']
    data_to_import = tm_config['tm_to_urbansim_variable_mapping'] 
    base_dir = mtc_common.tm_get_base_dir(config)
    data_dir = tm_config[year]['data_dir']

    for dataset_name, skim_file in data_to_import.iteritems():
        skim_file = os.path.join(base_dir, data_dir, skim_file)
        data = read_csv(skim_file, header=0)
        
        with block("Caching {} to {}".format(dataset_name, out_store_loc)):
            logger.log_status("Source file {}".format(skim_file))
            opus_ds = to_opus_dataset(data, out_store, dataset_name)

Пример #19

Показать файл

 def run(self, year):
     """
     Copy skims stored in hdf5 format into the UrbanSim cache.
     
     Should run after psrc_parcel.emme.models.run_export_skims which creates the skims hdf5 file.
     It creates a travel_model dataset with each skim being an attribute of it. 
     Zones are assumed to have no gaps.
     
     Arguments:
     year -- year of the urbansim run. Used to extract the TM year from the bank configuration.
     
     Configuration entries (in travel_model_configuration) used:
     matrix_variable_map -- dictionary of bank names and corresponding skim names.
             Bank names are the path where (back-)slashes are replaced by dots, e.g. skims.auto.am.
             A value for each of such bank name is a dictionary with keys being skim names and 
             values being the desired urbansim attribute name. E.g.
             {'skims.nonmotorized.am':
                   {'abketm': 'am_bike_to_work_travel_time',
                    'awlktm': 'am_walk_time_in_minutes'
                   }
             }
     matrix_h5_directory -- path to the hdf5 file called xxxx-travelmodel.h5  
             where xxxx is replaced by the TM year (default is the Emme base directory), 
             which contains the skims as n x n matrices.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     tmconfig = self.config['travel_model_configuration']
     year_config = tmconfig[year]
     matrix_directory = tmconfig.get('matrix_h5_directory', self.get_emme2_base_dir())        
     bank_year = tmconfig[year]['bank'][0]
     bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year)
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path, variable_dict, bank_file=bank_file)

Пример #20

Показать файл

    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name= self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

            agents_index = None
            model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                          "constants":constants, "flush_variables":True},
                          chunk_specification = {'nchunks':1}
                          )
            land_covers.flush_dataset()
            del gridcells
            del land_covers

Пример #21

Показать файл

Файл: get_travel_model_data_into_cache.py Проект: urban-ai/VIBe2UrbanSim

    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate values from the 
        travel_model_configuration part of this config, and then copies the specified 
        data into the specified travel_data variable names.  Results in
        a new travel_data cache for year+1.
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        simulation_state.set_cache_directory(cache_directory)
        
        logger.start_block('Getting data from travel model')
        next_year = year + 1
        flt_dir_for_next_year = os.path.join(cache_directory, str(next_year))
        if not os.path.exists(flt_dir_for_next_year):
            os.mkdir(flt_dir_for_next_year)
        attribute_cache = AttributeCache()        
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()
        zone_set = dataset_pool.get_dataset('zone')

#        zone_set = ZoneDataset(in_storage_location=flt_dir_for_this_year, 
#                               in_storage_type='flt_storage', 
#                               in_table_name='zones')
        zone_set.load_dataset()
        self.prepare_for_run(config['travel_model_configuration'], year)
        travel_data_set = self.get_travel_data_from_travel_model(config, year, zone_set, 
                                                                 *args, **kwargs)
        logger.end_block()
        
        logger.start_block('Writing travel data to cache')
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_dir_for_next_year)
        #out_storage = flt_storage(Resources(data={"storage_location":flt_dir_for_next_year}))
        travel_data_set.write_dataset(attributes=travel_data_set.get_known_attribute_names(), 
                                      out_storage=out_storage, 
                                      out_table_name='travel_data')
        logger.end_block()

Пример #22

Показать файл

Файл: get_cache_data_into_travel_model.py Проект: urban-ai/VIBe2UrbanSim

    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for travel mdel to read.  
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=attribute_cache).get_dataset_pool()

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}),
        #"urbansim",
        #additional_arguments={'in_storage': attribute_cache})
        zone_set = dataset_pool.get_dataset('zone')
        self.prepare_for_run(config['travel_model_configuration'], year)
        self.create_travel_model_input_file(config, year, zone_set,
                                            dataset_pool, *args, **kwargs)

Пример #23

Показать файл

    def run(self, config, show_output=False):
        logger.log_status("Caching large SQL tables to: " +
                          config['cache_directory'])
        self.show_output = show_output

        #import pydevd;pydevd.settrace()

        server_configuration = config['scenario_database_configuration']

        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration=server_configuration,
            base_scenario_database_name=server_configuration.database_name)

        self.database_server = DatabaseServer(server_configuration)

        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping(
        )

        self.tables_to_cache = config[
            'creating_baseyear_cache_configuration'].tables_to_cache

        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])

        self.tables_cached = set()
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning(
                'The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)

Пример #24

Показать файл

Файл: get_emme2_data_into_cache.py Проект: christianurich/VIBe2UrbanSim

 def run(self, year, matrix_directory=None):
     """This is the main entry point.  It gets the appropriate values from the 
     travel_model_configuration part of this config, and then copies the specified 
     emme/2 matrices into the specified travel_data variable names.  Results in
     a new travel_data cache for year+1.
     If matrix_directory is not None, it is assumed the matrices files are already created 
     in the given directory.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     matrices_created = False
     if matrix_directory is not None:
         matrices_created = True    
     reports = self.config['travel_model_configuration'].get('reports_to_copy', [])
     
     for x in 1,2,3:
         if matrix_directory is None:
             bank_dir = self.get_emme2_dir(year, "bank%i" % x)
         else:
             bank_dir = os.path.join(matrix_directory, "bank%i" % x)
         if "bank%i" % x in year_config['matrix_variable_map']:
             self.get_needed_matrices_from_emme2(year, 
                                             year_config['cache_directory'],
                                             bank_dir,
                                             year_config['matrix_variable_map']["bank%i" % x],
                                                 matrices_created)
             for report in reports:
                 self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir)
         if "bank%i" % x in year_config.get('node_matrix_variable_map', {}):
             node_variable_map = year_config['node_matrix_variable_map']["bank%i" % x]
             if len(node_variable_map.keys()) > 0:
                 self.get_needed_node_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, node_variable_map)

Пример #25

Показать файл

    runs = Runs.runs
    comparison_variables = Runs.comparison_variables
    baseline = Runs.baseline
    
    simulation_state = SimulationState()
    simulation_state.set_current_time(options.year)
    
    SessionConfiguration(new_instance=True,
                         package_order=['psrc','urbansim','opus_core'],
                         in_storage=AttributeCache())

    if options.augment_variables == True:
        for dataset_name in comparison_variables.keys():
            cache_directory = baseline
            simulation_state.set_cache_directory(cache_directory)
            dataset = DatasetFactory().get_dataset(dataset_name,
                                                    package='urbansim', 
                                                    arguments={'in_storage': AttributeCache()})
        
            variables = comparison_variables[dataset_name]
            dataset.compute_variables(variables, resources=Resources())
            ids = dataset.get_id_attribute()
            for run in runs.keys():
                cache_directory=run
                simulation_state.set_cache_directory(cache_directory)
                run_dataset = DatasetFactory().get_dataset(dataset_name, 
                                                           package='urbansim', 
                                                           arguments={'in_storage': AttributeCache()})
                match_index = run_dataset.get_id_index(ids)
                for variable in variables:

Пример #26

Показать файл

    ]
    refinements = None
    refinements_storage = None
    if options.refinements_directory is not None:
        refinements_storage = StorageFactory().get_storage(
            'flt_storage', storage_location=options.refinements_directory)
        refinements = DatasetFactory().search_for_dataset(
            'refinement',
            package_order,
            arguments={'in_storage': refinements_storage})
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()

    simulation_state = SimulationState()
    simulation_state.set_cache_directory(options.cache_directory)
    simulation_state.set_current_time(start_year)
    attribute_cache = AttributeCache()
    dataset_pool = SessionConfiguration(
        new_instance=True,
        package_order=package_order,
        in_storage=attribute_cache).get_dataset_pool()

    if refinements is None:
        refinements = dataset_pool.get_dataset('refinement')
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()

    for year in range(start_year, end_year + 1):
        logger.start_block("Doing refinement for %s" % year)

Пример #27

Показать файл

Файл: model_system.py Проект: emiliom/DRCOG_Urbansim

class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """
    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name),
                                   after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(
                        dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \
                                         (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get('flush_variables', False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool(
            ).datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get(
                "datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(self,
                  year,
                  models,
                  simulation_state,
                  debuglevel,
                  resources,
                  write_datasets_to_cache_at_end_of_year,
                  cleanup_datasets=True):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(),
                                     "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, 'w')
        try:
            logger.start_block('Simulate year %s' % year)
            try:
                base_year = resources['base_year']
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(
                    year_for_base_year_cache)
                self.vardict['cache_storage'] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(
                    base_year)
                self.vardict['base_cache_storage'] = base_cache_storage
                simulation_state.set_flush_datasets(
                    resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get(
                    'datasets_to_preload_in_year', {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get(
                        'datasets_to_preload', {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get('models_configuration',
                                                     {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool(
                ).iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec '%s=its_dataset' % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration)
                self.run_year_namespace = locals()
                #==========
                # Run the models.
                #==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(
                        1,
                        len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name][
                            "controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[
                                model_name][0].keys()[imember]
                            group_member = ModelGroupMember(
                                model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][
                                0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name,
                                                           model_name)
                            if member_model_name in models_configuration.keys(
                            ):
                                model_configuration = models_configuration[
                                    member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration[
                                        "controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            '_model_structure_dependencies_',
                            {}).get('dataset', [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(
                                        dataset_name) or (
                                            dataset_name
                                            not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec '%s=ds' % dataset_name
                            except:
                                logger.log_warning(
                                    'Failed to load dataset %s.' %
                                    dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec("from %s import %s" %
                                     (import_module,
                                      import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys(
                        ):
                            import_replacement_config = controller_config[
                                "gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number,
                                resources.get('status_file_for_gui', None))
                            model.write_status_for_gui()
                            # prepare part
                            exec(self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + '=self.vardict[outputvar]'

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(
                                resources.get('command_file_for_gui', None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block(
                        'Writing datasets to cache for year %s' % year)
                    try:
                        for dataset_name, its_dataset in SessionConfiguration(
                        ).get_dataset_pool().datasets_in_pool().iteritems():
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state['controller_config']["init"]
        group_member = parent_state['group_member']
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (
                init_config["name"],
                self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval("%s(group_member, %s)" %
                         (init_config["name"],
                          self.construct_arguments_from_config(init_config)))
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" %
                (prepare_config["name"],
                 self.construct_arguments_from_config(prepare_config)))
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ''

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (
            process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(
            self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if 'group_by_attribute' in models_configuration[
                            model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[
                            model_name]["controller"]['group_by_attribute']
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(
                                group_dataset_name), group_attribute)
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[
                            0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][
                                member_name] = member[member_name]
                            if not isinstance(
                                    model_group_members_to_run[model_name][0]
                                [member_name], list):
                                model_group_members_to_run[model_name][0][
                                    member_name] = [
                                        model_group_members_to_run[model_name]
                                        [0][member_name]
                                    ]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][
                                member] = ["run"]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == 'stop':
                logger.log_warning('Simulation stopped.')
                sys.exit()
            elif line == 'resume':
                break
            elif line <> 'pause':
                logger.log_warning(
                    "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'."
                    % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(
                map(lambda year: seed_dict[year],
                    range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" %
                          (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear,
                year,
                seed=seed_array[iyear],
                resources=resources,
                profiler_name=profiler_name,
                log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" %
                          (start_year, end_year))

    #TODO: changing of configuration
    def _run_each_year_as_separate_process(self,
                                           iyear,
                                           year,
                                           seed=None,
                                           resources=None,
                                           profiler_name=None,
                                           log_file=None):

        logger.start_block('Running simulation for year %d in new process' %
                           year)
        resources['years'] = (year, year)
        resources['seed'] = seed,

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ['--log-file-name', os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                'opus_core.model_coordinators.model_system',
                resources,
                optional_args=optional_args)
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
            self,
            resources,
            run_in_background=False,
            class_path='opus_core.model_coordinators.model_system'):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._fork_new_process('%s' % class_path,
                               resources,
                               delete_temp_dir=False,
                               run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self,
                          module_name,
                          resources,
                          run_in_background=False,
                          **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(
            module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get('models_with_sampled_coefficients', [])
        if 'models_in_year' not in config.keys():
            config['models_in_year'] = {}
        if config['models_in_year'].get(config['base_year'] + 1, None) is None:
            config['models_in_year'][config['base_year'] +
                                     1] = config.get('models')

        for umodel in models_to_update:
            try:
                i = config['models_in_year'][config['base_year'] +
                                             1].index(umodel)
                new_model_name = '%s_sampled_coef' % umodel
                config['models_in_year'][config['base_year'] +
                                         1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(
                config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["sample_coefficients"] = True
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["distribution"] = "'normal'"
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"][
                    "cache_storage"] = "base_cache_storage"

Пример #28

Показать файл

Файл: estimator.py Проект: christianurich/VIBe2UrbanSim

class Estimator(GenericModelExplorer):
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config

            
       
    def estimate(self, out_storage=None):
        self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False)
        self.extract_coefficients_and_specification()

        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def reestimate(self, specification_module_name=None, specification_dict=None, out_storage=None, type=None, submodels=None):
        """specification_module_name is name of a module that contains a dictionary called
        'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object.
        'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary
        is expected to have an entry of this name. If 'submodels' is given (list or a number),
        the restimation is done only for those submodels.
        """
        if specification_module_name is not None:
            exec("import " + specification_module_name)
            eval("reload (" + specification_module_name + ")")
            exec("specification_dict =" + specification_module_name + ".specification")
            
        if type is not None:
            specification_dict = specification_dict[type]
        if submodels is not None: #remove all submodels but the given ones from specification
            submodels_to_be_deleted = specification_dict.keys()
            if not isinstance(submodels, list):
                submodels = [submodels]
            for sm in submodels:
                if sm not in submodels_to_be_deleted:
                    raise ValueError, "Submodel %s not in the specification." % sm
                submodels_to_be_deleted.remove(sm)
                if "_definition_" in submodels_to_be_deleted:
                    submodels_to_be_deleted.remove("_definition_")
            for sm in submodels_to_be_deleted:
                del specification_dict[sm]
        self.specification = EquationSpecification(specification_dict=specification_dict)
        new_namespace = self.model_system.run_year_namespace
        keys_coeff_spec = self.get_keys_for_coefficients_and_specification()
        new_namespace[keys_coeff_spec["specification"]] = self.specification
        self.coefficients, coeff_dict_dummy = self.model_system.do_process(new_namespace)
        ## update run_year_namespce since it's not been updated by do_process
        self.model_system.run_year_namespace = new_namespace
        self.model_system.run_year_namespace[keys_coeff_spec["coefficients"]] = self.coefficients
        
        ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method
        #self.extract_coefficients_and_specification()  
        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)
        
        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy()
            
        if agents_index is None:
            agents_index = self.agents_index_for_prediction
        
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()
        
        #self.config['output_configuration'] = output_configuration
        
        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']
        
        try:
            run_year_namespace = copy.copy(self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False
        
        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1
            agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset all choices
            
            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)
            
            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names():
                agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data)
            logger.log_status("Predictions saved into attribute " + predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()
        
        return False

Пример #29

Показать файл

    def run(self, year=None, years_to_run=[], configuration=None):
        if year not in years_to_run or self.data_to_export == None:
            return

        cache_directory = configuration['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        package_order=configuration['dataset_pool_configuration'].package_order
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=package_order,
                                            in_storage=attribute_cache
                                            ).get_dataset_pool()
        out_dir = os.path.join(cache_directory, "mtc_data")

        out_storage = csv_storage(storage_location=out_dir)

        # Adjust the age distribution per ABAG/MTC's specifications
        age_control_dir = os.path.join(paths.OPUS_DATA_PATH, configuration['project_name'], "ageControl")
        age_control_storage = csv_storage(storage_location=age_control_dir)
        age_control_files = os.listdir(age_control_dir)
        years = np.array(map(lambda x:
                             int(os.path.basename(x).replace("tazData", "").replace(".csv", "")),
                             glob.glob(os.path.join(age_control_dir, "tazData*.csv"))))
        closest_year = years[np.argmin(np.abs(years - [year]*len(years)))]
        if closest_year != year:
            logger.log_warning("Could not find age control data for " + str(year) +
                               ".  Choosing nearest year " + str(closest_year) + ".")

        age_control_table = age_control_storage.load_table("tazData" + str(closest_year), lowercase=False)

        # Calculate the ABAG shares of person by age
        age_categories = ['AGE0004', 'AGE0519', 'AGE2044', 'AGE4564', 'AGE65P']
        age_category_sums = dict((k, age_control_table[k].sum()) for k in age_categories)
        total = sum(age_category_sums.values())
        abag_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories)

        for data_fname, variable_mapping in self.data_to_export.iteritems():

            if not flip_urbansim_to_tm_variable_mappling:
                col_names = variable_mapping.values()
                variables_aliases = ["=".join(mapping[::-1]) for mapping in \
                                     variable_mapping.iteritems()]
            else:
                col_names = variable_mapping.keys()
                variables_aliases = ["=".join(mapping) for mapping in \
                                     variable_mapping.iteritems()]

            dataset_name = VariableName(variables_aliases[0]).get_dataset_name()
            dataset = dataset_pool.get_dataset(dataset_name)
            dataset.compute_variables(variables_aliases)

            if data_fname == "ABAGData":
                logger.log_status("Adjusting ABAGData to match age controls")
                age_category_sums = dict((k, dataset[k].sum()) for k in age_categories)
                total = sum(age_category_sums.values())
                us_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories)
                adjustments = dict((k, abag_age_category_shares[k]/us_age_category_shares[k]) for k in age_categories)
                diff = np.zeros(dataset.n)
                for k in age_categories:
                    before = dataset[k]
                    dataset[k] = np.array(map(lambda v : round(v*adjustments[k]), dataset.get_attribute(k)))
                    diff += (dataset[k] - before)
                dataset["TOTPOP"] += diff
                dataset["HHPOP"] += diff
                logger.log_status("NOTE: Adjusted total population by %d (%2.3f%%) due to rounding error." %
                                  (int(diff.sum()), diff.sum()*100/total))

            org_fname = os.path.join(out_dir, "%s.computed.csv" % data_fname)
            new_fname = os.path.join(out_dir, "%s%s.csv" % (year,data_fname))
            block_msg = "Writing {} for travel model to {}".format(data_fname,
                                                                   new_fname)
            with block(block_msg):
                dataset.write_dataset(attributes=col_names,
                                    out_storage=out_storage,
                                    out_table_name=data_fname)
                #rename & process header
                shutil.move(org_fname, new_fname)
                os.system("sed 's/:[a-z][0-9]//g' -i %s" % new_fname)

Пример #30

Показать файл

            ("paris.household_x_neighborhood.age_lnprice","age_lnprice"),
            ("paris.household_x_neighborhood.lninc_lnprice","lninc_lnprice"),
            ("paris.neighborhood.delta_pop","delta_pop"),
            ("neighborhood.rail9","rail"),
            ("neighborhood.subway","subway"),
            ("neighborhood.disthwy","disthwy"),
            ("neighborhood.tc","tc"),
            ("neighborhood.vp","vp"),
            ("paris.household_x_neighborhood.hhfem_nbtc","hhfem_nbtc")
            )
    }

    from my_estimation_config import my_configuration    
    ss = SimulationState()
    ss.set_current_time(2000)
    ss.set_cache_directory(my_configuration['cache_directory'])

    attribute_cache = AttributeCache()
    sc = SessionConfiguration(new_instance=True,
                         package_order=my_configuration['dataset_pool_configuration'].package_order,
                         in_storage=attribute_cache)


    #settings = ParisSettings()
    #settings.prepare_session_configuration()
    estimator = HLCMEstimator(config=my_configuration,
                              save_estimation_results=False)

    #estimator = HLCMEstimator(settings=my_configuration, 
    #                    run_land_price_model_before_estimation=False, 
    #                    save_estimation_results=False,

Пример #31

Показать файл

Файл: do_refinement.py Проект: psrc/urbansim

    
    start_year = options.start_year
    end_year = options.end_year
    package_order = [ package.strip() for package in options.package_order.split(",") ]
    refinements = None
    refinements_storage = None
    if options.refinements_directory is not None:
        refinements_storage = StorageFactory().get_storage('flt_storage', storage_location=options.refinements_directory)
        refinements = DatasetFactory().search_for_dataset('refinement', package_order, arguments={'in_storage':refinements_storage})
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()
        
    simulation_state = SimulationState()
    simulation_state.set_cache_directory(options.cache_directory)
    simulation_state.set_current_time(start_year)
    attribute_cache = AttributeCache()
    dataset_pool = SessionConfiguration(new_instance=True,
                                        package_order=package_order,
                                        in_storage=attribute_cache).get_dataset_pool()
    
    if refinements is None:
        refinements = dataset_pool.get_dataset('refinement')
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()

    for year in range(start_year, end_year+1):
        logger.start_block("Doing refinement for %s" % year )
        simulation_state.set_current_time(year)

Пример #32

Показать файл

Файл: expand_persons_from_households.py Проект: urban-ai/VIBe2UrbanSim

    def __init__(self, config):
        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(
                config['estimation_database_configuration'])
            db = db_server.get_database(
                config['estimation_database_configuration'].database_name)

            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=db)
        else:
            out_storage = StorageFactory().get_storage(
                type='flt_storage',
                storage_location=os.path.join(config['cache_directory'],
                                              str(config['base_year'] + 1)))

        simulation_state = SimulationState()
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
        attribute_cache = AttributeCache()

        SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=attribute_cache)

        if not os.path.exists(
                os.path.join(config['cache_directory'], str(
                    config['base_year']))):
            #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first"
            CacheScenarioDatabase().run(config, unroll_gridcells=False)

        for dataset_name in config['datasets_to_preload']:
            SessionConfiguration().get_dataset_from_pool(dataset_name)

        households = SessionConfiguration().get_dataset_from_pool("household")
        household_ids = households.get_id_attribute()
        workers = households.get_attribute("workers")

        hh_ids = []
        member_ids = []
        is_worker = []
        job_ids = []

        for i in range(households.size()):
            if workers[i] > 0:
                hh_ids += [household_ids[i]] * workers[i]
                member_ids += range(1, workers[i] + 1)
                is_worker += [1] * workers[i]
                job_ids += [-1] * workers[i]

        in_storage = StorageFactory().get_storage('dict_storage')

        persons_table_name = 'persons'
        in_storage.write_table(
            table_name=persons_table_name,
            table_data={
                'person_id': arange(len(hh_ids)) + 1,
                'household_id': array(hh_ids),
                'member_id': array(member_ids),
                'is_worker': array(is_worker),
                'job_id': array(job_ids),
            },
        )

        persons = PersonDataset(in_storage=in_storage,
                                in_table_name=persons_table_name)
        persons.write_dataset(out_storage=out_storage,
                              out_table_name=persons_table_name)

Пример #33

Показать файл

    def run(self,
            optimizer='lbfgsb',
            results_pickle_prefix="calib",
            optimizer_kwargs={}):
        ''' Call specifized optimizer to calibrate
        
        Arguments:
            - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.)
            - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved
            
        Returns:
            - the results from the opimizater
            - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified
        
        '''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        init_v = array([], dtype='f8')
        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                init_v = np.concatenate((init_v, dataset[calib_attr][index]))
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    init_v = np.concatenate((init_v, dataset[attr][index]))
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

        t0 = time.time()

        if is_parallelizable == True: set_parallel(True)

        print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(
            optimizer, is_parallelizable) + ENDC
        print OKBLUE + "-------------------------------------------------------\n" + ENDC
        if optimizer == 'bfgs':
            default_kwargs = {
                'fprime': None,
                'epsilon': 1e-08,
                'maxiter': None,
                'full_output': 1,
                'disp': 1,
                'retall': 0,
                'callback': None
            }
            optimizer_func = fmin_bfgs
        elif optimizer == 'lbfgsb':
            default_kwargs = {
                'fprime': None,
                'approx_grad': True,
                'bounds': None,
                'factr': 1e12,
                'iprint': 1
            }

            optimizer_func = fmin_l_bfgs_b
        elif optimizer == 'anneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True
            }

            optimizer_func = anneal
        elif optimizer == 'panneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True,
                'cores': 24,
                'interv': 20
            }

            optimizer_func = panneal
        else:
            raise ValueError, "Unrecognized optimizer {}".format(optimizer)

        default_kwargs.update(optimizer_kwargs)
        results = optimizer_func(self.target_func, copy(init_v),
                                 **default_kwargs)

        duration = time.time() - t0
        if results_pickle_prefix is not None:
            pickle_file = "{}_{}.pickle".format(results_pickle_prefix,
                                                optimizer)
            pickle_file = os.path.join(self.log_directory, pickle_file)
            pickle.dump(results, open(pickle_file, "wb"))

        if is_parallelizable == True: set_parallel(False)

        logger.log_status('init target_func: {}'.format(
            self.target_func(init_v)))
        logger.log_status('end target_func: {}'.format(
            results[:]))  #which one?
        logger.log_status('outputs from optimizer: {}'.format(results))
        logger.log_status('Execution time: {}'.format(duration))

Пример #34

Показать файл

Файл: estimation_HLCM_Paris.py Проект: urban-ai/VIBe2UrbanSim

            ("neighborhood.ln_price", "ln_price"),
            ("paris.household_x_neighborhood.age_lnprice", "age_lnprice"),
            ("paris.household_x_neighborhood.lninc_lnprice", "lninc_lnprice"),
            ("paris.neighborhood.delta_pop", "delta_pop"),
            ("neighborhood.rail9", "rail"),
            ("neighborhood.subway", "subway"),
            ("neighborhood.disthwy", "disthwy"),
            ("neighborhood.tc", "tc"),
            ("neighborhood.vp", "vp"),
            ("paris.household_x_neighborhood.hhfem_nbtc", "hhfem_nbtc"))
    }

    from my_estimation_config import my_configuration
    ss = SimulationState()
    ss.set_current_time(2000)
    ss.set_cache_directory(my_configuration['cache_directory'])

    attribute_cache = AttributeCache()
    sc = SessionConfiguration(
        new_instance=True,
        package_order=my_configuration['dataset_pool_configuration'].
        package_order,
        in_storage=attribute_cache)

    #settings = ParisSettings()
    #settings.prepare_session_configuration()
    estimator = HLCMEstimator(config=my_configuration,
                              save_estimation_results=False)

    #estimator = HLCMEstimator(settings=my_configuration,
    #                    run_land_price_model_before_estimation=False,

Пример #35

Показать файл

def import_openamos_data(config, year, zone_set=None):
    tm_config = config['travel_model_configuration']
    if tm_config.has_key('skim_dir'):
        skim_dir = tm_config.get('skim_dir')
    else:
        projectLoc = tm_config.get("project_path")

        #openamos_dir = tm_config[year]
        #skim_dir = "/workspace/workdata/SimTRAVEL_data/base_scenario/skims/bootstrap/"
        print "--->", projectLoc
        skim_dir = os.path.join(projectLoc, "skimOutput/dynamic")
    logger.log_status('Reading skims from {}'.format(skim_dir))
    skim_files = glob.glob(os.path.join(skim_dir, "skim*.dat"))
    print skim_files
    skims = None
    """
    for skim_file in skim_files:
        i = int( re.search('\d+', skim_file).group(0) )
        skim = read_csv_with_numpy(skim_file, header=False,
                                   columns=['from_zone_id', 'to_zone_id', str(i)])
        if skims is None:
            skims = skim
        else:
            import pdb; pdb.set_trace()
            skims = np.hstack((skims, skim[str(i)]))
    """
    attr_pattern = '{}{}'
    for skim_file in skim_files:
        i = int( re.findall('\d+', skim_file)[-1] )
        skim = read_csv(skim_file, header=0,
                        names=['from_zone_id', 'to_zone_id', 'travel_time', 'travel_distance'])
        if skims is None:
            skims = skim.rename(columns={'travel_time': attr_pattern.format('tm', str(i)),
                                         'travel_distance': attr_pattern.format('td', str(i)),
                                        }, copy=False)
        else:
            #skims.insert(i, str(i), skim.travel_time
            skims[attr_pattern.format('tm', str(i))] = skim.travel_time
            skims[attr_pattern.format('td', str(i))] = skim.travel_distance

    skims.set_index(['from_zone_id', 'to_zone_id'], inplace=True)

    peak_hours = set([6, 7, 8, 9, 16, 17, 18, 19])
    off_peak_hours = set(range(24)) - peak_hours
    peak_travel_time = avg_travel_time(skims, peak_hours, prefix='tm')
    off_peak_travel_time = avg_travel_time(skims, off_peak_hours, prefix='tm')
    peak_travel_distance = avg_travel_time(skims, peak_hours, prefix='td')
    off_peak_travel_distance = avg_travel_time(skims, off_peak_hours, prefix='td')
    travel_time = DataFrame({'peak_travel_time': peak_travel_time, 
                             'off_peak_travel_time': off_peak_travel_time,
                             'peak_travel_distance': peak_travel_distance, 
                             'off_peak_travel_distance': off_peak_travel_distance,
                            })

    ## subset to include only zones appearing in zone_set
    #zone_ids = zone_set['zone_id']
    #zone_pairs = [z for z in product(zone_ids, zone_ids)]
    #travel_time = travel_time.ix[zone_pairs]

    cache_directory = config['cache_directory']
    simulation_state = SimulationState()
    simulation_state.set_current_time(year)
    simulation_state.set_cache_directory(cache_directory)
    out_store = AttributeCache().get_flt_storage_for_year(year+1)
    logger.log_status('Caching travel_data to {}'.format(out_store.get_storage_location()))
    travel_data = to_opus_dataset(travel_time, out_store, 'travel_data')

    return travel_data

Пример #36

Показать файл

Файл: model_system.py Проект: psrc/urbansim

class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """

    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(
        self,
        resources,
        write_datasets_to_cache_at_end_of_year=True,
        log_file_name="run_model_system.log",
        cleanup_datasets=True,
    ):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3})
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get("cache_directory", None) is not None:
            self.simulation_state.set_cache_directory(resources["cache_directory"])

        if "expression_library" in resources:
            VariableFactory().set_expression_library(resources["expression_library"])

        if resources.get("sample_input", False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block("Start simulation run"):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get("seed", NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " + str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool().remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets,
                            )
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and (
                    len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())
                ):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get("flush_variables", False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get("datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(
        self,
        year,
        models,
        simulation_state,
        debuglevel,
        resources,
        write_datasets_to_cache_at_end_of_year,
        cleanup_datasets=True,
    ):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, "w")
        try:
            logger.start_block("Simulate year %s" % year)
            try:
                base_year = resources["base_year"]
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
                self.vardict["cache_storage"] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
                self.vardict["base_cache_storage"] = base_cache_storage
                simulation_state.set_flush_datasets(resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get("datasets_to_preload", {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get("models_configuration", {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec "%s=its_dataset" % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration
                )
                self.run_year_namespace = locals()
                # ==========
                # Run the models.
                # ==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(1, len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name]["controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[model_name][0].keys()[imember]
                            group_member = ModelGroupMember(model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name, model_name)
                            if member_model_name in models_configuration.keys():
                                model_configuration = models_configuration[member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration["controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            "_model_structure_dependencies_", {}
                        ).get("dataset", [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec "%s=ds" % dataset_name
                            except:
                                logger.log_warning("Failed to load dataset %s." % dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec ("from %s import %s" % (import_module, import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys():
                            import_replacement_config = controller_config["gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec ("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number, resources.get("status_file_for_gui", None)
                            )
                            model.write_status_for_gui()
                            # prepare part
                            exec (self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + "=self.vardict[outputvar]"

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(resources.get("command_file_for_gui", None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block("Writing datasets to cache for year %s" % year)
                    try:
                        for dataset_name, its_dataset in (
                            SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems()
                        ):
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state["controller_config"]["init"]
        group_member = parent_state["group_member"]
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval(
                "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            )
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))
            )
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ""

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if "group_by_attribute" in models_configuration[model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[model_name]["controller"][
                            "group_by_attribute"
                        ]
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute
                        )
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][member_name] = member[member_name]
                            if not isinstance(model_group_members_to_run[model_name][0][member_name], list):
                                model_group_members_to_run[model_name][0][member_name] = [
                                    model_group_members_to_run[model_name][0][member_name]
                                ]
                            number_of_models += len(model_group_members_to_run[model_name][0][member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][member] = ["run"]
                            number_of_models += len(model_group_members_to_run[model_name][0][member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == "stop":
                logger.log_warning("Simulation stopped.")
                sys.exit()
            elif line == "resume":
                break
            elif line <> "pause":
                logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources["cache_directory"] = cache_directory

        log_file = os.path.join(cache_directory, "run_multiprocess.log")
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get("_seed_dictionary_", None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get("_seed_dictionary_")
            seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2 ** 30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file
            )
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))

    # TODO: changing of configuration
    def _run_each_year_as_separate_process(
        self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None
    ):

        logger.start_block("Running simulation for year %d in new process" % year)
        resources["years"] = (year, year)
        resources["seed"] = (seed,)

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ["--log-file-name", os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                "opus_core.model_coordinators.model_system", resources, optional_args=optional_args
            )
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
        self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system"
    ):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get("models_with_sampled_coefficients", [])
        if "models_in_year" not in config.keys():
            config["models_in_year"] = {}
        if config["models_in_year"].get(config["base_year"] + 1, None) is None:
            config["models_in_year"][config["base_year"] + 1] = config.get("models")

        for umodel in models_to_update:
            try:
                i = config["models_in_year"][config["base_year"] + 1].index(umodel)
                new_model_name = "%s_sampled_coef" % umodel
                config["models_in_year"][config["base_year"] + 1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "sample_coefficients"
            ] = True
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "distribution"
            ] = "'normal'"
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "cache_storage"
            ] = "base_cache_storage"

Пример #37

Показать файл

Файл: batch_make_indicators.py Проект: urban-ai/VIBe2UrbanSim

    def run(self, resources, year):
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True, in_storage=AttributeCache())

        arguments = {'in_storage': attribute_cache}
        gc_set = DatasetFactory().get_dataset('gridcell',
                                              package='urbansim',
                                              arguments=arguments)

        runs = {
            #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)',
            #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)',
            #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)',
            r'X:\urbansim_cache\run_1847.2007_01_15_15_23':
            r'(run 1847 - no UGB 1/17/2007)',
            r'X:\urbansim_cache\run_1848.2007_01_15_15_40':
            r'(run 1848 - no UGB+1.5xhighway 1/17/2007)',
            #        r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)',
            r'V:\psrc\run_1850.2007_01_15_17_03':
            r'(run 1850 - baseline 1/17/2007)',
            r'V:\psrc\run_1851.2007_01_15_17_07':
            r'(run 1851 - no build 1/17/2007)'
        }

        #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16'
        baseline = r'V:\psrc\run_1850.2007_01_15_17_03'

        comparison_variables = {
            'gridcell': [
                'urbansim.gridcell.population',
                'urbansim.gridcell.number_of_jobs'
            ],
            'faz': ['urbansim.faz.population', 'urbansim.faz.number_of_jobs'],
        }
        #datasets_to_preload = {
        #                'gridcell':{ 'nchunks':2},
        #                'household':{},
        #                'job':{},
        #                'zone':{},
        #                'faz':{},
        #                'development_type':{},
        #                'development_event_history':{},
        #                'development_constraint':{},
        #                'job_building_type':{},
        #                'urbansim_constant':{},
        #                }

        year = 2025

        simulation_state = SimulationState()
        simulation_state.set_current_time(year)

        SessionConfiguration(new_instance=True,
                             package_order=['psrc', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload,
        #                                                    "urbansim",
        #                                                    additional_arguments={'in_storage': AttributeCache()})

        variable_augment = False
        if variable_augment == True:
            for dataset_name in comparison_variables.keys():
                cache_directory = baseline
                simulation_state.set_cache_directory(cache_directory)
                dataset = DatasetFactory().get_dataset(
                    dataset_name,
                    package='urbansim',
                    arguments={'in_storage': AttributeCache()})

                variables = comparison_variables[dataset_name]
                dataset.compute_variables(variables, resources=Resources())
                ids = dataset.get_id_attribute()
                for run in runs.keys():
                    cache_directory = run
                    simulation_state.set_cache_directory(cache_directory)
                    run_dataset = DatasetFactory().get_dataset(
                        dataset_name,
                        package='urbansim',
                        arguments={'in_storage': AttributeCache()})
                    match_index = run_dataset.get_id_index(ids)
                    for variable in variables:
                        short_name = VariableName(variable).alias()
                        attribute = dataset.get_attribute(short_name)
                        run_dataset.add_attribute(attribute[match_index],
                                                  'baseline_' + short_name,
                                                  metadata=1)
                        run_dataset.flush_attribute('baseline_' + short_name)

                    SessionConfiguration().get_dataset_pool(
                    ).remove_all_datasets()

        #indicators_module = args[0]
        #eval("from %s import config" % indicators_module)
        from make_indicators_openev import config
        from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        #from make_indicators_openev import config
        #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        for run, descriptin in runs.iteritems():
            config.request_years = [year]
            config.cache_directory = run
            config.run_description = descriptin
            generate_indicators(config)

Пример #38

Показать файл

            cache_directory = _cache_directory

    try:
        year = int(options.year)
    except IndexError:
        parser.error("year must be provided.")
        parser.print_help()
        sys.exit(1)

    if package_order is None:
        package_order = eval(options.package_order)


    st = SimulationState()
    st.set_current_time(year)
    st.set_cache_directory(cache_directory)
    attribute_cache = AttributeCache()
    dp = SessionConfiguration(new_instance=True,
                              package_order=package_order,
                              in_storage=attribute_cache
                              ).get_dataset_pool()

    
    ## example usage:
    # python -i explore_run_cache.py -p bay_area_parcel -r 105 2025
    # >>> h2025 = dp.get_dataset('household')
    # >>> children_5yr = h2025.compute_variables('household.aggregate(person.age <= 5)')

    # python -d /workspace/opus/data/bay_area_parcel/base_year_data 2010
    # >>> h2010 = dp.get_dataset('household')
    # >>> children_5yr = h2010.compute_variables('household.aggregate(person.age <= 5)')

Пример #39

Показать файл

    def run(self, config, year, storage_type='sql'):
        """ 
        """
        
        tm_config = config['travel_model_configuration']
        database_server_config = tm_config.get("database_server_configuration", 'simtravel_database_server')
        database_name = tm_config.get("database_name", 'mag_zone_baseyear')
        
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()

        if storage_type == 'sql':
            db_server = DatabaseServer(DatabaseConfiguration(
                                                             database_name = database_name,
                                                             database_configuration = database_server_config
                                                             )
                                                             )
            if not db_server.has_database(database_name): 
                print "Db doesn't exist creating one"
                db_server.create_database(database_name)
            db = db_server.get_database(database_name) 
            output_storage = sql_storage(storage_location = db)
        elif storage_type == 'csv':
            csv_directory = os.path.join(cache_directory, 'csv', str(year))
            output_storage = csv_storage(storage_location=csv_directory)
        else:
            raise ValueError, "Unsupported output storage type {}".format(storage_type)
                                                            
        logger.start_block('Compute and export data to openAMOS...')

        hh = dataset_pool.get_dataset('household')
        hh_recs = dataset_pool.get_dataset('households_recs')
        #hh_recs.add_attribute(0,"htaz1")
        #hh_recs.flush_dataset()
        #syn_hh = dataset_pool.get_dataset('synthetic_household')

        hh_variables = ['houseid=household.household_id',
                        "hhsize=household.number_of_agents(person)",
                        "one=(household.household_id>0).astype('i')",
                        "inclt35k=(household.income<35000).astype('i')",
                        "incge35k=(household.income>=35000).astype('i')",
                        "incge50k=(household.income>=50000).astype('i')",
                        "incge75k=(household.income>=75000).astype('i')",
                        "incge100k=(household.income>=100000).astype('i')",
                        "inc35t50=((household.income>=35000) & (household.income<50000)).astype('i')",
                        "inc50t75=((household.income>=50000) & (household.income<75000)).astype('i')",
                        "inc75t100=((household.income>=75000) & (household.income<100000)).astype('i')",
                        'htaz = ((houseid>0)*(household.disaggregate(building.zone_id)-100) + (houseid<=0)*0)',
                        #'htaz = ((houseid>0) & (htaz1>100))*(htaz1-100)+((houseid>0) & (htaz1==-1))*1122',
                        "withchild = (household.aggregate(person.age<18)>0).astype('i')",
                        "noc = household.aggregate(person.age<18)",
                        "numadlt = household.aggregate(person.age>=18)",
                        "hinc=household.income",
                        "wif=household.workers",
                        #"wif=household.aggregate(mag_zone.person.is_employed)",
                        'numwrkr=household.workers',
                        #'numwrkr=household.aggregate(mag_zone.person.is_employed)',
                        'nwrkcnt=household.number_of_agents(person) - household.workers',
                        #'nwrkcnt=household.number_of_agents(person) - household.aggregate(mag_zone.person.is_employed)',

                        'yrbuilt=mag_zone.household.yrbuilt',
                        'mag_zone.household.sparent',
                        'mag_zone.household.rur',
                        'mag_zone.household.urb',
                        'zonetid = household.disaggregate(building.zone_id)',
                        ]
        
        self.prepare_attributes(hh, hh_variables)
        attrs_to_export = hh_recs.get_known_attribute_names()
       
        self.write_dataset(hh, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(hh.dataset_name)

        persons = dataset_pool.get_dataset('person')
        persons.out_table_name_default = 'persons'

        # Recoding invalid work and school locations to some random valid values
        persons_recs = dataset_pool.get_dataset('persons_recs')
        persons_recs.add_attribute(persons['person_id'],"personuniqueid")
        persons_recs.add_attribute(persons['marriage_status'],"marstat")
        persons_recs.add_attribute(persons['student_status'],"schstat")


        """
        persons_recs.add_attribute(persons['wtaz0'],"htaz_act")
        persons_recs.add_attribute(0,"wtaz_rec")
        persons_recs.add_attribute(0,"wtaz_rec1")
        persons_recs.add_attribute(0,"wtaz_rec2")

        persons_recs.add_attribute(0,"wtaz1_1")
        persons_recs.add_attribute(0,"wtaz1_2")
        persons_recs.add_attribute(0,"wtaz1_3")
        #persons_recs.add_attribute(persons['student_status'],"schstat")
        """

        persons_recs.add_attribute(0,"wtaz1")
        persons_recs.add_attribute(0,"htaz")
        persons_recs.add_attribute(0,"schtaz1")

        persons_recs.flush_dataset()

        #syn_persons = dataset_pool.get_dataset('synthetic_person')
        persons_variables = ['personid=mag_zone.person.member_id',
                             'personuniqueid=person.person_id',
                             'houseid=person.household_id',
                             "one=(person.person_id>0).astype('i')",
                             'trvtime=mag_zone.person.travel_time_from_home_to_work',
                             'timetowk=mag_zone.person.travel_time_from_home_to_work',
                             #'mag_zone.person.tmtowrk',
                             #'tmtowrk=person.disaggregate(synthetic_person.tmtowrk)',
                             "ag5t10=((person.age>=5) & (person.age<=10)).astype('i')",
                             "ag11t14=((person.age>=11) & (person.age<=14)).astype('i')",
                             "ag15t17=((person.age>=15) & (person.age<=17)).astype('i')",
                             "ag18t24=((person.age>=18) & (person.age<=24)).astype('i')",
                             "ag25t34=((person.age>=25) & (person.age<=34)).astype('i')",
                             "ag35t44=((person.age>=35) & (person.age<=44)).astype('i')",
                             "ag45t54=((person.age>=45) & (person.age<=54)).astype('i')",
                             "ag55t64=((person.age>=55) & (person.age<=64)).astype('i')",
                             "agge65=(person.age>=65).astype('i')",

                             "ag12t17=((person.age>=12) & (person.age<=17)).astype('i')",
                             "ag5t14=((person.age>=5) & (person.age<=14)).astype('i')",
                             "agge15=(person.age>=15).astype('i')",

                             "wrkr=(person.employment_status==1).astype('i')",
                             "isemploy=(person.employment_status==1).astype('i')",
                             "fulltim=(mag_zone.person.full_time==1).astype('i')",
                             'parttim=mag_zone.person.part_time',

                             'htaz = ((houseid>0)*(person.disaggregate(building.zone_id, intermediates=[household])-100) + (houseid<=0)*0)',

                             'wtaz1=(person.wtaz <= 0)*0 + (person.wtaz > 0)*(person.wtaz-100)',
                       
                             "presch = ((person.age < 5)&(houseid>0)).astype('i')",
                             "mag_zone.person.schstat",


                             'schtaz1 = (person.schtaz <= 0)*0 + (person.schtaz > 0)*(person.schtaz-100)',
                             'marstat = person.marriage_status',

                             'enroll = person.student_status',
                             'grade = person.student_status & person.education',
                             'educ = person.education',
                             "male = (person.sex==1).astype('i')",
                             "female = (person.sex==2).astype('i')",

                             "coled = (person.education >= 10).astype('i')",

                             'race1 = person.race',
                             "white = (person.race == 1).astype('i')",
                             'person.hispanic'
                             ]
        self.prepare_attributes(persons, persons_variables)

        attrs_to_export = persons_recs.get_known_attribute_names()

        self.write_dataset(persons, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(persons.dataset_name)

        zones = dataset_pool.get_dataset('zone')
        zones_variables = [
                             "retail_employment=zone.aggregate(mag_zone.job.sector_group=='retail')",
                             "public_employment=zone.aggregate(mag_zone.job.sector_group=='public')",
                             "office_employment=zone.aggregate(mag_zone.job.sector_group=='office')",
                             "industrial_employment=zone.aggregate(mag_zone.job.sector_group=='individual')",
                             "other_employment=zone.aggregate(mag_zone.job.sector_group=='other')",

                             "retail_employment_density=zone.aggregate(mag_zone.job.sector_group=='retail')/zone.acres",
                             "public_employment_density=zone.aggregate(mag_zone.job.sector_group=='public')/zone.acres",
                             "office_employment_density=zone.aggregate(mag_zone.job.sector_group=='office')/zone.acres",
                             "industrial_employment_density=zone.aggregate(mag_zone.job.sector_group=='individual')/zone.acres",
                             "other_employment_density=zone.aggregate(mag_zone.job.sector_group=='other')/zone.acres",

                             "total_area=zone.acres",

                             "lowest_income=zone.aggregate(household.income < scoreatpercentile(household.income, 20))",
                             "low_income=zone.aggregate(household.income < scoreatpercentile(household.income, 40))",
                             "high_income=zone.aggregate(household.income > scoreatpercentile(household.income, 80))",

                             #"institutional_population=zone.disaggregate(locations.institutional_population)",
                             #"groupquarter_households=zone.disaggregate(locations.groupquarter_households)",

                             "residential_households=zone.number_of_agents(household)",

                             "locationid=zone.zone_id",
                             ]
        
        locations = dataset_pool['locations']
        self.prepare_attributes(zones, zones_variables, dataset2=locations)
        attrs_to_export = locations.get_known_attribute_names()

        self.write_dataset(locations, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(locations.dataset_name)
        #raw_input("check location block")

        logger.end_block()

Пример #40

Показать файл

Файл: estimator.py Проект: urban-ai/VIBe2UrbanSim

class Estimator(GenericModelExplorer):
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError(
                "The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None

        models = self.config.get('models', [])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (
                            isinstance(model[model_name], list) and
                        ("estimate" in model[model_name])):
                        self.model_name = model_name
                        break
        estimate_config_changes = self.config.get(
            'config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({
                'models_configuration': {
                    self.model_name: {
                        'controller': {
                            'init': {
                                'arguments': {}
                            }
                        }
                    }
                }
            })
            estimate_config_str = self.config['models_configuration'].get(
                self.model_name,
                {}).get('controller',
                        {}).get('init',
                                {}).get('arguments',
                                        {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass

            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][
                self.model_name]['controller']['init']['arguments'][
                    'estimate_config'] = 'Resources(%s)' % estimate_config

    def estimate(self, out_storage=None):
        self.model_system.run(self.config,
                              write_datasets_to_cache_at_end_of_year=False)
        self.extract_coefficients_and_specification()

        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def reestimate(self,
                   specification_module_name=None,
                   specification_dict=None,
                   out_storage=None,
                   type=None,
                   submodels=None):
        """specification_module_name is name of a module that contains a dictionary called
        'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object.
        'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary
        is expected to have an entry of this name. If 'submodels' is given (list or a number),
        the restimation is done only for those submodels.
        """
        if specification_module_name is not None:
            exec("import " + specification_module_name)
            eval("reload (" + specification_module_name + ")")
            exec("specification_dict =" + specification_module_name +
                 ".specification")

        if type is not None:
            specification_dict = specification_dict[type]
        if submodels is not None:  #remove all submodels but the given ones from specification
            submodels_to_be_deleted = specification_dict.keys()
            if not isinstance(submodels, list):
                submodels = [submodels]
            for sm in submodels:
                if sm not in submodels_to_be_deleted:
                    raise ValueError, "Submodel %s not in the specification." % sm
                submodels_to_be_deleted.remove(sm)
                if "_definition_" in submodels_to_be_deleted:
                    submodels_to_be_deleted.remove("_definition_")
            for sm in submodels_to_be_deleted:
                del specification_dict[sm]
        self.specification = EquationSpecification(
            specification_dict=specification_dict)
        new_namespace = self.model_system.run_year_namespace
        keys_coeff_spec = self.get_keys_for_coefficients_and_specification()
        new_namespace[keys_coeff_spec["specification"]] = self.specification
        self.coefficients, coeff_dict_dummy = self.model_system.do_process(
            new_namespace)
        ## update run_year_namespce since it's not been updated by do_process
        self.model_system.run_year_namespace = new_namespace
        self.model_system.run_year_namespace[
            keys_coeff_spec["coefficients"]] = self.coefficients

        ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method
        #self.extract_coefficients_and_specification()
        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)

        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy(
            )

        if agents_index is None:
            agents_index = self.agents_index_for_prediction

        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()

        #self.config['output_configuration'] = output_configuration

        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']

        try:
            run_year_namespace = copy.copy(
                self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False

        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size,
                               dtype=current_choices.dtype) - 1
            agents.modify_attribute(name=choice_id_name,
                                    data=dummy_data)  #reset all choices

            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config[
                'models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)

            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names(
            ):
                agents.add_primary_attribute(name=predicted_choice_id_name,
                                             data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name,
                                        data=dummy_data)
            logger.log_status("Predictions saved into attribute " +
                              predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()

        return False

Пример #41

Показать файл

    runs = Runs.runs
    comparison_variables = Runs.comparison_variables
    baseline = Runs.baseline
    
    simulation_state = SimulationState()
    simulation_state.set_current_time(options.year)
    
    SessionConfiguration(new_instance=True,
                         package_order=['psrc','urbansim','opus_core'],
                         in_storage=AttributeCache())

    if options.augment_variables == True:
        for dataset_name in comparison_variables.keys():
            cache_directory = baseline
            simulation_state.set_cache_directory(cache_directory)
            dataset = DatasetFactory().get_dataset(dataset_name,
                                                    package='urbansim', 
                                                    arguments={'in_storage': AttributeCache()})
        
            variables = comparison_variables[dataset_name]
            dataset.compute_variables(variables, resources=Resources())
            ids = dataset.get_id_attribute()
            for run in runs.keys():
                cache_directory=run
                simulation_state.set_cache_directory(cache_directory)
                run_dataset = DatasetFactory().get_dataset(dataset_name, 
                                                           package='urbansim', 
                                                           arguments={'in_storage': AttributeCache()})
                match_index = run_dataset.get_id_index(ids)
                for variable in variables:

Пример #42

Показать файл

Файл: run_simulation_all_chunks.py Проект: urban-ai/VIBe2UrbanSim

    def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder,
            coefficients_name, specification_name, convert_flt=True, convert_input=False):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name=self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name=coefficients_name)
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name=specification_name)
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             package_order=['biocomplexity', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())
                
        ncols = LccmConfiguration.ncols        
        
        if temp_folder is None:
            self.temp_land_cover_dir = tempfile.mkdtemp()
        else:
            self.temp_land_cover_dir = temp_folder
        
        for year in years:
            land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, 
                                                              years, output_directory, convert_flt, convert_input)
            #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData)
            max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version
            offset = min(LccmConfiguration.offset, max_size)
            s = 0
            t = offset
            while (s < t and t <= max_size):
                logger.log_status("Offset: ", s, t)
                index = arange(s,t)
                
                land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers')
                self._clean_up_land_cover_cache(land_cover_cache_path)
                
                simulation_state.set_current_time(year)
                
                # 2nd instance of lc_dataset
                land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           debuglevel=4)
                land_covers.subset_by_index(index)
#                land_covers.load_dataset()
                gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

                agents_index = None
                model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                              "constants":constants, "flush_variables":True},
                              chunk_specification = {'nchunks':5}) ## chunk size set here
                land_covers.flush_dataset()
                del gridcells
                del land_covers

#                self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt)
                self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt)
                
                if t >= max_size: break
                s = max(t-10*ncols,s)
                t = min(t+offset-10*ncols,max_size)
                
        # clean up temp storage after done simulation
        shutil.rmtree(self.temp_land_cover_dir)

Пример #43

Показать файл

    def run(self, resources, year):
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             in_storage=AttributeCache())
                             
        arguments = {'in_storage':attribute_cache}
        gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', 
                                              arguments=arguments)

        runs = {
        #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)',
        #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)',
        #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)',
        
                r'X:\urbansim_cache\run_1847.2007_01_15_15_23':r'(run 1847 - no UGB 1/17/2007)',
                r'X:\urbansim_cache\run_1848.2007_01_15_15_40':r'(run 1848 - no UGB+1.5xhighway 1/17/2007)',
        #        r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)',
                r'V:\psrc\run_1850.2007_01_15_17_03':r'(run 1850 - baseline 1/17/2007)',
                r'V:\psrc\run_1851.2007_01_15_17_07':r'(run 1851 - no build 1/17/2007)'
                }
        
        #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16'
        baseline = r'V:\psrc\run_1850.2007_01_15_17_03'
        
        comparison_variables = {'gridcell': ['urbansim.gridcell.population',
                                             'urbansim.gridcell.number_of_jobs'],
                                'faz':['urbansim.faz.population',
                                       'urbansim.faz.number_of_jobs'], 
                                 }
        #datasets_to_preload = {
        #                'gridcell':{ 'nchunks':2},
        #                'household':{},
        #                'job':{},
        #                'zone':{},
        #                'faz':{},
        #                'development_type':{},
        #                'development_event_history':{},
        #                'development_constraint':{},
        #                'job_building_type':{},
        #                'urbansim_constant':{},
        #                }
        
        year = 2025
        
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        
        SessionConfiguration(new_instance=True,
                             package_order=['psrc','urbansim','opus_core'],
                             in_storage=AttributeCache())
        
        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload,
        #                                                    "urbansim",
        #                                                    additional_arguments={'in_storage': AttributeCache()})
        
        variable_augment = False
        if variable_augment == True:
            for dataset_name in comparison_variables.keys():
                cache_directory = baseline
                simulation_state.set_cache_directory(cache_directory)
                dataset = DatasetFactory().get_dataset(dataset_name,
                                                        package='urbansim', 
                                                        arguments={'in_storage': AttributeCache()})
            
                variables = comparison_variables[dataset_name]
                dataset.compute_variables(variables, resources=Resources())
                ids = dataset.get_id_attribute()
                for run in runs.keys():
                    cache_directory=run
                    simulation_state.set_cache_directory(cache_directory)
                    run_dataset = DatasetFactory().get_dataset(dataset_name, 
                                                               package='urbansim', 
                                                               arguments={'in_storage': AttributeCache()})
                    match_index = run_dataset.get_id_index(ids)
                    for variable in variables:
                        short_name = VariableName(variable).alias()
                        attribute = dataset.get_attribute(short_name)
                        run_dataset.add_attribute(attribute[match_index],'baseline_'+short_name,metadata=1)
                        run_dataset.flush_attribute('baseline_'+short_name)
                    
                    SessionConfiguration().get_dataset_pool().remove_all_datasets()
        
        #indicators_module = args[0]
        #eval("from %s import config" % indicators_module)
        from make_indicators_openev import config
        from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        #from make_indicators_openev import config
        #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        for run, descriptin in runs.iteritems():
            config.request_years = [year]
            config.cache_directory = run
            config.run_description = descriptin
            generate_indicators(config)

Пример #44

Показать файл

Файл: start_calibration.py Проект: apdjustino/DRCOG_Urbansim

    def run(self, optimizer="lbfgsb", results_pickle_prefix="calib", optimizer_kwargs={}):
        """ Call specifized optimizer to calibrate
        
        Arguments:
            - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.)
            - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved
            
        Returns:
            - the results from the opimizater
            - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified
        
        """

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True, package_order=self.package_order, in_storage=attribute_cache
        ).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []})
            assert (
                subset is None
                or subset.get(dataset_name, None) is None
                or subset_patterns is None
                or subset_patterns.get(dataset_name, None) is None
            )
            if subset is not None and subset.get(dataset_name, None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]])
            else:
                index = arange(dataset.size(), dtype="i")

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        init_v = array([], dtype="f8")
        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                init_v = np.concatenate((init_v, dataset[calib_attr][index]))
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    init_v = np.concatenate((init_v, dataset[attr][index]))
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

        t0 = time.time()

        if is_parallelizable == True:
            set_parallel(True)

        print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(optimizer, is_parallelizable) + ENDC
        print OKBLUE + "-------------------------------------------------------\n" + ENDC
        if optimizer == "bfgs":
            default_kwargs = {
                "fprime": None,
                "epsilon": 1e-08,
                "maxiter": None,
                "full_output": 1,
                "disp": 1,
                "retall": 0,
                "callback": None,
            }
            optimizer_func = fmin_bfgs
        elif optimizer == "lbfgsb":
            default_kwargs = {"fprime": None, "approx_grad": True, "bounds": None, "factr": 1e12, "iprint": 1}

            optimizer_func = fmin_l_bfgs_b
        elif optimizer == "anneal":
            default_kwargs = {
                "schedule": "fast",
                "full_output": 1,
                "T0": None,
                "Tf": 1e-12,
                "maxeval": None,
                "maxaccept": None,
                "maxiter": 400,
                "boltzmann": 1.0,
                "learn_rate": 0.5,
                "feps": 1e-06,
                "quench": 1.0,
                "m": 1.0,
                "n": 1.0,
                "lower": -1,
                "upper": 1,
                "dwell": 50,
                "disp": True,
            }

            optimizer_func = anneal
        elif optimizer == "panneal":
            default_kwargs = {
                "schedule": "fast",
                "full_output": 1,
                "T0": None,
                "Tf": 1e-12,
                "maxeval": None,
                "maxaccept": None,
                "maxiter": 400,
                "boltzmann": 1.0,
                "learn_rate": 0.5,
                "feps": 1e-06,
                "quench": 1.0,
                "m": 1.0,
                "n": 1.0,
                "lower": -1,
                "upper": 1,
                "dwell": 50,
                "disp": True,
                "cores": 24,
                "interv": 20,
            }

            optimizer_func = panneal
        else:
            raise ValueError, "Unrecognized optimizer {}".format(optimizer)

        default_kwargs.update(optimizer_kwargs)
        results = optimizer_func(self.target_func, copy(init_v), **default_kwargs)

        duration = time.time() - t0
        if results_pickle_prefix is not None:
            pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer)
            pickle_file = os.path.join(self.log_directory, pickle_file)
            pickle.dump(results, open(pickle_file, "wb"))

        if is_parallelizable == True:
            set_parallel(False)

        logger.log_status("init target_func: {}".format(self.target_func(init_v)))
        logger.log_status("end target_func: {}".format(results[:]))  # which one?
        logger.log_status("outputs from optimizer: {}".format(results))
        logger.log_status("Execution time: {}".format(duration))