示例#1
0
    def test_run(self):
 
        # The paths work as follows: opus_matsim.__path__ is the path of the opus_matsim python module.  So we can use that
        # as anchor ...
        config_location = os.path.join(opus_matsim.__path__[0], 'tests')
        print "location: ", config_location
        run_config = XMLConfiguration( os.path.join(config_location,"test_config.xml")).get_run_configuration("Test")
        
        run_config['creating_baseyear_cache_configuration'].cache_directory_root = self.temp_dir
        run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = \
            os.path.join(opus_matsim.__path__[0], 'tests', 'testdata', 'base_year_data')

        # insert_auto_generated_cache_directory... does things I don't understand.  Need to do the following to obtain consistent
        # behavior independent from the file root:
        run_config['cache_directory'] = None
        
        insert_auto_generated_cache_directory_if_needed(run_config)
        run_manager = RunManager(ServicesDatabaseConfiguration())
    
        run_manager.setup_new_run(cache_directory = run_config['cache_directory'],
                                  configuration = run_config)
        
        run_manager.run_run(run_config, run_as_multiprocess = True )
        

        self.assert_(True)
        
        self.cleanup_test_run()
示例#2
0
    def testName(self):
        print "entering test_run"
        
        logger.log_status('Preparing MATsim test run ...')
        # unzip MATSim files
        matsim_zip = ExtractZipFile(self.matsim_source, self.destination)
        matsim_zip.extract()
        matsim_extracted_files = os.path.join(self.destination, 'MATSimTestClasses') # location of unziped MATSim files
        # unzip base_year_cache
        base_year_data_zip = ExtractZipFile(self.base_year_data_source, self.destination)
        base_year_data_zip.extract()
        base_year_data_extracted_files = os.path.join(self.destination, 'base_year_data') # location of unziped base_year_cache
        
                
        # updating location of base_year_data
        self.run_config['creating_baseyear_cache_configuration'].cache_directory_root = self.destination
        self.run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = base_year_data_extracted_files
        self.run_config['cache_directory'] = base_year_data_extracted_files
        self.run_config.add('matsim_files', matsim_extracted_files)
        self.run_config.add('matsim_config', self.matsim_config_full)
        self.run_config.add('root', self.destination)
        
        insert_auto_generated_cache_directory_if_needed(self.run_config)
        run_manager = RunManager(ServicesDatabaseConfiguration())
    
        run_manager.setup_new_run(cache_directory = self.run_config['cache_directory'],
                                  configuration = self.run_config)

        logger.log_status('Strating UrbanSim run ... ')
        run_manager.run_run(self.run_config, run_as_multiprocess = True )
        # after the UrbanSim run the travel data sets schould be equal
        # self.assertTrue( self.compare_travel_data_sets() )
        logger.log_status('... UrbanSim run finished.')
        
        print "leaving test_run"
def _do_run_simple_test_run(caller, temp_dir, config, end_year=None):
    """Runs model system with a single model (for speed).
    Sets the .resources property of the caller before starting the run.
    """

    runs_manager = RunManager(config)

    run_configuration = _get_run_config(temp_dir=temp_dir)

    insert_auto_generated_cache_directory_if_needed(run_configuration)
    run_configuration[
        'creating_baseyear_cache_configuration'].cache_directory_root = temp_dir
    run_configuration['models'] = ['land_price_model']
    if end_year is not None:
        run_configuration['years'] = (run_configuration['years'][0], end_year)

    SessionConfiguration(
        new_instance=True,
        package_order=run_configuration['dataset_pool_configuration'].
        package_order,
        in_storage=AttributeCache())
    insert_auto_generated_cache_directory_if_needed(run_configuration)
    caller.resources = run_configuration
    runs_manager.setup_new_run(
        cache_directory=run_configuration['cache_directory'],
        configuration=run_configuration)
    runs_manager.run_run(run_configuration)
示例#4
0
    def test_run(self):

        # The paths work as follows: opus_matsim.__path__ is the path of the opus_matsim python module.  So we can use that
        # as anchor ...
        config_location = os.path.join(opus_matsim.__path__[0], 'tests')
        print "location: ", config_location
        run_config = XMLConfiguration(
            os.path.join(config_location,
                         "test_config.xml")).get_run_configuration("Test")

        run_config[
            'creating_baseyear_cache_configuration'].cache_directory_root = self.temp_dir
        run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = \
            os.path.join(opus_matsim.__path__[0], 'tests', 'testdata', 'base_year_data')

        # insert_auto_generated_cache_directory... does things I don't understand.  Need to do the following to obtain consistent
        # behavior independent from the file root:
        run_config['cache_directory'] = None

        insert_auto_generated_cache_directory_if_needed(run_config)
        run_manager = RunManager(ServicesDatabaseConfiguration())

        run_manager.setup_new_run(
            cache_directory=run_config['cache_directory'],
            configuration=run_config)

        run_manager.run_run(run_config, run_as_multiprocess=True)

        self.assert_(True)

        self.cleanup_test_run()
示例#5
0
 def test_run(self):
     print "Entering test run"
     
     run_manager = RunManager(ServicesDatabaseConfiguration())
     run_manager.setup_new_run(cache_directory = self.config['cache_directory'],configuration = self.config)
     
     run_manager.run_run(self.config, run_as_multiprocess = True )
     
     print "Leaving test run"
 def test_run(self):
     print "Entering test run"
     
     run_manager = RunManager(ServicesDatabaseConfiguration())
     run_manager.setup_new_run(cache_directory = self.config['cache_directory'],configuration = self.config)
     
     run_manager.run_run(self.config, run_as_multiprocess = True )
     
     print "Leaving test run"
示例#7
0
def prepare_run_manager(option_group=None):
    if option_group is None:
        option_group = StartRunOptionGroup()
    parser = option_group.parser
    options, args = option_group.parse()

    run_manager = RunManager(
        option_group.get_services_database_configuration(options))

    if options.pickled_resource_file is not None:
        f = file(options.pickled_resource_file, 'r')
        try:
            config = pickle.load(f)
        finally:
            f.close()
    elif options.configuration_path is not None:
        opus_path = options.configuration_path
        try:
            config = get_config_from_opus_path(opus_path)
        except ImportError:
            # TODO: Once all fully-specified configurations are stored as classes,
            #       get rid of this use.
            import_stmt = 'from %s import run_configuration as config' % opus_path
            exec(import_stmt)
        insert_auto_generated_cache_directory_if_needed(config)
    elif options.xml_configuration is not None:
        if options.scenario_name is None:
            parser.print_help()
            sys.exit(1)
        config = XMLConfiguration(
            options.xml_configuration).get_run_configuration(
                options.scenario_name)
        insert_auto_generated_cache_directory_if_needed(config)
    else:
        parser.print_help()
        sys.exit(1)

    if options.existing_cache_to_copy is not None:
        config[
            'creating_baseyear_cache_configuration'].cache_from_database = False
        config[
            'creating_baseyear_cache_configuration'].baseyear_cache = BaseyearCacheConfiguration(
                existing_cache_to_copy=options.existing_cache_to_copy, )
        if options.years_to_cache is not None:
            config[
                'creating_baseyear_cache_configuration'].baseyear_cache.years_to_cache = eval(
                    options.years_to_cache)

    if options.profile_filename is not None:
        config["profile_filename"] = options.profile_filename

    run_manager.setup_new_run(cache_directory=config['cache_directory'],
                              configuration=config)

    return options, config, run_manager
示例#8
0
 def test_simulation(self):
     services_db = ServicesDatabaseConfiguration( database_name = 'services',                         
                                                  database_configuration = 'services_database_server' )
     run_manager = RunManager(services_db)
     run_as_multiprocess = True
     for scenario_name in ['psrc_baseline_test']:
         config = self.xml_config.get_run_configuration(scenario_name)
         insert_auto_generated_cache_directory_if_needed(config)
         run_manager.setup_new_run(cache_directory = config['cache_directory'],
                                   configuration = config)
         run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
示例#9
0
    def run(self):
        
        logger.start_block()
        insert_auto_generated_cache_directory_if_needed(self.config)
         
        run_manager = RunManager(ServicesDatabaseConfiguration())
        run_manager.setup_new_run(cache_directory = self.config['cache_directory'],configuration = self.config)
        
        run_manager.run_run(self.config, run_as_multiprocess = True )

        logger.end_block()
示例#10
0
    def run(self, config, executable):
        #--config=opus_matsim/sustain_city/configs/seattle_parcel.xml --executable=Seattle_baseline
        config = XMLConfiguration(config).get_run_configuration(executable)
        
        insert_auto_generated_cache_directory_if_needed(config)
     
        run_manager = RunManager(ServicesDatabaseConfiguration())
        
        run_manager.setup_new_run(cache_directory = config['cache_directory'],configuration = config)

        run_manager.run_run(config, run_as_multiprocess = True )
 def test_simulation(self):
     services_db = ServicesDatabaseConfiguration( database_name = 'services',                         
                                                  database_configuration = 'services_database_server' )
     run_manager = RunManager(services_db)
     run_as_multiprocess = True
     for scenario_name in ['san_antonio_baseline_test']:
         config = self.xml_config.get_run_configuration(scenario_name)
         insert_auto_generated_cache_directory_if_needed(config)
         run_manager.setup_new_run(cache_directory = config['cache_directory'],
                                   configuration = config)
         run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
 def test_simulation(self):
     eugene_dir = __import__('eugene').__path__[0]
     xml_config = XMLConfiguration(os.path.join(eugene_dir, 'configs', 'eugene_gridcell.xml'))
     option_group = StartRunOptionGroup()
     parser = option_group.parser
     # simulate 0 command line arguments by passing in []
     (options, _) = parser.parse_args([])
     run_manager = RunManager(option_group.get_services_database_configuration(options))
     run_section = xml_config.get_run_configuration('Eugene_baseline')
     insert_auto_generated_cache_directory_if_needed(run_section)
     run_manager.setup_new_run(cache_directory = run_section['cache_directory'],
                               configuration = run_section)
     run_manager.run_run(run_section)
示例#13
0
def prepare_run_manager(option_group=None):
    if option_group is None:
        option_group = StartRunOptionGroup()
    parser = option_group.parser
    options, args = option_group.parse()

    run_manager = RunManager(option_group.get_services_database_configuration(options))
    
    if options.pickled_resource_file is not None:
        f = file(options.pickled_resource_file, 'r')
        try:
            config = pickle.load(f)
        finally:
            f.close()
    elif options.configuration_path is not None:
        opus_path = options.configuration_path
        try:
            config = get_config_from_opus_path(opus_path)
        except ImportError:
            # TODO: Once all fully-specified configurations are stored as classes,
            #       get rid of this use.
            import_stmt = 'from %s import run_configuration as config' % opus_path
            exec(import_stmt)
        insert_auto_generated_cache_directory_if_needed(config)
    elif options.xml_configuration is not None:
        if options.scenario_name is None:
            parser.print_help()
            sys.exit(1)
        config = XMLConfiguration(options.xml_configuration).get_run_configuration(options.scenario_name)
        insert_auto_generated_cache_directory_if_needed(config)
    else:
        parser.print_help()
        sys.exit(1)
        
    if options.existing_cache_to_copy is not None:
        config['creating_baseyear_cache_configuration'].cache_from_database = False
        config['creating_baseyear_cache_configuration'].baseyear_cache = BaseyearCacheConfiguration(
            existing_cache_to_copy = options.existing_cache_to_copy,
            )
        if options.years_to_cache is not None:
            config['creating_baseyear_cache_configuration'].baseyear_cache.years_to_cache = eval(options.years_to_cache)

    if options.profile_filename is not None:
        config["profile_filename"] = options.profile_filename
 
    run_manager.setup_new_run(cache_directory = config['cache_directory'],
                              configuration = config)

    return options, config, run_manager
 def test_simulation(self):
     # check that the simulation proceeds without crashing
     # open the configuration for seattle_parcel.xml
     seattle_parcel_dir = __import__('seattle_parcel').__path__[0]
     xml_config = XMLConfiguration(os.path.join(seattle_parcel_dir, 'configs', 'seattle_parcel.xml'))
     option_group = StartRunOptionGroup()
     parser = option_group.parser
     # simulate 0 command line arguments by passing in []
     (options, _) = parser.parse_args([])
     run_manager = RunManager(option_group.get_services_database_configuration(options))
     run_section = xml_config.get_run_configuration('Seattle_baseline')
     insert_auto_generated_cache_directory_if_needed(run_section)
     run_manager.setup_new_run(cache_directory = run_section['cache_directory'],
                               configuration = run_section)
     run_manager.run_run(run_section)
示例#15
0
 def test_simulation(self):
     eugene_dir = __import__('eugene').__path__[0]
     xml_config = XMLConfiguration(
         os.path.join(eugene_dir, 'configs', 'eugene_gridcell.xml'))
     option_group = StartRunOptionGroup()
     parser = option_group.parser
     # simulate 0 command line arguments by passing in []
     (options, _) = parser.parse_args([])
     run_manager = RunManager(
         option_group.get_services_database_configuration(options))
     run_section = xml_config.get_run_configuration('Eugene_baseline')
     insert_auto_generated_cache_directory_if_needed(run_section)
     run_manager.setup_new_run(
         cache_directory=run_section['cache_directory'],
         configuration=run_section)
     run_manager.run_run(run_section)
    def test_simulation(self):
        base_year_data_path = os.path.join(self.data_path, 'base_year_data')        
        if not os.path.exists(base_year_data_path):
            os.makedirs(base_year_data_path)

        ftp_url = os.environ["FTP_URL"]
        file_name = os.path.split(ftp_url)[1]
        ftp_user = os.environ["FTP_USERNAME"]
        ftp_password = os.environ["FTP_PASSWORD"]
        
        #stdout, stderr = Popen("ls -la %s" % base_year_data_path, shell=True).communicate()
        #stdout, stderr = Popen("echo '%s'" % (base_year_data_path), stdout=PIPE).communicate()
        #print stdout
        
        try:
            Popen( """
                        cd %s;
                        pwd;
                        ls -la;
                        echo wget --timestamping %s --ftp-user=%s --ftp-password=%s > /dev/null 2>&1;
                        rm -rf 2008;
                        unzip -o %s
                        """ % (base_year_data_path, ftp_url, ftp_user, ftp_password, file_name),
                        shell = True
                        ).communicate()
        except:
            print "Error when downloading and unzipping file from %s." % ftp_url
            raise

        services_db = ServicesDatabaseConfiguration( database_name = 'services',                         
                                                     database_configuration = 'services_database_server' )
        run_manager = RunManager(services_db)
        run_as_multiprocess = True
        xml_config = XMLConfiguration(os.path.join(self.opus_home, 'project_configs', 'washtenaw_parcel.xml'))
        for scenario_name in ['washtenaw_baseline_test']:
            config = xml_config.get_run_configuration(scenario_name)
            insert_auto_generated_cache_directory_if_needed(config)
#            base_year = config['base_year']
#            config['years_to_run'] = (base_year+1, base_year+2)
            run_manager.setup_new_run(cache_directory = config['cache_directory'],
                                      configuration = config)
            run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
def _do_run_simple_test_run(caller, temp_dir, config, end_year=None):
    """Runs model system with a single model (for speed).
    Sets the .resources property of the caller before starting the run.
    """
    runs_manager = RunManager(config)

    run_configuration = SubsetConfiguration()
    run_configuration['creating_baseyear_cache_configuration'].cache_directory_root = temp_dir
    run_configuration['models'] = ['land_price_model']
    if end_year is not None:
        run_configuration['years'] = (run_configuration['years'][0], end_year)
    
    SessionConfiguration(new_instance=True,
                         package_order=run_configuration['dataset_pool_configuration'].package_order,
                         in_storage=AttributeCache())
    insert_auto_generated_cache_directory_if_needed(run_configuration)
    caller.resources = run_configuration
    runs_manager.setup_new_run(cache_directory = run_configuration['cache_directory'],
                               configuration = run_configuration)
    runs_manager.run_run(run_configuration)
    def testName(self):
        print "entering test_run"

        logger.log_status("Preparing MATsim test run ...")
        # unzip MATSim files
        matsim_zip = ExtractZipFile(self.matsim_source, self.destination)
        matsim_zip.extract()
        matsim_extracted_files = os.path.join(self.destination, "MATSimTestClasses")  # location of unziped MATSim files
        # unzip base_year_cache
        base_year_data_zip = ExtractZipFile(self.base_year_data_source, self.destination)
        base_year_data_zip.extract()
        base_year_data_extracted_files = os.path.join(
            self.destination, "base_year_data"
        )  # location of unziped base_year_cache

        # updating location of base_year_data
        self.run_config["creating_baseyear_cache_configuration"].cache_directory_root = self.destination
        self.run_config[
            "creating_baseyear_cache_configuration"
        ].baseyear_cache.existing_cache_to_copy = base_year_data_extracted_files
        self.run_config["cache_directory"] = base_year_data_extracted_files
        self.run_config.add("matsim_files", matsim_extracted_files)
        self.run_config.add("matsim_config", self.matsim_config_full)
        self.run_config.add("root", self.destination)

        insert_auto_generated_cache_directory_if_needed(self.run_config)
        run_manager = RunManager(ServicesDatabaseConfiguration())

        run_manager.setup_new_run(cache_directory=self.run_config["cache_directory"], configuration=self.run_config)

        logger.log_status("Strating UrbanSim run ... ")
        run_manager.run_run(self.run_config, run_as_multiprocess=True)
        # after the UrbanSim run the travel data sets schould be equal
        # self.assertTrue( self.compare_travel_data_sets() )
        logger.log_status("... UrbanSim run finished.")

        print "leaving test_run"
class Calibration(object):
    """ Class to calibrate UrbanSim model coefficients.
    
    """

    def __init__(
        self,
        xml_config,
        scenario,
        calib_datasets,
        target_expression,
        target_file,
        subset=None,
        subset_patterns=None,
        skip_cache_cleanup=False,
        log_directory=None,
    ):
        """
        - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml'
        - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified
        - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g.
                  {'establishment_location_choice_model_coefficients': 'estimate'}
        - target_expression: opus expression computing values from prediction to be compared with targets 
        - target_file: name of csv file providing targets 
        - subset: dictionary specifying the dataset to be calibrated,
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]}
          subset and subset_patterns can not be both specified for the same dataset
        - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} 
          subset and subset_patterns can not be both specified for the same dataset

        """
        self.target_expression = target_expression
        self.target = self.read_target(target_file)

        self.run_manager = None
        self.xml_config = xml_config
        self.scenario = scenario
        self.skip_cache_cleanup = skip_cache_cleanup
        self.run_id, self.cache_directory = self.init_run()
        self.run_ids = [self.run_id]  # allow starting of multiple runs for parallel optimization
        self.log_directory = log_directory
        if self.log_directory is None:
            self.log_directory = self.cache_directory  # legacy

        log_file = os.path.join(self.log_directory, "calibration.log")
        logger.enable_file_logging(log_file)

        dict_config = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario)
        ## get parameters from config
        self.base_year = dict_config["base_year"]
        self.start_year, self.end_year = dict_config["years"]
        self.project_name = dict_config["project_name"]
        self.package_order = dict_config["dataset_pool_configuration"].package_order

    @log_block("Start Calibration")
    def run(self, optimizer="lbfgsb", results_pickle_prefix="calib", optimizer_kwargs={}):
        """ Call specifized optimizer to calibrate
        
        Arguments:
            - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.)
            - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved
            
        Returns:
            - the results from the opimizater
            - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified
        
        """

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True, package_order=self.package_order, in_storage=attribute_cache
        ).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []})
            assert (
                subset is None
                or subset.get(dataset_name, None) is None
                or subset_patterns is None
                or subset_patterns.get(dataset_name, None) is None
            )
            if subset is not None and subset.get(dataset_name, None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]])
            else:
                index = arange(dataset.size(), dtype="i")

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        init_v = array([], dtype="f8")
        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                init_v = np.concatenate((init_v, dataset[calib_attr][index]))
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    init_v = np.concatenate((init_v, dataset[attr][index]))
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

        t0 = time.time()

        if is_parallelizable == True:
            set_parallel(True)

        print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(optimizer, is_parallelizable) + ENDC
        print OKBLUE + "-------------------------------------------------------\n" + ENDC
        if optimizer == "bfgs":
            default_kwargs = {
                "fprime": None,
                "epsilon": 1e-08,
                "maxiter": None,
                "full_output": 1,
                "disp": 1,
                "retall": 0,
                "callback": None,
            }
            optimizer_func = fmin_bfgs
        elif optimizer == "lbfgsb":
            default_kwargs = {"fprime": None, "approx_grad": True, "bounds": None, "factr": 1e12, "iprint": 1}

            optimizer_func = fmin_l_bfgs_b
        elif optimizer == "anneal":
            default_kwargs = {
                "schedule": "fast",
                "full_output": 1,
                "T0": None,
                "Tf": 1e-12,
                "maxeval": None,
                "maxaccept": None,
                "maxiter": 400,
                "boltzmann": 1.0,
                "learn_rate": 0.5,
                "feps": 1e-06,
                "quench": 1.0,
                "m": 1.0,
                "n": 1.0,
                "lower": -1,
                "upper": 1,
                "dwell": 50,
                "disp": True,
            }

            optimizer_func = anneal
        elif optimizer == "panneal":
            default_kwargs = {
                "schedule": "fast",
                "full_output": 1,
                "T0": None,
                "Tf": 1e-12,
                "maxeval": None,
                "maxaccept": None,
                "maxiter": 400,
                "boltzmann": 1.0,
                "learn_rate": 0.5,
                "feps": 1e-06,
                "quench": 1.0,
                "m": 1.0,
                "n": 1.0,
                "lower": -1,
                "upper": 1,
                "dwell": 50,
                "disp": True,
                "cores": 24,
                "interv": 20,
            }

            optimizer_func = panneal
        else:
            raise ValueError, "Unrecognized optimizer {}".format(optimizer)

        default_kwargs.update(optimizer_kwargs)
        results = optimizer_func(self.target_func, copy(init_v), **default_kwargs)

        duration = time.time() - t0
        if results_pickle_prefix is not None:
            pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer)
            pickle_file = os.path.join(self.log_directory, pickle_file)
            pickle.dump(results, open(pickle_file, "wb"))

        if is_parallelizable == True:
            set_parallel(False)

        logger.log_status("init target_func: {}".format(self.target_func(init_v)))
        logger.log_status("end target_func: {}".format(results[:]))  # which one?
        logger.log_status("outputs from optimizer: {}".format(results))
        logger.log_status("Execution time: {}".format(duration))

    def init_run(self, create_baseyear_cache=True):
        """ init run, get run_id & cache_directory. """
        ##avoid invoking start_run from cmd line -
        option_group = StartRunOptionGroup()
        option_group.parser.set_defaults(xml_configuration=self.xml_config, scenario_name=self.scenario)
        # run_id, cache_directory = start_run(option_group)

        options, args = option_group.parse()
        self.run_manager = RunManager(option_group.get_services_database_configuration(options))

        resources = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario)
        insert_auto_generated_cache_directory_if_needed(resources)
        cache_directory = resources["cache_directory"]
        self.run_manager.setup_new_run(cache_directory, resources)
        run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory()
        self.run_manager.add_row_to_history(run_id, resources, "done")

        if create_baseyear_cache:
            self.run_manager.create_baseyear_cache(resources)

        ## good for testing
        # run_id = 275
        # cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20'
        assert run_id is not None
        assert cache_directory is not None
        return run_id, cache_directory

    def update_parameters(
        self, est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs
    ):
        i_est_v = 0
        current_year = simulation_state.get_current_time()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(cache_directory)

        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                dtype = dataset[calib_attr].dtype
                dataset[calib_attr][index] = (est_v[i_est_v : i_est_v + index.size]).astype(dtype)
                i_est_v += index.size
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    dtype = dataset[attr].dtype
                    dataset[attr][index] = (est_v[i_est_v : i_est_v + index.size]).astype(dtype)
                    i_est_v += index.size
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

            # dtype = dataset[calib_attr].dtype
            # dataset[calib_attr][index] = (est_v[i_est_v:i_est_v+index.size]).astype(dtype)
            # flush dataset
            dataset.flush_dataset()
            # i_est_v += index.size
        simulation_state.set_current_time(current_year)

    def update_prediction(self, est_v, simulation_state, dataset_pool, calib_datasets, *args, **kwargs):
        option_group = RestartRunOptionGroup()
        option_group.parser.set_defaults(project_name=self.project_name, skip_cache_cleanup=self.skip_cache_cleanup)

        options, args = option_group.parse()
        if self.run_manager is None:
            self.run_manager = RunManager(option_group.get_services_database_configuration(options))

        if lock != None:
            lock.acquire()
        ## query runs available for re-use
        runs_done = self.run_manager.get_run_info(run_ids=self.run_ids, status="done")
        create_baseyear_cache = False
        import pdb

        pdb.set_trace()
        if len(runs_done) == 0:  ##there is no re-usable run directory, init a new run
            run_id, cache_directory = self.init_run(create_baseyear_cache=False)
            self.run_ids.append(run_id)
            create_baseyear_cache = True
            logger.log_status("Initializing new run with id " + str(run_id))
        else:
            run_id = runs_done[0].run_id  ##take the first 'done' run_id
            cache_directory = self.run_manager.get_cache_directory(run_id)
            logger.log_status("Using old run with id " + str(run_id))
        resources = self.run_manager.get_resources_for_run_id_from_history(run_id, filter_by_status=False)
        self.run_manager.add_row_to_history(run_id, resources, "taken")
        if lock != None:
            lock.release()

        if create_baseyear_cache:
            self.run_manager.create_baseyear_cache(resources)

        self.update_parameters(est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs)
        restart_run(option_group=option_group, args=[run_id, self.start_year])

        prediction = self.summarize_prediction(cache_directory, simulation_state, dataset_pool, calib_datasets)
        return prediction

    def summarize_prediction(self, cache_directory, simulation_state, dataset_pool, calib_datasets):
        dataset_name = VariableName(self.target_expression).get_dataset_name()
        current_year = simulation_state.get_current_time()
        simulation_state.set_current_time(self.end_year)
        simulation_state.set_cache_directory(cache_directory)
        # force reload
        dataset_pool.remove_all_datasets()
        dataset = dataset_pool[dataset_name]
        ids = dataset.get_id_attribute()
        results = dataset.compute_variables(self.target_expression, dataset_pool=dataset_pool)
        simulation_state.set_current_time(current_year)
        return dict(zip(ids, results))

    def read_target(self, target_file):
        ## read (& process) target numbers into a dictionary: {id:value}
        ## csv file with header
        ## id, target
        header = file(target_file, "r").readline().strip().split(",")
        contents = np.genfromtxt(target_file, delimiter=",", comments="#", skip_header=1)
        target = dict(zip(contents[:, 0], contents[:, 1]))

        return target

    def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs):
        """ Target function."""

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True, package_order=self.package_order, in_storage=attribute_cache
        ).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []})
            assert (
                subset is None
                or subset.get(dataset_name, None) is None
                or subset_patterns is None
                or subset_patterns.get(dataset_name, None) is None
            )
            if subset is not None and subset.get(dataset_name, None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]])
            else:
                index = arange(dataset.size(), dtype="i")

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        # assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()])
        results = func(predct, target)

        return results
    number_of_runs = config.get("number_of_runs", 1)
    number_of_runs_in_parallel = min(config.get("parallel_runs", 1), number_of_runs)
    # generate seeds for multiple runs
    root_seed = config.get("seed", None)
    seed(root_seed)
    # generate different seed for each run (each seed contains 1 number)
    seed_array = randint(1,2**30, number_of_runs)
    list_of_cache_directories = []
    for irun in range(number_of_runs):
        config['seed']= (seed_array[irun],)
        this_config = config.copy()
        if ((irun + 1) % number_of_runs_in_parallel) == 0:
            run_in_background = False
        else:
            run_in_background = True
        run_manager.setup_new_run(cache_directory = this_config['cache_directory'],
                                  configuration = this_config)
        run_manager.run_run(this_config, run_as_multiprocess=False,
                            run_in_background=run_in_background)
        if irun == 0:
            # log file for the multiple runs will be located in the first cache
            first_cache_directory = this_config['cache_directory']
            log_file = os.path.join(first_cache_directory, 'multiple_runs.log')
            logger.enable_file_logging(log_file)
            logger.log_status("Multiple runs: %s replications" % number_of_runs)
            logger.log_status("root random seed = %s" % str(root_seed))
        else:
            logger.enable_file_logging(log_file, verbose=False)

        logger.log_status("Run %s: %s" % (irun+1, this_config['cache_directory']))
        logger.disable_file_logging(log_file)
        list_of_cache_directories.append(this_config['cache_directory'])
示例#21
0
class Calibration(object):
    ''' Class to calibrate UrbanSim model coefficients.
    
    '''
    def __init__(self,
                 xml_config,
                 scenario,
                 calib_datasets,
                 target_expression,
                 target_file,
                 subset=None,
                 subset_patterns=None,
                 skip_cache_cleanup=False,
                 log_directory=None):
        """
        - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml'
        - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified
        - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g.
                  {'establishment_location_choice_model_coefficients': 'estimate'}
        - target_expression: opus expression computing values from prediction to be compared with targets 
        - target_file: name of csv file providing targets 
        - subset: dictionary specifying the dataset to be calibrated,
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]}
          subset and subset_patterns can not be both specified for the same dataset
        - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} 
          subset and subset_patterns can not be both specified for the same dataset

        """
        self.target_expression = target_expression
        self.target = self.read_target(target_file)

        self.run_manager = None
        self.xml_config = xml_config
        self.scenario = scenario
        self.skip_cache_cleanup = skip_cache_cleanup
        self.run_id, self.cache_directory = self.init_run()
        self.run_ids = [
            self.run_id
        ]  #allow starting of multiple runs for parallel optimization
        self.log_directory = log_directory
        if self.log_directory is None:
            self.log_directory = self.cache_directory  #legacy

        log_file = os.path.join(self.log_directory, "calibration.log")
        logger.enable_file_logging(log_file)

        dict_config = XMLConfiguration(self.xml_config).get_run_configuration(
            self.scenario)
        ## get parameters from config
        self.base_year = dict_config['base_year']
        self.start_year, self.end_year = dict_config['years']
        self.project_name = dict_config['project_name']
        self.package_order = dict_config[
            'dataset_pool_configuration'].package_order

    @log_block("Start Calibration")
    def run(self,
            optimizer='lbfgsb',
            results_pickle_prefix="calib",
            optimizer_kwargs={}):
        ''' Call specifized optimizer to calibrate
        
        Arguments:
            - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.)
            - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved
            
        Returns:
            - the results from the opimizater
            - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified
        
        '''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        init_v = array([], dtype='f8')
        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                init_v = np.concatenate((init_v, dataset[calib_attr][index]))
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    init_v = np.concatenate((init_v, dataset[attr][index]))
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

        t0 = time.time()

        if is_parallelizable == True: set_parallel(True)

        print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(
            optimizer, is_parallelizable) + ENDC
        print OKBLUE + "-------------------------------------------------------\n" + ENDC
        if optimizer == 'bfgs':
            default_kwargs = {
                'fprime': None,
                'epsilon': 1e-08,
                'maxiter': None,
                'full_output': 1,
                'disp': 1,
                'retall': 0,
                'callback': None
            }
            optimizer_func = fmin_bfgs
        elif optimizer == 'lbfgsb':
            default_kwargs = {
                'fprime': None,
                'approx_grad': True,
                'bounds': None,
                'factr': 1e12,
                'iprint': 1
            }

            optimizer_func = fmin_l_bfgs_b
        elif optimizer == 'anneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True
            }

            optimizer_func = anneal
        elif optimizer == 'panneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True,
                'cores': 24,
                'interv': 20
            }

            optimizer_func = panneal
        else:
            raise ValueError, "Unrecognized optimizer {}".format(optimizer)

        default_kwargs.update(optimizer_kwargs)
        results = optimizer_func(self.target_func, copy(init_v),
                                 **default_kwargs)

        duration = time.time() - t0
        if results_pickle_prefix is not None:
            pickle_file = "{}_{}.pickle".format(results_pickle_prefix,
                                                optimizer)
            pickle_file = os.path.join(self.log_directory, pickle_file)
            pickle.dump(results, open(pickle_file, "wb"))

        if is_parallelizable == True: set_parallel(False)

        logger.log_status('init target_func: {}'.format(
            self.target_func(init_v)))
        logger.log_status('end target_func: {}'.format(
            results[:]))  #which one?
        logger.log_status('outputs from optimizer: {}'.format(results))
        logger.log_status('Execution time: {}'.format(duration))

    def init_run(self, create_baseyear_cache=True):
        ''' init run, get run_id & cache_directory. '''
        ##avoid invoking start_run from cmd line -
        option_group = StartRunOptionGroup()
        option_group.parser.set_defaults(xml_configuration=self.xml_config,
                                         scenario_name=self.scenario)
        #run_id, cache_directory = start_run(option_group)

        options, args = option_group.parse()
        self.run_manager = RunManager(
            option_group.get_services_database_configuration(options))

        resources = XMLConfiguration(self.xml_config).get_run_configuration(
            self.scenario)
        insert_auto_generated_cache_directory_if_needed(resources)
        cache_directory = resources['cache_directory']
        self.run_manager.setup_new_run(cache_directory, resources)
        run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory(
        )
        self.run_manager.add_row_to_history(run_id, resources, "done")

        if create_baseyear_cache:
            self.run_manager.create_baseyear_cache(resources)

        ## good for testing
        #run_id = 275
        #cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20'
        assert run_id is not None
        assert cache_directory is not None
        return run_id, cache_directory

    def update_parameters(self, est_v, cache_directory, simulation_state,
                          dataset_pool, calib_datasets, *args, **kwargs):
        i_est_v = 0
        current_year = simulation_state.get_current_time()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(cache_directory)

        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                dtype = dataset[calib_attr].dtype
                dataset[calib_attr][index] = (est_v[i_est_v:i_est_v +
                                                    index.size]).astype(dtype)
                i_est_v += index.size
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    dtype = dataset[attr].dtype
                    dataset[attr][index] = (est_v[i_est_v:i_est_v +
                                                  index.size]).astype(dtype)
                    i_est_v += index.size
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

            #dtype = dataset[calib_attr].dtype
            #dataset[calib_attr][index] = (est_v[i_est_v:i_est_v+index.size]).astype(dtype)
            #flush dataset
            dataset.flush_dataset()
            #i_est_v += index.size
        simulation_state.set_current_time(current_year)

    def update_prediction(self, est_v, simulation_state, dataset_pool,
                          calib_datasets, *args, **kwargs):
        option_group = RestartRunOptionGroup()
        option_group.parser.set_defaults(
            project_name=self.project_name,
            skip_cache_cleanup=self.skip_cache_cleanup)

        options, args = option_group.parse()
        if self.run_manager is None:
            self.run_manager = RunManager(
                option_group.get_services_database_configuration(options))

        if lock != None: lock.acquire()
        ## query runs available for re-use
        runs_done = self.run_manager.get_run_info(run_ids=self.run_ids,
                                                  status='done')
        create_baseyear_cache = False
        import pdb
        pdb.set_trace()
        if len(runs_done
               ) == 0:  ##there is no re-usable run directory, init a new run
            run_id, cache_directory = self.init_run(
                create_baseyear_cache=False)
            self.run_ids.append(run_id)
            create_baseyear_cache = True
            logger.log_status('Initializing new run with id ' + str(run_id))
        else:
            run_id = runs_done[0].run_id  ##take the first 'done' run_id
            cache_directory = self.run_manager.get_cache_directory(run_id)
            logger.log_status('Using old run with id ' + str(run_id))
        resources = self.run_manager.get_resources_for_run_id_from_history(
            run_id, filter_by_status=False)
        self.run_manager.add_row_to_history(run_id, resources, "taken")
        if lock != None: lock.release()

        if create_baseyear_cache:
            self.run_manager.create_baseyear_cache(resources)

        self.update_parameters(est_v, cache_directory, simulation_state,
                               dataset_pool, calib_datasets, *args, **kwargs)
        restart_run(option_group=option_group, args=[run_id, self.start_year])

        prediction = self.summarize_prediction(cache_directory,
                                               simulation_state, dataset_pool,
                                               calib_datasets)
        return prediction

    def summarize_prediction(self, cache_directory, simulation_state,
                             dataset_pool, calib_datasets):
        dataset_name = VariableName(self.target_expression).get_dataset_name()
        current_year = simulation_state.get_current_time()
        simulation_state.set_current_time(self.end_year)
        simulation_state.set_cache_directory(cache_directory)
        #force reload
        dataset_pool.remove_all_datasets()
        dataset = dataset_pool[dataset_name]
        ids = dataset.get_id_attribute()
        results = dataset.compute_variables(self.target_expression,
                                            dataset_pool=dataset_pool)
        simulation_state.set_current_time(current_year)
        return dict(zip(ids, results))

    def read_target(self, target_file):
        ## read (& process) target numbers into a dictionary: {id:value}
        ## csv file with header
        ## id, target
        header = file(target_file, 'r').readline().strip().split(',')
        contents = np.genfromtxt(target_file,
                                 delimiter=",",
                                 comments='#',
                                 skip_header=1)
        target = dict(zip(contents[:, 0], contents[:, 1]))

        return target

    def target_func(self,
                    est_v,
                    func=lambda x, y: np.sum(np.abs(x - y)),
                    **kwargs):
        ''' Target function.'''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state,
                                            dataset_pool, calib_datasets,
                                            **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        #assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 \
                           for k in self.target.keys() ])
        results = func(predct, target)

        return results
示例#22
0
                                     number_of_runs)
    # generate seeds for multiple runs
    root_seed = config.get("seed", None)
    seed(root_seed)
    # generate different seed for each run (each seed contains 1 number)
    seed_array = randint(1, 2**30, number_of_runs)
    list_of_cache_directories = []
    for irun in range(number_of_runs):
        config['seed'] = (seed_array[irun], )
        this_config = config.copy()
        if ((irun + 1) % number_of_runs_in_parallel) == 0:
            run_in_background = False
        else:
            run_in_background = True
        run_manager.setup_new_run(
            cache_directory=this_config['cache_directory'],
            configuration=this_config)
        run_manager.run_run(this_config,
                            run_as_multiprocess=False,
                            run_in_background=run_in_background)
        if irun == 0:
            # log file for the multiple runs will be located in the first cache
            first_cache_directory = this_config['cache_directory']
            log_file = os.path.join(first_cache_directory, 'multiple_runs.log')
            logger.enable_file_logging(log_file)
            logger.log_status("Multiple runs: %s replications" %
                              number_of_runs)
            logger.log_status("root random seed = %s" % str(root_seed))
        else:
            logger.enable_file_logging(log_file, verbose=False)