def test_mp_run(): mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp') configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir]) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) tracing.config_logger() run_list = mp_tasks.get_run_list() mp_tasks.print_run_list(run_list) # do this after config.handle_standard_args, as command line args may override injectables injectables = ['data_dir', 'configs_dir', 'output_dir'] injectables = {k: inject.get_injectable(k) for k in injectables} # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after']) mp_tasks.run_multiprocess(run_list, injectables) pipeline.open_pipeline('_') regress_mini_auto() pipeline.close_pipeline()
def test_config_logger(capsys): add_canonical_dirs() tracing.config_logger() logger = logging.getLogger('popsim') file_handlers = [ h for h in logger.handlers if type(h) is logging.FileHandler ] assert len(file_handlers) == 1 asim_logger_baseFilename = file_handlers[0].baseFilename print("handlers:", logger.handlers) logger.info('test_config_logger') logger.info('log_info') logger.warning('log_warn1') out, err = capsys.readouterr() # don't consume output print(out) assert "could not find conf file" not in out assert 'log_warn1' in out assert 'log_info' not in out with open(asim_logger_baseFilename, 'r') as content_file: content = content_file.read() print(content) assert 'log_warn1' in content assert 'log_info' in content
def setup_dirs(ancillary_configs_dir=None, data_dir=None): # ancillary_configs_dir is used by run_mp to test multiprocess test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__), 'configs') example_configs_dir = example_path('configs') configs_dir = [test_pipeline_configs_dir, example_configs_dir] if ancillary_configs_dir is not None: configs_dir = [ancillary_configs_dir] + configs_dir inject.add_injectable('configs_dir', configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable('output_dir', output_dir) if not data_dir: data_dir = example_path('data') inject.add_injectable('data_dir', data_dir) inject.clear_cache() tracing.config_logger() tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('yaml') tracing.delete_output_files('omx')
def test_full_run2(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs2') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data2') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = DISTRICT', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_results' ] pipeline.run(models=_MODELS, resume_after=None) assert isinstance(pipeline.get_table('expanded_household_ids'), pd.DataFrame) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def config_logger(): """ActivitySim logger """ _tracing.config_logger() _logging.captureWarnings(capture=True) _warnings.simplefilter("always") logger = _logging.getLogger('asimtbm') logger.info("Setup logger")
def run(args): """ Run bca4abm. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. """ if args.working_dir and os.path.exists(args.working_dir): os.chdir(args.working_dir) if args.config: inject.add_injectable('configs_dir', args.config) if args.data: inject.add_injectable('data_dir', args.data) if args.output: inject.add_injectable('output_dir', args.output) for injectable in ['configs_dir', 'data_dir', 'output_dir']: try: dir_path = inject.get_injectable(injectable) except RuntimeError: sys.exit('Error: please specify either a --working_dir ' "containing 'configs', 'data', and 'output' folders " 'or all three of --config, --data, and --output') if not os.path.exists(dir_path): sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path))) if args.pipeline: inject.add_injectable('pipeline_file_name', args.pipeline) if args.resume: override_setting('resume_after', args.resume) tracing.config_logger() tracing.delete_csv_files() # only modifies output_dir warnings.simplefilter('always') logging.captureWarnings(capture=True) t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print('resume_after: %s' % resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = tracing.print_elapsed_time('all models', t0)
def test_zero_chunk_size(): settings = inject_settings(chunk_size=0) inject.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after='aggregate_od_processor') pipeline.close_pipeline()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() # assert orca.get_injectable("chunk_size") == chunk_size _MODELS = [ 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate', 'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling', 'non_mandatory_tour_frequency', 'destination_choice', 'non_mandatory_scheduling', 'tour_mode_choice_simulate', 'create_simple_trips', 'trip_mode_choice_simulate' ] pipeline.run(models=_MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close() orca.clear_cache() return tour_count
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. """ from activitysim import abm # register injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) log_settings() t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0)
def run_abm(models, resume_after=None, chunk_size=None, trace_hh_id=None, trace_od=None): settings = inject_settings(chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od) inject.clear_cache() tracing.config_logger() pipeline.run(models=models, resume_after=resume_after)
def test_run_4step(): settings = inject_settings(chunk_size=None, trace_hh_id=None, trace_od=None) inject.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=None) pipeline.close_pipeline()
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_tables', 'write_synthetic_population', ] pipeline.run(models=_MODELS, resume_after=None) expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT # output_tables action: skip assert not os.path.exists(os.path.join(output_dir, 'households.csv')) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def setup(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger()
def setup_dirs(): configs_dir = os.path.join(os.path.dirname(__file__), "configs") mp_configs_dir = os.path.join(os.path.dirname(__file__), "configs_mp") inject.add_injectable("configs_dir", [mp_configs_dir, configs_dir]) output_dir = os.path.join(os.path.dirname(__file__), "output") inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), "data") inject.add_injectable("data_dir", data_dir) tracing.config_logger() tracing.delete_output_files("csv") tracing.delete_output_files("txt") tracing.delete_output_files("yaml")
def setup_dirs(configs_dir): inject.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) inject.clear_cache() tracing.config_logger() tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('yaml')
def test_pipeline_run(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'step1', ] pipeline.run(models=_MODELS, resume_after=None) table1 = pipeline.get_table("table1").column1 # test that model arg is passed to step pipeline.run_model('step2.table_name=table2') table2 = pipeline.get_table("table2").column1 # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table1", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def setup_dirs(configs_dir, data_dir): print(f"configs_dir: {configs_dir}") inject.add_injectable('configs_dir', configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable('output_dir', output_dir) print(f"data_dir: {data_dir}") inject.add_injectable('data_dir', data_dir) inject.clear_cache() tracing.config_logger() tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('yaml') tracing.delete_output_files('omx')
def setup_function(): inject.reinject_decorated_tables() inject.remove_injectable('skim_dict') inject.remove_injectable('skim_stack') configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) inject.clear_cache() tracing.config_logger()
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) # data_dir = os.path.join(os.path.dirname(__file__), 'data') data_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'data') orca.add_injectable("data_dir", data_dir) # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') scenarios_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'scenarios') orca.add_injectable("scenarios_dir", scenarios_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() # run list from settings file is dict with list of 'steps' and optional 'resume_after' run_list = setting('run_list') assert 'steps' in run_list, "Did not find steps in run_list" # list of steps and possible resume_after in run_list steps = run_list.get('steps') pipeline.run(models=steps, resume_after=None) # geo_crosswalk = pipeline.get_table('geo_crosswalk') # assert geo_crosswalk.index.name == 'TAZ' # assert 'FAF4' in geo_crosswalk.columns # assert 'FIPS' in geo_crosswalk.columns # assert os.path.exists(os.path.join(output_dir, 'naics_set.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) settings = inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close_pipeline() orca.clear_cache() return tour_count
def test_weighting(): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example_survey_weighting', 'configs') inject.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example_survey_weighting', 'data') inject.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) inject.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'summarize', 'write_tables' ] pipeline.run(models=_MODELS, resume_after=None) summary_hh_weights = pipeline.get_table('summary_hh_weights') total_summary_hh_weights = summary_hh_weights[ 'SUBREGCluster_balanced_weight'].sum() seed_households = pd.read_csv(os.path.join(data_dir, 'seed_households.csv')) total_seed_households_weights = seed_households['HHweight'].sum() assert abs(total_summary_hh_weights - total_seed_households_weights) < 1 # tables will no longer be available after pipeline is closed pipeline.close_pipeline() inject.clear_cache()
def test_full_run2_repop_replace(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor.table_list=repop_input_table_list', 'repop_setup_data_structures', 'initial_seed_balancing.final=true', 'integerize_final_seed_weights.repop', 'repop_balancing', 'expand_households.repop;replace', 'write_synthetic_population.repop', 'write_tables.repop', ] pipeline.run(models=_MODELS, resume_after='summarize') expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def run(): config.handle_standard_args() # specify None for a pseudo random base seed # inject.add_injectable('rng_base_seed', 0) tracing.config_logger() config.filter_warnings() tracing.delete_csv_files() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print("resume_after", resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline()
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. returns: int: sys.exit exit code """ # register abm steps and other abm-specific injectables # by default, assume we are running activitysim.abm # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command if not inject.is_injectable('preload_injectables'): from activitysim import abm # register abm steps and other abm-specific injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables # legacy support for run_list setting nested 'models' and 'resume_after' settings if config.setting('run_list'): warnings.warn( "Support for 'run_list' settings group will be removed.\n" "The run_list.steps setting is renamed 'models'.\n" "The run_list.resume_after setting is renamed 'resume_after'.\n" "Specify both 'models' and 'resume_after' directly in settings config file.", FutureWarning) run_list = config.setting('run_list') if 'steps' in run_list: assert not config.setting('models'), \ f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!" config.override_setting('models', run_list['steps']) if 'resume_after' in run_list: assert not config.setting('resume_after'), \ f"Don't expect 'resume_after' both in run_list and as stand-alone setting!" config.override_setting('resume_after', run_list['resume_after']) # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) # directories for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']: logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None))) log_settings = inject.get_injectable('log_settings', {}) for k in log_settings: logger.info('SETTING %s: %s' % (k, config.setting(k))) t0 = tracing.print_elapsed_time() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) assert not pipeline.is_open() if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline() else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline( ) # has side effect of closing open pipeline else: pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0) return 0
import os os.getcwd() os.chdir('rFirm') from activitysim.core import inject_defaults from activitysim.core import tracing from activitysim.core import pipeline from activitysim.core import inject from activitysim.core.config import setting import rFirm tracing.config_logger() pipeline.open_pipeline('_') # pipeline.preload_injectables() df = pipeline.get_table("table1").to_frame() import pandas as pd file_path = "/Users/jeff.doyle/work/rFirm/example/regression_data/results/firm_sim_types/outputs/Firms.csv" rfirms = pd.read_csv(file_path, comment='#'). \ rename(columns={'BusID': 'bus_id', 'TAZ1': 'TAZ', 'Model_EmpCat': 'model_emp_cat'}). \ set_index('bus_id')
import pandas as pd import numpy as np import os import time import extensions # you will want to configure this with the locations of the canonical datasets DATA_REPO = "C:/projects/sandag-asim/toRSG/output/" DATA_REPO = "E:/activitysim/project/output/" DATA_REPO = "/Users/jeff.doyle/work/activitysim-data/sandag_zone/output/" COMPARE_RESULTS = False tracing.config_logger() logger = logging.getLogger('activitysim') @inject.injectable(override=True) def output_dir(): if not os.path.exists('output'): os.makedirs('output') # make directory if needed return 'output' @inject.injectable(override=True) def data_dir(): return os.path.join(DATA_REPO)
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() tracing.config_logger() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'initialize', 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are among the first 10 households in households table hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) # mtf_choice # PERID # 23647 NaN # 24203 NaN # 24375 school2 # 24687 NaN # 24824 NaN # 24975 NaN # 25027 NaN # 25117 NaN # 25772 NaN # 25871 NaN # 26284 NaN # 26863 NaN # 27059 NaN # 92233 NaN # 92382 school1 # 92744 work_and_school # 92823 NaN # 93172 school2 # 93774 NaN # 172491 work1 # Name: mandatory_tour_frequency, dtype: object pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. returns: int: sys.exit exit code """ # register abm steps and other abm-specific injectables # by default, assume we are running activitysim.abm # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command if not inject.is_injectable('preload_injectables'): from activitysim import abm # register abm steps and other abm-specific injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables # legacy support for run_list setting nested 'models' and 'resume_after' settings if config.setting('run_list'): warnings.warn( "Support for 'run_list' settings group will be removed.\n" "The run_list.steps setting is renamed 'models'.\n" "The run_list.resume_after setting is renamed 'resume_after'.\n" "Specify both 'models' and 'resume_after' directly in settings config file.", FutureWarning) run_list = config.setting('run_list') if 'steps' in run_list: assert not config.setting('models'), \ f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!" config.override_setting('models', run_list['steps']) if 'resume_after' in run_list: assert not config.setting('resume_after'), \ f"Don't expect 'resume_after' both in run_list and as stand-alone setting!" config.override_setting('resume_after', run_list['resume_after']) # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) # directories for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']: logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None))) log_settings = inject.get_injectable('log_settings', {}) for k in log_settings: logger.info('SETTING %s: %s' % (k, config.setting(k))) # OMP_NUM_THREADS: openmp # OPENBLAS_NUM_THREADS: openblas # MKL_NUM_THREADS: mkl for env in ['MKL_NUM_THREADS', 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS']: logger.info(f"ENV {env}: {os.getenv(env)}") np_info_keys = [ 'atlas_blas_info', 'atlas_blas_threads_info', 'atlas_info', 'atlas_threads_info', 'blas_info', 'blas_mkl_info', 'blas_opt_info', 'lapack_info', 'lapack_mkl_info', 'lapack_opt_info', 'mkl_info' ] for cfg_key in np_info_keys: info = np.__config__.get_info(cfg_key) if info: for info_key in ['libraries']: if info_key in info: logger.info( f"NUMPY {cfg_key} {info_key}: {info[info_key]}") t0 = tracing.print_elapsed_time() try: if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(injectables) assert not pipeline.is_open() if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline() else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline( ) # has side effect of closing open pipeline else: pipeline.close_pipeline() mem.log_global_hwm() # main process except Exception: # log time until error and the error traceback tracing.print_elapsed_time('all models until this error', t0) logger.exception('activitysim run encountered an unrecoverable error') raise chunk.consolidate_logs() mem.consolidate_logs() tracing.print_elapsed_time('all models', t0) return 0