def full_run(configs_dir, data_dir, resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None, two_zone=True): setup_dirs(configs_dir, data_dir) settings = inject_settings( two_zone=two_zone, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability, use_shadow_pricing=False ) # shadow pricing breaks replicability when sample_size varies MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) return tour_count
def test_pipeline_checkpoint_drop(): setup() _MODELS = [ 'step1', '_step2', '_step_add_col.table_name=table2;column_name=c2', '_step_forget_tab.table_name=table2', 'step3', 'step_forget_tab.table_name=table3', ] pipeline.run(models=_MODELS, resume_after=None) checkpoints = pipeline.get_checkpoints() print "checkpoints\n", checkpoints pipeline.get_table("table1") with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table2") assert "never checkpointed" in str(excinfo.value) # can't get a dropped table from current checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table3") assert "was dropped" in str(excinfo.value) # ensure that we can still get table3 from a checkpoint at which it existed pipeline.get_table("table3", checkpoint_name="step3") pipeline.close_pipeline() close_handlers()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') setup_dirs(configs_dir) settings = inject_settings( configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability, use_shadow_pricing=False) # shadow pricing breaks replicability when sample_size varies MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) return tour_count
def test_full_run2_repop_replace(): # Note: tests are run in alphabetical order. # This tests expects to find the pipeline h5 file from # test_full_run1 in the output folder _MODELS = [ 'input_pre_processor.table_list=repop_input_table_list;repop', 'repop_setup_data_structures', 'initial_seed_balancing.final=true;repop', 'integerize_final_seed_weights.repop', 'repop_balancing', 'expand_households.repop;replace', 'write_synthetic_population.repop', 'write_tables.repop', ] pipeline.run(models=_MODELS, resume_after='summarize') expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT # tables will no longer be available after pipeline is closed pipeline.close_pipeline() inject.clear_cache()
def test_full_run1(): _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography=TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_tables', 'write_synthetic_population', ] pipeline.run(models=_MODELS, resume_after=None) expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT # output_tables action: skip output_dir = inject.get_injectable('output_dir') assert not os.path.exists(os.path.join(output_dir, 'households.csv')) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() inject.clear_cache()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): setup_dirs() settings = inject_settings( households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, testing_fail_trip_destination=False, check_for_variability=check_for_variability, want_dest_choice_sample_tables=False, use_shadow_pricing=False ) # shadow pricing breaks replicability when sample_size varies # FIXME should enable testing_fail_trip_destination? MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) return tour_count
def test_balancer_step(): setup_working_dir('example_balance', inherit=True) pipeline.run(['balance_trips', 'write_tables']) pipeline.close_pipeline()
def test_full_run2(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs2') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data2') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = DISTRICT', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_results' ] pipeline.run(models=_MODELS, resume_after=None) assert isinstance(pipeline.get_table('expanded_household_ids'), pd.DataFrame) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def run(args): """ Run bca4abm. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. """ if args.working_dir and os.path.exists(args.working_dir): os.chdir(args.working_dir) if args.config: inject.add_injectable('configs_dir', args.config) if args.data: inject.add_injectable('data_dir', args.data) if args.output: inject.add_injectable('output_dir', args.output) for injectable in ['configs_dir', 'data_dir', 'output_dir']: try: dir_path = inject.get_injectable(injectable) except RuntimeError: sys.exit('Error: please specify either a --working_dir ' "containing 'configs', 'data', and 'output' folders " 'or all three of --config, --data, and --output') if not os.path.exists(dir_path): sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path))) if args.pipeline: inject.add_injectable('pipeline_file_name', args.pipeline) if args.resume: override_setting('resume_after', args.resume) tracing.config_logger() tracing.delete_csv_files() # only modifies output_dir warnings.simplefilter('always') logging.captureWarnings(capture=True) t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print('resume_after: %s' % resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = tracing.print_elapsed_time('all models', t0)
def run(run_list, injectables=None): if run_list['multiprocess']: logger.info("run multiprocess simulation") mp_tasks.run_multiprocess(run_list, injectables) else: logger.info("run single process simulation") pipeline.run(models=run_list['models'], resume_after=run_list['resume_after']) pipeline.close_pipeline() mem.log_global_hwm()
def run(run_list, injectables=None): if run_list['multiprocess']: logger.info("run multiprocess simulation") mp_tasks.run_multiprocess(run_list, injectables) else: logger.info("run single process simulation") pipeline.run(models=run_list['models'], resume_after=run_list['resume_after']) pipeline.close_pipeline() chunk.log_write_hwm()
def test_zero_chunk_size(): settings = inject_settings(chunk_size=0) inject.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after='aggregate_od_processor') pipeline.close_pipeline()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() # assert orca.get_injectable("chunk_size") == chunk_size _MODELS = [ 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate', 'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling', 'non_mandatory_tour_frequency', 'destination_choice', 'non_mandatory_scheduling', 'tour_mode_choice_simulate', 'create_simple_trips', 'trip_mode_choice_simulate' ] pipeline.run(models=_MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close() orca.clear_cache() return tour_count
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. """ from activitysim import abm # register injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) log_settings() t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0)
def run_abm(models, resume_after=None, chunk_size=None, trace_hh_id=None, trace_od=None): settings = inject_settings(chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od) inject.clear_cache() tracing.config_logger() pipeline.run(models=models, resume_after=resume_after)
def test_run_4step(): settings = inject_settings(chunk_size=None, trace_hh_id=None, trace_od=None) inject.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=None) pipeline.close_pipeline()
def test_pipeline_run(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'step1', ] pipeline.run(models=_MODELS, resume_after=None) table1 = pipeline.get_table("table1").column1 # test that model arg is passed to step pipeline.run_model('step2.table_name=table2') table2 = pipeline.get_table("table2").column1 # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table1", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def test_mini_pipeline_run(): setup_dirs() inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, write_skim_cache=True ) _MODELS = [ 'initialize_landuse', 'compute_accessibility', 'initialize_households', 'school_location', 'workplace_location', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) regress_mini_auto() pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() regress_mini_location_choice_logsums() # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "never checkpointed" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) # should create optional workplace_location_sample table workplace_location_sample_df = pipeline.get_table("workplace_location_sample") assert 'mode_choice_logsum' in workplace_location_sample_df pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) # data_dir = os.path.join(os.path.dirname(__file__), 'data') data_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'data') orca.add_injectable("data_dir", data_dir) # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') scenarios_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'scenarios') orca.add_injectable("scenarios_dir", scenarios_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() # run list from settings file is dict with list of 'steps' and optional 'resume_after' run_list = setting('run_list') assert 'steps' in run_list, "Did not find steps in run_list" # list of steps and possible resume_after in run_list steps = run_list.get('steps') pipeline.run(models=steps, resume_after=None) # geo_crosswalk = pipeline.get_table('geo_crosswalk') # assert geo_crosswalk.index.name == 'TAZ' # assert 'FAF4' in geo_crosswalk.columns # assert 'FIPS' in geo_crosswalk.columns # assert os.path.exists(os.path.join(output_dir, 'naics_set.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') setup_dirs(configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, # use_shadow_pricing=True ) _MODELS = [ 'initialize_landuse', 'compute_accessibility', 'initialize_households', 'school_location', 'workplace_location', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) regress_mini_auto() pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "never checkpointed" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_load_cached_accessibility(): inject.clear_cache() inject.reinject_decorated_tables() data_dir = [ os.path.join(os.path.dirname(__file__), 'data'), example_path('data') ] setup_dirs(data_dir=data_dir) # # add OPTIONAL ceched table accessibility to input_table_list # activitysim.abm.tables.land_use.accessibility() will load this table if listed here # presumably independently calculated outside activitysim or a cached copy created during a previous run # settings = config.read_settings_file('settings.yaml', mandatory=True) input_table_list = settings.get('input_table_list') input_table_list.append({ 'tablename': 'accessibility', 'filename': 'cached_accessibility.csv', 'index_col': 'zone_id' }) inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, input_table_list=input_table_list) _MODELS = [ 'initialize_landuse', # 'compute_accessibility', # we load accessibility table ordinarily created by compute_accessibility 'initialize_households', ] pipeline.run(models=_MODELS, resume_after=None) accessibility_df = pipeline.get_table("accessibility") assert 'auPkRetail' in accessibility_df pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_pipeline_run(): inject.add_step('step1', steps.step1) inject.add_step('step2', steps.step2) inject.add_step('step3', steps.step3) inject.add_step('step_add_col', steps.step_add_col) inject.dump_state() _MODELS = [ 'step1', 'step2', 'step3', 'step_add_col.table_name=table2;column_name=c2' ] pipeline.run(models=_MODELS, resume_after=None) checkpoints = pipeline.get_checkpoints() print("checkpoints\n", checkpoints) c2 = pipeline.get_table("table2").c2 # get table from pipeline.get_table("table1", checkpoint_name="step3") # try to get a table from a step before it was checkpointed with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table2", checkpoint_name="step1") assert "not in checkpoint 'step1'" in str(excinfo.value) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "never checkpointed" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table1", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() close_handlers()
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) settings = inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close_pipeline() orca.clear_cache() return tour_count
def test_weighting(): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example_survey_weighting', 'configs') inject.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example_survey_weighting', 'data') inject.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) inject.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'summarize', 'write_tables' ] pipeline.run(models=_MODELS, resume_after=None) summary_hh_weights = pipeline.get_table('summary_hh_weights') total_summary_hh_weights = summary_hh_weights[ 'SUBREGCluster_balanced_weight'].sum() seed_households = pd.read_csv(os.path.join(data_dir, 'seed_households.csv')) total_seed_households_weights = seed_households['HHweight'].sum() assert abs(total_summary_hh_weights - total_seed_households_weights) < 1 # tables will no longer be available after pipeline is closed pipeline.close_pipeline() inject.clear_cache()
def test_full_run2_repop_replace(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor.table_list=repop_input_table_list', 'repop_setup_data_structures', 'initial_seed_balancing.final=true', 'integerize_final_seed_weights.repop', 'repop_balancing', 'expand_households.repop;replace', 'write_synthetic_population.repop', 'write_tables.repop', ] pipeline.run(models=_MODELS, resume_after='summarize') expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'synthesize_population', 'write_results', 'summarize' ] pipeline.run(models=_MODELS, resume_after=None) expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def run(): config.handle_standard_args() # specify None for a pseudo random base seed # inject.add_injectable('rng_base_seed', 0) tracing.config_logger() config.filter_warnings() tracing.delete_csv_files() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print("resume_after", resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() tracing.config_logger() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'initialize', 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are among the first 10 households in households table hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) # mtf_choice # PERID # 23647 NaN # 24203 NaN # 24375 school2 # 24687 NaN # 24824 NaN # 24975 NaN # 25027 NaN # 25117 NaN # 25772 NaN # 25871 NaN # 26284 NaN # 26863 NaN # 27059 NaN # 92233 NaN # 92382 school1 # 92744 work_and_school # 92823 NaN # 93172 school2 # 93774 NaN # 172491 work1 # Name: mandatory_tour_frequency, dtype: object pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
from activitysim.core import pipeline import extensions handle_standard_args() # comment out the line below to default base seed to 0 random seed # so that run results are reproducible # pipeline.set_rn_generator_base_seed(seed=None) tracing.config_logger() t0 = print_elapsed_time() MODELS = setting('models') # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print "resume_after", resume_after pipeline.run(models=MODELS, resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = print_elapsed_time("all models", t0)
from census_getter.util import setting handle_standard_args() tracing.config_logger() t0 = print_elapsed_time() logger = logging.getLogger('census_getter') # get the run list (name was possibly specified on the command line with the -m option) run_list_name = inject.get_injectable('run_list_name', 'run_list') # run list from settings file is dict with list of 'steps' and optional 'resume_after' run_list = setting(run_list_name) assert 'steps' in run_list, "Did not find steps in run_list" # list of steps and possible resume_after in run_list steps = run_list.get('steps') resume_after = run_list.get('resume_after', None) if resume_after: print "resume_after", resume_after pipeline.run(models=steps, resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = ("all models", t0)
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. returns: int: sys.exit exit code """ # register abm steps and other abm-specific injectables # by default, assume we are running activitysim.abm # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command if not inject.is_injectable('preload_injectables'): from activitysim import abm # register abm steps and other abm-specific injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables # legacy support for run_list setting nested 'models' and 'resume_after' settings if config.setting('run_list'): warnings.warn( "Support for 'run_list' settings group will be removed.\n" "The run_list.steps setting is renamed 'models'.\n" "The run_list.resume_after setting is renamed 'resume_after'.\n" "Specify both 'models' and 'resume_after' directly in settings config file.", FutureWarning) run_list = config.setting('run_list') if 'steps' in run_list: assert not config.setting('models'), \ f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!" config.override_setting('models', run_list['steps']) if 'resume_after' in run_list: assert not config.setting('resume_after'), \ f"Don't expect 'resume_after' both in run_list and as stand-alone setting!" config.override_setting('resume_after', run_list['resume_after']) # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) # directories for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']: logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None))) log_settings = inject.get_injectable('log_settings', {}) for k in log_settings: logger.info('SETTING %s: %s' % (k, config.setting(k))) t0 = tracing.print_elapsed_time() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) assert not pipeline.is_open() if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline() else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline( ) # has side effect of closing open pipeline else: pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0) return 0