def run(args): """ Run bca4abm. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. """ if args.working_dir and os.path.exists(args.working_dir): os.chdir(args.working_dir) if args.config: inject.add_injectable('configs_dir', args.config) if args.data: inject.add_injectable('data_dir', args.data) if args.output: inject.add_injectable('output_dir', args.output) for injectable in ['configs_dir', 'data_dir', 'output_dir']: try: dir_path = inject.get_injectable(injectable) except RuntimeError: sys.exit('Error: please specify either a --working_dir ' "containing 'configs', 'data', and 'output' folders " 'or all three of --config, --data, and --output') if not os.path.exists(dir_path): sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path))) if args.pipeline: inject.add_injectable('pipeline_file_name', args.pipeline) if args.resume: override_setting('resume_after', args.resume) tracing.config_logger() tracing.delete_csv_files() # only modifies output_dir warnings.simplefilter('always') logging.captureWarnings(capture=True) t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print('resume_after: %s' % resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = tracing.print_elapsed_time('all models', t0)
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into eh pipeline """ logger.info("preload_injectables") t0 = tracing.print_elapsed_time() if inject.get_injectable('skim_dict', None) is not None: t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) if inject.get_injectable('skim_stack', None) is not None: t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. """ from activitysim import abm # register injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) log_settings() t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0)
def initialize_households(): trace_label = 'initialize_households' model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True) annotate_tables(model_settings, trace_label) # - initialize shadow_pricing size tables after annotating household and person tables # since these are scaled to model size, they have to be created while single-process shadow_pricing.add_size_tables() # - preload person_windows t0 = tracing.print_elapsed_time() inject.get_table('person_windows').to_frame() t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
def annotate_tables(model_settings, trace_label): annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning("annotate_tables setting is empty - nothing to do!") t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] df = inject.get_table(tablename).to_frame() # - rename columns column_map = table_info.get('column_map', None) if column_map: logger.info("renaming %s columns %s" % (tablename, column_map,)) df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info("annotated %s SPEC %s" % (tablename, annotate['SPEC'],)) expressions.assign_columns( df=df, model_settings=annotate, trace_label=trace_label) # fixme - narrow? # - write table to pipeline pipeline.replace_table(tablename, df)
def preload_injectables(): """ called after pipeline is """ # could simply list injectables as arguments, but this way we can report timing... logger.info("preload_injectables") t0 = tracing.print_elapsed_time() if inject.get_injectable('skim_dict', None) is not None: t0 = tracing.print_elapsed_time("preload skim_dict") if inject.get_injectable('skim_stack', None) is not None: t0 = tracing.print_elapsed_time("preload skim_stack")
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into the pipeline """ logger.info("preload_injectables") inject.add_step('track_skim_usage', track_skim_usage) inject.add_step('write_data_dictionary', write_data_dictionary) inject.add_step('write_tables', write_tables) table_list = config.setting('input_table_list') # default ActivitySim table names and indices if table_list is None: logger.warn("No 'input_table_list' found in settings. This will be a " "required setting in upcoming versions of ActivitySim.") new_settings = inject.get_injectable('settings') new_settings['input_table_list'] = DEFAULT_TABLE_LIST inject.add_injectable('settings', new_settings) t0 = tracing.print_elapsed_time() # FIXME - still want to do this? # if inject.get_injectable('skim_dict', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) # # if inject.get_injectable('skim_stack', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True) return True
def annotate_tables(model_settings, trace_label): trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables') chunk.log_rss(trace_label) annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning( f"{trace_label} - annotate_tables setting is empty - nothing to do!" ) assert isinstance(annotate_tables, list), \ f"annotate_tables settings should be a list but is {type(annotate_tables)}" t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}") df = inject.get_table(tablename).to_frame() chunk.log_df(trace_label, tablename, df) # - rename columns column_map = table_info.get('column_map', None) if column_map: warnings.warn( f"Setting 'column_map' has been changed to 'rename_columns'. " f"Support for 'column_map' in annotate_tables will be removed in future versions.", FutureWarning) logger.info( f"{trace_label} - renaming {tablename} columns {column_map}") df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info( f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}" ) expressions.assign_columns(df=df, model_settings=annotate, trace_label=trace_label) chunk.log_df(trace_label, tablename, df) # - write table to pipeline pipeline.replace_table(tablename, df) del df chunk.log_df(trace_label, tablename, None)
def initialize(): """ Because random seed is set differently for each step, the sampling of households depends on which step they are initially loaded in so we force them to load here and they get stored to the pipeline, """ t0 = tracing.print_elapsed_time() inject.get_table('land_use').to_frame() t0 = tracing.print_elapsed_time("preload land_use", t0, debug=True) inject.get_table('households').to_frame() t0 = tracing.print_elapsed_time("preload households", t0, debug=True) inject.get_table('persons').to_frame() t0 = tracing.print_elapsed_time("preload persons", t0, debug=True) inject.get_table('person_windows').to_frame() t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
def load_skims(omx_file_path, skim_info, skim_buffers): read_cache = config.setting('read_skim_cache') write_cache = config.setting('write_skim_cache') assert not (read_cache and write_cache), \ "read_skim_cache and write_skim_cache are both True in settings file. I am assuming this is a mistake" skim_data = skim_data_from_buffers(skim_buffers, skim_info) t0 = tracing.print_elapsed_time() if read_cache: read_skim_cache(skim_info, skim_data) t0 = tracing.print_elapsed_time("read_skim_cache", t0) else: read_skims_from_omx(skim_info, skim_data, omx_file_path) t0 = tracing.print_elapsed_time("read_skims_from_omx", t0) if write_cache: write_skim_cache(skim_info, skim_data) t0 = tracing.print_elapsed_time("write_skim_cache", t0)
def choose_parking_location(segment_name, trips, alternatives, model_settings, want_sample_table, skims, chunk_size, trace_hh_id, trace_label): logger.info("choose_parking_location %s with %d trips", trace_label, trips.shape[0]) t0 = print_elapsed_time() alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] destination_sample = interaction_dataset(trips, alternatives, alt_index_id=alt_dest_col_name) destination_sample.index = np.repeat(trips.index.values, len(alternatives)) destination_sample.index.name = trips.index.name destination_sample = destination_sample[[alt_dest_col_name]].copy() # # - trip_destination_simulate destinations = parking_destination_simulate( segment_name=segment_name, trips=trips, destination_sample=destination_sample, model_settings=model_settings, skims=skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) if want_sample_table: # FIXME - sample_table destination_sample.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) else: destination_sample = None t0 = print_elapsed_time("%s.parking_location_simulate" % trace_label, t0) return destinations, destination_sample
def initialize(): """ Because random seed is set differently for each step, the sampling of households depends on which step they are initially loaded in. We load them explicitly up front, so that """ t0 = tracing.print_elapsed_time() inject.get_table('land_use').to_frame() t0 = tracing.print_elapsed_time("preload land_use") inject.get_table('households').to_frame() t0 = tracing.print_elapsed_time("preload households") inject.get_table('persons').to_frame() t0 = tracing.print_elapsed_time("preload persons") inject.get_table('person_windows').to_frame() t0 = tracing.print_elapsed_time("preload person_windows") pass
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into the pipeline """ logger.info("preload_injectables") inject.add_step('track_skim_usage', track_skim_usage) inject.add_step('write_data_dictionary', write_data_dictionary) inject.add_step('write_tables', write_tables) table_list = config.setting('input_table_list') # default ActivitySim table names and indices if table_list is None: logger.warning( "No 'input_table_list' found in settings. This will be a " "required setting in upcoming versions of ActivitySim.") new_settings = inject.get_injectable('settings') new_settings['input_table_list'] = DEFAULT_TABLE_LIST inject.add_injectable('settings', new_settings) # FIXME undocumented feature if config.setting('write_raw_tables'): # write raw input tables as csv (before annotation) csv_dir = config.output_file_path('raw_tables') if not os.path.exists(csv_dir): os.makedirs(csv_dir) # make directory if needed table_names = [t['tablename'] for t in table_list] for t in table_names: df = inject.get_table(t).to_frame() if t == 'households': df.drop(columns='chunk_id', inplace=True) df.to_csv(os.path.join(csv_dir, '%s.csv' % t), index=True) t0 = tracing.print_elapsed_time() # FIXME - still want to do this? # if inject.get_injectable('skim_dict', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) # # if inject.get_injectable('skim_stack', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True) return True
def annotate_tables(model_settings, trace_label): annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning("annotate_tables setting is empty - nothing to do!") t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] df = inject.get_table(tablename).to_frame() # - rename columns column_map = table_info.get('column_map', None) if column_map: warnings.warn( "annotate_tables option 'column_map' renamed 'rename_columns' and moved" "to settings.yaml. Support for 'column_map' in annotate_tables will be " "removed in future versions.", FutureWarning) logger.info("renaming %s columns %s" % ( tablename, column_map, )) df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info("annotated %s SPEC %s" % ( tablename, annotate['SPEC'], )) expressions.assign_columns(df=df, model_settings=annotate, trace_label=trace_label) # fixme - narrow? # - write table to pipeline pipeline.replace_table(tablename, df)
def annotate_tables(model_settings, trace_label): annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning(f"{trace_label} - annotate_tables setting is empty - nothing to do!") assert isinstance(annotate_tables, list), \ f"annotate_tables settings should be a list but is {type(annotate_tables)}" t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] df = inject.get_table(tablename).to_frame() # - rename columns column_map = table_info.get('column_map', None) if column_map: warnings.warn(f"{trace_label} - annotate_tables option 'column_map' renamed 'rename_columns' " f"and moved to global settings file. Support for 'column_map' in annotate_tables " f"will be removed in future versions.", FutureWarning) logger.info(f"{trace_label} - renaming {tablename} columns {column_map}") df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info(f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}") expressions.assign_columns( df=df, model_settings=annotate, trace_label=trace_label) # fixme - narrow? # - write table to pipeline pipeline.replace_table(tablename, df)
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into the pipeline """ logger.info("preload_injectables") inject.add_step('track_skim_usage', track_skim_usage) inject.add_step('write_data_dictionary', write_data_dictionary) inject.add_step('write_tables', write_tables) t0 = tracing.print_elapsed_time() # FIXME - still want to do this? # if inject.get_injectable('skim_dict', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) # # if inject.get_injectable('skim_stack', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True) return True
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. returns: int: sys.exit exit code """ # register abm steps and other abm-specific injectables # by default, assume we are running activitysim.abm # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command if not inject.is_injectable('preload_injectables'): from activitysim import abm # register abm steps and other abm-specific injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables # legacy support for run_list setting nested 'models' and 'resume_after' settings if config.setting('run_list'): warnings.warn( "Support for 'run_list' settings group will be removed.\n" "The run_list.steps setting is renamed 'models'.\n" "The run_list.resume_after setting is renamed 'resume_after'.\n" "Specify both 'models' and 'resume_after' directly in settings config file.", FutureWarning) run_list = config.setting('run_list') if 'steps' in run_list: assert not config.setting('models'), \ f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!" config.override_setting('models', run_list['steps']) if 'resume_after' in run_list: assert not config.setting('resume_after'), \ f"Don't expect 'resume_after' both in run_list and as stand-alone setting!" config.override_setting('resume_after', run_list['resume_after']) # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) # directories for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']: logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None))) log_settings = inject.get_injectable('log_settings', {}) for k in log_settings: logger.info('SETTING %s: %s' % (k, config.setting(k))) # OMP_NUM_THREADS: openmp # OPENBLAS_NUM_THREADS: openblas # MKL_NUM_THREADS: mkl for env in ['MKL_NUM_THREADS', 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS']: logger.info(f"ENV {env}: {os.getenv(env)}") np_info_keys = [ 'atlas_blas_info', 'atlas_blas_threads_info', 'atlas_info', 'atlas_threads_info', 'blas_info', 'blas_mkl_info', 'blas_opt_info', 'lapack_info', 'lapack_mkl_info', 'lapack_opt_info', 'mkl_info' ] for cfg_key in np_info_keys: info = np.__config__.get_info(cfg_key) if info: for info_key in ['libraries']: if info_key in info: logger.info( f"NUMPY {cfg_key} {info_key}: {info[info_key]}") t0 = tracing.print_elapsed_time() try: if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(injectables) assert not pipeline.is_open() if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline() else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline( ) # has side effect of closing open pipeline else: pipeline.close_pipeline() mem.log_global_hwm() # main process except Exception: # log time until error and the error traceback tracing.print_elapsed_time('all models until this error', t0) logger.exception('activitysim run encountered an unrecoverable error') raise chunk.consolidate_logs() mem.consolidate_logs() tracing.print_elapsed_time('all models', t0) return 0
data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data' data_dir = '../example/data' # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data') inject.add_injectable('data_dir', ['ancillary_data', data_dir]) # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data']) inject.add_injectable('configs_dir', ['configs', '../example/configs']) injectables = config.handle_standard_args() tracing.config_logger() config.filter_warnings() log_settings(injectables) t0 = tracing.print_elapsed_time() # cleanup if not resuming if not config.setting('resume_after', False): cleanup_output_files() run_list = mp_tasks.get_run_list() if run_list['multiprocess']: # do this after config.handle_standard_args, as command line args may override injectables injectables = list(set(injectables) | set(['data_dir', 'configs_dir', 'output_dir'])) injectables = {k: inject.get_injectable(k) for k in injectables} else: injectables = None run(run_list, injectables)
data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data' data_dir = '../example/data' # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data') inject.add_injectable('data_dir', ['ancillary_data', data_dir]) # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data']) inject.add_injectable('configs_dir', ['configs', '../example/configs']) injectables = config.handle_standard_args() tracing.config_logger() config.filter_warnings() log_settings(injectables) t0 = tracing.print_elapsed_time() # cleanup if not resuming if not config.setting('resume_after', False): cleanup_output_files() run_list = mp_tasks.get_run_list() if run_list['multiprocess']: # do this after config.handle_standard_args, as command line args may override injectables injectables = list( set(injectables) | set(['data_dir', 'configs_dir', 'output_dir'])) injectables = {k: inject.get_injectable(k) for k in injectables} else: injectables = None
def non_mandatory_tour_frequency(persons_merged, non_mandatory_tour_frequency_alts, non_mandatory_tour_frequency_spec, non_mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ t0 = print_elapsed_time() choosers = persons_merged.to_frame() alts = non_mandatory_tour_frequency_alts.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers)) constants = config.get_model_constants(non_mandatory_tour_frequency_settings) choices_list = [] # segment by person type and pick the right spec for each person type for name, segment in choosers.groupby('ptype_cat'): logger.info("Running segment '%s' of size %d" % (name, len(segment))) choices = asim.interaction_simulate( segment, alts, # notice that we pick the column for the segment for each segment we run spec=non_mandatory_tour_frequency_spec[[name]], locals_d=constants, chunk_size=chunk_size, trace_label=trace_hh_id and 'non_mandatory_tour_frequency.%s' % name, trace_choice_name='non_mandatory_tour_frequency') choices_list.append(choices) t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0) # FIXME - force garbage collection # mem = asim.memory_info() # logger.info('memory_info ptype %s, %s' % (name, mem)) choices = pd.concat(choices_list) # FIXME - no need to reindex? orca.add_column("persons", "non_mandatory_tour_frequency", choices) create_non_mandatory_tours_table() pipeline.add_dependent_columns("persons", "persons_nmtf") if trace_hh_id: trace_columns = ['non_mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="non_mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def build_cdap_spec(interaction_coefficients, hhsize, trace_spec=False, trace_label=None, cache=True): """ Build a spec file for computing utilities of alternative household member interaction patterns for households of specified size. We generate this spec automatically from a table of rules and coefficients because the interaction rules are fairly simple and can be expressed compactly whereas there is a lot of redundancy between the spec files for different household sizes, as well as in the vectorized expression of the interaction alternatives within the spec file itself interaction_coefficients has five columns: activity A single character activity type name (M, N, or H) interaction_ptypes List of ptypes in the interaction (in order of increasing ptype) or empty for wildcards (meaning that the interaction applies to all ptypes in that size hh) cardinality the number of persons in the interaction (e.g. 3 for a 3-way interaction) slug a human friendly efficient name so we can dump a readable spec trace file for debugging this slug is replaced with the numerical coefficient value after we dump the trace file coefficient The coefficient to apply for all hh interactions for this activity and set of ptypes The generated spec will have the eval expression in the index, and a utility column for each alternative (e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize 2) In order to be able to dump the spec in a human-friendly fashion to facilitate debugging the cdap_interaction_coefficients table, we first populate utility columns in the spec file with the coefficient slugs, dump the spec file, and then replace the slugs with coefficients. Parameters ---------- interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int household size for which the spec should be built. Returns ------- spec: pandas.DataFrame """ t0 = tracing.print_elapsed_time() # if DUMP: # # dump the interaction_coefficients table because it has been preprocessed # tracing.trace_df(interaction_coefficients, # '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize), # transpose=False, slicer='NONE') # cdap spec is same for all households of MAX_HHSIZE and greater hhsize = min(hhsize, MAX_HHSIZE) if cache: spec = get_cached_spec(hhsize) if spec is not None: return spec expression_name = "Expression" # generate a list of activity pattern alternatives for this hhsize # e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize=2 alternatives = [''.join(tup) for tup in itertools.product('HMN', repeat=hhsize)] # spec df has expression column plus a column for each alternative spec = pd.DataFrame(columns=[expression_name] + alternatives) # Before processing the interaction_coefficients, we add add rows to the spec to carry # the alternative utilities previously computed for each individual into all hh alternative # columns in which the individual assigned that alternative. The Expression column contains # the name of the choosers column with that individuals utility for the individual alternative # and the hh alternative columns that should receive that utility are given a value of 1 # e.g. M_p1 is a column in choosers with the individual utility to person p1 of alternative M # Expression MM MN MH NM NN NH HM HN HH # M_p1 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 # N_p1 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 for pnum in range(1, hhsize+1): for activity in ['M', 'N', 'H']: new_row_index = len(spec) spec.loc[new_row_index, expression_name] = add_pn(activity, pnum) # list of alternative columns where person pnum has expression activity # e.g. for M_p1 we want the columns where activity M is in position p1 alternative_columns = [alt for alt in alternatives if alt[pnum - 1] == activity] spec.loc[new_row_index, alternative_columns] = 1 # ignore rows whose cardinality exceeds hhsize relevant_rows = interaction_coefficients.cardinality <= hhsize # for each row in the interaction_coefficients table for row in interaction_coefficients[relevant_rows].itertuples(): # if it is a wildcard all_people interaction if not row.interaction_ptypes: # wildcard interactions only apply if the interaction includes all household members # this will be the case if the cardinality of the wildcard equals the hhsize # conveniently, the slug is given the name of the alternative column (e.g. HHHH) # conveniently, for wildcards, the slug has been assigned the name of the alternative # (e.g. HHHH) that it applies to, since the interaction includes all household members # and there are no ptypes to append to it # FIXME - should we be doing this for greater than HH_MAXSIZE households? if row.slug in alternatives: spec.loc[len(spec), [expression_name, row.slug]] = ['1', row.slug] continue if not (0 <= row.cardinality <= MAX_INTERACTION_CARDINALITY): raise RuntimeError("Bad row cardinality %d for %s" % (row.cardinality, row.slug)) # for all other interaction rules, we need to generate a row in the spec for each # possible combination of interacting persons # e.g. for (1, 2), (1,3), (2,3) for a coefficient with cardinality 2 in hhsize 3 for tup in itertools.combinations(list(range(1, hhsize+1)), row.cardinality): # determine the name of the chooser column with the ptypes for this interaction if row.cardinality == 1: interaction_column = "ptype_p%d" % tup[0] else: # column named (e.g.) p1_p3 for an interaction between p1 and p3 interaction_column = '_'.join(['p%s' % pnum for pnum in tup]) # build expression that evaluates True iff the interaction is between specified ptypes # (e.g.) p1_p3==13 for an interaction between p1 and p3 of ptypes 1 and 3 (or 3 and1 ) expression = "%s==%s" % (interaction_column, row.interaction_ptypes) # create list of columns with names matching activity for each of the persons in tup # e.g. ['MMM', 'MMN', 'MMH'] for an interaction between p1 and p3 with activity 'M' # alternative_columns = \ # filter(lambda alt: all([alt[p - 1] == row.activity for p in tup]), alternatives) alternative_columns = \ [alt for alt in alternatives if all([alt[p - 1] == row.activity for p in tup])] # a row for this interaction may already exist, # e.g. if there are rules for both HH13 and MM13, we don't need to add rows for both # since they are triggered by the same expressions (e.g. p1_p2==13, p1_p3=13,...) existing_row_index = (spec[expression_name] == expression) if (existing_row_index).any(): # if the rows exist, simply update the appropriate alternative columns in spec spec.loc[existing_row_index, alternative_columns] = row.slug spec.loc[existing_row_index, expression_name] = expression else: # otherwise, add a new row to spec new_row_index = len(spec) spec.loc[new_row_index, alternative_columns] = row.slug spec.loc[new_row_index, expression_name] = expression # eval expression goes in the index spec.set_index(expression_name, inplace=True) simulate.uniquify_spec_index(spec) if trace_spec: tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize), transpose=False, slicer='NONE') # replace slug with coefficient d = interaction_coefficients.set_index('slug')['coefficient'].to_dict() for c in spec.columns: spec[c] =\ spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0) if trace_spec: tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize), transpose=False, slicer='NONE') if cache: cache_spec(hhsize, spec) t0 = tracing.print_elapsed_time("build_cdap_spec hh_size %s" % hhsize, t0) return spec
def run(args): """ Run the models. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. Both '--config' and '--data' can be specified multiple times. Directories listed first take precedence. returns: int: sys.exit exit code """ # register abm steps and other abm-specific injectables # by default, assume we are running activitysim.abm # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command if not inject.is_injectable('preload_injectables'): from activitysim import abm # register abm steps and other abm-specific injectables tracing.config_logger(basic=True) handle_standard_args(args) # possibly update injectables # legacy support for run_list setting nested 'models' and 'resume_after' settings if config.setting('run_list'): warnings.warn( "Support for 'run_list' settings group will be removed.\n" "The run_list.steps setting is renamed 'models'.\n" "The run_list.resume_after setting is renamed 'resume_after'.\n" "Specify both 'models' and 'resume_after' directly in settings config file.", FutureWarning) run_list = config.setting('run_list') if 'steps' in run_list: assert not config.setting('models'), \ f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!" config.override_setting('models', run_list['steps']) if 'resume_after' in run_list: assert not config.setting('resume_after'), \ f"Don't expect 'resume_after' both in run_list and as stand-alone setting!" config.override_setting('resume_after', run_list['resume_after']) # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = config.setting('resume_after', None) # cleanup if not resuming if not resume_after: cleanup_output_files() elif config.setting('cleanup_trace_files_on_resume', False): tracing.delete_trace_files() tracing.config_logger( basic=False) # update using possibly new logging configs config.filter_warnings() logging.captureWarnings(capture=True) # directories for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']: logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None))) log_settings = inject.get_injectable('log_settings', {}) for k in log_settings: logger.info('SETTING %s: %s' % (k, config.setting(k))) t0 = tracing.print_elapsed_time() if config.setting('multiprocess', False): logger.info('run multiprocess simulation') from activitysim.core import mp_tasks run_list = mp_tasks.get_run_list() injectables = {k: inject.get_injectable(k) for k in INJECTABLES} mp_tasks.run_multiprocess(run_list, injectables) assert not pipeline.is_open() if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline() else: logger.info('run single process simulation') pipeline.run(models=config.setting('models'), resume_after=resume_after) if config.setting('cleanup_pipeline_after_run', False): pipeline.cleanup_pipeline( ) # has side effect of closing open pipeline else: pipeline.close_pipeline() chunk.log_write_hwm() tracing.print_elapsed_time('all models', t0) return 0
# --resume : resume_after handle_standard_args() tracing.config_logger() warnings.simplefilter("always") logging.captureWarnings(capture=True) old_settings = np.seterr(divide='raise', over='raise', invalid='raise', under='ignore') print "numpy.geterr: %s" % np.geterr() t0 = tracing.print_elapsed_time() MODELS = setting('models') # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print "resume_after", resume_after pipeline.run(models=MODELS, resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline()
from census_getter import steps from activitysim.core import tracing from activitysim.core import pipeline from activitysim.core import inject from activitysim.core.config import handle_standard_args from activitysim.core.tracing import print_elapsed_time from census_getter.util import setting handle_standard_args() tracing.config_logger() t0 = print_elapsed_time() logger = logging.getLogger('census_getter') # get the run list (name was possibly specified on the command line with the -m option) run_list_name = inject.get_injectable('run_list_name', 'run_list') # run list from settings file is dict with list of 'steps' and optional 'resume_after' run_list = setting(run_list_name) assert 'steps' in run_list, "Did not find steps in run_list" # list of steps and possible resume_after in run_list steps = run_list.get('steps') resume_after = run_list.get('resume_after', None) if resume_after:
from activitysim.core.tracing import print_elapsed_time from activitysim.core.config import handle_standard_args from activitysim.core.config import setting from activitysim.core import pipeline import extensions handle_standard_args() # comment out the line below to default base seed to 0 random seed # so that run results are reproducible # pipeline.set_rn_generator_base_seed(seed=None) tracing.config_logger() t0 = print_elapsed_time() MODELS = setting('models') # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print "resume_after", resume_after pipeline.run(models=MODELS, resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline()
def compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, skims, trace_label): """ Compute logsums for the tour alt_tdds, which will differ based on their different start, stop times of day, which translate to different odt_skim out_period and in_periods. In mtctm1, tdds are hourly, but there are only 5 skim time periods, so some of the tdd_alts will be the same, once converted to skim time periods. With 5 skim time periods there are 15 unique out-out period pairs but 190 tdd alternatives. For efficiency, rather compute a lot of redundant logsums, we compute logsums for the unique (out-period, in-period) pairs and then join them back to the alt_tdds. """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') network_los = inject.get_injectable('network_los') # - in_period and out_period assert 'out_period' not in alt_tdd assert 'in_period' not in alt_tdd alt_tdd['out_period'] = network_los.skim_time_period_label( alt_tdd['start']) alt_tdd['in_period'] = network_los.skim_time_period_label(alt_tdd['end']) alt_tdd['duration'] = alt_tdd['end'] - alt_tdd['start'] if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS: # compute logsums for all the tour alt_tdds (inefficient) logsums = _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label) return logsums index_name = alt_tdd.index.name deduped_alt_tdds, redupe_columns = dedupe_alt_tdd(alt_tdd, tour_purpose, trace_label) logger.info( f"{trace_label} compute_logsums " f"deduped_alt_tdds reduced number of rows by " f"{round(100 * (len(alt_tdd) - len(deduped_alt_tdds)) / len(alt_tdd), 2)}% " f"from {len(alt_tdd)} to {len(deduped_alt_tdds)} compared to USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS" ) t0 = tracing.print_elapsed_time() # - compute logsums for the alt_tdd_periods deduped_alt_tdds['logsums'] = \ _compute_logsums(deduped_alt_tdds, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label) # tracing.log_runtime(model_name=trace_label, start_time=t0) # redupe - join the alt_tdd_period logsums to alt_tdd to get logsums for alt_tdd logsums = pd.merge(alt_tdd.reset_index(), deduped_alt_tdds.reset_index(), on=[index_name] + redupe_columns, how='left').set_index(index_name).logsums # this is really expensive TRACE = False if TRACE: trace_logsums_df = logsums.to_frame('representative_logsum') trace_logsums_df['brute_force_logsum'] = \ _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label) tracing.trace_df(trace_logsums_df, label=tracing.extend_trace_label( trace_label, 'representative_logsums'), slicer='NONE', transpose=False) return logsums
def choose_trip_destination( primary_purpose, trips, alternatives, tours_merged, model_settings, size_term_matrix, skims, chunk_size, trace_hh_id, trace_label): logger.info("choose_trip_destination %s with %d trips", trace_label, trips.shape[0]) t0 = print_elapsed_time() # - trip_destination_sample destination_sample = trip_destination_sample( primary_purpose=primary_purpose, trips=trips, alternatives=alternatives, model_settings=model_settings, size_term_matrix=size_term_matrix, skims=skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) dropped_trips = ~trips.index.isin(destination_sample.index.unique()) if dropped_trips.any(): logger.warning("%s trip_destination_sample %s trips " "without viable destination alternatives" % (trace_label, dropped_trips.sum())) trips = trips[~dropped_trips] t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0) if trips.empty: return pd.Series(index=trips.index) # - compute logsums compute_logsums( primary_purpose=primary_purpose, trips=trips, destination_sample=destination_sample, tours_merged=tours_merged, model_settings=model_settings, skims=skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0) # - trip_destination_simulate destinations = trip_destination_simulate( primary_purpose=primary_purpose, trips=trips, destination_sample=destination_sample, model_settings=model_settings, size_term_matrix=size_term_matrix, skims=skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) dropped_trips = ~trips.index.isin(destinations.index) if dropped_trips.any(): logger.warning("%s trip_destination_simulate %s trips " "without viable destination alternatives" % (trace_label, dropped_trips.sum())) t0 = print_elapsed_time("%s.trip_destination_simulate" % trace_label, t0) return destinations
def choose_trip_destination(primary_purpose, trips, alternatives, tours_merged, model_settings, want_logsums, want_sample_table, size_term_matrix, skim_hotel, estimator, chunk_size, trace_hh_id, trace_label): logger.info("choose_trip_destination %s with %d trips", trace_label, trips.shape[0]) t0 = print_elapsed_time() # - trip_destination_sample destination_sample = trip_destination_sample( primary_purpose=primary_purpose, trips=trips, alternatives=alternatives, model_settings=model_settings, size_term_matrix=size_term_matrix, skim_hotel=skim_hotel, estimator=estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) dropped_trips = ~trips.index.isin(destination_sample.index.unique()) if dropped_trips.any(): logger.warning("%s trip_destination_sample %s trips " "without viable destination alternatives" % (trace_label, dropped_trips.sum())) trips = trips[~dropped_trips] t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0) if trips.empty: return pd.Series(index=trips.index).to_frame('choice'), None # - compute logsums destination_sample = compute_logsums(primary_purpose=primary_purpose, trips=trips, destination_sample=destination_sample, tours_merged=tours_merged, model_settings=model_settings, skim_hotel=skim_hotel, chunk_size=chunk_size, trace_label=trace_label) t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0) # - trip_destination_simulate destinations = trip_destination_simulate( primary_purpose=primary_purpose, trips=trips, destination_sample=destination_sample, model_settings=model_settings, want_logsums=want_logsums, size_term_matrix=size_term_matrix, skim_hotel=skim_hotel, estimator=estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) dropped_trips = ~trips.index.isin(destinations.index) if dropped_trips.any(): logger.warning("%s trip_destination_simulate %s trips " "without viable destination alternatives" % (trace_label, dropped_trips.sum())) if want_sample_table: # FIXME - sample_table destination_sample.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) else: destination_sample = None t0 = print_elapsed_time("%s.trip_destination_simulate" % trace_label, t0) return destinations, destination_sample