def test_simul_integerizer(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable("configs_dir", configs_dir) # data_dir = os.path.join(os.path.dirname(__file__), 'data') # inject.add_injectable("data_dir", data_dir) # # output_dir = os.path.join(os.path.dirname(__file__), 'output') # inject.add_injectable("output_dir", output_dir) integer_weights_df = do_simul_integerizing( trace_label="label", incidence_df=incidence_df, sub_weights=sub_zone_weights, sub_controls_df=sub_controls_df, control_spec=control_spec, total_hh_control_col='num_hh', sub_geography='TRACT', sub_control_zones=sub_control_zones) assert (integer_weights_df.integer_weight.values == [ 0, 14, 10, 49, 1, 1, 0, 0, 0, 0, 46, 29 ]).all() print("\ntest_simul_integerizer integer_weights_df\n", integer_weights_df)
def test_misc(): inject.clear_cache() with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("configs_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("data_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("output_dir") assert "directory does not exist" in str(excinfo.value) configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc') inject.add_injectable("configs_dir", configs_dir) settings = inject.get_injectable("settings") assert isinstance(settings, dict) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) # default values if not specified in settings assert inject.get_injectable("chunk_size") == 0
def add_canonical_dirs(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir)
def register_households(df, trace_hh_id): """ Register with orca households for tracing Parameters ---------- df: pandas.DataFrame traced dataframe trace_hh_id: int household id we are tracing Returns ------- Nothing """ logger.info("tracing household id %s in %s households" % (trace_hh_id, len(df.index))) if trace_hh_id not in df.index: logger.warn("trace_hh_id %s not in dataframe" % trace_hh_id) # inject persons_index name of person dataframe index if df.index.name is None: df.index.names = ['household_id'] logger.warn("households table index had no name. renamed index '%s'" % df.index.name) inject.add_injectable("hh_index_name", df.index.name) logger.debug("register_households injected hh_index_name '%s'" % df.index.name)
def test_misc(): inject.clear_cache() with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("configs_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("data_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: inject.get_injectable("output_dir") assert "directory does not exist" in str(excinfo.value) configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc') inject.add_injectable("configs_dir", configs_dir) settings = inject.get_injectable("settings") assert isinstance(settings, dict) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) # default values if not specified in settings assert inject.get_injectable("chunk_size") == 0
def test_create_input_store(seed_households, data_dir): settings_yaml = """ create_input_store: True input_table_list: - tablename: households h5_tablename: seed_households filename: households.csv index_col: household_id rename_columns: HHID: household_id """ settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader) inject.add_injectable('settings', settings) hh_file = os.path.join(data_dir, 'households.csv') seed_households.to_csv(hh_file, index=False) assert os.path.isfile(hh_file) df = input.read_input_table('households') assert df.index.name == 'household_id' output_store = os.path.join(inject.get_injectable('output_dir'), 'input_data.h5') assert os.path.exists(output_store) store_df = pd.read_hdf(output_store, 'seed_households') assert store_df.equals(seed_households)
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into the pipeline """ logger.info("preload_injectables") inject.add_step('track_skim_usage', track_skim_usage) inject.add_step('write_data_dictionary', write_data_dictionary) inject.add_step('write_tables', write_tables) table_list = config.setting('input_table_list') # default ActivitySim table names and indices if table_list is None: logger.warn("No 'input_table_list' found in settings. This will be a " "required setting in upcoming versions of ActivitySim.") new_settings = inject.get_injectable('settings') new_settings['input_table_list'] = DEFAULT_TABLE_LIST inject.add_injectable('settings', new_settings) t0 = tracing.print_elapsed_time() # FIXME - still want to do this? # if inject.get_injectable('skim_dict', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) # # if inject.get_injectable('skim_stack', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True) return True
def test_1_week_time_window(): settings = { 'skim_time_periods': { 'time_window': 10080, # One Week 'period_minutes': 1440, # One Day 'periods': [0, 1, 2, 3, 4, 5, 6, 7], 'labels': [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday' ] } } inject.add_injectable("settings", settings) assert expressions.skim_time_period_label(1) == 'Sunday' assert expressions.skim_time_period_label(2) == 'Monday' assert expressions.skim_time_period_label(3) == 'Tuesday' assert expressions.skim_time_period_label(4) == 'Wednesday' assert expressions.skim_time_period_label(5) == 'Thursday' assert expressions.skim_time_period_label(6) == 'Friday' assert expressions.skim_time_period_label(7) == 'Saturday' weekly_series = expressions.skim_time_period_label( pd.Series([1, 2, 3, 4, 5, 6, 7])) pd.testing.assert_series_equal( weekly_series, pd.Series([ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday' ]))
def initialize_traceable_tables(): traceable_table_ids = inject.get_injectable('traceable_table_ids', {}) if len(traceable_table_ids) > 0: logger.debug( f"initialize_traceable_tables resetting table_ids for {list(traceable_table_ids.keys())}" ) inject.add_injectable('traceable_table_ids', {})
def inject_settings(**kwargs): settings = config.read_settings_file('settings.yaml', mandatory=True) for k in kwargs: settings[k] = kwargs[k] inject.add_injectable("settings", settings) return settings
def test_integerizer(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable("configs_dir", configs_dir) # data_dir = os.path.join(os.path.dirname(__file__), 'data') # inject.add_injectable("data_dir", data_dir) # # output_dir = os.path.join(os.path.dirname(__file__), 'output') # inject.add_injectable("output_dir", output_dir) # rows are elements for which factors are calculated, columns are constraints to be satisfied incidence_table = pd.DataFrame({ 'num_hh': [1, 1, 1, 1, 1, 1, 1, 1], 'hh_1': [1, 1, 1, 0, 0, 0, 0, 0], 'hh_2': [0, 0, 0, 1, 1, 1, 1, 1], 'p1': [1, 1, 2, 1, 0, 1, 2, 1], 'p2': [1, 0, 1, 0, 2, 1, 1, 1], 'p3': [1, 1, 0, 2, 1, 0, 2, 0], 'float_weights': [ 1.362893, 25.658290, 7.978812, 27.789651, 18.451021, 8.641589, 1.476104, 8.641589 ] }) control_cols = ['num_hh', 'hh_1', 'hh_2', 'p1', 'p2', 'p3'] control_spec = pd.DataFrame({ 'seed_table': [ 'households', 'households', 'households', 'persons', 'persons', 'persons' ], 'target': control_cols, 'importance': [10000000, 1000, 1000, 1000, 1000, 1000] }) # column totals which the final weighted incidence table sums must satisfy control_totals = pd.Series([100, 35, 65, 91, 65, 104], index=control_spec.target.values) integerized_weights, status = integerizer.do_integerizing( trace_label='label', control_spec=control_spec, control_totals=control_totals, incidence_table=incidence_table[control_cols], float_weights=incidence_table['float_weights'], total_hh_control_col='num_hh') print("do_integerizing status", status) print("sum", integerized_weights.sum()) print("do_integerizing integerized_weights\n", integerized_weights) assert integerized_weights.sum() == 100
def inject_settings(configs_dir, **kwargs): with open(os.path.join(configs_dir, 'settings.yaml')) as f: settings = yaml.load(f, Loader=yaml.SafeLoader) for k in kwargs: settings[k] = kwargs[k] inject.add_injectable("settings", settings) return settings
def test_future_warning(config_path): with open(os.path.join(config_path, 'settings_60_min.yaml')) as f: settings = yaml.load(f, Loader=yaml.SafeLoader) settings['skim_time_periods']['hours'] = settings['skim_time_periods'].pop( 'periods') inject.add_injectable("settings", settings) with pytest.warns(FutureWarning) as warning_test: expressions.skim_time_period_label(1)
def setup_working_dir(example_name, inherit=False): os.chdir(example_dir(example_name)) tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('log') tracing.delete_output_files('h5') if inherit: data_dir = inject.get_injectable('data_dir') example_data_dir = os.path.join(example_dir('example'), 'data') inject.add_injectable('data_dir', [data_dir, example_data_dir], cache=True)
def test_rng_access(): setup_dirs() inject.add_injectable('rng_base_seed', 0) pipeline.open_pipeline() rng = pipeline.get_rn_generator() assert isinstance(rng, random.Random) pipeline.close_pipeline() inject.clear_cache()
def test_60_minute_windows(config_path): with open(os.path.join(config_path, 'settings_60_min.yaml')) as f: settings = yaml.load(f, Loader=yaml.SafeLoader) inject.add_injectable("settings", settings) assert expressions.skim_time_period_label(1) == 'EA' assert expressions.skim_time_period_label(8) == 'AM' assert expressions.skim_time_period_label(12) == 'MD' assert expressions.skim_time_period_label(18) == 'PM' assert expressions.skim_time_period_label(23) == 'EV' pd.testing.assert_series_equal( expressions.skim_time_period_label(pd.Series([1, 8, 12, 18, 23])), pd.Series(['EA', 'AM', 'MD', 'PM', 'EV']))
def inject_settings(**kwargs): for k in kwargs: if k == "two_zone": if kwargs[k]: settings = config.read_settings_file('settings.yaml', mandatory=True) else: settings = config.read_settings_file('settings_static.yaml', mandatory=True) settings[k] = kwargs[k] inject.add_injectable("settings", settings) return settings
def preload_injectables(): """ preload bulky injectables up front - stuff that isn't inserted into the pipeline """ logger.info("preload_injectables") inject.add_step('track_skim_usage', track_skim_usage) inject.add_step('write_data_dictionary', write_data_dictionary) inject.add_step('write_tables', write_tables) table_list = config.setting('input_table_list') # default ActivitySim table names and indices if table_list is None: logger.warning( "No 'input_table_list' found in settings. This will be a " "required setting in upcoming versions of ActivitySim.") new_settings = inject.get_injectable('settings') new_settings['input_table_list'] = DEFAULT_TABLE_LIST inject.add_injectable('settings', new_settings) # FIXME undocumented feature if config.setting('write_raw_tables'): # write raw input tables as csv (before annotation) csv_dir = config.output_file_path('raw_tables') if not os.path.exists(csv_dir): os.makedirs(csv_dir) # make directory if needed table_names = [t['tablename'] for t in table_list] for t in table_names: df = inject.get_table(t).to_frame() if t == 'households': df.drop(columns='chunk_id', inplace=True) df.to_csv(os.path.join(csv_dir, '%s.csv' % t), index=True) t0 = tracing.print_elapsed_time() # FIXME - still want to do this? # if inject.get_injectable('skim_dict', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True) # # if inject.get_injectable('skim_stack', None) is not None: # t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True) return True
def handle_standard_args(parser=None): """ Adds 'standard' activitysim arguments: --config : specify path to config_dir --output : specify path to output_dir --data : specify path to data_dir Parameters ---------- parser : argparse.ArgumentParser or None to custom argument handling, pass in a parser with arguments added and handle them based on returned args. This method will hand the args it adds Returns ------- args : parser.parse_args() result """ if parser is None: parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", help="path to config dir") parser.add_argument("-o", "--output", help="path to output dir") parser.add_argument("-d", "--data", help="path to data dir") parser.add_argument("-r", "--resume", help="resume after") parser.add_argument("-m", "--models", help="models run_list_name in settings") args = parser.parse_args() if args.config: if not os.path.exists(args.config): raise IOError("Could not find configs dir '%s'." % args.config) inject.add_injectable("configs_dir", args.config) if args.output: if not os.path.exists(args.output): raise IOError("Could not find output dir '%s'." % args.config) inject.add_injectable("output_dir", args.output) if args.data: if not os.path.exists(args.data): raise IOError("Could not find data dir '%s'." % args.config) inject.add_injectable("data_dir", args.data) if args.resume: inject.add_injectable("resume_after", args.resume) if args.models: inject.add_injectable("run_list_name", args.models) return args
def handle_standard_args(args, multiprocess=True): def inject_arg(name, value, cache=False): assert name in INJECTABLES inject.add_injectable(name, value, cache=cache) if args.working_dir: # activitysim will look in the current working directory for # 'configs', 'data', and 'output' folders by default os.chdir(args.working_dir) # settings_file_name should be cached or else it gets squashed by config.py if args.settings_file: inject_arg('settings_file_name', args.settings_file, cache=True) if args.config: inject_arg('configs_dir', args.config) if args.data: inject_arg('data_dir', args.data) if args.output: inject_arg('output_dir', args.output) if multiprocess and args.multiprocess: config_paths = validate_injectable('configs_dir') if not os.path.exists('configs_mp'): logger.warning("could not find 'configs_mp'. skipping...") else: logger.info("adding 'configs_mp' to config_dir list...") config_paths.insert(0, 'configs_mp') inject_arg('configs_dir', config_paths) config.override_setting('multiprocess', True) if args.multiprocess > 0: config.override_setting('num_processes', args.multiprocess) if args.chunk_size: config.override_setting('chunk_size', int(args.chunk_size)) for injectable in ['configs_dir', 'data_dir', 'output_dir']: validate_injectable(injectable) if args.pipeline: inject.add_injectable('pipeline_file_name', args.pipeline) if args.resume: config.override_setting('resume_after', args.resume)
def test_rng_access(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') setup_dirs(configs_dir) inject.add_injectable('rng_base_seed', 0) pipeline.open_pipeline() rng = pipeline.get_rn_generator() assert isinstance(rng, random.Random) pipeline.close_pipeline() inject.clear_cache()
def test_missing_filename(seed_households, data_dir): settings_yaml = """ input_table_list: - tablename: households index_col: household_id rename_columns: HHID: household_id """ settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader) inject.add_injectable('settings', settings) with pytest.raises(AssertionError) as excinfo: input.read_input_table('households') assert 'no input file provided' in str(excinfo.value)
def run(args): """ Run bca4abm. Specify a project folder using the '--working_dir' option, or point to the config, data, and output folders directly with '--config', '--data', and '--output'. """ if args.working_dir and os.path.exists(args.working_dir): os.chdir(args.working_dir) if args.config: inject.add_injectable('configs_dir', args.config) if args.data: inject.add_injectable('data_dir', args.data) if args.output: inject.add_injectable('output_dir', args.output) for injectable in ['configs_dir', 'data_dir', 'output_dir']: try: dir_path = inject.get_injectable(injectable) except RuntimeError: sys.exit('Error: please specify either a --working_dir ' "containing 'configs', 'data', and 'output' folders " 'or all three of --config, --data, and --output') if not os.path.exists(dir_path): sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path))) if args.pipeline: inject.add_injectable('pipeline_file_name', args.pipeline) if args.resume: override_setting('resume_after', args.resume) tracing.config_logger() tracing.delete_csv_files() # only modifies output_dir warnings.simplefilter('always') logging.captureWarnings(capture=True) t0 = tracing.print_elapsed_time() # If you provide a resume_after argument to pipeline.run # the pipeline manager will attempt to load checkpointed tables from the checkpoint store # and resume pipeline processing on the next submodel step after the specified checkpoint resume_after = setting('resume_after', None) if resume_after: print('resume_after: %s' % resume_after) pipeline.run(models=setting('models'), resume_after=resume_after) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() t0 = tracing.print_elapsed_time('all models', t0)
def load_shadow_price_calculator(model_settings): """ Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace) If multiprocessing, get the shared_data buffer to coordinate global_desired_size calculation across sub-processes Parameters ---------- model_settings : dict Returns ------- spc : ShadowPriceCalculator """ num_processes = inject.get_injectable('num_processes', 1) model_selector = model_settings['MODEL_SELECTOR'] # - get shared_data from data_buffers (if multiprocessing) data_buffers = inject.get_injectable('data_buffers', None) if data_buffers is not None: logger.info('Using existing data_buffers for shadow_price') # - shadow_pricing_info shadow_pricing_info = inject.get_injectable('shadow_pricing_info', None) if shadow_pricing_info is None: shadow_pricing_info = get_shadow_pricing_info() inject.add_injectable('shadow_pricing_info', shadow_pricing_info) # - extract data buffer and reshape as numpy array data, lock = \ shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector) else: assert num_processes == 1 data = None # ShadowPriceCalculator will allocate its own data lock = None # - ShadowPriceCalculator spc = ShadowPriceCalculator( model_settings, num_processes, data, lock) return spc
def load_shadow_price_calculator(model_settings): """ Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace) If multiprocessing, get the shared_data buffer to coordinate global_desired_size calculation across sub-processes Parameters ---------- model_settings : dict Returns ------- spc : ShadowPriceCalculator """ num_processes = inject.get_injectable('num_processes', 1) model_selector = model_settings['MODEL_SELECTOR'] # - get shared_data from data_buffers (if multiprocessing) data_buffers = inject.get_injectable('data_buffers', None) if data_buffers is not None: logger.info('Using existing data_buffers for shadow_price') # - shadow_pricing_info shadow_pricing_info = inject.get_injectable('shadow_pricing_info', None) if shadow_pricing_info is None: shadow_pricing_info = get_shadow_pricing_info() inject.add_injectable('shadow_pricing_info', shadow_pricing_info) # - extract data buffer and reshape as numpy array data, lock = \ shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector) else: assert num_processes == 1 data = None # ShadowPriceCalculator will allocate its own data lock = None # - ShadowPriceCalculator spc = ShadowPriceCalculator( model_settings, num_processes, data, lock) return spc
def data_dir_from_settings(): """ legacy strategy foir specifying data_dir is with orca injectable. Calling this function provides an alternative by reading it from settings file """ # FIXME - not sure this plays well with orca # it may depend on when file with orca decorator is imported data_dir = setting('data_dir', None) if data_dir: inject.add_injectable('data_dir', data_dir) else: data_dir = inject.get_injectable('data_dir') logger.info("data_dir: %s" % data_dir) return data_dir
def register_tours(df, trace_hh_id): """ Register with inject for tracing create an injectable 'trace_tour_ids' with a list of tour_ids in household we are tracing. This allows us to slice by tour_id without requiring presence of person_id column Parameters ---------- df: pandas.DataFrame traced dataframe trace_hh_id: int household id we are tracing Returns ------- Nothing """ # get list of persons in traced household (should already have been registered) person_ids = inject.get_injectable("trace_person_ids", []) if len(person_ids) == 0: # trace_hh_id not in households table or register_persons was not not called logger.warn("no person ids registered for trace_hh_id %s" % trace_hh_id) return # but if household_id is in households, then we may have some tours traced_tours_df = slice_ids(df, person_ids, column='person_id') trace_tour_ids = traced_tours_df.index.tolist() if len(trace_tour_ids) == 0: logger.info("register_tours: no tours found for person_ids %s." % person_ids) else: logger.info("tracing tour_ids %s in %s tours" % (trace_tour_ids, len(df.index))) inject.add_injectable("trace_tour_ids", trace_tour_ids) logger.debug("register_tours injected trace_tour_ids %s" % trace_tour_ids)
def register_persons(df, trace_hh_id): """ Register with orca persons for tracing Parameters ---------- df: pandas.DataFrame traced dataframe trace_hh_id: int household id we are tracing Returns ------- Nothing """ # inject persons_index name of person dataframe index if df.index.name is None: df.index.names = ['person_id'] logger.warn("persons table index had no name. renamed index '%s'" % df.index.name) inject.add_injectable("persons_index_name", df.index.name) logger.debug("register_persons injected persons_index_name '%s'" % df.index.name) # inject list of person_ids in household we are tracing # this allows us to slice by person_id without requiring presence of household_id column traced_persons_df = df[df['household_id'] == trace_hh_id] trace_person_ids = traced_persons_df.index.tolist() if len(trace_person_ids) == 0: logger.warn("register_persons: trace_hh_id %s not found." % trace_hh_id) inject.add_injectable("trace_person_ids", trace_person_ids) logger.debug("register_persons injected trace_person_ids %s" % trace_person_ids) logger.info("tracing person_ids %s in %s persons" % (trace_person_ids, len(df.index)))
def test_mp_run(): configs_dir = [example_path('configs_3_zone'), example_path('configs')] data_dir = example_path('data_3') setup_dirs(configs_dir, data_dir) inject.add_injectable('settings_file_name', 'settings_mp.yaml') run_list = mp_tasks.get_run_list() mp_tasks.print_run_list(run_list) # do this after config.handle_standard_args, as command line args may override injectables injectables = [ 'data_dir', 'configs_dir', 'output_dir', 'settings_file_name' ] injectables = {k: inject.get_injectable(k) for k in injectables} mp_tasks.run_multiprocess(run_list, injectables) pipeline.open_pipeline('_') regress_3_zone() pipeline.close_pipeline()
def test_mp_run(): mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp') configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir]) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) tracing.config_logger() run_list = mp_tasks.get_run_list() mp_tasks.print_run_list(run_list) # do this after config.handle_standard_args, as command line args may override injectables injectables = ['data_dir', 'configs_dir', 'output_dir'] injectables = {k: inject.get_injectable(k) for k in injectables} # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after']) mp_tasks.run_multiprocess(run_list, injectables) pipeline.open_pipeline('_') regress_mini_auto() pipeline.close_pipeline()
def test_vts(): inject.add_injectable("settings", {}) # note: need 0 duration tour on one end of day to guarantee at least one available tour alts = pd.DataFrame({ "start": [1, 1, 2, 3], "end": [1, 4, 5, 6] }) alts['duration'] = alts.end - alts.start inject.add_injectable("tdd_alts", alts) current_tour_person_ids = pd.Series(['b', 'c'], index=['d', 'e']) previous_tour_by_personid = pd.Series([2, 2, 1], index=['a', 'b', 'c']) prev_tour_attrs = get_previous_tour_by_tourid(current_tour_person_ids, previous_tour_by_personid, alts) pdt.assert_series_equal( prev_tour_attrs.start_previous, pd.Series([2, 1], index=['d', 'e'], name='start_previous')) pdt.assert_series_equal( prev_tour_attrs.end_previous, pd.Series([5, 4], index=['d', 'e'], name='end_previous')) tours = pd.DataFrame({ "person_id": [1, 1, 2, 3, 3], "tour_num": [1, 2, 1, 1, 2], "tour_type": ['x', 'x', 'x', 'x', 'x'] }) persons = pd.DataFrame({ "income": [20, 30, 25] }, index=[1, 2, 3]) inject.add_table('persons', persons) spec = pd.DataFrame({"Coefficient": [1.2]}, index=["income"]) spec.index.name = "Expression" segment_col = None # no segmentation of model_spec inject.add_injectable("check_for_variability", True) tdd_choices, timetable = vectorize_tour_scheduling( tours, persons, alts, spec, segment_col, model_settings={}, chunk_size=0, trace_label='test_vts') # FIXME - dead reckoning regression # there's no real logic here - this is just what came out of the monte carlo # note that the result comes out ordered by the nth trips and not ordered # by the trip index. shrug? expected = [2, 2, 2, 0, 0] assert (tdd_choices.tdd.values == expected).all()
def test_mp_run(): mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp') configs_dir = os.path.join(os.path.dirname(__file__), 'configs') inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir]) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) tracing.config_logger() run_list = mp_tasks.get_run_list() mp_tasks.print_run_list(run_list) # do this after config.handle_standard_args, as command line args may override injectables injectables = ['data_dir', 'configs_dir', 'output_dir'] injectables = {k: inject.get_injectable(k) for k in injectables} # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after']) mp_tasks.run_multiprocess(run_list, injectables) pipeline.open_pipeline('_') regress_mini_auto() pipeline.close_pipeline()
def setup_dirs(ancillary_configs_dir=None, data_dir=None): # ancillary_configs_dir is used by run_mp to test multiprocess test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__), 'configs') example_configs_dir = example_path('configs') configs_dir = [test_pipeline_configs_dir, example_configs_dir] if ancillary_configs_dir is not None: configs_dir = [ancillary_configs_dir] + configs_dir inject.add_injectable('configs_dir', configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable('output_dir', output_dir) if not data_dir: data_dir = example_path('data') inject.add_injectable('data_dir', data_dir) inject.clear_cache() tracing.config_logger() tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('yaml') tracing.delete_output_files('omx')
def test_hdf_reader1(seed_households, data_dir): settings_yaml = """ input_table_list: - tablename: households filename: households.h5 index_col: household_id rename_columns: HHID: household_id """ settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader) inject.add_injectable('settings', settings) hh_file = os.path.join(data_dir, 'households.h5') seed_households.to_hdf(hh_file, key='households', mode='w') assert os.path.isfile(hh_file) df = input.read_input_table('households') assert df.index.name == 'household_id'
def setup_dirs(configs_dir): inject.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') inject.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') inject.add_injectable("data_dir", data_dir) inject.clear_cache() tracing.config_logger() tracing.delete_output_files('csv') tracing.delete_output_files('txt') tracing.delete_output_files('yaml')
'chunk_size', 'multiprocess', 'num_processes', 'resume_after', ] for k in settings: logger.info("setting %s: %s" % (k, config.setting(k))) for k in injectables: logger.info("injectable %s: %s" % (k, inject.get_injectable(k))) if __name__ == '__main__': inject.add_injectable('data_dir', '../example/data') inject.add_injectable('configs_dir', ['configs', '../example/configs']) injectables = config.handle_standard_args() config.filter_warnings() tracing.config_logger() log_settings(injectables) t0 = tracing.print_elapsed_time() # cleanup if not resuming if not config.setting('resume_after', False): cleanup_output_files()
for k in injectables: logger.info("injectable %s: %s" % (k, inject.get_injectable(k))) if __name__ == '__main__': # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html # pd.set_option('display.max_columns', 50) data_dir = "E:/projects/clients/ASIM/data/mtc_tm1" data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data' data_dir = '../example/data' # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data') inject.add_injectable('data_dir', ['ancillary_data', data_dir]) # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data']) inject.add_injectable('configs_dir', ['configs', '../example/configs']) injectables = config.handle_standard_args() tracing.config_logger() config.filter_warnings() log_settings(injectables) t0 = tracing.print_elapsed_time() # cleanup if not resuming if not config.setting('resume_after', False): cleanup_output_files()
random_omaz = np.random.choice(network_los.maz_df.index.values, size=VECTOR_TEST_SIZE, replace=True) taps_mazs = network_los.get_taps_mazs(random_omaz, attribute=attribute) return len(taps_mazs.index) def set_random_seed(): np.random.seed(0) # uncomment the line below to set random seed so that run results are reproducible set_random_seed() inject.add_injectable("set_random_seed", set_random_seed) tracing.config_logger() t0 = print_elapsed_time() taz_skim_stack = inject.get_injectable('taz_skim_dict') t0 = print_elapsed_time("load taz_skim_dict", t0) tap_skim_stack = inject.get_injectable('tap_skim_dict') t0 = print_elapsed_time("load tap_skim_dict", t0) network_los = inject.get_injectable('network_los') t0 = print_elapsed_time("load network_los", t0) # test sizes for all implemented methods
def register_traceable_table(table_name, df): """ Register traceable table Parameters ---------- df: pandas.DataFrame traced dataframe Returns ------- Nothing """ trace_hh_id = inject.get_injectable("trace_hh_id", None) new_traced_ids = [] if trace_hh_id is None: return traceable_tables = inject.get_injectable('traceable_tables', []) if table_name not in traceable_tables: logger.error("table '%s' not in traceable_tables" % table_name) return idx_name = df.index.name if idx_name is None: logger.error("Can't register table '%s' without index name" % table_name) return traceable_table_ids = inject.get_injectable('traceable_table_ids') traceable_table_indexes = inject.get_injectable('traceable_table_indexes') if idx_name in traceable_table_indexes and traceable_table_indexes[idx_name] != table_name: logger.error("table '%s' index name '%s' already registered for table '%s'" % (table_name, idx_name, traceable_table_indexes[idx_name])) return if table_name == 'households': if trace_hh_id not in df.index: logger.warning("trace_hh_id %s not in dataframe" % trace_hh_id) new_traced_ids = [] else: logger.info("tracing household id %s in %s households" % (trace_hh_id, len(df.index))) new_traced_ids = [trace_hh_id] else: # find first already registered ref_col we can use to slice this table ref_col = next((c for c in traceable_table_indexes if c in df.columns), None) if ref_col is None: logger.error("can't find a registered table to slice table '%s' index name '%s'" " in traceable_table_indexes: %s" % (table_name, idx_name, traceable_table_indexes)) return # get traceable_ids for ref_col table ref_col_table_name = traceable_table_indexes[ref_col] ref_col_traced_ids = traceable_table_ids.get(ref_col_table_name, []) # inject list of ids in table we are tracing # this allows us to slice by id without requiring presence of a household id column traced_df = df[df[ref_col].isin(ref_col_traced_ids)] new_traced_ids = traced_df.index.tolist() if len(new_traced_ids) == 0: logger.warning("register %s: no rows with %s in %s." % (table_name, ref_col, ref_col_traced_ids)) # update traceable_table_indexes with this traceable_table's idx_name if idx_name not in traceable_table_indexes: traceable_table_indexes[idx_name] = table_name print("adding table %s.%s to traceable_table_indexes" % (table_name, idx_name)) inject.add_injectable('traceable_table_indexes', traceable_table_indexes) # update the list of trace_ids for this table prior_traced_ids = traceable_table_ids.get(table_name, []) if new_traced_ids: assert not set(prior_traced_ids) & set(new_traced_ids) traceable_table_ids[table_name] = prior_traced_ids + new_traced_ids inject.add_injectable('traceable_table_ids', traceable_table_ids) logger.info("register %s: added %s new ids to %s existing trace ids" % (table_name, len(new_traced_ids), len(prior_traced_ids))) logger.info("register %s: tracing new ids %s in %s" % (table_name, new_traced_ids, table_name))
def override_injectable(name, value): inject.add_injectable(name, value) injectables.append(name)
def cache_spec(hhsize, spec): spec_name = cached_spec_name(hhsize) # cache as injectable inject.add_injectable(spec_name, spec)
def override_setting(key, value): new_settings = inject.get_injectable('settings') new_settings[key] = value inject.add_injectable('settings', new_settings)
def households(households_sample_size, override_hh_ids, trace_hh_id): df_full = read_input_table("households") households_sliced = False logger.info("full household list contains %s households" % df_full.shape[0]) # only using households listed in override_hh_ids if override_hh_ids is not None: # trace_hh_id will not used if it is not in list of override_hh_ids logger.info("override household list containing %s households" % len(override_hh_ids)) df = df_full[df_full.index.isin(override_hh_ids)] households_sliced = True if df.shape[0] < len(override_hh_ids): logger.info("found %s of %s households in override household list" % (df.shape[0], len(override_hh_ids))) if df.shape[0] == 0: raise RuntimeError('No override households found in store') # if we are tracing hh exclusively elif trace_hh_id and households_sample_size == 1: # df contains only trace_hh (or empty if not in full store) df = tracing.slice_ids(df_full, trace_hh_id) households_sliced = True # if we need a subset of full store elif households_sample_size > 0 and df_full.shape[0] > households_sample_size: logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0])) """ Because random seed is set differently for each step, sampling of households using Random.global_rng would sample differently depending upon which step it was called from. We use a one-off rng seeded with the pseudo step name 'sample_households' to provide repeatable sampling no matter when the table is loaded. Note that the external_rng is also seeded with base_seed so the sample will (rightly) change if the pipeline rng's base_seed is changed """ prng = pipeline.get_rn_generator().get_external_rng('sample_households') df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False)) households_sliced = True # if tracing and we missed trace_hh in sample, but it is in full store if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index: # replace first hh in sample with trace_hh logger.debug("replacing household %s with %s in household sample" % (df.index[0], trace_hh_id)) df_hh = df_full.loc[[trace_hh_id]] df = pd.concat([df_hh, df[1:]]) else: df = df_full # persons table inject.add_injectable('households_sliced', households_sliced) logger.info("loaded households %s" % (df.shape,)) df.index.name = 'household_id' # FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id assert 'chunk_id' not in df.columns df['chunk_id'] = pd.Series(list(range(len(df))), df.index) # replace table function with dataframe inject.add_table('households', df) pipeline.get_rn_generator().add_channel('households', df) if trace_hh_id: tracing.register_traceable_table('households', df) tracing.trace_df(df, "raw.households", warn_if_empty=True) return df