Пример #1
0
def test_simul_integerizer():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    # data_dir = os.path.join(os.path.dirname(__file__), 'data')
    # inject.add_injectable("data_dir", data_dir)
    #
    # output_dir = os.path.join(os.path.dirname(__file__), 'output')
    # inject.add_injectable("output_dir", output_dir)

    integer_weights_df = do_simul_integerizing(
        trace_label="label",
        incidence_df=incidence_df,
        sub_weights=sub_zone_weights,
        sub_controls_df=sub_controls_df,
        control_spec=control_spec,
        total_hh_control_col='num_hh',
        sub_geography='TRACT',
        sub_control_zones=sub_control_zones)

    assert (integer_weights_df.integer_weight.values == [
        0, 14, 10, 49, 1, 1, 0, 0, 0, 0, 46, 29
    ]).all()

    print("\ntest_simul_integerizer integer_weights_df\n", integer_weights_df)
Пример #2
0
def test_misc():

    inject.clear_cache()

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("configs_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("data_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("output_dir")
    assert "directory does not exist" in str(excinfo.value)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc')
    inject.add_injectable("configs_dir", configs_dir)

    settings = inject.get_injectable("settings")
    assert isinstance(settings, dict)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    # default values if not specified in settings
    assert inject.get_injectable("chunk_size") == 0
Пример #3
0
def add_canonical_dirs():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)
Пример #4
0
def register_households(df, trace_hh_id):
    """
    Register with orca households for tracing

    Parameters
    ----------
    df: pandas.DataFrame
        traced dataframe

    trace_hh_id: int
        household id we are tracing

    Returns
    -------
    Nothing
    """

    logger.info("tracing household id %s in %s households" %
                (trace_hh_id, len(df.index)))

    if trace_hh_id not in df.index:
        logger.warn("trace_hh_id %s not in dataframe" % trace_hh_id)

    # inject persons_index name of person dataframe index
    if df.index.name is None:
        df.index.names = ['household_id']
        logger.warn("households table index had no name. renamed index '%s'" %
                    df.index.name)
    inject.add_injectable("hh_index_name", df.index.name)

    logger.debug("register_households injected hh_index_name '%s'" %
                 df.index.name)
Пример #5
0
def test_misc():

    inject.clear_cache()

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("configs_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("data_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("output_dir")
    assert "directory does not exist" in str(excinfo.value)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc')
    inject.add_injectable("configs_dir", configs_dir)

    settings = inject.get_injectable("settings")
    assert isinstance(settings, dict)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    # default values if not specified in settings
    assert inject.get_injectable("chunk_size") == 0
Пример #6
0
def test_create_input_store(seed_households, data_dir):

    settings_yaml = """
        create_input_store: True
        input_table_list:
          - tablename: households
            h5_tablename: seed_households
            filename: households.csv
            index_col: household_id
            rename_columns:
              HHID: household_id
    """

    settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
    inject.add_injectable('settings', settings)

    hh_file = os.path.join(data_dir, 'households.csv')
    seed_households.to_csv(hh_file, index=False)

    assert os.path.isfile(hh_file)

    df = input.read_input_table('households')

    assert df.index.name == 'household_id'

    output_store = os.path.join(inject.get_injectable('output_dir'),
                                'input_data.h5')
    assert os.path.exists(output_store)

    store_df = pd.read_hdf(output_store, 'seed_households')
    assert store_df.equals(seed_households)
Пример #7
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warn("No 'input_table_list' found in settings. This will be a "
                    "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Пример #8
0
def test_1_week_time_window():
    settings = {
        'skim_time_periods': {
            'time_window':
            10080,  # One Week
            'period_minutes':
            1440,  # One Day
            'periods': [0, 1, 2, 3, 4, 5, 6, 7],
            'labels': [
                'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
                'Friday', 'Saturday'
            ]
        }
    }

    inject.add_injectable("settings", settings)

    assert expressions.skim_time_period_label(1) == 'Sunday'
    assert expressions.skim_time_period_label(2) == 'Monday'
    assert expressions.skim_time_period_label(3) == 'Tuesday'
    assert expressions.skim_time_period_label(4) == 'Wednesday'
    assert expressions.skim_time_period_label(5) == 'Thursday'
    assert expressions.skim_time_period_label(6) == 'Friday'
    assert expressions.skim_time_period_label(7) == 'Saturday'

    weekly_series = expressions.skim_time_period_label(
        pd.Series([1, 2, 3, 4, 5, 6, 7]))

    pd.testing.assert_series_equal(
        weekly_series,
        pd.Series([
            'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
            'Saturday'
        ]))
Пример #9
0
def initialize_traceable_tables():

    traceable_table_ids = inject.get_injectable('traceable_table_ids', {})
    if len(traceable_table_ids) > 0:
        logger.debug(
            f"initialize_traceable_tables resetting table_ids for {list(traceable_table_ids.keys())}"
        )
    inject.add_injectable('traceable_table_ids', {})
Пример #10
0
def inject_settings(**kwargs):

    settings = config.read_settings_file('settings.yaml', mandatory=True)

    for k in kwargs:
        settings[k] = kwargs[k]

    inject.add_injectable("settings", settings)

    return settings
Пример #11
0
def test_integerizer():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    # data_dir = os.path.join(os.path.dirname(__file__), 'data')
    # inject.add_injectable("data_dir", data_dir)
    #
    # output_dir = os.path.join(os.path.dirname(__file__), 'output')
    # inject.add_injectable("output_dir", output_dir)

    # rows are elements for which factors are calculated, columns are constraints to be satisfied
    incidence_table = pd.DataFrame({
        'num_hh': [1, 1, 1, 1, 1, 1, 1, 1],
        'hh_1': [1, 1, 1, 0, 0, 0, 0, 0],
        'hh_2': [0, 0, 0, 1, 1, 1, 1, 1],
        'p1': [1, 1, 2, 1, 0, 1, 2, 1],
        'p2': [1, 0, 1, 0, 2, 1, 1, 1],
        'p3': [1, 1, 0, 2, 1, 0, 2, 0],
        'float_weights': [
            1.362893, 25.658290, 7.978812, 27.789651, 18.451021, 8.641589,
            1.476104, 8.641589
        ]
    })

    control_cols = ['num_hh', 'hh_1', 'hh_2', 'p1', 'p2', 'p3']

    control_spec = pd.DataFrame({
        'seed_table': [
            'households', 'households', 'households', 'persons', 'persons',
            'persons'
        ],
        'target':
        control_cols,
        'importance': [10000000, 1000, 1000, 1000, 1000, 1000]
    })

    # column totals which the final weighted incidence table sums must satisfy
    control_totals = pd.Series([100, 35, 65, 91, 65, 104],
                               index=control_spec.target.values)

    integerized_weights, status = integerizer.do_integerizing(
        trace_label='label',
        control_spec=control_spec,
        control_totals=control_totals,
        incidence_table=incidence_table[control_cols],
        float_weights=incidence_table['float_weights'],
        total_hh_control_col='num_hh')

    print("do_integerizing status", status)
    print("sum", integerized_weights.sum())
    print("do_integerizing integerized_weights\n", integerized_weights)

    assert integerized_weights.sum() == 100
Пример #12
0
def inject_settings(configs_dir, **kwargs):

    with open(os.path.join(configs_dir, 'settings.yaml')) as f:
        settings = yaml.load(f, Loader=yaml.SafeLoader)

        for k in kwargs:
            settings[k] = kwargs[k]

        inject.add_injectable("settings", settings)

    return settings
Пример #13
0
def test_future_warning(config_path):
    with open(os.path.join(config_path, 'settings_60_min.yaml')) as f:
        settings = yaml.load(f, Loader=yaml.SafeLoader)

    settings['skim_time_periods']['hours'] = settings['skim_time_periods'].pop(
        'periods')

    inject.add_injectable("settings", settings)

    with pytest.warns(FutureWarning) as warning_test:
        expressions.skim_time_period_label(1)
Пример #14
0
def setup_working_dir(example_name, inherit=False):

    os.chdir(example_dir(example_name))

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('log')
    tracing.delete_output_files('h5')

    if inherit:
        data_dir = inject.get_injectable('data_dir')
        example_data_dir = os.path.join(example_dir('example'), 'data')
        inject.add_injectable('data_dir', [data_dir, example_data_dir],
                              cache=True)
Пример #15
0
def test_rng_access():

    setup_dirs()

    inject.add_injectable('rng_base_seed', 0)

    pipeline.open_pipeline()

    rng = pipeline.get_rn_generator()

    assert isinstance(rng, random.Random)

    pipeline.close_pipeline()
    inject.clear_cache()
Пример #16
0
def test_60_minute_windows(config_path):
    with open(os.path.join(config_path, 'settings_60_min.yaml')) as f:
        settings = yaml.load(f, Loader=yaml.SafeLoader)

    inject.add_injectable("settings", settings)

    assert expressions.skim_time_period_label(1) == 'EA'
    assert expressions.skim_time_period_label(8) == 'AM'
    assert expressions.skim_time_period_label(12) == 'MD'
    assert expressions.skim_time_period_label(18) == 'PM'
    assert expressions.skim_time_period_label(23) == 'EV'

    pd.testing.assert_series_equal(
        expressions.skim_time_period_label(pd.Series([1, 8, 12, 18, 23])),
        pd.Series(['EA', 'AM', 'MD', 'PM', 'EV']))
Пример #17
0
def inject_settings(**kwargs):

    for k in kwargs:
        if k == "two_zone":
            if kwargs[k]:
                settings = config.read_settings_file('settings.yaml',
                                                     mandatory=True)
            else:
                settings = config.read_settings_file('settings_static.yaml',
                                                     mandatory=True)
        settings[k] = kwargs[k]

    inject.add_injectable("settings", settings)

    return settings
Пример #18
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warning(
            "No 'input_table_list' found in settings. This will be a "
            "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    # FIXME undocumented feature
    if config.setting('write_raw_tables'):

        # write raw input tables as csv (before annotation)
        csv_dir = config.output_file_path('raw_tables')
        if not os.path.exists(csv_dir):
            os.makedirs(csv_dir)  # make directory if needed

        table_names = [t['tablename'] for t in table_list]
        for t in table_names:
            df = inject.get_table(t).to_frame()
            if t == 'households':
                df.drop(columns='chunk_id', inplace=True)
            df.to_csv(os.path.join(csv_dir, '%s.csv' % t), index=True)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Пример #19
0
def handle_standard_args(parser=None):
    """
    Adds 'standard' activitysim arguments:
        --config : specify path to config_dir
        --output : specify path to output_dir
        --data   : specify path to data_dir

    Parameters
    ----------
    parser : argparse.ArgumentParser or None
        to  custom argument handling, pass in a parser with arguments added
        and handle them based on returned args. This method will hand the args it adds
    Returns
    -------

    args : parser.parse_args() result
    """

    if parser is None:
        parser = argparse.ArgumentParser()

    parser.add_argument("-c", "--config", help="path to config dir")
    parser.add_argument("-o", "--output", help="path to output dir")
    parser.add_argument("-d", "--data", help="path to data dir")
    parser.add_argument("-r", "--resume", help="resume after")
    parser.add_argument("-m",
                        "--models",
                        help="models run_list_name in settings")
    args = parser.parse_args()

    if args.config:
        if not os.path.exists(args.config):
            raise IOError("Could not find configs dir '%s'." % args.config)
        inject.add_injectable("configs_dir", args.config)
    if args.output:
        if not os.path.exists(args.output):
            raise IOError("Could not find output dir '%s'." % args.config)
        inject.add_injectable("output_dir", args.output)
    if args.data:
        if not os.path.exists(args.data):
            raise IOError("Could not find data dir '%s'." % args.config)
        inject.add_injectable("data_dir", args.data)
    if args.resume:
        inject.add_injectable("resume_after", args.resume)
    if args.models:
        inject.add_injectable("run_list_name", args.models)

    return args
Пример #20
0
def handle_standard_args(args, multiprocess=True):
    def inject_arg(name, value, cache=False):
        assert name in INJECTABLES
        inject.add_injectable(name, value, cache=cache)

    if args.working_dir:
        # activitysim will look in the current working directory for
        # 'configs', 'data', and 'output' folders by default
        os.chdir(args.working_dir)

    # settings_file_name should be cached or else it gets squashed by config.py
    if args.settings_file:
        inject_arg('settings_file_name', args.settings_file, cache=True)

    if args.config:
        inject_arg('configs_dir', args.config)

    if args.data:
        inject_arg('data_dir', args.data)

    if args.output:
        inject_arg('output_dir', args.output)

    if multiprocess and args.multiprocess:
        config_paths = validate_injectable('configs_dir')

        if not os.path.exists('configs_mp'):
            logger.warning("could not find 'configs_mp'. skipping...")
        else:
            logger.info("adding 'configs_mp' to config_dir list...")
            config_paths.insert(0, 'configs_mp')
            inject_arg('configs_dir', config_paths)

        config.override_setting('multiprocess', True)
        if args.multiprocess > 0:
            config.override_setting('num_processes', args.multiprocess)

    if args.chunk_size:
        config.override_setting('chunk_size', int(args.chunk_size))

    for injectable in ['configs_dir', 'data_dir', 'output_dir']:
        validate_injectable(injectable)

    if args.pipeline:
        inject.add_injectable('pipeline_file_name', args.pipeline)

    if args.resume:
        config.override_setting('resume_after', args.resume)
Пример #21
0
def test_rng_access():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject.add_injectable('rng_base_seed', 0)

    pipeline.open_pipeline()

    rng = pipeline.get_rn_generator()

    assert isinstance(rng, random.Random)

    pipeline.close_pipeline()
    inject.clear_cache()
Пример #22
0
def test_missing_filename(seed_households, data_dir):

    settings_yaml = """
        input_table_list:
          - tablename: households
            index_col: household_id
            rename_columns:
              HHID: household_id
    """

    settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
    inject.add_injectable('settings', settings)

    with pytest.raises(AssertionError) as excinfo:
        input.read_input_table('households')
    assert 'no input file provided' in str(excinfo.value)
Пример #23
0
def run(args):
    """
    Run bca4abm. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'.

    """

    if args.working_dir and os.path.exists(args.working_dir):
        os.chdir(args.working_dir)

    if args.config:
        inject.add_injectable('configs_dir', args.config)

    if args.data:
        inject.add_injectable('data_dir', args.data)

    if args.output:
        inject.add_injectable('output_dir', args.output)

    for injectable in ['configs_dir', 'data_dir', 'output_dir']:
        try:
            dir_path = inject.get_injectable(injectable)
        except RuntimeError:
            sys.exit('Error: please specify either a --working_dir '
                     "containing 'configs', 'data', and 'output' folders "
                     'or all three of --config, --data, and --output')
        if not os.path.exists(dir_path):
            sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path)))

    if args.pipeline:
        inject.add_injectable('pipeline_file_name', args.pipeline)

    if args.resume:
        override_setting('resume_after', args.resume)

    tracing.config_logger()
    tracing.delete_csv_files()  # only modifies output_dir
    warnings.simplefilter('always')
    logging.captureWarnings(capture=True)

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print('resume_after: %s' % resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    t0 = tracing.print_elapsed_time('all models', t0)
Пример #24
0
def load_shadow_price_calculator(model_settings):
    """
    Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace)

    If multiprocessing, get the shared_data buffer to coordinate global_desired_size
    calculation across sub-processes

    Parameters
    ----------
    model_settings : dict

    Returns
    -------
    spc : ShadowPriceCalculator
    """

    num_processes = inject.get_injectable('num_processes', 1)

    model_selector = model_settings['MODEL_SELECTOR']

    # - get shared_data from data_buffers (if multiprocessing)
    data_buffers = inject.get_injectable('data_buffers', None)
    if data_buffers is not None:
        logger.info('Using existing data_buffers for shadow_price')

        # - shadow_pricing_info
        shadow_pricing_info = inject.get_injectable('shadow_pricing_info', None)
        if shadow_pricing_info is None:
            shadow_pricing_info = get_shadow_pricing_info()
            inject.add_injectable('shadow_pricing_info', shadow_pricing_info)

        # - extract data buffer and reshape as numpy array
        data, lock = \
            shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector)
    else:
        assert num_processes == 1
        data = None  # ShadowPriceCalculator will allocate its own data
        lock = None

    # - ShadowPriceCalculator
    spc = ShadowPriceCalculator(
        model_settings,
        num_processes, data, lock)

    return spc
Пример #25
0
def load_shadow_price_calculator(model_settings):
    """
    Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace)

    If multiprocessing, get the shared_data buffer to coordinate global_desired_size
    calculation across sub-processes

    Parameters
    ----------
    model_settings : dict

    Returns
    -------
    spc : ShadowPriceCalculator
    """

    num_processes = inject.get_injectable('num_processes', 1)

    model_selector = model_settings['MODEL_SELECTOR']

    # - get shared_data from data_buffers (if multiprocessing)
    data_buffers = inject.get_injectable('data_buffers', None)
    if data_buffers is not None:
        logger.info('Using existing data_buffers for shadow_price')

        # - shadow_pricing_info
        shadow_pricing_info = inject.get_injectable('shadow_pricing_info', None)
        if shadow_pricing_info is None:
            shadow_pricing_info = get_shadow_pricing_info()
            inject.add_injectable('shadow_pricing_info', shadow_pricing_info)

        # - extract data buffer and reshape as numpy array
        data, lock = \
            shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector)
    else:
        assert num_processes == 1
        data = None  # ShadowPriceCalculator will allocate its own data
        lock = None

    # - ShadowPriceCalculator
    spc = ShadowPriceCalculator(
        model_settings,
        num_processes, data, lock)

    return spc
Пример #26
0
def data_dir_from_settings():
    """
    legacy strategy foir specifying data_dir is with orca injectable.
    Calling this function provides an alternative by reading it from settings file
    """

    # FIXME - not sure this plays well with orca
    # it may depend on when file with orca decorator is imported

    data_dir = setting('data_dir', None)

    if data_dir:
        inject.add_injectable('data_dir', data_dir)
    else:
        data_dir = inject.get_injectable('data_dir')

    logger.info("data_dir: %s" % data_dir)
    return data_dir
Пример #27
0
def register_tours(df, trace_hh_id):
    """
    Register with inject for tracing

    create an injectable 'trace_tour_ids' with a list of tour_ids in household we are tracing.
    This allows us to slice by tour_id without requiring presence of person_id column

    Parameters
    ----------
    df: pandas.DataFrame
        traced dataframe

    trace_hh_id: int
        household id we are tracing

    Returns
    -------
    Nothing
    """

    # get list of persons in traced household (should already have been registered)
    person_ids = inject.get_injectable("trace_person_ids", [])

    if len(person_ids) == 0:
        # trace_hh_id not in households table or register_persons was not not called
        logger.warn("no person ids registered for trace_hh_id %s" %
                    trace_hh_id)
        return

    # but if household_id is in households, then we may have some tours
    traced_tours_df = slice_ids(df, person_ids, column='person_id')
    trace_tour_ids = traced_tours_df.index.tolist()
    if len(trace_tour_ids) == 0:
        logger.info("register_tours: no tours found for person_ids %s." %
                    person_ids)
    else:
        logger.info("tracing tour_ids %s in %s tours" %
                    (trace_tour_ids, len(df.index)))

    inject.add_injectable("trace_tour_ids", trace_tour_ids)
    logger.debug("register_tours injected trace_tour_ids %s" % trace_tour_ids)
Пример #28
0
def register_persons(df, trace_hh_id):
    """
    Register with orca persons for tracing

    Parameters
    ----------
    df: pandas.DataFrame
        traced dataframe

    trace_hh_id: int
        household id we are tracing

    Returns
    -------
    Nothing
    """

    # inject persons_index name of person dataframe index
    if df.index.name is None:
        df.index.names = ['person_id']
        logger.warn("persons table index had no name. renamed index '%s'" %
                    df.index.name)
    inject.add_injectable("persons_index_name", df.index.name)

    logger.debug("register_persons injected persons_index_name '%s'" %
                 df.index.name)

    # inject list of person_ids in household we are tracing
    # this allows us to slice by person_id without requiring presence of household_id column
    traced_persons_df = df[df['household_id'] == trace_hh_id]
    trace_person_ids = traced_persons_df.index.tolist()
    if len(trace_person_ids) == 0:
        logger.warn("register_persons: trace_hh_id %s not found." %
                    trace_hh_id)

    inject.add_injectable("trace_person_ids", trace_person_ids)
    logger.debug("register_persons injected trace_person_ids %s" %
                 trace_person_ids)

    logger.info("tracing person_ids %s in %s persons" %
                (trace_person_ids, len(df.index)))
Пример #29
0
def test_mp_run():

    configs_dir = [example_path('configs_3_zone'), example_path('configs')]
    data_dir = example_path('data_3')

    setup_dirs(configs_dir, data_dir)
    inject.add_injectable('settings_file_name', 'settings_mp.yaml')

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = [
        'data_dir', 'configs_dir', 'output_dir', 'settings_file_name'
    ]
    injectables = {k: inject.get_injectable(k) for k in injectables}

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_3_zone()
    pipeline.close_pipeline()
Пример #30
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
def test_vts():

    inject.add_injectable("settings", {})

    # note: need 0 duration tour on one end of day to guarantee at least one available tour
    alts = pd.DataFrame({
        "start": [1, 1, 2, 3],
        "end": [1, 4, 5, 6]
    })
    alts['duration'] = alts.end - alts.start
    inject.add_injectable("tdd_alts", alts)

    current_tour_person_ids = pd.Series(['b', 'c'],
                                        index=['d', 'e'])

    previous_tour_by_personid = pd.Series([2, 2, 1],
                                          index=['a', 'b', 'c'])

    prev_tour_attrs = get_previous_tour_by_tourid(current_tour_person_ids,
                                                  previous_tour_by_personid,
                                                  alts)

    pdt.assert_series_equal(
        prev_tour_attrs.start_previous,
        pd.Series([2, 1], index=['d', 'e'], name='start_previous'))

    pdt.assert_series_equal(
        prev_tour_attrs.end_previous,
        pd.Series([5, 4], index=['d', 'e'], name='end_previous'))

    tours = pd.DataFrame({
        "person_id": [1, 1, 2, 3, 3],
        "tour_num": [1, 2, 1, 1, 2],
        "tour_type": ['x', 'x', 'x', 'x', 'x']
    })

    persons = pd.DataFrame({
        "income": [20, 30, 25]
    }, index=[1, 2, 3])

    inject.add_table('persons', persons)

    spec = pd.DataFrame({"Coefficient": [1.2]},
                        index=["income"])
    spec.index.name = "Expression"
    segment_col = None  # no segmentation of model_spec

    inject.add_injectable("check_for_variability", True)

    tdd_choices, timetable = vectorize_tour_scheduling(
        tours, persons, alts, spec, segment_col,
        model_settings={},
        chunk_size=0, trace_label='test_vts')

    # FIXME - dead reckoning regression
    # there's no real logic here - this is just what came out of the monte carlo
    # note that the result comes out ordered by the nth trips and not ordered
    # by the trip index.  shrug?
    expected = [2, 2, 2, 0, 0]
    assert (tdd_choices.tdd.values == expected).all()
Пример #32
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
Пример #33
0
def setup_dirs(ancillary_configs_dir=None, data_dir=None):

    # ancillary_configs_dir is used by run_mp to test multiprocess

    test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__),
                                             'configs')
    example_configs_dir = example_path('configs')
    configs_dir = [test_pipeline_configs_dir, example_configs_dir]

    if ancillary_configs_dir is not None:
        configs_dir = [ancillary_configs_dir] + configs_dir

    inject.add_injectable('configs_dir', configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable('output_dir', output_dir)

    if not data_dir:
        data_dir = example_path('data')

    inject.add_injectable('data_dir', data_dir)

    inject.clear_cache()

    tracing.config_logger()

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
    tracing.delete_output_files('omx')
Пример #34
0
def test_hdf_reader1(seed_households, data_dir):

    settings_yaml = """
        input_table_list:
          - tablename: households
            filename: households.h5
            index_col: household_id
            rename_columns:
              HHID: household_id
    """

    settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
    inject.add_injectable('settings', settings)

    hh_file = os.path.join(data_dir, 'households.h5')
    seed_households.to_hdf(hh_file, key='households', mode='w')

    assert os.path.isfile(hh_file)

    df = input.read_input_table('households')

    assert df.index.name == 'household_id'
Пример #35
0
def setup_dirs(configs_dir):

    inject.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    inject.clear_cache()

    tracing.config_logger()

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
Пример #36
0
        'chunk_size',
        'multiprocess',
        'num_processes',
        'resume_after',
    ]

    for k in settings:
        logger.info("setting %s: %s" % (k, config.setting(k)))

    for k in injectables:
        logger.info("injectable %s: %s" % (k, inject.get_injectable(k)))


if __name__ == '__main__':

    inject.add_injectable('data_dir', '../example/data')
    inject.add_injectable('configs_dir', ['configs', '../example/configs'])

    injectables = config.handle_standard_args()

    config.filter_warnings()
    tracing.config_logger()

    log_settings(injectables)

    t0 = tracing.print_elapsed_time()

    # cleanup if not resuming
    if not config.setting('resume_after', False):
        cleanup_output_files()
Пример #37
0
    for k in injectables:
        logger.info("injectable %s: %s" % (k, inject.get_injectable(k)))


if __name__ == '__main__':

    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html
    # pd.set_option('display.max_columns', 50)

    data_dir = "E:/projects/clients/ASIM/data/mtc_tm1"
    data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data'
    data_dir = '../example/data'

    # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data')
    inject.add_injectable('data_dir', ['ancillary_data', data_dir])
    # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data'])
    inject.add_injectable('configs_dir', ['configs', '../example/configs'])

    injectables = config.handle_standard_args()

    tracing.config_logger()
    config.filter_warnings()

    log_settings(injectables)

    t0 = tracing.print_elapsed_time()

    # cleanup if not resuming
    if not config.setting('resume_after', False):
        cleanup_output_files()
Пример #38
0
    random_omaz = np.random.choice(network_los.maz_df.index.values, size=VECTOR_TEST_SIZE,
                                   replace=True)

    taps_mazs = network_los.get_taps_mazs(random_omaz, attribute=attribute)

    return len(taps_mazs.index)


def set_random_seed():
    np.random.seed(0)


# uncomment the line below to set random seed so that run results are reproducible
set_random_seed()
inject.add_injectable("set_random_seed", set_random_seed)

tracing.config_logger()

t0 = print_elapsed_time()

taz_skim_stack = inject.get_injectable('taz_skim_dict')
t0 = print_elapsed_time("load taz_skim_dict", t0)

tap_skim_stack = inject.get_injectable('tap_skim_dict')
t0 = print_elapsed_time("load tap_skim_dict", t0)

network_los = inject.get_injectable('network_los')
t0 = print_elapsed_time("load network_los", t0)

# test sizes for all implemented methods
Пример #39
0
def register_traceable_table(table_name, df):
    """
    Register traceable table

    Parameters
    ----------
    df: pandas.DataFrame
        traced dataframe

    Returns
    -------
    Nothing
    """

    trace_hh_id = inject.get_injectable("trace_hh_id", None)

    new_traced_ids = []

    if trace_hh_id is None:
        return

    traceable_tables = inject.get_injectable('traceable_tables', [])
    if table_name not in traceable_tables:
        logger.error("table '%s' not in traceable_tables" % table_name)
        return

    idx_name = df.index.name
    if idx_name is None:
        logger.error("Can't register table '%s' without index name" % table_name)
        return

    traceable_table_ids = inject.get_injectable('traceable_table_ids')
    traceable_table_indexes = inject.get_injectable('traceable_table_indexes')

    if idx_name in traceable_table_indexes and traceable_table_indexes[idx_name] != table_name:
        logger.error("table '%s' index name '%s' already registered for table '%s'" %
                     (table_name, idx_name, traceable_table_indexes[idx_name]))
        return

    if table_name == 'households':
        if trace_hh_id not in df.index:
            logger.warning("trace_hh_id %s not in dataframe" % trace_hh_id)
            new_traced_ids = []
        else:
            logger.info("tracing household id %s in %s households" % (trace_hh_id, len(df.index)))
            new_traced_ids = [trace_hh_id]
    else:

        # find first already registered ref_col we can use to slice this table
        ref_col = next((c for c in traceable_table_indexes if c in df.columns), None)
        if ref_col is None:
            logger.error("can't find a registered table to slice table '%s' index name '%s'"
                         " in traceable_table_indexes: %s" %
                         (table_name, idx_name, traceable_table_indexes))
            return

        # get traceable_ids for ref_col table
        ref_col_table_name = traceable_table_indexes[ref_col]
        ref_col_traced_ids = traceable_table_ids.get(ref_col_table_name, [])

        # inject list of ids in table we are tracing
        # this allows us to slice by id without requiring presence of a household id column
        traced_df = df[df[ref_col].isin(ref_col_traced_ids)]
        new_traced_ids = traced_df.index.tolist()
        if len(new_traced_ids) == 0:
            logger.warning("register %s: no rows with %s in %s." %
                           (table_name, ref_col, ref_col_traced_ids))

    # update traceable_table_indexes with this traceable_table's idx_name
    if idx_name not in traceable_table_indexes:
        traceable_table_indexes[idx_name] = table_name
        print("adding table %s.%s to traceable_table_indexes" % (table_name, idx_name))
        inject.add_injectable('traceable_table_indexes', traceable_table_indexes)

    # update the list of trace_ids for this table
    prior_traced_ids = traceable_table_ids.get(table_name, [])

    if new_traced_ids:
        assert not set(prior_traced_ids) & set(new_traced_ids)
        traceable_table_ids[table_name] = prior_traced_ids + new_traced_ids
        inject.add_injectable('traceable_table_ids', traceable_table_ids)

    logger.info("register %s: added %s new ids to %s existing trace ids" %
                (table_name, len(new_traced_ids), len(prior_traced_ids)))
    logger.info("register %s: tracing new ids %s in %s" %
                (table_name, new_traced_ids, table_name))
Пример #40
0
 def override_injectable(name, value):
     inject.add_injectable(name, value)
     injectables.append(name)
Пример #41
0
def cache_spec(hhsize, spec):
    spec_name = cached_spec_name(hhsize)
    # cache as injectable
    inject.add_injectable(spec_name, spec)
Пример #42
0
 def override_setting(key, value):
     new_settings = inject.get_injectable('settings')
     new_settings[key] = value
     inject.add_injectable('settings', new_settings)
Пример #43
0
def households(households_sample_size, override_hh_ids, trace_hh_id):

    df_full = read_input_table("households")
    households_sliced = False

    logger.info("full household list contains %s households" % df_full.shape[0])

    # only using households listed in override_hh_ids
    if override_hh_ids is not None:

        # trace_hh_id will not used if it is not in list of override_hh_ids
        logger.info("override household list containing %s households" % len(override_hh_ids))

        df = df_full[df_full.index.isin(override_hh_ids)]
        households_sliced = True

        if df.shape[0] < len(override_hh_ids):
            logger.info("found %s of %s households in override household list" %
                        (df.shape[0], len(override_hh_ids)))

        if df.shape[0] == 0:
            raise RuntimeError('No override households found in store')

    # if we are tracing hh exclusively
    elif trace_hh_id and households_sample_size == 1:

        # df contains only trace_hh (or empty if not in full store)
        df = tracing.slice_ids(df_full, trace_hh_id)
        households_sliced = True

    # if we need a subset of full store
    elif households_sample_size > 0 and df_full.shape[0] > households_sample_size:

        logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0]))

        """
        Because random seed is set differently for each step, sampling of households using
        Random.global_rng would sample differently depending upon which step it was called from.
        We use a one-off rng seeded with the pseudo step name 'sample_households' to provide
        repeatable sampling no matter when the table is loaded.

        Note that the external_rng is also seeded with base_seed so the sample will (rightly) change
        if the pipeline rng's base_seed is changed
        """

        prng = pipeline.get_rn_generator().get_external_rng('sample_households')
        df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False))
        households_sliced = True

        # if tracing and we missed trace_hh in sample, but it is in full store
        if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index:
            # replace first hh in sample with trace_hh
            logger.debug("replacing household %s with %s in household sample" %
                         (df.index[0], trace_hh_id))
            df_hh = df_full.loc[[trace_hh_id]]
            df = pd.concat([df_hh, df[1:]])

    else:
        df = df_full

    # persons table
    inject.add_injectable('households_sliced', households_sliced)

    logger.info("loaded households %s" % (df.shape,))

    df.index.name = 'household_id'

    # FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
    assert 'chunk_id' not in df.columns
    df['chunk_id'] = pd.Series(list(range(len(df))), df.index)

    # replace table function with dataframe
    inject.add_table('households', df)

    pipeline.get_rn_generator().add_channel('households', df)

    if trace_hh_id:
        tracing.register_traceable_table('households', df)
        tracing.trace_df(df, "raw.households", warn_if_empty=True)

    return df