Exemplo n.º 1
0
def regress():

    persons_df = pipeline.get_table('persons')
    persons_df = persons_df[persons_df.household_id == HH_ID]
    print("persons_df\n", persons_df[['value_of_time', 'distance_to_work']])

    """
    persons_df
     person_id  value_of_time  distance_to_work
    person_id
    3249922        23.349532              0.62
    3249923        23.349532              0.62
    """

    tours_df = pipeline.get_table('tours')

    regress_tour_modes(tours_df)

    assert tours_df.shape[0] > 0
    assert not tours_df.tour_mode.isnull().any()

    trips_df = pipeline.get_table('trips')
    assert trips_df.shape[0] > 0
    assert not trips_df.purpose.isnull().any()
    assert not trips_df.depart.isnull().any()
    assert not trips_df.trip_mode.isnull().any()

    # should be at least two tours per trip
    assert trips_df.shape[0] >= 2*tours_df.shape[0]
Exemplo n.º 2
0
def regress():

    persons_df = pipeline.get_table('persons')
    persons_df = persons_df[persons_df.household_id == HH_ID]
    print("persons_df\n%s" % persons_df[['value_of_time', 'distance_to_work']])
    """
    persons_df
     person_id  value_of_time  distance_to_work
    person_id
    3249922        23.349532              0.62
    3249923        23.349532              0.62
    """

    tours_df = pipeline.get_table('tours')

    regress_tour_modes(tours_df)

    assert tours_df.shape[0] > 0
    assert not tours_df.tour_mode.isnull().any()

    # optional logsum column was added to all tours except mandatory
    assert 'destination_logsum' in tours_df
    if (tours_df.destination_logsum.isnull() !=
        (tours_df.tour_category == 'mandatory')).any():
        print(tours_df[(tours_df.destination_logsum.isnull() !=
                        (tours_df.tour_category == 'mandatory'))])
    assert (tours_df.destination_logsum.isnull() == (
        tours_df.tour_category == 'mandatory')).all()

    # mode choice logsum calculated for all tours
    assert 'mode_choice_logsum' in tours_df
    assert not tours_df.mode_choice_logsum.isnull().any()

    trips_df = pipeline.get_table('trips')
    assert trips_df.shape[0] > 0
    assert not trips_df.purpose.isnull().any()
    assert not trips_df.depart.isnull().any()
    assert not trips_df.trip_mode.isnull().any()

    # mode_choice_logsum calculated for all trips
    assert not trips_df.mode_choice_logsum.isnull().any()

    # should be at least two tours per trip
    assert trips_df.shape[0] >= 2 * tours_df.shape[0]

    # write_trip_matrices
    trip_matrices_file = config.output_file_path('trips_md.omx')
    assert os.path.exists(trip_matrices_file)
    trip_matrices = omx.open_file(trip_matrices_file)
    assert trip_matrices.shape() == (25, 25)

    assert 'WALK_MD' in trip_matrices.list_matrices()
    walk_trips = np.array(trip_matrices['WALK_MD'])
    assert walk_trips.dtype == np.dtype('float64')

    trip_matrices.close()
Exemplo n.º 3
0
def test_pipeline_checkpoint_drop():

    setup()

    _MODELS = [
        'step1',
        '_step2',
        '_step_add_col.table_name=table2;column_name=c2',
        '_step_forget_tab.table_name=table2',
        'step3',
        'step_forget_tab.table_name=table3',
    ]
    pipeline.run(models=_MODELS, resume_after=None)

    checkpoints = pipeline.get_checkpoints()
    print "checkpoints\n", checkpoints

    pipeline.get_table("table1")

    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table2")
    assert "never checkpointed" in str(excinfo.value)

    # can't get a dropped table from current checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table3")
    assert "was dropped" in str(excinfo.value)

    # ensure that we can still get table3 from a checkpoint at which it existed
    pipeline.get_table("table3", checkpoint_name="step3")

    pipeline.close_pipeline()
    close_handlers()
Exemplo n.º 4
0
def full_run(configs_dir,
             data_dir,
             resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None,
             two_zone=True):

    setup_dirs(configs_dir, data_dir)

    settings = inject_settings(
        two_zone=two_zone,
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        check_for_variability=check_for_variability,
        use_shadow_pricing=False
    )  # shadow pricing breaks replicability when sample_size varies

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Exemplo n.º 5
0
def write_summaries(output_dir):

    summary_settings_name = 'output_summaries'
    summary_file_name = 'summaries.txt'

    summary_settings = setting(summary_settings_name)

    if summary_settings is None:
        logger.info(
            "No {summary_settings_name} specified in settings file. Nothing to write."
        )
        return

    summary_dict = summary_settings

    mode = 'wb' if sys.version_info < (3, ) else 'w'
    with open(config.output_file_path(summary_file_name), mode) as output_file:

        for table_name, column_names in summary_dict.items():

            df = pipeline.get_table(table_name)

            for c in column_names:
                n = 100
                empty = (df[c] == '') | df[c].isnull()

                print(
                    f"\n### {table_name}.{c} type: {df.dtypes[c]} rows: {len(df)} ({empty.sum()} empty)\n\n",
                    file=output_file)
                print(df[c].value_counts().nlargest(n), file=output_file)
Exemplo n.º 6
0
def regress_mini_mtf():

    mtf_choice = pipeline.get_table("persons").sort_index().mandatory_tour_frequency

    # these choices are for pure regression - their appropriateness has not been checked
    per_ids = [2566698, 2877284, 2877287]
    choices = ['work1', 'work_and_school', 'school1']
    expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='person_id'),
                                name='mandatory_tour_frequency')

    mtf_choice = mtf_choice[mtf_choice != '']  # drop null (empty string) choices

    offset = len(mtf_choice) // 2  # choose something midway as hh_id ordered by hh size
    print("mtf_choice\n", mtf_choice.head(offset).tail(5))

    """
    mtf_choice
     person_id
    2458502            school1
    2458503            school1
    2566698              work1
    2877284    work_and_school
    2877287            school1
    Name: mandatory_tour_frequency, dtype: object
    """
    pdt.assert_series_equal(mtf_choice.reindex(per_ids), expected_choice)
Exemplo n.º 7
0
def test_full_run2_repop_replace():
    # Note: tests are run in alphabetical order.
    # This tests expects to find the pipeline h5 file from
    # test_full_run1 in the output folder

    _MODELS = [
        'input_pre_processor.table_list=repop_input_table_list;repop',
        'repop_setup_data_structures',
        'initial_seed_balancing.final=true;repop',
        'integerize_final_seed_weights.repop',
        'repop_balancing',
        'expand_households.repop;replace',
        'write_synthetic_population.repop',
        'write_tables.repop',
    ]

    pipeline.run(models=_MODELS, resume_after='summarize')

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Exemplo n.º 8
0
def write_trip_matrices(trips, skim_dict, skim_stack):
    """
    Write trip matrices step.

    Adds boolean columns to local trips table via annotation expressions,
    then aggregates trip counts and writes OD matrices to OMX.  Save annotated
    trips table to pipeline if desired.
    """

    model_settings = config.read_model_settings('write_trip_matrices.yaml')
    trips_df = annotate_trips(trips, skim_dict, skim_stack, model_settings)

    if bool(model_settings.get('SAVE_TRIPS_TABLE')):
        pipeline.replace_table('trips', trips_df)

    logger.info('Aggregating trips...')
    aggregate_trips = trips_df.groupby(['origin', 'destination'],
                                       sort=False).sum()
    logger.info('Finished.')

    orig_vals = aggregate_trips.index.get_level_values('origin')
    dest_vals = aggregate_trips.index.get_level_values('destination')

    zone_index = pipeline.get_table('land_use').index
    assert all(zone in zone_index for zone in orig_vals)
    assert all(zone in zone_index for zone in dest_vals)

    _, orig_index = zone_index.reindex(orig_vals)
    _, dest_index = zone_index.reindex(dest_vals)

    write_matrices(aggregate_trips, zone_index, orig_index, dest_index,
                   model_settings)
Exemplo n.º 9
0
def regress_mini_auto():

    # regression test: these are among the middle households in households table
    # should be the same results as in run_mp (multiprocessing) test case
    hh_ids = [932147, 982875, 983048, 1024353]
    choices = [1, 1, 1, 0]
    expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="household_id"),
                                name='auto_ownership')

    auto_choice = pipeline.get_table("households").sort_index().auto_ownership

    offset = HOUSEHOLDS_SAMPLE_SIZE // 2  # choose something midway as hh_id ordered by hh size
    print("auto_choice\n", auto_choice.head(offset).tail(4))

    auto_choice = auto_choice.reindex(hh_ids)

    """
    auto_choice
     household_id
    932147     1
    982875     1
    983048     1
    1024353    0
    Name: auto_ownership, dtype: int64
    """
    pdt.assert_series_equal(auto_choice, expected_choice)
Exemplo n.º 10
0
def full_run(resume_after=None, chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None, trace_od=None, check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs')

    setup_dirs(configs_dir)

    settings = inject_settings(
        configs_dir,
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        check_for_variability=check_for_variability,
        use_shadow_pricing=False)  # shadow pricing breaks replicability when sample_size varies

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Exemplo n.º 11
0
def test_full_run1():

    _MODELS = [
        'input_pre_processor',
        'setup_data_structures',
        'initial_seed_balancing',
        'meta_control_factoring',
        'final_seed_balancing',
        'integerize_final_seed_weights',
        'sub_balancing.geography=TRACT',
        'sub_balancing.geography=TAZ',
        'expand_households',
        'summarize',
        'write_tables',
        'write_synthetic_population',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    output_dir = inject.get_injectable('output_dir')
    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Exemplo n.º 12
0
def test_full_run2():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs2')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data2')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'integerize_final_seed_weights',
        'sub_balancing.geography = DISTRICT',
        'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ',
        'expand_households', 'summarize', 'write_results'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    assert isinstance(pipeline.get_table('expanded_household_ids'),
                      pd.DataFrame)

    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Exemplo n.º 13
0
def regress_mini_auto():

    # regression test: these are among the middle households in households table
    # should be the same results as in run_mp (multiprocessing) test case
    hh_ids = [1099626, 1173905, 1196298, 1286259]
    choices = [1, 1, 0, 0]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="household_id"),
                                name='auto_ownership')

    auto_choice = pipeline.get_table("households").sort_index().auto_ownership

    offset = HOUSEHOLDS_SAMPLE_SIZE // 2  # choose something midway as hh_id ordered by hh size
    print("auto_choice\n%s" % auto_choice.head(offset).tail(4))

    auto_choice = auto_choice.reindex(hh_ids)
    """
    auto_choice
    household_id
    1099626    1
    1173905    1
    1196298    0
    1286259    0
    Name: auto_ownership, dtype: int64
    """
    pdt.assert_series_equal(auto_choice, expected_choice, check_dtype=False)
Exemplo n.º 14
0
def regress_mini_auto():

    # regression test: these are among the middle households in households table
    # should be the same results as in test_pipeline (single-threaded) tests
    hh_ids = [932147, 982875, 983048, 1024353]
    choices = [1, 1, 1, 0]
    expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="household_id"),
                                name='auto_ownership')

    auto_choice = pipeline.get_table("households").sort_index().auto_ownership

    offset = HOUSEHOLDS_SAMPLE_SIZE // 2  # choose something midway as hh_id ordered by hh size
    print("auto_choice\n", auto_choice.head(offset).tail(4))

    auto_choice = auto_choice.reindex(hh_ids)

    """
    auto_choice
     household_id
    932147     1
    982875     1
    983048     1
    1024353    0
    Name: auto_ownership, dtype: int64
    """
    pdt.assert_series_equal(auto_choice, expected_choice)
Exemplo n.º 15
0
def regress_mini_mtf():

    mtf_choice = pipeline.get_table(
        "persons").sort_index().mandatory_tour_frequency

    # these choices are for pure regression - their appropriateness has not been checked
    per_ids = [2566701, 2566702, 3061895]
    choices = ['school1', 'school1', 'work1']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='person_id'),
                                name='mandatory_tour_frequency')

    mtf_choice = mtf_choice[mtf_choice !=
                            '']  # drop null (empty string) choices

    offset = len(
        mtf_choice) // 2  # choose something midway as hh_id ordered by hh size
    print("mtf_choice\n%s" % mtf_choice.head(offset).tail(3))
    """
    mtf_choice
    person_id
    2566701    school1
    2566702    school1
    3061895      work1
    Name: mandatory_tour_frequency, dtype: object
    """
    pdt.assert_series_equal(mtf_choice.reindex(per_ids),
                            expected_choice,
                            check_dtype=False)
Exemplo n.º 16
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    setup_dirs()

    settings = inject_settings(
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        testing_fail_trip_destination=False,
        check_for_variability=check_for_variability,
        want_dest_choice_sample_tables=False,
        use_shadow_pricing=False
    )  # shadow pricing breaks replicability when sample_size varies

    # FIXME should enable testing_fail_trip_destination?

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Exemplo n.º 17
0
def step_forget_tab():

    table_name = inject.get_step_arg('table_name')
    assert table_name is not None

    table = pipeline.get_table(table_name)

    pipeline.drop_table(table_name)
Exemplo n.º 18
0
def test_mini_pipeline_run():

    setup_dirs()

    inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    write_skim_cache=True
                    )

    _MODELS = [
        'initialize_landuse',
        'compute_accessibility',
        'initialize_households',
        'school_location',
        'workplace_location',
        'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    regress_mini_auto()

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()
    regress_mini_location_choice_logsums()

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    # should create optional workplace_location_sample table
    workplace_location_sample_df = pipeline.get_table("workplace_location_sample")
    assert 'mode_choice_logsum' in workplace_location_sample_df

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemplo n.º 19
0
def regress_mini_location_choice_logsums():

    persons = pipeline.get_table("persons")

    # DEST_CHOICE_LOGSUM_COLUMN_NAME is specified in school_location.yaml and should be assigned
    assert 'school_location_logsum' in persons
    assert not persons.school_location_logsum.isnull().all()

    # DEST_CHOICE_LOGSUM_COLUMN_NAME is NOT specified in workplace_location.yaml
    assert 'workplace_location_logsum' not in persons
Exemplo n.º 20
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject_settings(configs_dir,
                    households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    # use_shadow_pricing=True
                    )

    _MODELS = [
        'initialize_landuse',
        'compute_accessibility',
        'initialize_households',
        'school_location',
        'workplace_location',
        'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    regress_mini_auto()

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemplo n.º 21
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml',
                                                mandatory=True)

    annotate_tables(model_settings, trace_label)

    # instantiate accessibility (must be checkpointed to be be used to slice accessibility)
    accessibility = pipeline.get_table('accessibility')
Exemplo n.º 22
0
def regress():

    expanded_household_ids = pipeline.get_table("expanded_household_ids")
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby("TAZ").size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    output_dir = inject.get_injectable("output_dir")
    assert not os.path.exists(os.path.join(output_dir, "households.csv"))
    assert os.path.exists(os.path.join(output_dir, "summary_DISTRICT_1.csv"))
Exemplo n.º 23
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)

    annotate_tables(model_settings, trace_label)

    # create accessibility (only required if multiprocessing wants to slice accessibility)
    land_use = pipeline.get_table('land_use')
    accessibility_df = pd.DataFrame(index=land_use.index)
    pipeline.replace_table("accessibility", accessibility_df)
Exemplo n.º 24
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)

    # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']]
    assert prev_checkpoint_count == 8

    pipeline.open_pipeline('auto_ownership_simulate')

    regress_mini_auto()

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(
        excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()

    # should be able to get this before pipeline is closed (from existing open store)
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    # - write list of override_hh_ids to override_hh_ids.csv in data for use in next test
    num_hh_ids = 10
    hh_ids = pipeline.get_table("households").head(num_hh_ids).index.values
    hh_ids = pd.DataFrame({'household_id': hh_ids})

    data_dir = inject.get_injectable('data_dir')
    hh_ids.to_csv(os.path.join(data_dir, 'override_hh_ids.csv'),
                  index=False,
                  header=True)

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemplo n.º 25
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    # assert orca.get_injectable("chunk_size") == chunk_size

    _MODELS = [
        'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate',
        'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling',
        'non_mandatory_tour_frequency', 'destination_choice',
        'non_mandatory_scheduling', 'tour_mode_choice_simulate',
        'create_simple_trips', 'trip_mode_choice_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close()

    orca.clear_cache()

    return tour_count
Exemplo n.º 26
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)

    # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']]
    assert prev_checkpoint_count == 8

    pipeline.open_pipeline('auto_ownership_simulate')

    regress_mini_auto()

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()

    # should be able to get this before pipeline is closed (from existing open store)
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    # - write list of override_hh_ids to override_hh_ids.csv in data for use in next test
    num_hh_ids = 10
    hh_ids = pipeline.get_table("households").head(num_hh_ids).index.values
    hh_ids = pd.DataFrame({'household_id': hh_ids})

    data_dir = inject.get_injectable('data_dir')
    hh_ids.to_csv(os.path.join(data_dir, 'override_hh_ids.csv'), index=False, header=True)

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemplo n.º 27
0
def get_trips_df(model_settings):
    """Default to pipeline trips table unless
    user provides a CSV
    """
    filename = model_settings.get('input_table', None)

    if not filename:
        logger.info("using 'trips' pipeline table for balancing step")
        trips_df = pipeline.get_table('trips')
        return trips_df.reset_index()

    logger.info('using %s for balancing step' % filename)
    fpath = config.data_file_path(filename, mandatory=True)

    return pd.read_csv(fpath, header=0, comment='#')
Exemplo n.º 28
0
def test_pipeline_run():

    inject.add_step('step1', steps.step1)
    inject.add_step('step2', steps.step2)
    inject.add_step('step3', steps.step3)
    inject.add_step('step_add_col', steps.step_add_col)
    inject.dump_state()

    _MODELS = [
        'step1', 'step2', 'step3',
        'step_add_col.table_name=table2;column_name=c2'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    checkpoints = pipeline.get_checkpoints()
    print("checkpoints\n", checkpoints)

    c2 = pipeline.get_table("table2").c2

    # get table from
    pipeline.get_table("table1", checkpoint_name="step3")

    # try to get a table from a step before it was checkpointed
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table2", checkpoint_name="step1")
    assert "not in checkpoint 'step1'" in str(excinfo.value)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table1", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()

    close_handlers()
Exemplo n.º 29
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)

    annotate_tables(model_settings, trace_label)

    # create accessibility
    land_use = pipeline.get_table('land_use')

    accessibility_df = pd.DataFrame(index=land_use.index)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Exemplo n.º 30
0
def step_add_col():

    table_name = inject.get_step_arg('table_name')
    assert table_name is not None

    col_name = inject.get_step_arg('column_name')
    assert col_name is not None

    table = pipeline.get_table(table_name)

    assert col_name not in table.columns

    table[col_name] = table.index + (1000 * len(table.columns))

    pipeline.replace_table(table_name, table)
Exemplo n.º 31
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml',
                                                mandatory=True)

    annotate_tables(model_settings, trace_label)

    # create accessibility
    land_use = pipeline.get_table('land_use')

    accessibility_df = pd.DataFrame(index=land_use.index)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Exemplo n.º 32
0
def annotate_trips(trips, network_los, model_settings):
    """
    Add columns to local trips table. The annotator has
    access to the origin/destination skims and everything
    defined in the model settings CONSTANTS.

    Pipeline tables can also be accessed by listing them under
    TABLES in the preprocessor settings.
    """

    trips_df = trips.to_frame()

    trace_label = 'trip_matrices'

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    if 'trip_period' not in trips_df:
        trips_df['trip_period'] = network_los.skim_time_period_label(
            trips_df.depart)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key='origin',
                                               dest_key='destination',
                                               dim3_key='trip_period')
    skims = {'od_skims': od_skim_wrapper, "odt_skims": odt_skim_stack_wrapper}

    locals_dict = {}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_dict.update(constants)

    expressions.annotate_preprocessors(trips_df, locals_dict, skims,
                                       model_settings, trace_label)

    # Data will be expanded by an expansion weight column from
    # the households pipeline table, if specified in the model settings.
    hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')

    if hh_weight_col and hh_weight_col not in trips_df:
        logger.info("adding '%s' from households to trips table" %
                    hh_weight_col)
        household_weights = pipeline.get_table('households')[hh_weight_col]
        trips_df[hh_weight_col] = trips_df.household_id.map(household_weights)

    return trips_df
Exemplo n.º 33
0
def regress_3_zone():

    tours_df = pipeline.get_table('tours')
    assert len(tours_df[tours_df.tour_mode == 'WALK_TRANSIT']) > 0

    # should cache atap and btap for transit modes only
    for c in ['od_atap', 'od_btap', 'do_atap', 'do_btap']:
        # tour_mode_choice sets non-transit taps to 0
        assert not (tours_df[tours_df.tour_mode.isin(
            ['WALK_TRANSIT', 'DRIVE_TRANSIT'])][c] == 0).any()
        baddies = ~tours_df.tour_mode.isin(['WALK_TRANSIT', 'DRIVE_TRANSIT'
                                            ]) & (tours_df[c] != 0)
        if baddies.any():
            print(tours_df[baddies][[
                'tour_type', 'tour_mode', 'od_atap', 'od_btap', 'do_atap',
                'do_btap'
            ]])
            assert False
Exemplo n.º 34
0
def test_load_cached_accessibility():

    inject.clear_cache()
    inject.reinject_decorated_tables()

    data_dir = [
        os.path.join(os.path.dirname(__file__), 'data'),
        example_path('data')
    ]
    setup_dirs(data_dir=data_dir)

    #
    # add OPTIONAL ceched table accessibility to input_table_list
    # activitysim.abm.tables.land_use.accessibility() will load this table if listed here
    # presumably independently calculated outside activitysim or a cached copy created during a previous run
    #
    settings = config.read_settings_file('settings.yaml', mandatory=True)
    input_table_list = settings.get('input_table_list')
    input_table_list.append({
        'tablename': 'accessibility',
        'filename': 'cached_accessibility.csv',
        'index_col': 'zone_id'
    })
    inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    input_table_list=input_table_list)

    _MODELS = [
        'initialize_landuse',
        # 'compute_accessibility',  # we load accessibility table ordinarily created by compute_accessibility
        'initialize_households',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    accessibility_df = pipeline.get_table("accessibility")

    assert 'auPkRetail' in accessibility_df

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemplo n.º 35
0
def write_tables(output_dir):
    """
    Write pipeline tables as csv files (in output directory) as specified by output_tables list
    in settings file.

    'output_tables' can specify either a list of output tables to include or to skip
    if no output_tables list is specified, then no checkpointed tables will be written

    To write all output tables EXCEPT the households and persons tables:

    ::

      output_tables:
        action: skip
        tables:
          - households
          - persons

    To write ONLY the households table:

    ::

      output_tables:
        action: include
        tables:
           - households

    Parameters
    ----------
    output_dir: str

    """

    output_tables_settings_name = 'output_tables'

    output_tables_settings = setting(output_tables_settings_name)

    if output_tables_settings is None:
        logger.info("No output_tables specified in settings file. Nothing to write.")
        return

    action = output_tables_settings.get('action')
    tables = output_tables_settings.get('tables')
    prefix = output_tables_settings.get('prefix', 'final_')

    if action not in ['include', 'skip']:
        raise "expected %s action '%s' to be either 'include' or 'skip'" % \
              (output_tables_settings_name, action)

    checkpointed_tables = pipeline.checkpointed_tables()
    if action == 'include':
        output_tables_list = tables
    elif action == 'skip':
        output_tables_list = [t for t in checkpointed_tables if t not in tables]

    for table_name in output_tables_list:

        if table_name == 'checkpoints':
            df = pipeline.get_checkpoints()
        else:
            if table_name not in checkpointed_tables:
                logger.warning("Skipping '%s': Table not found." % table_name)
                continue
            df = pipeline.get_table(table_name)

        file_name = "%s%s.csv" % (prefix, table_name)
        file_path = config.output_file_path(file_name)

        # include the index if it has a name or is a MultiIndex
        write_index = df.index.name is not None or isinstance(df.index, pd.core.index.MultiIndex)

        df.to_csv(file_path, index=write_index)