示例#1
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    # assert orca.get_injectable("chunk_size") == chunk_size

    _MODELS = [
        'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate',
        'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling',
        'non_mandatory_tour_frequency', 'destination_choice',
        'non_mandatory_scheduling', 'tour_mode_choice_simulate',
        'create_simple_trips', 'trip_mode_choice_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close()

    orca.clear_cache()

    return tour_count
示例#2
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    _MODELS = [
        'compute_accessibility', 'school_location_simulate',
        'workplace_location_simulate', 'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are the first 3 households in households table
    hh_ids = [26960, 857296, 93428]
    choices = [0, 1, 0]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(10)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    per_ids = [92363, 92681, 93428]
    choices = ['work1', 'school1', 'school2']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close()

    orca.clear_cache()
示例#3
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)
    assert prev_checkpoint_count == 7

    pipeline.start_pipeline('auto_ownership_simulate')

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are the 2nd-4th households in households table
    hh_ids = [26960, 857296, 93428]
    choices = [0, 1, 0]
    expected_auto_choice = pd.Series(choices,
                                     index=pd.Index(hh_ids, name="HHID"),
                                     name='auto_ownership')

    print "auto_choice\n", auto_choice.head(4)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_auto_choice)

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(
        excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    per_ids = [92363, 92681, 93428]

    choices = ['work1', 'school1', 'school2']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # should be able to get this before pipeline is closed (from existing open store)
    assert orca.get_injectable('pipeline_store') is not None
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    pipeline.close()

    # should also be able to get this after pipeline is closed (open and close)
    assert orca.get_injectable('pipeline_store') is None
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    orca.clear_cache()
示例#4
0
# retrieve the state of a checkpointed table after a specific model was run
df = pipeline.get_table(table_name="persons",
                        checkpoint_name="school_location_simulate")
print "\npersons table columns after school_location_simulate:", df.columns.values

# get_table without checkpoint_name returns the latest version of the table
df = pipeline.get_table("tours")
print "\ntour_type value counts\n", df.tour_type.value_counts()

# get_table for a computed (non-checkpointed, internal, orca) table
# return the most recent value of a (non-checkpointed, internal) computed table
df = pipeline.get_table("persons_merged")
df = df[['household_id', 'age', 'auPkTotal', 'roundtrip_auto_time_to_work']]
print "\npersons_merged selected columns\n", df.head(20)

# write final versions of all checkpointed dataframes to CSV files to review results
for table_name in pipeline.checkpointed_tables():
    file_name = "final_%s_table.csv" % table_name
    file_path = os.path.join(orca.get_injectable("output_dir"), file_name)
    pipeline.get_table(table_name).to_csv(file_path)

# tables will no longer be available after pipeline is closed
pipeline.close()

# write checkpoints (this can be called whether of not pipeline is open)
file_path = os.path.join(orca.get_injectable("output_dir"), "checkpoints.csv")
pipeline.get_checkpoints().to_csv(file_path)

t0 = print_elapsed_time("all models", t0)