Exemplo n.º 1
0
def test_full_run(store):
    orca.add_injectable("configs_dir",
                        os.path.join(os.path.dirname(__file__), '..', '..',
                                     '..', 'example'))

    orca.add_injectable("store", store)

    orca.add_injectable("nonmotskm_matrix", np.ones((1454, 1454)))
    orca.add_injectable("set_random_seed", set_random_seed)

    # grab some of the tables
    orca.get_table("land_use").to_frame().info()
    orca.get_table("households").to_frame().info()
    orca.get_table("persons").to_frame().info()

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    # run the models in the expected order
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])
    orca.run(["cdap_simulate"])
    orca.run(['mandatory_tour_frequency'])
    orca.get_table("mandatory_tours").tour_type.value_counts()
    orca.run(['non_mandatory_tour_frequency'])
    orca.get_table("non_mandatory_tours").tour_type.value_counts()
    orca.run(["destination_choice"])
    orca.run(["mandatory_scheduling"])
    orca.run(["non_mandatory_scheduling"])
    orca.run(["mode_choice_simulate"])

    orca.clear_cache()
Exemplo n.º 2
0
def test_rng_access():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    pipeline.set_rn_generator_base_seed(0)

    pipeline.open_pipeline()

    with pytest.raises(RuntimeError) as excinfo:
        pipeline.set_rn_generator_base_seed(0)
    assert "call set_rn_generator_base_seed before the first step" in str(
        excinfo.value)

    rng = pipeline.get_rn_generator()

    pipeline.close_pipeline()
    orca.clear_cache()
Exemplo n.º 3
0
def test_full_run2():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs2')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data2')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'integerize_final_seed_weights',
        'sub_balancing.geography = DISTRICT',
        'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ',
        'expand_households', 'summarize', 'write_results'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    assert isinstance(pipeline.get_table('expanded_household_ids'),
                      pd.DataFrame)

    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
def full_run(preload_3d_skims, chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None, trace_od=None, check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    preload_3d_skims=preload_3d_skims,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.add_injectable("set_random_seed", set_random_seed)

    orca.clear_cache()

    tracing.config_logger()

    # grab some of the tables
    orca.get_table("land_use").to_frame().info()
    orca.get_table("households").to_frame().info()
    orca.get_table("persons").to_frame().info()

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE
    assert orca.get_injectable("chunk_size") == chunk_size

    # run the models in the expected order
    orca.run(["compute_accessibility"])
    orca.run(["school_location_simulate"])
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])
    orca.run(["cdap_simulate"])
    orca.run(['mandatory_tour_frequency'])
    orca.get_table("mandatory_tours").tour_type.value_counts()
    orca.run(['non_mandatory_tour_frequency'])
    orca.get_table("non_mandatory_tours").tour_type.value_counts()
    orca.run(["destination_choice"])
    orca.run(["mandatory_scheduling"])
    orca.run(["non_mandatory_scheduling"])
    orca.run(["patch_mandatory_tour_destination"])
    orca.run(["tour_mode_choice_simulate"])
    orca.run(["trip_mode_choice_simulate"])

    tours_merged = orca.get_table("tours_merged").to_frame()

    tour_count = len(tours_merged.index)

    orca.clear_cache()

    return tour_count
Exemplo n.º 5
0
def wplcm_simulate(persons, households, jobs):
    # can only send in jobs that have a valid building_id, so remove unlocated jobs for now
    jobs_df = jobs.to_frame()
    jobs_df = jobs_df[jobs_df.building_id>0]
    jobs_df.index.name = 'job_id'
    orca.add_table('located_jobs', jobs_df)
    located_jobs =  orca.get_table('located_jobs')
    res = utils.lcm_simulate("wplcmcoef.yaml", persons, located_jobs, None,
                              "job_id", "number_of_jobs", "vacant_jobs", cast=True)
        
    orca.clear_cache()
Exemplo n.º 6
0
def compute_indicators(settings, iter_var):
    # loop over indicators and datasets from settings and store into file
    for ind, value in settings['indicators'].iteritems():
        for ds in value['dataset']:
            ds_tablename = '%s_%s_%s' % (ds, ind, str(iter_var))
            df = orca.get_table(ds)[ind]
            #print 'ds is %s and ind is %s' % (ds, ind)
            #print orca.get_table(ds)[ind].to_frame().head()
            orca.add_table(ds_tablename, df)
            ind_table_list.append(ds_tablename)
    orca.clear_cache()      
Exemplo n.º 7
0
def households_transition(households, household_controls,
                          year, settings, persons):
    orig_size_hh = households.local.shape[0]
    orig_size_pers = persons.local.shape[0]
    orig_pers_index = persons.index
    orig_hh_index = households.index
    res = utils.full_transition(households, household_controls, year,
                                settings['households_transition'],
                                "building_id",
                                linked_tables={"persons":
                                               (persons.local,
                                                'household_id')})

    print "Net change: %s households" % (orca.get_table("households").
                                         local.shape[0] - orig_size_hh)
    print "Net change: %s persons" % (orca.get_table("persons").
                                      local.shape[0] - orig_size_pers)

    # changes to households/persons table are not reflected in local scope
    # need to reset vars to get changes.
    households = orca.get_table('households')
    persons = orca.get_table("persons")

    # need to make some updates to the persons & households table
    households.update_col_from_series("is_inmigrant",
                                      pd.Series(np.where
                                                (~households.index.isin
                                                 (orig_hh_index), 1, 0),
                                                index=households.index),
                                      cast=True)

    # new workers dont have jobs yet, set job_id to -1
    persons.update_col_from_series("job_id",
                                   pd.Series(np.where(~persons.index.isin
                                             (orig_pers_index), -1,
                                             persons.job_id),
                                             index=persons.index), cast=True)

    # dont know their work at home status yet, set to 0:
    persons.update_col_from_series("work_at_home",
                                   pd.Series(np.where
                                             (~persons.index.isin
                                              (orig_pers_index), 0,
                                              persons.work_at_home),
                                             index=persons.index), cast=True)
    # set non-worker job_id to -2
    persons.update_col_from_series("job_id",
                                   pd.Series(np.where
                                             (persons.employment_status > 0,
                                              persons.job_id, -2),
                                             index=persons.index), cast=True)
    orca.clear_cache()
    return res
Exemplo n.º 8
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    # assert orca.get_injectable("chunk_size") == chunk_size

    _MODELS = [
        'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate',
        'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling',
        'non_mandatory_tour_frequency', 'destination_choice',
        'non_mandatory_scheduling', 'tour_mode_choice_simulate',
        'create_simple_trips', 'trip_mode_choice_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close()

    orca.clear_cache()

    return tour_count
Exemplo n.º 9
0
def orca_year_dataset(hdf, year):
    if str(year) == '2015':
        year = 'base'
    orca.add_injectable("jobs_large_area_lookup", [])
    orca.add_injectable("households_large_area_lookup", [])
    orca.add_injectable("year", int(year if str(year) != 'base' else 2015))
    for tbl in ['households', 'persons', 'jobs', 'buildings', 'parcels', 'dropped_buildings']:
        name = str(year) + '/' + tbl
        if name in hdf:
            df = hdf[name]
        else:
            stub_name = str(2016) + '/' + tbl
            print "No table named " + name + ". Using the structuer from " + stub_name + "."
            df = hdf[stub_name].iloc[0:0]
        orca.add_table(tbl, df)
        orca.clear_cache()
Exemplo n.º 10
0
def test_misc():

    orca.clear_cache()

    with pytest.raises(RuntimeError) as excinfo:
        orca.get_injectable("configs_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        orca.get_injectable("data_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        orca.get_injectable("output_dir")
    assert "directory does not exist" in str(excinfo.value)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc')
    orca.add_injectable("configs_dir", configs_dir)

    settings = orca.get_injectable("settings")
    assert isinstance(settings, dict)

    assert orca.get_injectable("trace_person_ids") == []

    assert orca.get_injectable("trace_tour_ids") == []

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    with pytest.raises(RuntimeError) as excinfo:
        orca.get_injectable("store")
    assert "store file name not specified in settings" in str(excinfo.value)

    settings = {'store': 'bogus.h5'}
    orca.add_injectable("settings", settings)
    with pytest.raises(RuntimeError) as excinfo:
        orca.get_injectable("store")
    assert "store file not found" in str(excinfo.value)

    # these should be None until overridden
    assert orca.get_injectable("hh_index_name") is None
    assert orca.get_injectable("persons_index_name") is None

    # default values if not specified in settings
    assert orca.get_injectable("hh_chunk_size") == 0
    assert orca.get_injectable("chunk_size") == 0
    assert orca.get_injectable("preload_3d_skims") is False
Exemplo n.º 11
0
def full_run(store, omx_file, preload_3d_skims, chunk_size=0):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example')
    orca.add_injectable("configs_dir", configs_dir)

    inject_settings(configs_dir,
                    households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    preload_3d_skims=preload_3d_skims,
                    chunk_size=chunk_size)

    orca.add_injectable("omx_file", omx_file)
    orca.add_injectable("store", store)
    orca.add_injectable("set_random_seed", set_random_seed)

    orca.clear_cache()

    # grab some of the tables
    orca.get_table("land_use").to_frame().info()
    orca.get_table("households").to_frame().info()
    orca.get_table("persons").to_frame().info()

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE
    assert orca.get_injectable("chunk_size") == chunk_size

    # run the models in the expected order
    orca.run(["school_location_simulate"])
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])
    orca.run(["cdap_simulate"])
    orca.run(['mandatory_tour_frequency'])
    orca.get_table("mandatory_tours").tour_type.value_counts()
    orca.run(['non_mandatory_tour_frequency'])
    orca.get_table("non_mandatory_tours").tour_type.value_counts()
    orca.run(["destination_choice"])
    orca.run(["mandatory_scheduling"])
    orca.run(["non_mandatory_scheduling"])
    orca.run(["patch_mandatory_tour_destination"])
    orca.run(["tour_mode_choice_simulate"])
    orca.run(["trip_mode_choice_simulate"])

    tours_merged = orca.get_table("tours_merged").to_frame()

    tour_count = len(tours_merged.index)

    orca.clear_cache()

    return tour_count
Exemplo n.º 12
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor',
        'setup_data_structures',
        'initial_seed_balancing',
        'meta_control_factoring',
        'final_seed_balancing',
        'integerize_final_seed_weights',
        'sub_balancing.geography = TRACT',
        'sub_balancing.geography=TAZ',
        'expand_households',
        'summarize',
        'write_tables',
        'write_synthetic_population',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Exemplo n.º 13
0
def test_mini_run(random_seed):

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.add_injectable("set_random_seed", set_random_seed)

    orca.clear_cache()

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    # run the models in the expected order
    orca.run(["compute_accessibility"])
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])

    # this is a regression test so that we know if these numbers change
    auto_choice = orca.get_table('households').get_column('auto_ownership')

    hh_ids = [2124015, 961042, 1583271]
    choices = [1, 1, 1]
    print "auto_choice\n", auto_choice.head(3)
    pdt.assert_series_equal(
        auto_choice[hh_ids],
        pd.Series(choices, index=pd.Index(hh_ids, name="HHID")))

    orca.run(["cdap_simulate"])
    orca.run(['mandatory_tour_frequency'])

    mtf_choice = orca.get_table('persons').get_column(
        'mandatory_tour_frequency')
    per_ids = [326914, 172781, 298898]
    choices = ['school1', 'work_and_school', 'work2']
    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(
        mtf_choice[per_ids],
        pd.Series(choices, index=pd.Index(per_ids, name='PERID')))
    orca.clear_cache()
Exemplo n.º 14
0
def setup():

    orca.orca._INJECTABLES.pop('skim_dict', None)
    orca.orca._INJECTABLES.pop('skim_stack', None)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    orca.clear_cache()

    tracing.config_logger()
Exemplo n.º 15
0
def test_pipeline_run():

    orca.orca._INJECTABLES.pop('skim_dict', None)
    orca.orca._INJECTABLES.pop('skim_stack', None)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'step1',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    table1 = pipeline.get_table("table1").column1

    # test that model arg is passed to step
    pipeline.run_model('step2.table_name=table2')

    table2 = pipeline.get_table("table2").column1

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table1", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()
Exemplo n.º 16
0
def test_mini_run(store, omx_file, random_seed):

    configs_dir = os.path.join(os.path.dirname(__file__))
    orca.add_injectable("configs_dir", configs_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.add_injectable("omx_file", omx_file)

    orca.add_injectable("store", store)

    orca.add_injectable("set_random_seed", set_random_seed)

    orca.clear_cache()

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    # run the models in the expected order
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])

    # this is a regression test so that we know if these numbers change
    auto_choice = orca.get_table('households').get_column('auto_ownership')

    hh_ids = [2124015, 961042, 1583271]
    choices = [1, 2, 2]
    print "auto_choice\n", auto_choice.head(3)
    pdt.assert_series_equal(
        auto_choice[hh_ids],
        pd.Series(choices, index=pd.Index(hh_ids, name="HHID")))

    orca.run(["cdap_simulate"])
    orca.run(['mandatory_tour_frequency'])

    mtf_choice = orca.get_table('persons').get_column('mandatory_tour_frequency')
    per_ids = [172616, 172781, 172782]
    choices = ['work1', 'school1', 'work_and_school']
    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(
        mtf_choice[per_ids],
        pd.Series(choices, index=pd.Index(per_ids, name='PERID')))
    orca.clear_cache()
Exemplo n.º 17
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    # data_dir = os.path.join(os.path.dirname(__file__), 'data')
    data_dir = os.path.join(os.path.dirname(__file__),
                            '..', '..', 'example', 'data')
    orca.add_injectable("data_dir", data_dir)

    # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
    scenarios_dir = os.path.join(os.path.dirname(__file__),
                                 '..', '..', 'example', 'scenarios')
    orca.add_injectable("scenarios_dir", scenarios_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    # run list from settings file is dict with list of 'steps' and optional 'resume_after'
    run_list = setting('run_list')
    assert 'steps' in run_list, "Did not find steps in run_list"

    # list of steps and possible resume_after in run_list
    steps = run_list.get('steps')

    pipeline.run(models=steps, resume_after=None)

    # geo_crosswalk = pipeline.get_table('geo_crosswalk')
    # assert geo_crosswalk.index.name == 'TAZ'
    # assert 'FAF4' in geo_crosswalk.columns
    # assert 'FIPS' in geo_crosswalk.columns
    #
    assert os.path.exists(os.path.join(output_dir, 'naics_set.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
def parcels_zoning_by_scenario(parcels, parcels_zoning_calculations,
                               zoning_baseline):

    df = pd.DataFrame(index=parcels.index)
    df["baseline_dua"] = zoning_baseline.max_dua
    df["baseline_far"] = zoning_baseline.max_far
    df["baseline_height"] = zoning_baseline.max_height
    df["zoning_name"] = zoning_baseline["name"]
    df["zoning_source"] = zoning_baseline["tablename"]

    for scenario in [str(i) for i in range(4)]:
        orca.clear_cache()
        orca.add_injectable("scenario", scenario)
        z = orca.get_table("parcels_zoning_calculations")
        df["max_dua_%s" % scenario] = z.effective_max_dua
        df["max_far_%s" % scenario] = z.effective_max_far
        df["du_underbuild_%s" % scenario] = z.zoned_du_underbuild
        df["non_res_cat_%s" % scenario] = z.non_res_categories

    return df
Exemplo n.º 19
0
def parcels_zoning_by_scenario(parcels, parcels_zoning_calculations,
                               zoning_baseline):

    df = pd.DataFrame(index=parcels.index)
    df["baseline_dua"] = zoning_baseline.max_dua
    df["baseline_far"] = zoning_baseline.max_far
    df["baseline_height"] = zoning_baseline.max_height
    df["zoning_name"] = zoning_baseline["name"]
    df["zoning_source"] = zoning_baseline["tablename"]

    for scenario in [str(i) for i in range(4)]:
        orca.clear_cache()
        orca.add_injectable("scenario", scenario)
        z = orca.get_table("parcels_zoning_calculations")
        df["max_dua_%s" % scenario] = z.effective_max_dua
        df["max_far_%s" % scenario] = z.effective_max_far
        df["du_underbuild_%s" % scenario] = z.zoned_du_underbuild
        df["non_res_cat_%s" % scenario] = z.non_res_categories

    return df
Exemplo n.º 20
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    settings = inject_settings(configs_dir,
                               households_sample_size=households_sample_size,
                               chunk_size=chunk_size,
                               trace_hh_id=trace_hh_id,
                               trace_od=trace_od,
                               check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close_pipeline()

    orca.clear_cache()

    return tour_count
Exemplo n.º 21
0
def hlcm_simulate(households, buildings, persons, settings):
    movers = households.to_frame()
    movers = movers[movers.building_id == -1]
    relocated = movers[movers.is_inmigrant < 1]
    res = utils.lcm_simulate("hlcmcoef.yaml", households, buildings,
                             None, "building_id", "residential_units",
                             "vacant_residential_units", cast=True)
    orca.clear_cache()

    # Determine which relocated persons get disconnected from their job
    if settings.get('remove_jobs_from_workers', False):
        persons_df = persons.to_frame()
        relocated_workers = persons_df.loc[(persons_df.employment_status > 0) &
                                       (persons_df.household_id.isin
                                       (relocated.index))]
        relocated_workers['new_dist_to_work'] = network_distance_from_home_to_work(
                                        relocated_workers.workplace_zone_id,
                                        relocated_workers.household_zone_id)
        relocated_workers['prev_dist_to_work'] = network_distance_from_home_to_work(
                                        relocated_workers.workplace_zone_id,
                                        relocated_workers.prev_household_zone_id)

        # if new distance to work is greater than old, disconnect person from job
        relocated_workers.job_id = np.where(relocated_workers.new_dist_to_work >
                                        relocated_workers.prev_dist_to_work,
                                        -1, relocated_workers.job_id)
        persons.update_col_from_series("job_id", relocated_workers.job_id,
                                   cast=True)

        # Update is_inmigrant- I think this it is ok to do this now,
        # but perhaps this should be part of a clean up step
        # at the end of the sim year.

        households.update_col_from_series("is_inmigrant", pd.Series(0,
                                      index=households.index), cast=True)

    orca.clear_cache()

    return res
Exemplo n.º 22
0
def test_full_run2_repop_replace():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor.table_list=repop_input_table_list',
        'repop_setup_data_structures',
        'initial_seed_balancing.final=true',
        'integerize_final_seed_weights.repop',
        'repop_balancing',
        'expand_households.repop;replace',
        'write_synthetic_population.repop',
        'write_tables.repop',
    ]

    pipeline.run(models=_MODELS, resume_after='summarize')

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Exemplo n.º 23
0
def wahcm_simulate(persons, jobs, households, zones):
    
    work_at_home_prob = work_at_home_simulate("wahcmcoeff.yaml", persons, 
                                 [households, zones])[1]
    jobs_df = jobs.to_frame()
    home_based_jobs = jobs_df[(jobs_df.home_based_status == 1) & (jobs_df.vacant_jobs>0)]

    # sample home workers using the exact number of vacant home based jobs, weighted by the probablities from the wachm:
    home_workers = work_at_home_prob.sample(len(home_based_jobs), weights = work_at_home_prob.values)

    # update job_id on the persons table
    # should not matter which person gets which home-based job
    combine_indexes = pd.DataFrame([home_workers.index, home_based_jobs.index]).transpose()
    combine_indexes.columns = ['person_id', 'job_id']
    combine_indexes.set_index('person_id', inplace=True)
    combine_indexes['work_at_home'] = 1
    
    # updates job_id, work_at_home on the persons table where index (person_id) matches in combine_indexes
    persons.update_col_from_series("job_id", combine_indexes.job_id, cast = True)
    persons.update_col_from_series('work_at_home', combine_indexes.work_at_home, cast = True)
    print "%s additional people assigned to work at home." % len(combine_indexes)
                           
    # building_id on jobs table for home based workers should be the household building_id of the person assigned the job
    # get building_id:
    combine_indexes['building_id'] = 0
    combine_indexes.building_id.update(persons.household_building_id)
    
    #update building_id & vacant_jobs on jobs table:
    combine_indexes.reset_index(level = None, inplace = True)
    combine_indexes.set_index('job_id', inplace=True)
    combine_indexes['vacant_jobs'] = 0
    
    # update jobs table- building_id of at home workers and 0 for vacant_jobs
    jobs.update_col_from_series('building_id', combine_indexes.building_id, cast = True)
    jobs.update_col_from_series('vacant_jobs', combine_indexes.vacant_jobs, cast = True)
    print "Number of unplaced home-based jobs: %s" % len(jobs.local[(jobs.local.home_based_status==1) 
                              & (jobs.local.vacant_jobs > 0) & (jobs.building_id > 0)])
    orca.clear_cache()
Exemplo n.º 24
0
def test_mini_run(store, random_seed):
    orca.add_injectable("configs_dir",
                        os.path.join(os.path.dirname(__file__)))

    orca.add_injectable("store", store)

    orca.add_injectable("nonmotskm_matrix", np.ones((1454, 1454)))
    orca.add_injectable("set_random_seed", set_random_seed)

    assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    # run the models in the expected order
    orca.run(["workplace_location_simulate"])
    orca.run(["auto_ownership_simulate"])

    # this is a regression test so that we know if these numbers change
    auto_choice = orca.get_table('households').get_column('auto_ownership')
    print auto_choice[[2306822, 652072, 651907]]

    pdt.assert_series_equal(
        auto_choice[[2306822, 652072, 651907]],
        pd.Series(
            [2, 1, 1], index=pd.Index([2306822, 652072, 651907], name='HHID')))

    orca.run(["cdap_simulate"])

    orca.run(['mandatory_tour_frequency'])

    mtf_choice = orca.get_table('persons').get_column(
        'mandatory_tour_frequency')

    pdt.assert_series_equal(
        mtf_choice[[146642, 642922, 642921]],
        pd.Series(
            ['school1', 'work1', 'school2'],
            index=pd.Index([146642, 642922, 642921], name='PERID')))

    orca.clear_cache()
Exemplo n.º 25
0
def orca_year_dataset(hdf, year):
    orca.clear_cache()
    if str(year) == '2015':
        year = 'base'
    orca.add_injectable("year", int(year if str(year) != 'base' else 2015))
    for tbl in [
            'parcels', 'buildings', 'jobs', 'households', 'persons',
            'group_quarters', 'base_job_space', 'dropped_buildings'
    ]:
        name = str(year) + '/' + tbl
        if name in hdf:
            df = hdf[name]
        else:
            stub_name = str(2020) + '/' + tbl
            print "No table named " + name + ". Using the structuer from " + stub_name + "."
            df = hdf[stub_name].iloc[0:0]

        if tbl in {'households', 'jobs'} and 'large_area_id' not in df.columns:
            print 'impute large_area_id'
            df['large_area_id'] = misc.reindex(
                orca.get_table('buildings').large_area_id, df.building_id)

        orca.add_table(tbl, df.fillna(0))
Exemplo n.º 26
0
def clear_cache():
    orca.clear_cache()
Exemplo n.º 27
0
def elcm_simulate(jobs, buildings, parcels, zones, gridcells):
    res = utils.lcm_simulate("elcmcoef.yaml", jobs, buildings,
                             [parcels, zones, gridcells],
                             "building_id", "job_spaces", "vacant_job_spaces",
                             cast=True)
    orca.clear_cache()
Exemplo n.º 28
0
def teardown_function(func):
    orca.clear_cache()
    inject.reinject_decorated_tables()
Exemplo n.º 29
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)
    assert prev_checkpoint_count == 7

    pipeline.start_pipeline('auto_ownership_simulate')

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are the 2nd-4th households in households table
    hh_ids = [26960, 857296, 93428]
    choices = [0, 1, 0]
    expected_auto_choice = pd.Series(choices,
                                     index=pd.Index(hh_ids, name="HHID"),
                                     name='auto_ownership')

    print "auto_choice\n", auto_choice.head(4)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_auto_choice)

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(
        excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    per_ids = [92363, 92681, 93428]

    choices = ['work1', 'school1', 'school2']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # should be able to get this before pipeline is closed (from existing open store)
    assert orca.get_injectable('pipeline_store') is not None
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    pipeline.close()

    # should also be able to get this after pipeline is closed (open and close)
    assert orca.get_injectable('pipeline_store') is None
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    orca.clear_cache()
Exemplo n.º 30
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    _MODELS = [
        'compute_accessibility', 'school_location_simulate',
        'workplace_location_simulate', 'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are the first 3 households in households table
    hh_ids = [26960, 857296, 93428]
    choices = [0, 1, 0]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(10)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    per_ids = [92363, 92681, 93428]
    choices = ['work1', 'school1', 'school2']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close()

    orca.clear_cache()
Exemplo n.º 31
0
def clear_cache():
    return orca.clear_cache()
Exemplo n.º 32
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)

    # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']]
    assert prev_checkpoint_count == 11

    pipeline.open_pipeline('auto_ownership_simulate')

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are the same as in test_mini_pipeline_run1
    hh_ids = [464138, 1918238, 2201602]
    choices = [0, 1, 2]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(4)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(
        excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    per_ids = [24375, 92744, 172491]
    choices = ['school2', 'work_and_school', 'work1']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # should be able to get this before pipeline is closed (from existing open store)
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    pipeline.close_pipeline()
    orca.clear_cache()
Exemplo n.º 33
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    tracing.config_logger()

    # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    _MODELS = [
        'initialize', 'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are among the first 10 households in households table
    hh_ids = [464138, 1918238, 2201602]
    choices = [0, 1, 2]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(10)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated
    per_ids = [24375, 92744, 172491]
    choices = ['school2', 'work_and_school', 'work1']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    # mtf_choice
    # PERID
    # 23647                 NaN
    # 24203                 NaN
    # 24375             school2
    # 24687                 NaN
    # 24824                 NaN
    # 24975                 NaN
    # 25027                 NaN
    # 25117                 NaN
    # 25772                 NaN
    # 25871                 NaN
    # 26284                 NaN
    # 26863                 NaN
    # 27059                 NaN
    # 92233                 NaN
    # 92382             school1
    # 92744     work_and_school
    # 92823                 NaN
    # 93172             school2
    # 93774                 NaN
    # 172491              work1
    # Name: mandatory_tour_frequency, dtype: object
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()