def atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id): """ add logsum column to existing atwork_subtour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in atwork_subtour_destination_sample, and computing the logsum of all the utilities +-----------+--------------+----------------+------------+----------------+ | person_id | dest_TAZ | rand | pick_count | logsum (added) | +===========+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-----------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-----------+--------------+----------------+------------+----------------+ + ... | | | | | +-----------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-----------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-----------+--------------+----------------+------------+----------------+ """ trace_label = 'atwork_subtour_destination_logsums' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') tour_purpose = 'atwork' logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def atwork_subtour_destination_logsums(persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id): """ add logsum column to existing atwork_subtour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in atwork_subtour_destination_sample, and computing the logsum of all the utilities +-----------+--------------+----------------+------------+----------------+ | person_id | dest_TAZ | rand | pick_count | logsum (added) | +===========+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-----------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-----------+--------------+----------------+------------+----------------+ + ... | | | | | +-----------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-----------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-----------+--------------+----------------+------------+----------------+ """ trace_label = 'atwork_subtour_destination_logsums' model_settings = config.read_model_settings( 'atwork_subtour_destination.yaml') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') tour_purpose = 'atwork' logsums = logsum.compute_logsums(choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ model_name = 'mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different logsum coefficients for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting tour processing to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label=trace_label, slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, tdd_non_mandatory_spec, non_mandatory_tour_scheduling_settings, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' tours = tours.to_frame() persons_merged = persons_merged.to_frame() non_mandatory_tours = tours[tours.non_mandatory] logger.info("Running non_mandatory_tour_scheduling with %d tours" % len(tours)) constants = config.get_model_constants( non_mandatory_tour_scheduling_settings) tdd_choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, tdd_non_mandatory_spec, constants=constants, chunk_size=chunk_size, trace_label=trace_label) # add tdd_choices columns to tours for c in tdd_choices.columns: tours.loc[tdd_choices.index, c] = tdd_choices[c] pipeline.replace_table("tours", tours) non_mandatory_tours = tours[tours.non_mandatory] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ model_name = 'non_mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return tour_segment_col = None choices = run_tour_scheduling(model_name, non_mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label=trace_label, slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def run_destination_logsums( tour_purpose, persons_merged, destination_sample, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing tour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in destination_sample, and computing the logsum of all the utilities """ logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def run_destination_logsums( tour_purpose, persons_merged, destination_sample, model_settings, network_los, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing tour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_zone_id) pair in destination_sample, and computing the logsum of all the utilities """ logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, network_los, chunk_size, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings = config.read_model_settings( 'non_mandatory_tour_scheduling.yaml') model_spec = simulate.read_model_spec( file_name='tour_scheduling_nonmandatory.csv') segment_col = None # no segmentation of model_spec tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) tdd_choices, timetable = vectorize_tour_scheduling( non_mandatory_tours, persons_merged, tdd_alts, model_spec, segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings_file_name = 'non_mandatory_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation( 'non_mandatory_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) # - non_mandatory tour scheduling is not segmented by tour type spec_info = {'spec': model_spec, 'estimator': estimator} choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=spec_info, tour_segment_col=None, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def run_od_simulate(spec_segment_name, tours, od_sample, want_logsums, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): """ run simulate OD choices on tour_od_sample annotated with mode_choice logsum to select a tour OD from sample alternatives """ model_spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=spec_segment_name, estimator=estimator) # merge persons into tours choosers = tours # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing" ) choosers = choosers.sort_index() if estimator: estimator.write_choosers(choosers) origin_col_name = model_settings['ORIG_COL_NAME'] dest_col_name = model_settings['DEST_COL_NAME'] alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE'] alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name) od_sample[alt_od_col_name] = create_od_id_col(od_sample, origin_col_name, dest_col_name) # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list od_sample['size_term'] = \ reindex(destination_size_terms.size_term, od_sample[alt_dest_col_name]) # also have to add origin attribute columns lu = inject.get_table('land_use').to_frame(columns=origin_attr_cols) od_sample = pd.merge(od_sample, lu, left_on=origin_col_name, right_index=True, how='left') tracing.dump_df(DUMP, od_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is an origin ID # column and a destination ID columns in the alternatives table. # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, od_sample, spec=model_spec, choice_column=alt_od_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='origin_destination', estimator=estimator) if not want_logsums: choices = choices.to_frame('choice') choices = _get_od_cols_from_od_id(choices, origin_col_name, dest_col_name) return choices
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings = config.read_model_settings('tour_scheduling_atwork.yaml') model_spec = simulate.read_model_spec( file_name='tour_scheduling_atwork.csv') persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors(subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) tdd_choices = vectorize_subtour_scheduling(parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df( DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def run_destination_simulate(spec_segment_name, tours, persons_merged, destination_sample, want_logsums, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): """ run destination_simulate on tour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=spec_segment_name, estimator=estimator) # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] persons_merged = persons_merged[[ c for c in persons_merged.columns if c in chooser_columns ]] tours = tours[[ c for c in tours.columns if c in chooser_columns or c == 'person_id' ]] choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing" ) choosers = choosers.sort_index() if estimator: estimator.write_choosers(choosers) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list destination_sample['size_term'] = \ reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name]) tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers # and a zone_id in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate(choosers, destination_sample, spec=model_spec, choice_column=alt_dest_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='destination', estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(choices, pd.Series) choices = choices.to_frame('choice') return choices
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack, school_location_sample, configs_dir, chunk_size, trace_hh_id): """ add logsum column to existing school_location_sample able logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in school_location_sample, and computing the logsum of all the utilities <added> PERID, dest_TAZ, rand, pick_count, logsum 23750, 14, 0.565502716034, 4 1.85659498857 23750, 16, 0.711135838871, 6 1.92315598631 ... 23751, 12, 0.408038878552, 1 2.40612135416 23751, 14, 0.972732479292, 2 1.44009018355 """ trace_label = 'school_location_logsums' # extract logsums_spec from omnibus_spec # omnibus_spec = orca.get_injectable('tour_mode_choice_spec') # for tour_type in ['school', 'university']: # logsums_spec = get_segment_and_unstack(omnibus_spec, tour_type) # tracing.dump_df(DUMP, logsums_spec, trace_label, 'logsums_spec_%s' % tour_type) school_location_settings = config.read_model_settings( configs_dir, 'school_location.yaml') alt_col_name = school_location_settings["ALT_COL_NAME"] # FIXME - just using settings from tour_mode_choice logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml') persons_merged = persons_merged.to_frame() school_location_sample = school_location_sample.to_frame() logger.info("Running school_location_sample with %s rows" % len(school_location_sample)) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS'] persons_merged = persons_merged[chooser_columns] tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') logsums_list = [] for school_type in ['university', 'highschool', 'gradeschool']: logsums_spec = mode_choice_logsums_spec(configs_dir, school_type) choosers = school_location_sample[school_location_sample['school_type'] == school_type] choosers = pd.merge(choosers, persons_merged, left_index=True, right_index=True, how="left") choosers['in_period'] = time_period_label( school_location_settings['IN_PERIOD']) choosers['out_period'] = time_period_label( school_location_settings['OUT_PERIOD']) # FIXME - should do this in expression file? choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name]) choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name]) tracing.dump_df(DUMP, choosers, trace_label, '%s_choosers' % school_type) logsums = compute_logsums(choosers, logsums_spec, logsum_settings, skim_dict, skim_stack, alt_col_name, chunk_size, trace_hh_id, trace_label) logsums_list.append(logsums) logsums = pd.concat(logsums_list) # add_column series should have an index matching the table to which it is being added # logsums does, since school_location_sample was on left side of merge creating choosers orca.add_column("school_location_sample", "mode_choice_logsum", logsums)
def atwork_subtour_destination_simulate(subtours, persons_merged, destination_sample, want_logsums, model_settings, skim_dict, destination_size_terms, estimator, chunk_size, trace_label): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # merge persons into tours choosers = pd.merge(subtours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] if estimator: estimator.write_choosers(choosers) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge destination_size_terms columns into alt sample list alternatives = \ pd.merge(destination_sample, destination_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=model_spec, choice_column=alt_dest_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location', estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(choices, pd.Series) choices = choices.to_frame('choice') return choices
def atwork_subtour_destination_simulate(tours, persons_merged, atwork_subtour_destination_sample, atwork_subtour_destination_spec, skim_dict, destination_size_terms, configs_dir, chunk_size, trace_hh_id): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ trace_label = 'atwork_subtour_destination_simulate' model_settings = inject.get_injectable('atwork_subtour_destination_settings') tours = tours.to_frame() subtours = tours[tours.tour_category == 'subtour'] # merge persons into tours choosers = pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True) alt_col_name = model_settings["ALT_COL_NAME"] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() alternatives = \ pd.merge(atwork_subtour_destination_sample, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) sample_pool_size = len(destination_size_terms.index) logger.info("Running atwork_subtour_destination_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_col_name) locals_d = { 'skims': skims, 'sample_pool_size': float(sample_pool_size) } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=atwork_subtour_destination_spec, choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location') tracing.print_summary('subtour destination', choices, describe=True) subtours['destination'] = choices results = expressions.compute_columns( df=subtours, model_settings='annotate_tours_with_dest', configs_dir=configs_dir, trace_label=trace_label) assign_in_place(tours, subtours[['destination']]) assign_in_place(tours, results) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours, label=trace_label, columns=['destination'], warn_if_empty=True)
def atwork_subtour_destination_logsums(persons_merged, land_use, skim_dict, skim_stack, atwork_subtour_destination_sample, configs_dir, chunk_size, trace_hh_id): """ add logsum column to existing workplace_location_sample able logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in workplace_location_sample, and computing the logsum of all the utilities +-------+--------------+----------------+------------+----------------+ | PERID | dest_TAZ | rand | pick_count | logsum (added) | +=======+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-------+--------------+----------------+------------+----------------+ + ... | | | | | +-------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-------+--------------+----------------+------------+----------------+ """ trace_label = 'atwork_subtour_destination_logsums' model_settings = inject.get_injectable('atwork_subtour_destination_settings') logsums_spec = mode_choice_logsums_spec(configs_dir, 'work') alt_col_name = model_settings["ALT_COL_NAME"] chooser_col_name = 'workplace_taz' # FIXME - just using settings from tour_mode_choice logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml') persons_merged = persons_merged.to_frame() atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame() # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['LOGSUM_CHOOSER_COLUMNS'] persons_merged = persons_merged[chooser_columns] # merge persons into tours choosers = pd.merge(atwork_subtour_destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") choosers['in_period'] = skim_time_period_label(model_settings['IN_PERIOD']) choosers['out_period'] = skim_time_period_label(model_settings['OUT_PERIOD']) # FIXME - should do this in expression file? choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name]) choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name]) logger.info("Running atwork_subtour_destination_logsums with %s rows" % len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = compute_logsums( choosers, logsums_spec, logsum_settings, skim_dict, skim_stack, chooser_col_name, alt_col_name, chunk_size, trace_hh_id, trace_label) # "add_column series should have an index matching the table to which it is being added" # when the index has duplicates, however, in the special case that the series index exactly # matches the table index, then the series value order is preserved. logsums does have a # matching index, since atwork_subtour_destination_sample was on left side of merge de-dup merge inject.add_column("atwork_subtour_destination_sample", "mode_choice_logsum", logsums)
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings = config.read_model_settings('tour_scheduling_atwork.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv') persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) tdd_choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def run_destination_simulate( spec_segment_name, tours, persons_merged, destination_sample, model_settings, skim_dict, destination_size_terms, chunk_size, trace_label): """ run destination_simulate on tour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec_file_name = model_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=model_spec_file_name) model_spec = model_spec[[spec_segment_name]] # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list destination_sample['size_term'] = \ reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name]) tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(origin_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, destination_sample, spec=model_spec, choice_column=alt_dest_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='destination') return choices
def workplace_location_logsums(persons_merged, land_use, skim_dict, skim_stack, workplace_location_sample, configs_dir, chunk_size, trace_hh_id): """ add logsum column to existing workplace_location_sample able logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in workplace_location_sample, and computing the logsum of all the utilities <added> PERID, dest_TAZ, rand, pick_count, logsum 23750, 14, 0.565502716034, 4 1.85659498857 23750, 16, 0.711135838871, 6 1.92315598631 ... 23751, 12, 0.408038878552, 1 2.40612135416 23751, 14, 0.972732479292, 2 1.44009018355 """ trace_label = 'workplace_location_logsums' logsums_spec = mode_choice_logsums_spec(configs_dir, 'work') workplace_location_settings = config.read_model_settings( configs_dir, 'workplace_location.yaml') alt_col_name = workplace_location_settings["ALT_COL_NAME"] # FIXME - just using settings from tour_mode_choice logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml') persons_merged = persons_merged.to_frame() workplace_location_sample = workplace_location_sample.to_frame() logger.info("Running workplace_location_sample with %s rows" % len(workplace_location_sample)) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = workplace_location_settings['LOGSUM_CHOOSER_COLUMNS'] persons_merged = persons_merged[chooser_columns] choosers = pd.merge(workplace_location_sample, persons_merged, left_index=True, right_index=True, how="left") choosers['in_period'] = time_period_label( workplace_location_settings['IN_PERIOD']) choosers['out_period'] = time_period_label( workplace_location_settings['OUT_PERIOD']) # FIXME - should do this in expression file? choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name]) choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name]) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = compute_logsums(choosers, logsums_spec, logsum_settings, skim_dict, skim_stack, alt_col_name, chunk_size, trace_hh_id, trace_label) # "add_column series should have an index matching the table to which it is being added" # when the index has duplicates, however, in the special case that the series index exactly # matches the table index, then the series value order is preserved # logsums now does, since workplace_location_sample was on left side of merge de-dup merge orca.add_column("workplace_location_sample", "mode_choice_logsum", logsums)
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, tdd_subtour_spec, atwork_subtour_scheduling_settings, configs_dir, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' constants = config.get_model_constants(atwork_subtour_scheduling_settings) persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'subtour'] logger.info("Running atwork_subtour_scheduling with %d tours" % len(subtours)) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) """ parent_tours tour_id tdd 20973389 20973389 26 44612864 44612864 3 48954854 48954854 7 """ tdd_choices = vectorize_subtour_scheduling(parent_tours, subtours, persons_merged, tdd_alts, tdd_subtour_spec, constants=constants, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(subtours, tdd_choices) expressions.assign_columns(df=subtours, model_settings='annotate_tours', configs_dir=configs_dir, trace_label=trace_label) assign_in_place(tours, subtours) pipeline.replace_table("tours", tours) tracing.dump_df( DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(subtours, label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def workplace_location_simulate(persons_merged, workplace_location_sample, workplace_location_spec, workplace_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ Workplace location model on workplace_location_sample annotated with mode_choice logsum to select a work_taz from sample alternatives """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whether # it gets used choosers = persons_merged.to_frame() alt_col_name = workplace_location_settings["ALT_COL_NAME"] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list workplace_location_sample = workplace_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() alternatives = \ pd.merge(workplace_location_sample, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate', 'alternatives') constants = config.get_model_constants(workplace_location_settings) sample_pool_size = len(destination_size_terms.index) logger.info("Running workplace_location_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)} if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=workplace_location_spec, choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_hh_id and 'workplace_location', trace_choice_name='workplace_location') # FIXME - no need to reindex since we didn't slice choosers # choices = choices.reindex(persons_merged.index) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("persons", "workplace_taz", choices) pipeline.add_dependent_columns("persons", "persons_workplace") if trace_hh_id: trace_columns = ['workplace_taz' ] + orca.get_table('persons_workplace').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="workplace_location", columns=trace_columns, warn_if_empty=True)
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack, school_location_sample, configs_dir, chunk_size, trace_hh_id): """ add logsum column to existing school_location_sample able logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in school_location_sample, and computing the logsum of all the utilities +-------+--------------+----------------+------------+----------------+ | PERID | dest_TAZ | rand | pick_count | logsum (added) | +=======+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-------+--------------+----------------+------------+----------------+ + ... | | | | | +-------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-------+--------------+----------------+------------+----------------+ """ trace_label = 'school_location_logsums' school_location_settings = config.read_model_settings( configs_dir, 'school_location.yaml') alt_col_name = school_location_settings["ALT_COL_NAME"] chooser_col_name = 'TAZ' # FIXME - just using settings from tour_mode_choice logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml') persons_merged = persons_merged.to_frame() school_location_sample = school_location_sample.to_frame() logger.info("Running school_location_sample with %s rows" % len(school_location_sample)) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS'] persons_merged = persons_merged[chooser_columns] tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') logsums_list = [] for school_type in ['university', 'highschool', 'gradeschool']: logsums_spec = mode_choice_logsums_spec(configs_dir, school_type) choosers = school_location_sample[school_location_sample['school_type'] == school_type] choosers = pd.merge(choosers, persons_merged, left_index=True, right_index=True, how="left") choosers['in_period'] = skim_time_period_label( school_location_settings['IN_PERIOD']) choosers['out_period'] = skim_time_period_label( school_location_settings['OUT_PERIOD']) # FIXME - should do this in expression file? choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name]) choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name]) tracing.dump_df(DUMP, choosers, tracing.extend_trace_label(trace_label, school_type), 'choosers') logsums = compute_logsums( choosers, logsums_spec, logsum_settings, skim_dict, skim_stack, chooser_col_name, alt_col_name, chunk_size, trace_hh_id, tracing.extend_trace_label(trace_label, school_type)) logsums_list.append(logsums) logsums = pd.concat(logsums_list) # add_column series should have an index matching the table to which it is being added # logsums does, since school_location_sample was on left side of merge creating choosers inject.add_column("school_location_sample", "mode_choice_logsum", logsums)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ trace_label = 'mandatory_tour_scheduling' model_settings = config.read_model_settings( 'mandatory_tour_scheduling.yaml') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return persons_merged = persons_merged.to_frame() # - filter chooser columns for both logsums and simulate logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) # - add primary_purpose column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different locsum coefficents for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting the skims to align with primary_purpose segment_col = 'primary_purpose' if segment_col not in mandatory_tours: is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours['primary_purpose'] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') # - spec dict segmented by primary_purpose specs = model_settings.get('SPEC', []) segment_specs = { segment: simulate.read_model_spec(file_name=spec) for segment, spec in specs.items() } logger.info("Running mandatory_tour_scheduling with %d tours", len(tours)) tdd_choices, timetable = vts.vectorize_tour_scheduling( mandatory_tours, persons_merged, tdd_alts, spec=segment_specs, segment_col=segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_scheduling", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ trace_label = 'atwork_subtour_destination_simulate' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv') # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # merge persons into tours choosers = pd.merge(subtours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge destination_size_terms columns into alt sample list alternatives = \ pd.merge(destination_sample, destination_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=model_spec, choice_column=alt_dest_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location') return choices
def atwork_subtour_destination_simulate(subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ trace_label = 'atwork_subtour_destination_simulate' model_settings = config.read_model_settings( 'atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec( file_name='atwork_subtour_destination.csv') # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # merge persons into tours choosers = pd.merge(subtours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge destination_size_terms columns into alt sample list alternatives = \ pd.merge(destination_sample, destination_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=model_spec, choice_column=alt_dest_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location') return choices
def run_destination_simulate(spec_segment_name, tours, persons_merged, destination_sample, model_settings, skim_dict, destination_size_terms, chunk_size, trace_label): """ run destination_simulate on tour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec_file_name = model_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=model_spec_file_name) model_spec = model_spec[[spec_segment_name]] # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list destination_sample['size_term'] = \ reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name]) tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(origin_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate(choosers, destination_sample, spec=model_spec, choice_column=alt_dest_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='destination') return choices
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ trace_label = 'mandatory_tour_scheduling' model_settings_file_name = 'mandatory_tour_scheduling.yaml' estimators = {} model_settings = config.read_model_settings(model_settings_file_name) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return persons_merged = persons_merged.to_frame() # - filter chooser columns for both logsums and simulate logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different locsum coefficents for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting the skims to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') # load specs spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {}) specs = {} estimators = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): # estimator for this tour_segment estimator = estimation.manager.begin_estimation( model_name='mandatory_tour_scheduling_%s' % spec_segment_name, bundle_name='mandatory_tour_scheduling') spec_file_name = spec_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients( spec_segment_settings[spec_segment_name]) specs[spec_segment_name] = simulate.eval_coefficients( model_spec, coefficients_df, estimator) if estimator: estimators[spec_segment_name] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df) # - spec dict segmented by primary_purpose tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {}) tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items(): tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name][ 'spec_segment_name'] = spec_segment_name tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name] tour_segments[tour_segment_name]['estimator'] = estimators.get( spec_segment_name) timetable = inject.get_injectable("timetable") if estimators: timetable.begin_transaction(list(estimators.values())) logger.info("Running mandatory_tour_scheduling with %d tours", len(tours)) choices = vts.vectorize_tour_scheduling(mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=tour_segments, tour_segment_col=tour_segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimators: # overrride choices for all estimators choices_list = [] for spec_segment_name, estimator in estimators.items(): model_choices = choices[( mandatory_tours.tour_type == spec_segment_name)] # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't estimator.write_choices(model_choices) override_choices = estimator.get_survey_values( model_choices, 'tours', 'tdd') estimator.write_override_choices(override_choices) choices_list.append(override_choices) estimator.end_estimation() choices = pd.concat(choices_list) # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) timetable.replace_table() # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_scheduling", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings_file_name = 'tour_scheduling_atwork.yaml' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('atwork_subtour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) persons_merged = persons_merged.to_frame() logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) # we don't need to update timetable because subtours are scheduled inside work trip windows choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def school_location_simulate(persons_merged, school_location_sample, school_location_spec, school_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ School location model on school_location_sample annotated with mode_choice logsum to select a school_taz from sample alternatives """ choosers = persons_merged.to_frame() school_location_sample = school_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() trace_label = 'school_location_simulate' alt_col_name = school_location_settings["ALT_COL_NAME"] constants = config.get_model_constants(school_location_settings) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers') choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] alts_segment = school_location_sample[ school_location_sample['school_type'] == school_type] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list alts_segment = \ pd.merge(alts_segment, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alts_segment, trace_label, '%s_alternatives' % school_type) choices = interaction_sample_simulate( choosers_segment, alts_segment, spec=school_location_spec[[school_type]], choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, school_type), trace_choice_name='school_location') choices_list.append(choices) choices = pd.concat(choices_list) # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location choices = choices.reindex(persons_merged.index).fillna(-1).astype(int) tracing.dump_df(DUMP, choices, trace_label, 'choices') tracing.print_summary('school_taz', choices, describe=True) inject.add_column("persons", "school_taz", choices) pipeline.add_dependent_columns("persons", "persons_school") if trace_hh_id: trace_columns = ['school_taz' ] + inject.get_table('persons_school').columns tracing.trace_df(inject.get_table('persons_merged').to_frame(), label="school_location", columns=trace_columns, warn_if_empty=True)
def run_od_logsums(spec_segment_name, tours_merged_df, od_sample, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing tour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, OD_id) pair in od_sample, and computing the logsum of all the utilities """ chunk_tag = 'tour_od.logsums' logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) origin_id_col = model_settings['ORIG_COL_NAME'] dest_id_col = model_settings['DEST_COL_NAME'] tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col) # FIXME - MEMORY HACK - only include columns actually used in spec tours_merged_df = \ logsum.filter_chooser_columns(tours_merged_df, logsum_settings, model_settings) # merge ods into choosers table choosers = od_sample.join(tours_merged_df, how='left') choosers[tour_od_id_col] = choosers[origin_id_col].astype( str) + '_' + choosers[dest_id_col].astype(str) logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') # run trip mode choice to compute tour mode choice logsums if logsum_settings.get('COMPUTE_TRIP_MODE_CHOICE_LOGSUMS', False): pseudo_tours = choosers.copy() trip_mode_choice_settings = config.read_model_settings( 'trip_mode_choice') # tours_merged table doesn't yet have all the cols it needs to be called (e.g. # home_zone_id), so in order to compute tour mode choice/trip mode choice logsums # in this step we have to pass all tour-level attributes in with the main trips # table. see trip_mode_choice.py L56-61 for more details. tour_cols_needed = trip_mode_choice_settings.get( 'TOURS_MERGED_CHOOSER_COLUMNS', []) tour_cols_needed.append(tour_od_id_col) # from tour_mode_choice.py not_university = (pseudo_tours.tour_type != 'school') | ~pseudo_tours.is_university pseudo_tours['tour_purpose'] = \ pseudo_tours.tour_type.where(not_university, 'univ') pseudo_tours['stop_frequency'] = '0out_0in' pseudo_tours['primary_purpose'] = pseudo_tours['tour_purpose'] choosers_og_index = choosers.index.name pseudo_tours.reset_index(inplace=True) pseudo_tours.index.name = 'unique_id' # need dest_id_col to create dest col in trips, but need to preserve # tour dest as separate column in the trips table bc the trip mode choice # preprocessor isn't able to get the tour dest from the tours table bc the # tours don't yet have ODs. stop_frequency_alts = inject.get_injectable('stop_frequency_alts') pseudo_tours['tour_destination'] = pseudo_tours[dest_id_col] trips = trip.initialize_from_tours( pseudo_tours, stop_frequency_alts, [origin_id_col, dest_id_col, 'tour_destination', 'unique_id']) outbound = trips['outbound'] trips['depart'] = reindex(pseudo_tours.start, trips.unique_id) trips.loc[~outbound, 'depart'] = reindex(pseudo_tours.end, trips.loc[~outbound, 'unique_id']) logsum_trips = pd.DataFrame() nest_spec = config.get_logit_model_settings(logsum_settings) # actual coeffs dont matter here, just need them to load the nest structure coefficients = simulate.get_segment_coefficients( logsum_settings, pseudo_tours.iloc[0]['tour_purpose']) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) tour_mode_alts = [] for nest in logit.each_nest(nest_spec): if nest.is_leaf: tour_mode_alts.append(nest.name) # repeat rows from the trips table iterating over tour mode for tour_mode in tour_mode_alts: trips['tour_mode'] = tour_mode logsum_trips = pd.concat((logsum_trips, trips), ignore_index=True) assert len(logsum_trips) == len(trips) * len(tour_mode_alts) logsum_trips.index.name = 'trip_id' for col in tour_cols_needed: if col not in trips: logsum_trips[col] = reindex(pseudo_tours[col], logsum_trips.unique_id) pipeline.replace_table('trips', logsum_trips) tracing.register_traceable_table('trips', logsum_trips) pipeline.get_rn_generator().add_channel('trips', logsum_trips) # run trip mode choice on pseudo-trips. use orca instead of pipeline to # execute the step because pipeline can only handle one open step at a time orca.run(['trip_mode_choice']) # grab trip mode choice logsums and pivot by tour mode and direction, index # on tour_id to enable merge back to choosers table trips = inject.get_table('trips').to_frame() trip_dir_mode_logsums = trips.pivot(index=['tour_id', tour_od_id_col], columns=['tour_mode', 'outbound'], values='trip_mode_choice_logsum') new_cols = [ '_'.join(['logsum', mode, 'outbound' if outbound else 'inbound']) for mode, outbound in trip_dir_mode_logsums.columns ] trip_dir_mode_logsums.columns = new_cols choosers.reset_index(inplace=True) choosers.set_index(['tour_id', tour_od_id_col], inplace=True) choosers = pd.merge(choosers, trip_dir_mode_logsums, left_index=True, right_index=True) choosers.reset_index(inplace=True) choosers.set_index(choosers_og_index, inplace=True) pipeline.get_rn_generator().drop_channel('trips') tracing.deregister_traceable_table('trips') assert (od_sample.index == choosers.index).all() for col in new_cols: od_sample[col] = choosers[col] logsums = logsum.compute_logsums(choosers, spec_segment_name, logsum_settings, model_settings, network_los, chunk_size, chunk_tag, trace_label, 'end', 'start', 'duration') assert (od_sample.index == logsums.index).all() od_sample['tour_mode_choice_logsum'] = logsums return od_sample