Пример #1
0
def annotate_preprocessors(
        tours_df, locals_dict, skims,
        model_settings, trace_label):

    locals_d = {}
    locals_d.update(locals_dict)
    locals_d.update(skims)

    preprocessor_settings = model_settings.get('preprocessor', [])
    if not isinstance(preprocessor_settings, list):
        assert isinstance(preprocessor_settings, dict)
        preprocessor_settings = [preprocessor_settings]

    simulate.set_skim_wrapper_targets(tours_df, skims)

    annotations = None
    for model_settings in preprocessor_settings:

        results = compute_columns(
            df=tours_df,
            model_settings=model_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

        assign_in_place(tours_df, results)
Пример #2
0
def cleanup_failed_trips(trips):
    """
    drop failed trips and cleanup fields in leg_mates:

    trip_num        assign new ordinal trip num after failed trips are dropped
    trip_count      assign new count of trips in leg, sans failed trips
    first           update first flag as we may have dropped first trip (last trip can't fail)
    next_trip_id    assign id of next trip in leg after failed trips are dropped
    """

    if trips.failed.any():
        logger.warning("cleanup_failed_trips dropping %s failed trips" % trips.failed.sum())

        trips['patch'] = False
        flag_failed_trip_leg_mates(trips, 'patch')

        # drop the original failures
        trips = trips[~trips.failed]

        # increasing trip_id order
        patch_trips = trips[trips.patch].sort_index()

        # recompute fields dependent on trip_num sequence
        grouped = patch_trips.groupby(['tour_id', 'outbound'])
        patch_trips['trip_num'] = grouped.cumcount() + 1
        patch_trips['trip_count'] = patch_trips['trip_num'] + grouped.cumcount(ascending=False)

        assign_in_place(trips, patch_trips[['trip_num', 'trip_count']])

        del trips['patch']

    del trips['failed']

    return trips
def atwork_subtour_destination(
        tours,
        persons_merged,
        skim_dict,
        skim_stack,
        land_use, size_terms,
        chunk_size, trace_hh_id):

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork')

    destination_sample = atwork_subtour_destination_sample(
        subtours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    destination_sample = atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id)

    choices = atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    subtours['destination'] = choices

    assign_in_place(tours, subtours[['destination']])

    pipeline.replace_table("tours", tours)

    tracing.print_summary('subtour destination', subtours.destination, describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Пример #4
0
def add_result_columns(base_dfname, from_df, prefix=''):

    dest_df = inject.get_table(base_dfname).to_frame()

    if prefix:
        from_df = from_df.copy()
        from_df.columns = [prefix + c for c in from_df.columns.values]

    assign_in_place(dest_df, from_df)

    pipeline.replace_table(base_dfname, dest_df)
Пример #5
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """

    model_name = 'mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different logsum coefficients for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting tour processing to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged,
                                  tdd_alts, tour_segment_col, chunk_size,
                                  trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Пример #6
0
def assign_columns(df, model_settings, configs_dir=None, trace_label=None):
    """
    Evaluate expressions in context of df and assign rusulting target columns to df

    Can add new or modify existing columns (if target same as existing df column name)

    Parameters - same as for compute_columns except df must not be None
    Returns - nothing since we modify df in place
    """

    assert df is not None

    results = compute_columns(df, model_settings, configs_dir, trace_label)
    assign_in_place(df, results)
Пример #7
0
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size, trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Пример #8
0
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack,
                               land_use, size_terms, chunk_size, trace_hh_id):

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    destination_size_terms = tour_destination_size_terms(
        land_use, size_terms, 'atwork')

    destination_sample = atwork_subtour_destination_sample(
        subtours, persons_merged, skim_dict, destination_size_terms,
        chunk_size, trace_hh_id)

    destination_sample = atwork_subtour_destination_logsums(
        persons_merged, destination_sample, skim_dict, skim_stack, chunk_size,
        trace_hh_id)

    choices = atwork_subtour_destination_simulate(subtours, persons_merged,
                                                  destination_sample,
                                                  skim_dict,
                                                  destination_size_terms,
                                                  chunk_size, trace_hh_id)

    subtours['destination'] = choices

    assign_in_place(tours, subtours[['destination']])

    pipeline.replace_table("tours", tours)

    tracing.print_summary('subtour destination',
                          subtours.destination,
                          describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Пример #9
0
def annotate_table(configs_dir):

    # model_settings name should have been provided as a step argument
    model_name = inject.get_step_arg('model_name')

    model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_name)

    df_name = model_settings['DF']
    df = inject.get_table(df_name).to_frame()

    results = expressions.compute_columns(
        df,
        model_settings=model_settings,
        configs_dir=configs_dir,
        trace_label=None)

    assign_in_place(df, results)

    pipeline.replace_table(df_name, df)
Пример #10
0
def non_mandatory_tour_destination(tours, persons_merged, skim_dict,
                                   skim_stack, chunk_size, trace_hh_id):
    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings(
        'non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(tours, persons_merged,
                                                    model_settings, skim_dict,
                                                    skim_stack, chunk_size,
                                                    trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Пример #11
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    model_name = 'non_mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    tour_segment_col = None

    choices = run_tour_scheduling(model_name, non_mandatory_tours,
                                  persons_merged, tdd_alts, tour_segment_col,
                                  chunk_size, trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Пример #12
0
def cleanup_failed_trips(trips):
    """
    drop failed trips and cleanup fields in leg_mates:

    trip_num        assign new ordinal trip num after failed trips are dropped
    trip_count      assign new count of trips in leg, sans failed trips
    first           update first flag as we may have dropped first trip (last trip can't fail)
    next_trip_id    assign id of next trip in leg after failed trips are dropped
    """

    if trips.failed.any():
        logger.warning("cleanup_failed_trips dropping %s failed trips" %
                       trips.failed.sum())

        trips['patch'] = False
        flag_failed_trip_leg_mates(trips, 'patch')

        # drop the original failures
        trips = trips[~trips.failed]

        # increasing trip_id order
        patch_trips = trips[trips.patch].sort_index()

        # recompute fields dependent on trip_num sequence
        grouped = patch_trips.groupby(['tour_id', 'outbound'])
        patch_trips['trip_num'] = grouped.cumcount() + 1
        # FIXME - 'clever' hack to avoid regroup - implementation dependent optimization that could change
        patch_trips['trip_count'] = patch_trips['trip_num'] + grouped.cumcount(
            ascending=False)

        assign_in_place(trips, patch_trips[['trip_num', 'trip_count']])

        del trips['patch']

    del trips['failed']

    return trips
Пример #13
0
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Пример #14
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings = config.read_model_settings(
        'non_mandatory_tour_scheduling.yaml')

    model_spec = simulate.read_model_spec(
        file_name='tour_scheduling_nonmandatory.csv')
    segment_col = None  # no segmentation of model_spec

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    tdd_choices, timetable = vectorize_tour_scheduling(
        non_mandatory_tours,
        persons_merged,
        tdd_alts,
        model_spec,
        segment_col,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Пример #15
0
def joint_tour_destination(tours, persons_merged, households_merged, skim_dict,
                           skim_stack, chunk_size, trace_hh_id):
    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings_file_name = 'non_mandatory_tour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    # choosers are tours - in a sense tours are choosing their destination
    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    persons_merged = persons_merged.to_frame()
    households_merged = households_merged.to_frame()

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results('joint_tour_destination')
        return

    estimator = estimation.manager.begin_estimation('joint_tour_destination')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

        # run_destination_simulate writes choosers because tours are merged  just-in-time with persons
        # to reduce memory overhead (the full tours_merged table is only created for one segment at a time)

    choices_df, save_sample_df = run_joint_tour_destination(
        tours, persons_merged, households_merged, want_logsums,
        want_sample_table, model_settings, skim_dict, skim_stack, estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df.choice)
        choices_df.choice = estimator.get_survey_values(
            choices_df.choice, 'tours', 'destination')
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    # add column as we want joint_tours table for tracing.
    joint_tours['destination'] = choices_df.choice
    assign_in_place(tours, joint_tours[['destination']])
    pipeline.replace_table("tours", tours)

    if want_logsums:
        joint_tours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, joint_tours[[logsum_column_name]])

    tracing.print_summary('destination',
                          joint_tours.destination,
                          describe=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_destination.joint_tours")
Пример #16
0
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings_file_name = 'non_mandatory_tour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    estimator = estimation.manager.begin_estimation('non_mandatory_tour_destination')
    if estimator:
        estimator.write_coefficients(simulate.read_model_coefficients(model_settings))
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        non_mandatory_tours,
        persons_merged,
        want_logsums,
        want_sample_table,
        model_settings,
        skim_dict,
        skim_stack,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df.choice)
        choices_df.choice = estimator.get_survey_values(choices_df.choice, 'tours', 'destination')
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    non_mandatory_tours['destination'] = choices_df.choice

    assign_in_place(tours, non_mandatory_tours[['destination']])

    if want_logsums:
        non_mandatory_tours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, non_mandatory_tours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
def atwork_subtour_destination(
        tours,
        persons_merged,
        network_los,
        chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_destination'
    model_settings_file_name = 'atwork_subtour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    future_settings = {
        'SIZE_TERM_SELECTOR': 'atwork',
        'SEGMENTS': ['atwork'],
        'ORIG_ZONE_ID': 'workplace_zone_id'
    }
    model_settings = config.future_model_settings(model_settings_file_name, model_settings, future_settings)

    destination_column_name = 'destination'
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    estimator = estimation.manager.begin_estimation('atwork_subtour_destination')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        subtours,
        persons_merged,
        want_logsums,
        want_sample_table,
        model_settings,
        network_los,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df['choice'])
        choices_df['choice'] = estimator.get_survey_values(choices_df['choice'], 'tours', 'destination')
        estimator.write_override_choices(choices_df['choice'])
        estimator.end_estimation()

    subtours[destination_column_name] = choices_df['choice']
    assign_in_place(tours, subtours[[destination_column_name]])

    if want_logsums:
        subtours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, subtours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    tracing.print_summary(destination_column_name,
                          subtours[destination_column_name],
                          describe=True)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Пример #18
0
def run_trip_destination(trips,
                         tours_merged,
                         estimator,
                         chunk_size,
                         trace_hh_id,
                         trace_label,
                         fail_some_trips_for_testing=False):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    want_sample_table
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')
    network_los = inject.get_injectable('network_los')

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(
        trips.trip_num < trips.trip_count, 0)

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination,
                               trips.tour_id).astype(np.int64)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(np.int64)
    trips['destination'] = np.where(trips.outbound, tour_destination,
                                    tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    if estimator:
        # need to check or override non-intermediate trip destination
        # should check consistency of survey trips origin, destination with parent tour and subsequent/prior trip?
        # FIXME if not consistent, do we fail or override? (seems weird to override them to bad values?)

        # expect all the same trips
        survey_trips = estimator.get_survey_table('trips').sort_index()
        assert survey_trips.index.equals(trips.index)

        first = (survey_trips.trip_num == 1)
        last = (survey_trips.trip_num == trips.trip_count)

        # expect survey's outbound first trip origin to be same as half tour origin
        assert (
            survey_trips.origin[survey_trips.outbound
                                & first] == tour_origin[survey_trips.outbound
                                                        & first]).all()
        # expect outbound last trip destination to be same as half tour destination
        assert (survey_trips.destination[survey_trips.outbound & last] ==
                tour_destination[survey_trips.outbound & last]).all()

        # expect inbound first trip origin to be same as half tour destination
        assert (survey_trips.origin[~survey_trips.outbound & first] ==
                tour_destination[~survey_trips.outbound & first]).all()
        # expect inbound last trip destination to be same as half tour origin
        assert (survey_trips.destination[~survey_trips.outbound & last] ==
                tour_origin[~survey_trips.outbound & last]).all()

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    redundant_cols = model_settings.get(
        'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS', [])
    if redundant_cols:
        tours_merged_cols = [
            c for c in tours_merged_cols if c not in redundant_cols
        ]

    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skim_hotel = SkimHotel(model_settings, network_los, trace_label)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by zone_id, purpose
    # e.g. size_terms.get(df.dest_zone_id, df.purpose)
    # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just zone_id index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST_COL_NAME']

    sample_list = []

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(
                trace_label, 'trip_num_%s' % trip_num)

            locals_dict = {'network_los': network_los}
            locals_dict.update(config.get_model_constants(model_settings))

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=locals_dict,
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label,
                        nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby(
                    'primary_purpose'):
                choices, destination_sample = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    want_logsums,
                    want_sample_table,
                    size_term_matrix,
                    skim_hotel,
                    estimator,
                    chunk_size,
                    trace_hh_id,
                    trace_label=tracing.extend_trace_label(
                        nth_trace_label, primary_purpose))

                choices_list.append(choices)
                if want_sample_table:
                    assert destination_sample is not None
                    sample_list.append(destination_sample)

            destinations_df = pd.concat(choices_list)

            if fail_some_trips_for_testing:
                if len(destinations_df) > 0:
                    destinations_df = destinations_df.drop(
                        destinations_df.index[0])

            failed_trip_ids = nth_trips.index.difference(destinations_df.index)
            if failed_trip_ids.any():
                logger.warning(
                    "%s sidelining %s trips without viable destination alternatives"
                    % (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids,
                          'origin'] = trips.loc[failed_trip_ids].origin.values

            if len(destinations_df) == 0:
                assert failed_trip_ids.all()
                logger.warning(
                    f"all {len(nth_trips)} {primary_purpose} trip_num {trip_num} trips failed"
                )

            if len(destinations_df) > 0:
                # - assign choices to this trip's destinations
                # if estimator, then the choices will already have been overridden by trip_destination_simulate
                # because we need to overwrite choices before any failed choices are suppressed
                assign_in_place(trips,
                                destinations_df.choice.to_frame('destination'))
                if want_logsums:
                    assert 'logsum' in destinations_df.columns
                    assign_in_place(
                        trips,
                        destinations_df.logsum.to_frame(logsum_column_name))

                # - assign choice to next trip's origin
                destinations_df.index = nth_trips.next_trip_id.reindex(
                    destinations_df.index)
                assign_in_place(trips,
                                destinations_df.choice.to_frame('origin'))

    del trips['next_trip_id']

    if len(sample_list) > 0:
        save_sample_df = pd.concat(sample_list)
    else:
        # this could happen if no intermediate trips, or if no saved sample desired
        save_sample_df = None

    return trips, save_sample_df
def joint_tour_destination(tours, persons_merged, households_merged,
                           network_los, chunk_size, trace_hh_id):
    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'joint_tour_destination'
    model_settings_file_name = 'joint_tour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    # choosers are tours - in a sense tours are choosing their destination
    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    persons_merged = persons_merged.to_frame()

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results('joint_tour_destination')
        return

    estimator = estimation.manager.begin_estimation('joint_tour_destination')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        tours, persons_merged, want_logsums, want_sample_table, model_settings,
        network_los, estimator, chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df.choice)
        choices_df.choice = estimator.get_survey_values(
            choices_df.choice, 'tours', 'destination')
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    # add column as we want joint_tours table for tracing.
    joint_tours['destination'] = choices_df.choice
    assign_in_place(tours, joint_tours[['destination']])
    pipeline.replace_table("tours", tours)

    if want_logsums:
        joint_tours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, joint_tours[[logsum_column_name]])

    tracing.print_summary('destination',
                          joint_tours.destination,
                          describe=True)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(
            choices_df)
        # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_destination.joint_tours")
Пример #20
0
def trip_mode_choice(
        trips,
        tours_merged,
        network_los,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col,
                                           tod_key='trip_period', segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice')

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            print(f"mode {mode} path_type {path_type}")

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[dest_col] = np.nan
                choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True)

    # update trips table with choices (and otionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name], value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Пример #21
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
Пример #22
0
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Пример #23
0
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                          trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'

    model_settings_file_name = 'joint_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table(
        'joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label,
                joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=joint_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation('joint_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    choices = vectorize_joint_tour_scheduling(joint_tours,
                                              joint_tour_participants,
                                              persons_merged,
                                              tdd_alts,
                                              timetable,
                                              spec=model_spec,
                                              model_settings=model_settings,
                                              estimator=estimator,
                                              chunk_size=chunk_size,
                                              trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):
            nth_participants = \
                joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

            estimator.log(
                "assign timetable for %s participants in %s tours with tour_num %s"
                % (len(nth_participants), len(nth_tours), tour_num))
            # - update timetables of all joint tour participants
            timetable.assign(nth_participants.person_id,
                             reindex(choices, nth_participants.tour_id))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Пример #24
0
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings_file_name = 'stop_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()
    assert not tours_merged.household_id.isnull().any()
    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        assert 'origin' in tours_merged
        assert 'destination' in tours_merged
        od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap(
            'origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper,
            'network_los': network_los
        }
        locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    spec_segments = model_settings.get('SPEC_SEGMENTS')
    assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}"
    segment_col = model_settings.get('SEGMENT_COL')
    assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}"

    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings[segment_col]
        segment_value = segment_settings[segment_col]

        chooser_segment = tours_merged[tours_merged[segment_col] ==
                                       segment_value]

        if len(chooser_segment) == 0:
            logging.info(
                f"{trace_label} skipping empty segment {segment_name}")
            continue

        logging.info(
            f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows"
        )

        estimator = estimation.manager.begin_estimation(
            model_name=segment_name, bundle_name='stop_frequency')

        segment_spec = simulate.read_model_spec(
            file_name=segment_settings['SPEC'])
        assert segment_spec is not None, "spec for segment_type %s not found" % segment_name

        coefficients_file_name = segment_settings['COEFFICIENTS']
        coefficients_df = simulate.read_model_coefficients(
            file_name=coefficients_file_name)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(segment_settings, bundle_directory=False)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = simulate.simple_simulate(
            choosers=chooser_segment,
            spec=segment_spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_name),
            trace_choice_name='stops',
            estimator=estimator)

        # convert indexes to alternative names
        choices = pd.Series(segment_spec.columns[choices.values],
                            index=choices.index)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'tours', 'stop_frequency')  # override choices
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    # FIXME should have added this when tours created?
    assert 'primary_purpose' not in tours
    if 'primary_purpose' not in tours.columns:
        # if not already there, then it will have been added by annotate tours preprocessor
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if estimator:
        # make sure they created trips with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_id', 'outbound']

        survey_trips = estimation.manager.get_survey_table(table_name='trips')
        different = False
        survey_trips_not_in_trips = survey_trips[~survey_trips.index.
                                                 isin(trips.index)]
        if len(survey_trips_not_in_trips) > 0:
            print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
            different = True
        trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index
                                                            )]
        if len(survey_trips_not_in_trips) > 0:
            print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}")
            different = True
        assert not different

        survey_trips = \
            estimation.manager.get_survey_values(trips,
                                                 table_name='trips',
                                                 column_names=columns)

        trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1)

        if trips_differ.any():
            print("trips_differ\n%s" % trips_differ)
            print("%s of %s tours differ" %
                  (trips_differ.sum(), len(trips_differ)))
            print("differing survey_trips\n%s" % survey_trips[trips_differ])
            print("differing modeled_trips\n%s" % trips[columns][trips_differ])

        assert (not trips_differ.any())

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
Пример #25
0
def atwork_subtour_mode_choice(tours, persons_merged, network_los, chunk_size,
                               trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    logger.info("Running %s with %d subtours" %
                (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type,
                          value_counts=True)

    constants = {}
    constants.update(config.get_model_constants(model_settings))

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    orig_col_name = 'workplace_zone_id'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_mode_choice')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        estimator.write_coefficients_template(
            simulate.read_model_coefficient_template(model_settings))
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # FIXME run_tour_mode_choice_simulate writes choosers post-annotation

    choices_df = run_tour_mode_choice_simulate(
        subtours_merged,
        tour_purpose='atwork',
        model_settings=model_settings,
        mode_column_name=mode_column_name,
        logsum_column_name=logsum_column_name,
        network_los=network_los,
        skims=skims,
        constants=constants,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_types in tvpb_mode_path_types.items():

            for direction, skim in zip(['od', 'do'],
                                       [tvpb_logsum_odt, tvpb_logsum_dot]):

                path_type = path_types[direction]
                skim_cache = skim.cache[path_type]

                print(
                    f"mode {mode} direction {direction} path_type {path_type}")

                for c in skim_cache:

                    dest_col = f'{direction}_{c}'

                    if dest_col not in choices_df:
                        choices_df[
                            dest_col] = 0 if pd.api.types.is_numeric_dtype(
                                skim_cache[c]) else ''
                    choices_df[dest_col].where(choices_df.tour_mode != mode,
                                               skim_cache[c],
                                               inplace=True)

    if estimator:
        estimator.write_choices(choices_df[mode_column_name])
        choices_df[mode_column_name] = \
            estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name)
        estimator.write_override_choices(choices_df[mode_column_name])
        estimator.end_estimation()

    tracing.print_summary('%s choices' % trace_label,
                          choices_df[mode_column_name],
                          value_counts=True)

    assign_in_place(tours, choices_df)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
Пример #26
0
def atwork_subtour_mode_choice_simulate(tours, persons_merged,
                                        tour_mode_choice_spec,
                                        tour_mode_choice_settings, skim_dict,
                                        skim_stack, trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'subtour']
    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged.to_frame(),
                        left_on='person_id',
                        right_index=True)

    nest_spec = config.get_logit_model_settings(tour_mode_choice_settings)
    constants = config.get_model_constants(tour_mode_choice_settings)

    logger.info("Running %s with %d subtours" %
                (trace_label, len(subtours.index)))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours.tour_type,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tour_mode_choice_spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key='workplace_taz',
                                             right_key='destination',
                                             skim_key="out_period")
    dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination',
                                             right_key='workplace_taz',
                                             skim_key="in_period")
    od_skims = skim_dict.wrap('workplace_taz', 'destination')

    spec = get_segment_and_unstack(tour_mode_choice_spec, segment='workbased')

    if trace_hh_id:
        tracing.trace_df(spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    choices = _mode_choice_simulate(
        choosers,
        odt_skim_stack_wrapper=odt_skim_stack_wrapper,
        dot_skim_stack_wrapper=dot_skim_stack_wrapper,
        od_skim_stack_wrapper=od_skims,
        spec=spec,
        constants=constants,
        nest_spec=nest_spec,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label,
                          choices,
                          value_counts=True)

    subtours['destination'] = choices
    assign_in_place(tours, subtours[['destination']])

    if trace_hh_id:
        trace_columns = [
            'mode', 'person_id', 'tour_type', 'tour_num', 'parent_tour_id'
        ]
        tracing.trace_df(subtours,
                         label=tracing.extend_trace_label(trace_label, 'mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         columns=trace_columns,
                         warn_if_empty=True)

    # FIXME - this forces garbage collection
    memory_info()
Пример #27
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
Пример #28
0
def stop_frequency(tours, tours_merged, stop_frequency_alts, skim_dict,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {"od_skims": od_skim_stack_wrapper}
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' %
                                        segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
Пример #29
0
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings = config.read_model_settings('joint_tour_participation.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_participation.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings_file_name = 'joint_tour_participation.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    estimator = estimation.manager.begin_estimation('joint_tour_participation')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(candidates)

    # add tour-based chunk_id so we can chunk all trips in tour together
    assert 'chunk_id' not in candidates.columns
    unique_household_ids = candidates.household_id.unique()
    household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids)
    candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id)

    choices = simulate.simple_simulate_by_chunk_id(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser,
        estimator=estimator)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    if estimator:
        estimator.write_choices(choices)

        # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series
        # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
        survey_participants_df = estimator.get_survey_table('joint_tour_participants')
        participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index)

        # but estimation software wants to know the choices value (alternative index)
        choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE})
        # estimator.write_override_choices(participate)  # write choices as boolean participate
        estimator.write_override_choices(choices)  # write choices as int alt indexes

        estimator.end_estimation()

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings_file_name = 'non_mandatory_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation(
        'non_mandatory_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    # - non_mandatory tour scheduling is not segmented by tour type
    spec_info = {'spec': model_spec, 'estimator': estimator}

    choices = vectorize_tour_scheduling(non_mandatory_tours,
                                        persons_merged,
                                        tdd_alts,
                                        timetable,
                                        tour_segments=spec_info,
                                        tour_segment_col=None,
                                        model_settings=model_settings,
                                        chunk_size=chunk_size,
                                        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num',
                                                               sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Пример #34
0
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'

    primary_tours = tours.to_frame()
    assert not (primary_tours.tour_category == 'atwork').any()

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    persons_merged = persons_merged.to_frame()
    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    constants = {}
    # model_constants can appear in expressions
    constants.update(config.get_model_constants(model_settings))

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    orig_col_name = 'home_zone_id'
    dest_col_name = 'destination'

    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims":
        odr_skim_stack_wrapper,  # dot return skims for e.g. TNC bridge return fare
        "dor_skims":
        dor_skim_stack_wrapper,  # odt return skims for e.g. TNC bridge return fare
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?

        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('tour_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # (run_tour_mode_choice_simulate writes choosers post-annotation)

    # FIXME should normalize handling of tour_type and tour_purpose
    # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS
    # we should either add this column when tours created or add univ to tour_types
    not_university = (primary_tours_merged.tour_type !=
                      'school') | ~primary_tours_merged.is_university
    primary_tours_merged['tour_purpose'] = \
        primary_tours_merged.tour_type.where(not_university, 'univ')

    choices_list = []
    for tour_purpose, tours_segment in primary_tours_merged.groupby(
            'tour_purpose'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_purpose,
            len(tours_segment.index),
        ))

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(tour_purpose)
            tvpb_logsum_dot.extend_trace_label(tour_purpose)

        # name index so tracing knows how to slice
        assert tours_segment.index.name == 'tour_id'

        choices_df = run_tour_mode_choice_simulate(
            tours_segment,
            tour_purpose,
            model_settings,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            network_los=network_los,
            skims=skims,
            constants=constants,
            estimator=estimator,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_purpose),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices_df' %
                              tour_purpose,
                              choices_df.tour_mode,
                              value_counts=True)

        choices_list.append(choices_df)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_types in tvpb_mode_path_types.items():

            for direction, skim in zip(['od', 'do'],
                                       [tvpb_logsum_odt, tvpb_logsum_dot]):

                path_type = path_types[direction]
                skim_cache = skim.cache[path_type]

                print(
                    f"mode {mode} direction {direction} path_type {path_type}")

                for c in skim_cache:

                    dest_col = f'{direction}_{c}'

                    if dest_col not in choices_df:
                        choices_df[
                            dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                                skim_cache[c]) else ''
                    choices_df[dest_col].where(choices_df.tour_mode != mode,
                                               skim_cache[c],
                                               inplace=True)

    if estimator:
        estimator.write_choices(choices_df.tour_mode)
        choices_df.tour_mode = estimator.get_survey_values(
            choices_df.tour_mode, 'tours', 'tour_mode')
        estimator.write_override_choices(choices_df.tour_mode)
        estimator.end_estimation()

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices_df.tour_mode,
                          value_counts=True)

    # so we can trace with annotations
    assign_in_place(primary_tours, choices_df)

    # update tours table with mode choice (and optionally logsums)
    all_tours = tours.to_frame()
    assign_in_place(all_tours, choices_df)

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Пример #35
0
def demographics_processor(persons, persons_merged, demographics_spec,
                           demographics_settings, chunk_size, trace_hh_id):

    # the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    persons_df = persons_merged.to_frame()

    logger.info(
        "Running demographics_processor with %d persons (chunk size = %s)" %
        (len(persons_df), chunk_size))

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(demographics_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(demographics_spec,
                                  persons_df,
                                  locals_dict,
                                  df_alias='persons',
                                  trace_rows=trace_rows)

    # add assigned columns to persons as they are needed by downstream processors
    persons = persons.to_frame()
    assign_in_place(persons, results)
    pipeline.replace_table("persons", persons)

    # coc groups with counts
    # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?)
    # for now, assume all assigned columns are coc, but this could cramp modelers style
    # if they want to create additional demographic columns for downstream use that aren't coc
    coc_columns = list(results.columns)

    inject.add_injectable("coc_column_names", coc_columns)

    # - create table with coc columns as indexes and a single column 'persons' with counts
    # index                        persons
    # coc_poverty coc_age
    # False       False            20
    #             True              3
    # True        False             4
    coc_grouped = results.groupby(coc_columns)
    coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons')

    pipeline.replace_table("coc_results", coc_grouped)

    add_summary_results(coc_grouped)

    if trace_hh_id:

        if trace_results is not None:

            tracing.write_csv(trace_results,
                              file_name="demographics",
                              index_label='person_idx',
                              column_labels=['label', 'person'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="demographics_locals")
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings_file_name = 'mandatory_tour_scheduling.yaml'
    estimators = {}

    model_settings = config.read_model_settings(model_settings_file_name)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # load specs
    spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
    specs = {}
    estimators = {}
    for spec_segment_name, spec_settings in spec_segment_settings.items():

        # estimator for this tour_segment
        estimator = estimation.manager.begin_estimation(
            model_name='mandatory_tour_scheduling_%s' % spec_segment_name,
            bundle_name='mandatory_tour_scheduling')

        spec_file_name = spec_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(
            spec_segment_settings[spec_segment_name])
        specs[spec_segment_name] = simulate.eval_coefficients(
            model_spec, coefficients_df, estimator)

        if estimator:
            estimators[spec_segment_name] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(spec_settings)
            estimator.write_coefficients(coefficients_df)

    # - spec dict segmented by primary_purpose
    tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
    tour_segments = {}
    for tour_segment_name, spec_segment_name in tour_segment_settings.items():
        tour_segments[tour_segment_name] = {}
        tour_segments[tour_segment_name][
            'spec_segment_name'] = spec_segment_name
        tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
        tour_segments[tour_segment_name]['estimator'] = estimators.get(
            spec_segment_name)

    timetable = inject.get_injectable("timetable")

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    choices = vts.vectorize_tour_scheduling(mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            model_choices = choices[(
                mandatory_tours.tour_type == spec_segment_name)]

            # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't
            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in tours.groupby('tour_num', sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    timetable.replace_table()

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Пример #37
0
def atwork_subtour_mode_choice(tours, persons_merged, skim_dict, skim_stack,
                               chunk_size, trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" %
                (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type,
                          value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec,
        tour_purpose='atwork',
        model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label,
                          choices,
                          value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(
                             trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings(
        'trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get(
        'DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." %
                        trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info(
                "trip_destination has already been run. Rerunning failed trips"
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" %
                        trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info(
                    "Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index,
                                    level='trip_id',
                                    inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    # if we estimated trip_destination, there should have been no failed trips
    # if we didn't, but it is enabled, it is probably a configuration error
    # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it
    assert not (estimation.manager.begin_estimation('trip_purpose')
                or estimation.manager.begin_estimation('trip_destination'))

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[
                trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index,
                                    level='trip_id',
                                    inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Пример #39
0
def atwork_subtour_mode_choice(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type, value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec, tour_purpose='atwork', model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label, choices, value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()