Пример #1
0
def compute_ood_logsums(choosers, logsum_settings, nest_spec, logsum_spec,
                        od_skims, locals_dict, chunk_size, trace_label,
                        chunk_tag):
    """
    Compute one (of two) out-of-direction logsums for destination alternatives

    Will either be trip_origin -> alt_dest or alt_dest -> primary_dest
    """

    locals_dict.update(od_skims)

    expressions.annotate_preprocessors(choosers, locals_dict, od_skims,
                                       logsum_settings, trace_label)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=od_skims,
                                               locals_d=locals_dict,
                                               chunk_size=chunk_size,
                                               trace_label=trace_label,
                                               chunk_tag=chunk_tag)

    assert logsums.index.equals(choosers.index)

    # FIXME not strictly necessary, but would make trace files more legible?
    # logsums = logsums.replace(-np.inf, -999)

    return logsums
Пример #2
0
def annotate_trips(trips, network_los, model_settings):
    """
    Add columns to local trips table. The annotator has
    access to the origin/destination skims and everything
    defined in the model settings CONSTANTS.

    Pipeline tables can also be accessed by listing them under
    TABLES in the preprocessor settings.
    """

    trips_df = trips.to_frame()

    trace_label = 'trip_matrices'

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    if 'trip_period' not in trips_df:
        trips_df['trip_period'] = network_los.skim_time_period_label(
            trips_df.depart)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key='origin',
                                               dest_key='destination',
                                               dim3_key='trip_period')
    skims = {'od_skims': od_skim_wrapper, "odt_skims": odt_skim_stack_wrapper}

    locals_dict = {}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_dict.update(constants)

    expressions.annotate_preprocessors(trips_df, locals_dict, skims,
                                       model_settings, trace_label)

    # Data will be expanded by an expansion weight column from
    # the households pipeline table, if specified in the model settings.
    hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')

    if hh_weight_col and hh_weight_col not in trips_df:
        logger.info("adding '%s' from households to trips table" %
                    hh_weight_col)
        household_weights = pipeline.get_table('households')[hh_weight_col]
        trips_df[hh_weight_col] = trips_df.household_id.map(household_weights)

    return trips_df
Пример #3
0
def run_tour_mode_choice_simulate(choosers,
                                  tour_purpose,
                                  model_settings,
                                  mode_column_name,
                                  logsum_column_name,
                                  network_los,
                                  skims,
                                  constants,
                                  estimator,
                                  chunk_size,
                                  trace_label=None,
                                  trace_choice_name=None):
    """
    This is a utility to run a mode choice model for each segment (usually
    segments are tour/trip purposes).  Pass in the tours/trip that need a mode,
    the Skim object, the spec to evaluate with, and any additional expressions
    you want to use in the evaluation of variables.
    """

    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(model_settings,
                                                     tour_purpose)

    spec = simulate.eval_coefficients(spec, coefficients, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # coefficients can appear in expressions
    locals_dict.update(coefficients)

    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    in_time = skims['in_time_col_name']
    out_time = skims['out_time_col_name']
    choosers['in_period'] = network_los.skim_time_period_label(
        choosers[in_time])
    choosers['out_period'] = network_los.skim_time_period_label(
        choosers[out_time])

    expressions.annotate_preprocessors(choosers, locals_dict, skims,
                                       model_settings, trace_label)

    trace_column_names = choosers.index.name
    assert trace_column_names == 'tour_id'
    if trace_column_names not in choosers:
        choosers[trace_column_names] = choosers.index

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(choosers)

    choices = mode_choice_simulate(choosers=choosers,
                                   spec=spec,
                                   nest_spec=nest_spec,
                                   skims=skims,
                                   locals_d=locals_dict,
                                   chunk_size=chunk_size,
                                   mode_column_name=mode_column_name,
                                   logsum_column_name=logsum_column_name,
                                   trace_label=trace_label,
                                   trace_choice_name=trace_choice_name,
                                   trace_column_names=trace_column_names,
                                   estimator=estimator)

    return choices
Пример #4
0
def trip_mode_choice(
        trips,
        tours_merged,
        network_los,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col,
                                           tod_key='trip_period', segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice')

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            print(f"mode {mode} path_type {path_type}")

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[dest_col] = np.nan
                choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True)

    # update trips table with choices (and otionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name], value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict,
                              chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings_file_name = 'tour_scheduling_atwork.yaml'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    skims = {
        "od_skims": od_skim_wrapper,
    }
    expressions.annotate_preprocessors(subtours, constants, skims,
                                       model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids},
                                index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']],
                                      left_index=True,
                                      right_index=True)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df, model_settings)
        # we don't need to update timetable because subtours are scheduled inside work trip windows

    choices = vectorize_subtour_scheduling(parent_tours,
                                           subtours,
                                           persons_merged,
                                           tdd_alts,
                                           model_spec,
                                           model_settings,
                                           estimator=estimator,
                                           chunk_size=chunk_size,
                                           trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    tdd_choices = pd.merge(choices.to_frame('tdd'),
                           tdd_alts,
                           left_on=['tdd'],
                           right_index=True,
                           how='left')

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(
            DUMP,
            tt.tour_map(parent_tours,
                        subtours,
                        tdd_alts,
                        persons_id_col='parent_tour_id'), trace_label,
            'tour_map')
Пример #6
0
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    # give trip mode choice the option to run without calling tours_merged. Useful for xborder
    # model where tour_od_choice needs trip mode choice logsums before some of the join keys
    # needed by tour_merged (e.g. home_zone_id) exist
    tours_cols = [
        col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS']
        if col not in trips_df.columns
    ]
    if len(tours_cols) > 0:
        tours_merged = inject.get_table('tours_merged').to_frame(
            columns=tours_cols)
    else:
        tours_merged = pd.DataFrame()

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'
    min_per_period = network_los.skim_time_periods['period_minutes']
    periods_per_hour = 60 / min_per_period

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col,
        'MIN_PER_PERIOD': min_per_period,
        'PERIODS_PER_HOUR': periods_per_hour
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb
        tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice')
        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           recipe=tvpb_recipe,
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
        })

        # This if-clause gives the user the option of NOT inheriting constants
        # from the tvpb settings. previously, these constants were inherited
        # automatically, which had the undesirable effect of overwriting any
        # trip mode choice model constants/coefficients that shared the same
        # name. The default behavior is still the same (True), but the user
        # can now avoid any chance of squashing these local variables by
        # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file.
        # the tvpb will still use the constants as defined in the recipe
        # specified above in `tvpb.wrap_logsum()` but they will not be used
        # in the trip mode choice expressions.
        if model_settings.get('use_TVPB_constants', True):
            constants.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # don't create estimation data bundle if trip mode choice is being called
    # from another model step (e.g. tour mode choice logsum creation)
    if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice':
        estimator = None
    else:
        estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)

        constants_keys = constants.keys()
        if any([coeff in constants_keys for coeff in coefficients.keys()]):
            logger.warning(
                "coefficients are obscuring constants in locals_dict")
        locals_dict.update(coefficients)

        # have to initialize chunker for preprocessing in order to access
        # tvpb logsum terms in preprocessor expressions.
        with chunk.chunk_log(tracing.extend_trace_label(
                trace_label, 'preprocessing'),
                             base=True):
            expressions.annotate_preprocessors(trips_segment, locals_dict,
                                               skims, model_settings,
                                               segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)