Пример #1
0
def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    probs_join_cols = ['primary_purpose', 'outbound', 'person_type']
    non_purpose_cols = probs_join_cols + ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs shold sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:, purpose_cols] = probs_spec.loc[:, purpose_cols].div(sum_probs, axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols,
                        how='left').set_index('trip_id')

    chunk.log_df(trace_label, 'choosers', choosers)

    # select the matching depart range (this should result on in exactly one chooser row per trip)
    choosers = choosers[(choosers.start >= choosers['depart_range_start']) & (
                choosers.start <= choosers['depart_range_end'])]

    # choosers should now match trips row for row
    assert choosers.index.is_unique
    assert len(choosers.index) == num_trips

    choices, rands = logit.make_choices(
        choosers[purpose_cols],
        trace_label=trace_label, trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, 'trip_purpose'])
        tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
Пример #2
0
def extend_tour_counts(persons, tour_counts, alternatives, trace_hh_id,
                       trace_label):
    """
    extend tour counts based on a probability table

    counts can only be extended if original count is between 1 and 4
    and tours can only be extended if their count is at the max possible
    (e.g. 2 for escort, 1 otherwise) so escort might be increased to 3 or 4
    and other tour types might be increased to 2 or 3

    Parameters
    ----------
    persons: pandas dataframe
        (need this for join columns)
    tour_counts: pandas dataframe
        one row per person, once column per tour_type
    alternatives
        alternatives from nmtv interaction_simulate
        only need this to know max possible frequency for a tour type
    trace_hh_id
    trace_label

    Returns
    -------
    extended tour_counts


    tour_counts looks like this:
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0

    """

    assert tour_counts.index.name == persons.index.name

    PROBABILITY_COLUMNS = ['0_tours', '1_tours', '2_tours']
    JOIN_COLUMNS = ['ptype', 'has_mandatory_tour', 'has_joint_tour']
    TOUR_TYPE_COL = 'nonmandatory_tour_type'

    probs_spec = extension_probs()
    persons = persons[JOIN_COLUMNS]

    # only extend if there are 1 - 4 non_mandatory tours to start with
    extend_tour_counts = tour_counts.sum(axis=1).between(1, 4)
    if not extend_tour_counts.any():
        logger.info(
            "extend_tour_counts - no persons eligible for tour_count extension"
        )
        return tour_counts

    have_trace_targets = trace_hh_id and tracing.has_trace_targets(
        extend_tour_counts)

    for i, tour_type in enumerate(alternatives.columns):

        i_tour_type = i + 1  # (probs_spec nonmandatory_tour_type column is 1-based)
        tour_type_trace_label = tracing.extend_trace_label(
            trace_label, tour_type)

        # - only extend tour if frequency is max possible frequency for this tour type
        tour_type_is_maxed = \
            extend_tour_counts & (tour_counts[tour_type] == alternatives[tour_type].max())
        maxed_tour_count_idx = tour_counts.index[tour_type_is_maxed]

        if len(maxed_tour_count_idx) == 0:
            continue

        # - get extension probs for tour_type
        choosers = pd.merge(
            persons.loc[maxed_tour_count_idx],
            probs_spec[probs_spec[TOUR_TYPE_COL] == i_tour_type],
            on=JOIN_COLUMNS,
            how='left').set_index(maxed_tour_count_idx)
        assert choosers.index.name == tour_counts.index.name

        # - random choice of extension magnitude based on relative probs
        choices, rands = logit.make_choices(choosers[PROBABILITY_COLUMNS],
                                            trace_label=tour_type_trace_label,
                                            trace_choosers=choosers)

        # - extend tour_count (0-based prob alternative choice equals magnitude of extension)
        if choices.any():
            tour_counts.loc[choices.index, tour_type] += choices

        if have_trace_targets:
            tracing.trace_df(choices,
                             tracing.extend_trace_label(
                                 tour_type_trace_label, 'choices'),
                             columns=[None, 'choice'])
            tracing.trace_df(rands,
                             tracing.extend_trace_label(
                                 tour_type_trace_label, 'rands'),
                             columns=[None, 'rand'])

    return tour_counts
Пример #3
0
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series(dtype='float64')

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
Пример #4
0
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
                               trace_hh_id=None, trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients, hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

    if len(utils.index) == 0:
        return pd.Series(dtype='float64')

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices
def participants_chooser(probs, choosers, spec, trace_label):
    """
    custom alternative to logit.make_choices for simulate.simple_simulate

    Choosing participants for mixed tours is trickier than adult or child tours becuase we
    need at least one adult and one child participant in a mixed tour. We call logit.make_choices
    and then check to see if the tour statisfies this requirement, and rechoose for any that
    fail until all are satisfied.

    In principal, this shold always occur eventually, but we fail after MAX_ITERATIONS,
    just in case there is some failure in program logic (haven't seen this occur.)

    Parameters
    ----------
    probs : pandas.DataFrame
        Rows for choosers and columns for the alternatives from which they
        are choosing. Values are expected to be valid probabilities across
        each row, e.g. they should sum to 1.
    choosers : pandas.dataframe
        simple_simulate choosers df
    spec : pandas.DataFrame
        simple_simulate spec df
        We only need spec so we can know the column index of the 'participate' alternative
        indicating that the participant has been chosen to participate in the tour
    trace_label : str

    Returns - same as logit.make_choices
    -------
    choices, rands
        choices, rands as returned by logit.make_choices (in same order as probs)

    """

    assert probs.index.equals(choosers.index)

    # choice is boolean (participate or not)
    model_settings = config.read_model_settings('joint_tour_participation.yaml')

    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = spec.columns.get_loc(choice_col)
    MAX_ITERATIONS = model_settings.get('max_participation_choice_iterations', 5000)

    trace_label = tracing.extend_trace_label(trace_label, 'participants_chooser')

    candidates = choosers.copy()
    choices_list = []
    rands_list = []

    num_tours_remaining = len(candidates.tour_id.unique())
    logger.info('%s %s joint tours to satisfy.', trace_label, num_tours_remaining,)

    iter = 0
    while candidates.shape[0] > 0:

        iter += 1

        if iter > MAX_ITERATIONS:
            logger.warning('%s max iterations exceeded (%s).', trace_label, MAX_ITERATIONS)
            diagnostic_cols = ['tour_id', 'household_id', 'composition', 'adult']
            unsatisfied_candidates = candidates[diagnostic_cols].join(probs)
            tracing.write_csv(unsatisfied_candidates,
                              file_name='%s.UNSATISFIED' % trace_label, transpose=False)
            print(unsatisfied_candidates.head(20))
            assert False

        choices, rands = logit.make_choices(probs, trace_label=trace_label, trace_choosers=choosers)
        participate = (choices == PARTICIPATE_CHOICE)

        # satisfaction indexed by tour_id
        tour_satisfaction = get_tour_satisfaction(candidates, participate)
        num_tours_satisfied_this_iter = tour_satisfaction.sum()

        if num_tours_satisfied_this_iter > 0:

            num_tours_remaining -= num_tours_satisfied_this_iter

            satisfied = reindex(tour_satisfaction, candidates.tour_id)

            choices_list.append(choices[satisfied])
            rands_list.append(rands[satisfied])

            # remove candidates of satisfied tours
            probs = probs[~satisfied]
            candidates = candidates[~satisfied]

        logger.info('%s iteration %s : %s joint tours satisfied %s remaining' %
                    (trace_label, iter, num_tours_satisfied_this_iter, num_tours_remaining,))

    choices = pd.concat(choices_list)
    rands = pd.concat(rands_list).reindex(choosers.index)

    # reindex choices and rands to match probs and v index
    choices = choices.reindex(choosers.index)
    rands = rands.reindex(choosers.index)
    assert choices.index.equals(choosers.index)
    assert rands.index.equals(choosers.index)

    logger.info('%s %s iterations to satisfy all joint tours.', trace_label, iter,)

    return choices, rands
Пример #6
0
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series()

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
Пример #7
0
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
                               trace_hh_id=None, trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients, hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

    if len(utils.index) == 0:
        return pd.Series()

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices
Пример #8
0
def choose_intermediate_trip_purpose(trips, probs_spec, estimator,
                                     probs_join_cols, use_depart_time,
                                     trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    non_purpose_cols = probs_join_cols.copy()
    if use_depart_time:
        non_purpose_cols += ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs should sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:,
                   purpose_cols] = probs_spec.loc[:,
                                                  purpose_cols].div(sum_probs,
                                                                    axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(),
                        probs_spec,
                        on=probs_join_cols,
                        how='left').set_index('trip_id')
    chunk.log_df(trace_label, 'choosers', choosers)

    if use_depart_time:

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        chooser_probs = \
            (choosers.start >= choosers['depart_range_start']) & (choosers.start <= choosers['depart_range_end'])

        # if we failed to match a row in probs_spec
        if chooser_probs.sum() < num_trips:

            # this can happen if the spec doesn't have probs for the trips matching a trip's probs_join_cols
            missing_trip_ids = trips.index[
                ~trips.index.isin(choosers.index[chooser_probs])].values
            unmatched_choosers = choosers[choosers.index.isin(
                missing_trip_ids)]
            unmatched_choosers = unmatched_choosers[['person_id', 'start'] +
                                                    non_purpose_cols]

            # join to persons for better diagnostics
            persons = inject.get_table('persons').to_frame()
            persons_cols = [
                'age', 'is_worker', 'is_student', 'is_gradeschool',
                'is_highschool', 'is_university'
            ]
            unmatched_choosers = pd.merge(unmatched_choosers,
                                          persons[[
                                              col for col in persons_cols
                                              if col in persons.columns
                                          ]],
                                          left_on='person_id',
                                          right_index=True,
                                          how='left')

            file_name = '%s.UNMATCHED_PROBS' % trace_label
            logger.error(
                "%s %s of %s intermediate trips could not be matched to probs based on join columns  %s"
                % (trace_label, len(unmatched_choosers), len(choosers),
                   probs_join_cols))
            logger.info("Writing %s unmatched choosers to %s" % (
                len(unmatched_choosers),
                file_name,
            ))
            tracing.write_csv(unmatched_choosers,
                              file_name=file_name,
                              transpose=False)
            raise RuntimeError(
                "Some trips could not be matched to probs based on join columns %s."
                % probs_join_cols)

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        choosers = choosers[chooser_probs]

    # choosers should now match trips row for row
    assert choosers.index.identical(trips.index)

    if estimator:
        probs_cols = list(probs_spec.columns)
        print(choosers[probs_cols])
        estimator.write_table(choosers[probs_cols], 'probs', append=True)

    choices, rands = logit.make_choices(choosers[purpose_cols],
                                        trace_label=trace_label,
                                        trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'trip_purpose'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
Пример #9
0
def schedule_nth_trips(trips, probs_spec, model_settings, first_trip_in_leg,
                       report_failed_trips, trace_hh_id, trace_label):
    """
    We join each trip with the appropriate row in probs_spec by joining on probs_join_cols,
    which should exist in both trips, probs_spec dataframe.

    Parameters
    ----------
    trips: pd.DataFrame
    probs_spec: pd.DataFrame
        Dataframe of probs for choice of depart times and join columns to match them with trips.
        Depart columns names are irrelevant. Instead, they are position dependent,
        time period choice is their index + depart_alt_base
    depart_alt_base: int
        int to add to probs column index to get time period it represents.
        e.g. depart_alt_base = 5 means first column (column 0) represents 5 am
    report_failed_trips : bool
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        time periods depart choices, one per trip (except for trips with zero probs)
    """

    depart_alt_base = model_settings.get('DEPART_ALT_BASE')

    probs_cols = [c for c in probs_spec.columns if c not in PROBS_JOIN_COLUMNS]

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(),
                        probs_spec,
                        on=PROBS_JOIN_COLUMNS,
                        how='left').set_index('trip_id')
    chunk.log_df(trace_label, "choosers", choosers)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choosers, '%s.choosers' % trace_label)

    # choosers should now match trips row for row
    assert choosers.index.is_unique
    assert len(choosers.index) == len(trips.index)

    # zero out probs outside earliest-latest window
    chooser_probs = clip_probs(trips, choosers[probs_cols], model_settings)
    chunk.log_df(trace_label, "chooser_probs", chooser_probs)

    if first_trip_in_leg:
        # probs should sum to 1 unless all zero
        chooser_probs = chooser_probs.div(chooser_probs.sum(axis=1),
                                          axis=0).fillna(0)

    # probs should sum to 1 with residual probs resulting in choice of 'fail'
    chooser_probs['fail'] = 1 - chooser_probs.sum(axis=1).clip(0, 1)
    chunk.log_df(trace_label, "chooser_probs", chooser_probs)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(chooser_probs, '%s.chooser_probs' % trace_label)

    choices, rands = logit.make_choices(chooser_probs,
                                        trace_label=trace_label,
                                        trace_choosers=choosers)

    chunk.log_df(trace_label, "choices", choices)
    chunk.log_df(trace_label, "rands", rands)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'depart'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    # convert alt choice index to depart time (setting failed choices to -1)
    failed = (choices == chooser_probs.columns.get_loc('fail'))
    choices = (choices + depart_alt_base).where(~failed, -1)

    chunk.log_df(trace_label, "failed", failed)

    # report failed trips while we have the best diagnostic info
    if report_failed_trips and failed.any():
        report_bad_choices(bad_row_map=failed,
                           df=choosers,
                           filename='failed_choosers',
                           trace_label=trace_label,
                           trace_choosers=None)

    # trace before removing failures
    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'depart'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    # remove any failed choices
    if failed.any():
        choices = choices[~failed]

    assert (choices >= trips.earliest[~failed]).all()
    assert (choices <= trips.latest[~failed]).all()

    return choices
Пример #10
0
    def build_virtual_path(self,
                           recipe,
                           path_type,
                           orig,
                           dest,
                           tod,
                           demographic_segment,
                           want_choices,
                           trace_label,
                           filter_targets=None,
                           trace=False,
                           override_choices=None):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'build_virtual_path')

        # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets
        assert not (trace and filter_targets is None)
        if filter_targets is not None:
            assert filter_targets.any()

            # slice orig and dest
            orig = orig[filter_targets]
            dest = dest[filter_targets]
            assert len(orig) > 0
            assert len(dest) > 0

            # slice tod and demographic_segment if not scalar
            if not isinstance(tod, str):
                tod = tod[filter_targets]
            if demographic_segment is not None:
                demographic_segment = demographic_segment[filter_targets]
                assert len(demographic_segment) > 0

            # slice choices
            # (requires actual choices from the previous call lest rands change on second call)
            assert want_choices == (override_choices is not None)
            if want_choices:
                override_choices = override_choices[filter_targets]

        units = self.units_for_recipe(recipe)
        assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility"

        access_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
        egress_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
        path_types_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        attributes_as_columns = \
            self.network_los.setting(f'TVPB_SETTINGS.{recipe}.tap_tap_settings.attributes_as_columns', [])

        path_info = {
            'path_type': path_type,
            'access_mode': access_mode,
            'egress_mode': egress_mode
        }

        # maz od pairs requested
        with memo("#TVPB build_virtual_path maz_od_df"):
            maz_od_df = pd.DataFrame({
                'idx': orig.index.values,
                'omaz': orig.values,
                'dmaz': dest.values,
                'seq': range(len(orig))
            })
            chunk.log_df(trace_label, "maz_od_df", maz_od_df)
            self.trace_maz_tap(maz_od_df, access_mode, egress_mode)

        # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values
        # but tod and demographic_segment should be the same for all chooser rows (unique orig index values)
        # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs)
        duplicated = orig.index.duplicated(keep='first')
        chooser_attributes = pd.DataFrame(index=orig.index[~duplicated])
        if not isinstance(tod, str):
            chooser_attributes['tod'] = tod.loc[~duplicated]
        elif 'tod' in attributes_as_columns:
            chooser_attributes['tod'] = tod
        else:
            path_info['tod'] = tod
        if demographic_segment is not None:
            chooser_attributes[
                'demographic_segment'] = demographic_segment.loc[~duplicated]

        with memo("#TVPB build_virtual_path access_df"):
            access_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='access',
                                                       mode=access_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "access_df", access_df)

        with memo("#TVPB build_virtual_path egress_df"):
            egress_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='egress',
                                                       mode=egress_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "egress_df", egress_df)

        # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap)
        with memo("#TVPB build_virtual_path compute_tap_tap"):
            transit_df = self.compute_tap_tap(recipe,
                                              maz_od_df,
                                              access_df,
                                              egress_df,
                                              chooser_attributes,
                                              path_info=path_info,
                                              trace_label=trace_label,
                                              trace=trace)
        chunk.log_df(trace_label, "transit_df", transit_df)

        with memo("#TVPB build_virtual_path best_paths"):
            path_df = self.best_paths(recipe, path_type, maz_od_df, access_df,
                                      egress_df, transit_df, trace_label,
                                      trace)
        chunk.log_df(trace_label, "path_df", path_df)

        # now that we have created path_df, we are done with the dataframes for the separate legs
        del access_df
        chunk.log_df(trace_label, "access_df", None)
        del egress_df
        chunk.log_df(trace_label, "egress_df", None)
        del transit_df
        chunk.log_df(trace_label, "transit_df", None)

        if units == 'utility':

            # logsums
            with memo("#TVPB build_virtual_path logsums"):
                # one row per seq with utilities in columns
                # path_num 0-based to aligh with logit.make_choices 0-based choice indexes
                path_df['path_num'] = path_df.groupby('seq').cumcount()
                chunk.log_df(trace_label, "path_df", path_df)

                utilities_df = path_df[['seq', 'path_num',
                                        units]].set_index(['seq', 'path_num'
                                                           ]).unstack()
                utilities_df.columns = utilities_df.columns.droplevel(
                )  # for legibility

                # add rows missing because no access or egress availability
                utilities_df = pd.concat(
                    [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1)
                utilities_df = utilities_df.fillna(
                    UNAVAILABLE
                )  # set utilities for missing paths to UNAVAILABLE

                chunk.log_df(trace_label, "utilities_df", utilities_df)

                with warnings.catch_warnings(record=True) as w:
                    # Cause all warnings to always be triggered.
                    # most likely "divide by zero encountered in log" caused by all transit sets non-viable
                    warnings.simplefilter("always")

                    paths_nest_nesting_coefficient = path_types_settings.get(
                        'paths_nest_nesting_coefficient', 1)
                    exp_utilities = np.exp(utilities_df.values /
                                           paths_nest_nesting_coefficient)
                    logsums = np.maximum(
                        np.log(np.nansum(exp_utilities, axis=1)), UNAVAILABLE)

                    if len(w) > 0:
                        for wrn in w:
                            logger.warning(
                                f"{trace_label} - {type(wrn).__name__} ({wrn.message})"
                            )

                        DUMP = False
                        if DUMP:
                            zero_utilities_df = utilities_df[np.nansum(
                                np.exp(utilities_df.values), axis=1) == 0]
                            zero_utilities_df.to_csv(config.output_file_path(
                                'warning_utilities_df.csv'),
                                                     index=True)
                            bug

            if want_choices:

                # orig index to identify appropriate random number channel to use making choices
                utilities_df.index = orig.index

                with memo("#TVPB build_virtual_path make_choices"):

                    probs = logit.utils_to_probs(utilities_df,
                                                 allow_zero_probs=True,
                                                 trace_label=trace_label)
                    chunk.log_df(trace_label, "probs", probs)

                    if trace:
                        choices = override_choices

                        utilities_df['choices'] = choices
                        self.trace_df(utilities_df, trace_label,
                                      'utilities_df')

                        probs['choices'] = choices
                        self.trace_df(probs, trace_label, 'probs')
                    else:

                        choices, rands = logit.make_choices(
                            probs,
                            allow_bad_probs=True,
                            trace_label=trace_label)

                        chunk.log_df(trace_label, "rands", rands)
                        del rands
                        chunk.log_df(trace_label, "rands", None)

                    del probs
                    chunk.log_df(trace_label, "probs", None)

                # we need to get path_set, btap, atap from path_df row with same seq and path_num
                # drop seq join column, but keep path_num of choice to override_choices when tracing
                columns_to_cache = ['btap', 'atap', 'path_set', 'path_num']
                logsum_df = \
                    pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}),
                             path_df[['seq'] + columns_to_cache],
                             on=['seq', 'path_num'], how='left')\
                    .drop(columns=['seq'])\
                    .set_index(orig.index)

                logsum_df['logsum'] = logsums

            else:

                assert len(logsums) == len(orig)
                logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index)

            chunk.log_df(trace_label, "logsum_df", logsum_df)

            del utilities_df
            chunk.log_df(trace_label, "utilities_df", None)

            if trace:
                self.trace_df(logsum_df, trace_label, 'logsum_df')

            chunk.log_df(trace_label, "logsum_df", logsum_df)
            results = logsum_df

        else:
            assert units == 'time'

            # return a series
            results = pd.Series(path_df[units].values, index=path_df['idx'])

            # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability
            results = reindex(results, maz_od_df.idx).fillna(0.0)

            chunk.log_df(trace_label, "results", results)

        assert len(results) == len(orig)

        del path_df
        chunk.log_df(trace_label, "path_df", None)

        # diagnostic
        # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz)
        # maz_od_df[units] = results.logsum if units == 'utility' else results.values
        # print(f"maz_od_df\n{maz_od_df}")

        return results
Пример #11
0
def choose_tour_leg_pattern(trip_segment,
                            patterns, spec,
                            trace_label='trace_label'):
    alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index()
    have_trace_targets = tracing.has_trace_targets(trip_segment)

    if have_trace_targets:
        tracing.trace_df(trip_segment, tracing.extend_trace_label(trace_label, 'choosers'))
        tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'),
                         transpose=False)

    if len(spec.columns) > 1:
        raise RuntimeError('spec must have only one column')

    # - join choosers and alts
    # in vanilla interaction_simulate interaction_df is cross join of choosers and alternatives
    # interaction_df = logit.interaction_dataset(choosers, alternatives, sample_size)
    # here, alternatives is sparsely repeated once for each (non-dup) sample
    # we expect alternatives to have same index of choosers (but with duplicate index values)
    # so we just need to left join alternatives with choosers
    assert alternatives.index.name == trip_segment.index.name

    interaction_df = alternatives.join(trip_segment, how='left', rsuffix='_chooser')

    chunk.log_df(trace_label, 'interaction_df', interaction_df)

    if have_trace_targets:
        trace_rows, trace_ids = tracing.interaction_trace_rows(interaction_df, trip_segment)

        tracing.trace_df(interaction_df,
                         tracing.extend_trace_label(trace_label, 'interaction_df'),
                         transpose=False)
    else:
        trace_rows = trace_ids = None

    interaction_utilities, trace_eval_results \
        = interaction_simulate.eval_interaction_utilities(spec, interaction_df, None, trace_label, trace_rows,
                                                          estimator=None)

    interaction_utilities = pd.concat([interaction_df[STOP_TIME_DURATION], interaction_utilities], axis=1)
    chunk.log_df(trace_label, 'interaction_utilities', interaction_utilities)

    interaction_utilities = pd.merge(interaction_utilities.reset_index(),
                                     patterns[patterns[TRIP_ID].isin(interaction_utilities.index)],
                                     on=[TRIP_ID, STOP_TIME_DURATION], how='left')

    if have_trace_targets:
        tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
                                               tracing.extend_trace_label(trace_label, 'eval'))

        tracing.trace_df(interaction_utilities,
                         tracing.extend_trace_label(trace_label, 'interaction_utilities'),
                         transpose=False)

    del interaction_df
    chunk.log_df(trace_label, 'interaction_df', None)

    interaction_utilities = interaction_utilities.groupby([TOUR_ID, OUTBOUND, PATTERN_ID],
                                                          as_index=False)[['utility']].sum()

    interaction_utilities[TOUR_LEG_ID] = \
        interaction_utilities.apply(generate_tour_leg_id, axis=1)

    tour_choosers = interaction_utilities.set_index(TOUR_LEG_ID)
    interaction_utilities = tour_choosers[['utility']].copy()

    # reshape utilities (one utility column and one row per row in model_design)
    # to a dataframe with one row per chooser and one column per alternative
    # interaction_utilities is sparse because duplicate sampled alternatives were dropped
    # so we need to pad with dummy utilities so low that they are never chosen

    # number of samples per chooser
    sample_counts = interaction_utilities.groupby(interaction_utilities.index).size().values
    chunk.log_df(trace_label, 'sample_counts', sample_counts)

    # max number of alternatvies for any chooser
    max_sample_count = sample_counts.max()

    # offsets of the first and last rows of each chooser in sparse interaction_utilities
    last_row_offsets = sample_counts.cumsum()
    first_row_offsets = np.insert(last_row_offsets[:-1], 0, 0)

    # repeat the row offsets once for each dummy utility to insert
    # (we want to insert dummy utilities at the END of the list of alternative utilities)
    # inserts is a list of the indices at which we want to do the insertions
    inserts = np.repeat(last_row_offsets, max_sample_count - sample_counts)

    del sample_counts
    chunk.log_df(trace_label, 'sample_counts', None)

    # insert the zero-prob utilities to pad each alternative set to same size
    padded_utilities = np.insert(interaction_utilities.utility.values, inserts, -999)
    del inserts

    del interaction_utilities
    chunk.log_df(trace_label, 'interaction_utilities', None)

    # reshape to array with one row per chooser, one column per alternative
    padded_utilities = padded_utilities.reshape(-1, max_sample_count)
    chunk.log_df(trace_label, 'padded_utilities', padded_utilities)

    # convert to a dataframe with one row per chooser and one column per alternative
    utilities_df = pd.DataFrame(
        padded_utilities,
        index=tour_choosers.index.unique())
    chunk.log_df(trace_label, 'utilities_df', utilities_df)

    del padded_utilities
    chunk.log_df(trace_label, 'padded_utilities', None)

    if have_trace_targets:
        tracing.trace_df(utilities_df, tracing.extend_trace_label(trace_label, 'utilities'),
                         column_labels=['alternative', 'utility'])

    # convert to probabilities (utilities exponentiated and normalized to probs)
    # probs is same shape as utilities, one row per chooser and one column for alternative
    probs = logit.utils_to_probs(utilities_df,
                                 trace_label=trace_label, trace_choosers=trip_segment)

    chunk.log_df(trace_label, 'probs', probs)

    del utilities_df
    chunk.log_df(trace_label, 'utilities_df', None)

    if have_trace_targets:
        tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'),
                         column_labels=['alternative', 'probability'])

    # make choices
    # positions is series with the chosen alternative represented as a column index in probs
    # which is an integer between zero and num alternatives in the alternative sample
    positions, rands = \
        logit.make_choices(probs, trace_label=trace_label, trace_choosers=trip_segment)

    chunk.log_df(trace_label, 'positions', positions)
    chunk.log_df(trace_label, 'rands', rands)

    del probs
    chunk.log_df(trace_label, 'probs', None)

    # shouldn't have chosen any of the dummy pad utilities
    assert positions.max() < max_sample_count

    # need to get from an integer offset into the alternative sample to the alternative index
    # that is, we want the index value of the row that is offset by <position> rows into the
    # tranche of this choosers alternatives created by cross join of alternatives and choosers

    # resulting pandas Int64Index has one element per chooser row and is in same order as choosers
    choices = tour_choosers[PATTERN_ID].take(positions + first_row_offsets)

    chunk.log_df(trace_label, 'choices', choices)

    if have_trace_targets:
        tracing.trace_df(choices, tracing.extend_trace_label(trace_label, 'choices'),
                         columns=[None, PATTERN_ID])
        tracing.trace_df(rands, tracing.extend_trace_label(trace_label, 'rands'),
                         columns=[None, 'rand'])

    return choices
def extend_tour_counts(persons, tour_counts, alternatives, trace_hh_id, trace_label):
    """
    extend tour counts based on a probability table

    counts can only be extended if original count is between 1 and 4
    and tours can only be extended if their count is at the max possible
    (e.g. 2 for escort, 1 otherwise) so escort might be increased to 3 or 4
    and other tour types might be increased to 2 or 3

    Parameters
    ----------
    persons: pandas dataframe
        (need this for join columns)
    tour_counts: pandas dataframe
        one row per person, once column per tour_type
    alternatives
        alternatives from nmtv interaction_simulate
        only need this to know max possible frequency for a tour type
    trace_hh_id
    trace_label

    Returns
    -------
    extended tour_counts


    tour_counts looks like this:
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0

    """

    assert tour_counts.index.name == persons.index.name

    PROBABILITY_COLUMNS = ['0_tours', '1_tours', '2_tours']
    JOIN_COLUMNS = ['ptype', 'has_mandatory_tour', 'has_joint_tour']
    TOUR_TYPE_COL = 'nonmandatory_tour_type'

    probs_spec = extension_probs()
    persons = persons[JOIN_COLUMNS]

    # only extend if there are 1 - 4 non_mandatory tours to start with
    extend_tour_counts = tour_counts.sum(axis=1).between(1, 4)
    if not extend_tour_counts.any():
        return tour_counts

    have_trace_targets = trace_hh_id and tracing.has_trace_targets(extend_tour_counts)

    for i, tour_type in enumerate(alternatives.columns):

        i_tour_type = i + 1  # (probs_spec nonmandatory_tour_type column is 1-based)
        tour_type_trace_label = tracing.extend_trace_label(trace_label, tour_type)

        # - only extend tour if frequency is max possible frequency for this tour type
        tour_type_is_maxed = \
            extend_tour_counts & (tour_counts[tour_type] == alternatives[tour_type].max())
        maxed_tour_count_idx = tour_counts.index[tour_type_is_maxed]

        if len(maxed_tour_count_idx) == 0:
            continue

        # - get extension probs for tour_type
        choosers = pd.merge(
            persons.loc[maxed_tour_count_idx],
            probs_spec[probs_spec[TOUR_TYPE_COL] == i_tour_type],
            on=JOIN_COLUMNS,
            how='left'
        ).set_index(maxed_tour_count_idx)
        assert choosers.index.name == tour_counts.index.name

        # - random choice of extension magnituce based on relative probs
        choices, rands = logit.make_choices(
            choosers[PROBABILITY_COLUMNS],
            trace_label=tour_type_trace_label,
            trace_choosers=choosers)

        # - extend tour_count (0-based prob alternative choice equals magnitude of extension)
        if choices.any():
            tour_counts.loc[choices.index, tour_type] += choices

        if have_trace_targets:
            tracing.trace_df(choices,
                             tracing.extend_trace_label(tour_type_trace_label, 'choices'),
                             columns=[None, 'choice'])
            tracing.trace_df(rands,
                             tracing.extend_trace_label(tour_type_trace_label, 'rands'),
                             columns=[None, 'rand'])

    return tour_counts
Пример #13
0
def make_scheduling_choices(
        choosers_df, scheduling_mode,
        probs_spec, probs_join_cols,
        depart_alt_base,
        first_trip_in_leg,
        report_failed_trips, trace_hh_id, trace_label,
        trace_choice_col_name='depart',
        clip_earliest_latest=True):
    """
    We join each trip with the appropriate row in probs_spec by joining on probs_join_cols,
    which should exist in both trips, probs_spec dataframe.

    Parameters
    ----------
    choosers: pd.DataFrame
    scheduling_mode: str
        Either 'departure' or 'stop_duration' depending on whether the probability
        lookup table is keyed on depature period or stop duration.
    trips: pd.DataFrame
    probs_spec: pd.DataFrame
        Dataframe of probs for choice of depart times and join columns to match them with trips.
        Depart columns names are irrelevant. Instead, they are position dependent,
        time period choice is their index + depart_alt_base
    depart_alt_base: int
        int to add to probs column index to get time period it represents.
        e.g. depart_alt_base = 5 means first column (column 0) represents 5 am
    report_failed_trips : bool
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        time periods depart choices, one per trip (except for trips with zero probs)
    """

    choosers = pd.merge(choosers_df.reset_index(), probs_spec, on=probs_join_cols,
                        how='left').set_index(choosers_df.index.name)
    chunk.log_df(trace_label, "choosers", choosers)

    if trace_hh_id and tracing.has_trace_targets(choosers_df):
        tracing.trace_df(choosers, '%s.choosers' % trace_label)

    # different pre-processing is required based on the scheduling mode
    chooser_probs = _preprocess_scheduling_probs(
        scheduling_mode, choosers_df, choosers, probs_spec,
        probs_join_cols, clip_earliest_latest, depart_alt_base, first_trip_in_leg)

    chunk.log_df(trace_label, "chooser_probs", chooser_probs)

    if trace_hh_id and tracing.has_trace_targets(choosers_df):
        tracing.trace_df(chooser_probs, '%s.chooser_probs' % trace_label)

    raw_choices, rands = logit.make_choices(chooser_probs, trace_label=trace_label, trace_choosers=choosers)

    chunk.log_df(trace_label, "choices", raw_choices)
    chunk.log_df(trace_label, "rands", rands)

    if trace_hh_id and tracing.has_trace_targets(choosers_df):
        tracing.trace_df(raw_choices, '%s.choices' % trace_label, columns=[None, trace_choice_col_name])
        tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])

    # different post-processing is required based on the scheduling mode
    choices, failed = _postprocess_scheduling_choices(
        scheduling_mode, depart_alt_base, raw_choices, chooser_probs.columns, choosers_df)

    chunk.log_df(trace_label, "failed", failed)

    # report failed trips while we have the best diagnostic info
    if report_failed_trips and failed.any():
        _report_bad_choices(
            bad_row_map=failed,
            df=choosers,
            filename='failed_choosers',
            trace_label=trace_label,
            trace_choosers=None)

    # trace before removing failures
    if trace_hh_id and tracing.has_trace_targets(choosers_df):
        tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, trace_choice_col_name])
        tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])

    # remove any failed choices
    if failed.any():
        choices = choices[~failed]

    if all([check_col in choosers_df.columns for check_col in ['earliest', 'latest']]):
        assert (choices >= choosers_df.earliest[~failed]).all()
        assert (choices <= choosers_df.latest[~failed]).all()

    return choices