示例#1
0
def process_joint_tours(joint_tour_frequency, joint_tour_frequency_alts,
                        point_persons):
    """
    This method processes the joint_tour_frequency column that comes out of
    the model of the same name and turns into a DataFrame that represents the
    joint tours that were generated

    Parameters
    ----------
    joint_tour_frequency : pandas.Series
        household joint_tour_frequency (which came out of the joint tour frequency model)
        indexed by household_id
    joint_tour_frequency_alts: DataFrame
        A DataFrame which has as a unique index with joint_tour_frequency values
        and frequency counts for the tours to be generated for that choice
    point_persons : pandas DataFrame
        table with columns for (at least) person_ids and home_zone_id indexed by household_id

    Returns
    -------
    tours : DataFrame
        An example of a tours DataFrame is supplied as a comment in the
        source code - it has an index which is a tour identifier, a household_id
        column, a tour_type column and tour_type_num and tour_num columns
        which is set to 1 or 2 depending whether it is the first or second joint tour
        made by the household.
    """

    assert not joint_tour_frequency.isnull().any()

    tours = process_tours(joint_tour_frequency.dropna(),
                          joint_tour_frequency_alts,
                          tour_category='joint',
                          parent_col='household_id')

    assert not tours.index.duplicated().any()
    assert point_persons.index.name == 'household_id'

    # - assign a temp point person to tour so we can create stable index
    tours['person_id'] = reindex(point_persons.person_id, tours.household_id)
    tours['origin'] = reindex(point_persons.home_zone_id, tours.household_id)

    # assign stable (predictable) tour_id
    set_tour_index(tours, is_joint=True)
    """
                   household_id tour_type  tour_type_count  tour_type_num  tour_num  tour_count
    tour_id
    3209530              320953      disc                1              1         1           2
    3209531              320953      disc                2              2         2           2
    23267026            2326702      shop                1              1         1           1
    17978574            1797857      main                1              1         1           1

                   tour_category  tour_category_id  person_id
    3209530                joint                 4     577234
    3209531                joint                 4     577234
    23267026               joint                 4    1742708
    17978574               joint                 4    5143198
    """
    return tours
示例#2
0
def create_simple_trips(tours, households, persons, trace_hh_id):
    """
    Create a simple trip table
    """

    logger.info("Running simple trips table creation with %d tours" %
                len(tours.index))

    tours_df = tours.to_frame()

    # we now have a tour_id column
    tours_df.reset_index(inplace=True)

    tours_df['household_id'] = reindex(persons.household_id,
                                       tours_df.person_id)
    tours_df['TAZ'] = reindex(households.TAZ, tours_df.household_id)

    # create inbound and outbound records
    trips = pd.concat([tours_df, tours_df], ignore_index=True)

    # first half are outbound, second half are inbound
    trips['INBOUND'] = np.repeat([False, True], len(trips.index) / 2)

    # TRIPID for outbound trips = 1, inbound_trips = 2
    trips['trip_num'] = np.repeat([1, 2], len(trips.index) / 2)

    # set key fields from tour fields: 'TAZ','destination','start','end'
    trips['OTAZ'] = trips.TAZ
    trips['OTAZ'][trips.INBOUND] = trips.destination[trips.INBOUND]

    trips['DTAZ'] = trips.destination
    trips['DTAZ'][trips.INBOUND] = trips.TAZ[trips.INBOUND]

    trips['start_trip'] = trips.start
    trips['start_trip'][trips.INBOUND] = trips.end[trips.INBOUND]

    trips['end_trip'] = trips.end
    trips['end_trip'][trips.INBOUND] = trips.start[trips.INBOUND]

    # create a stable (predictable) index based on tour_id and trip_num
    possible_trips_count = 2
    trips['trip_id'] = (trips.tour_id *
                        possible_trips_count) + (trips.trip_num - 1)
    trips.set_index('trip_id', inplace=True, verify_integrity=True)

    trip_columns = [
        'tour_id', 'INBOUND', 'trip_num', 'OTAZ', 'DTAZ', 'start_trip',
        'end_trip'
    ]
    trips = trips[trip_columns]

    orca.add_table("trips", trips)

    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel(trips, 'trips')

    if trace_hh_id:
        tracing.trace_df(trips, label="trips", warn_if_empty=True)
示例#3
0
def process_joint_tours(joint_tour_frequency, joint_tour_frequency_alts, point_persons):
    """
    This method processes the joint_tour_frequency column that comes out of
    the model of the same name and turns into a DataFrame that represents the
    joint tours that were generated

    Parameters
    ----------
    joint_tour_frequency : pandas.Series
        household joint_tour_frequency (which came out of the joint tour frequency model)
        indexed by household_id
    joint_tour_frequency_alts: DataFrame
        A DataFrame which has as a unique index with joint_tour_frequency values
        and frequency counts for the tours to be generated for that choice
    point_persons : pandas DataFrame
        table with columns for (at least) person_ids and home_taz indexed by household_id

    Returns
    -------
    tours : DataFrame
        An example of a tours DataFrame is supplied as a comment in the
        source code - it has an index which is a tour identifier, a household_id
        column, a tour_type column and tour_type_num and tour_num columns
        which is set to 1 or 2 depending whether it is the first or second joint tour
        made by the household.
    """

    assert not joint_tour_frequency.isnull().any()

    tours = process_tours(joint_tour_frequency.dropna(),
                          joint_tour_frequency_alts,
                          tour_category='joint',
                          parent_col='household_id')

    # - assign a temp point person to tour so we can create stable index
    tours['person_id'] = reindex(point_persons.person_id, tours.household_id)
    tours['origin'] = reindex(point_persons.home_taz, tours.household_id)

    # assign stable (predictable) tour_id
    set_tour_index(tours, is_joint=True)

    """
                   household_id tour_type  tour_type_count  tour_type_num  tour_num  tour_count
    tour_id
    3209530              320953      disc                1              1         1           2
    3209531              320953      disc                2              2         2           2
    23267026            2326702      shop                1              1         1           1
    17978574            1797857      main                1              1         1           1

                   tour_category  tour_category_id  person_id
    3209530                joint                 4     577234
    3209531                joint                 4     577234
    23267026               joint                 4    1742708
    17978574               joint                 4    5143198
    """
    return tours
示例#4
0
def process_non_mandatory_tours(persons, tour_counts):
    """
    This method processes the non_mandatory_tour_frequency column that comes
    out of the model of the same name and turns into a DataFrame that
    represents the non mandatory tours that were generated

    Parameters
    ----------
    persons: pandas.DataFrame
        persons table containing a non_mandatory_tour_frequency column
        which has the index of the chosen alternative as the value
    non_mandatory_tour_frequency_alts: DataFrame
        A DataFrame which has as a unique index which relates to the values
        in the series above typically includes columns which are named for trip
        purposes with values which are counts for that trip purpose.  Example
        trip purposes include escort, shopping, othmaint, othdiscr, eatout,
        social, etc.  A row would be an alternative which might be to take
        one shopping trip and zero trips of other purposes, etc.

    Returns
    -------
    tours : DataFrame
        An example of a tours DataFrame is supplied as a comment in the
        source code - it has an index which is a unique tour identifier,
        a person_id column, and a tour type column which comes from the
        column names of the alternatives DataFrame supplied above.
    """

    tours = create_tours(tour_counts, tour_category='non_mandatory')

    tours['household_id'] = reindex(persons.household_id, tours.person_id)
    tours['origin'] = reindex(persons.home_taz, tours.person_id)

    # assign stable (predictable) tour_id
    set_tour_index(tours)

    """
               person_id tour_type  tour_type_count  tour_type_num  tour_num   tour_count
    tour_id
    17008286     1133885  shopping                1              1         1            3
    17008283     1133885  othmaint                1              1         2            3
    17008282     1133885  othdiscr                1              1         3            3
    ...
               tour_category

               non_mandatory
               non_mandatory
               non_mandatory
    """

    return tours
示例#5
0
def process_non_mandatory_tours(persons, tour_counts):
    """
    This method processes the non_mandatory_tour_frequency column that comes
    out of the model of the same name and turns into a DataFrame that
    represents the non mandatory tours that were generated

    Parameters
    ----------
    persons: pandas.DataFrame
        persons table containing a non_mandatory_tour_frequency column
        which has the index of the chosen alternative as the value
    non_mandatory_tour_frequency_alts: DataFrame
        A DataFrame which has as a unique index which relates to the values
        in the series above typically includes columns which are named for trip
        purposes with values which are counts for that trip purpose.  Example
        trip purposes include escort, shopping, othmaint, othdiscr, eatout,
        social, etc.  A row would be an alternative which might be to take
        one shopping trip and zero trips of other purposes, etc.

    Returns
    -------
    tours : DataFrame
        An example of a tours DataFrame is supplied as a comment in the
        source code - it has an index which is a unique tour identifier,
        a person_id column, and a tour type column which comes from the
        column names of the alternatives DataFrame supplied above.
    """

    tours = create_tours(tour_counts, tour_category='non_mandatory')

    tours['household_id'] = reindex(persons.household_id, tours.person_id)
    tours['origin'] = reindex(persons.home_zone_id, tours.person_id)

    # assign stable (predictable) tour_id
    set_tour_index(tours)
    """
               person_id tour_type  tour_type_count  tour_type_num  tour_num   tour_count
    tour_id
    17008286     1133885  shopping                1              1         1            3
    17008283     1133885  othmaint                1              1         2            3
    17008282     1133885  othdiscr                1              1         3            3
    ...
               tour_category

               non_mandatory
               non_mandatory
               non_mandatory
    """

    return tours
def destination_in_cbd(non_mandatory_tours, land_use, settings):
    # protection until filled in by destination choice model
    if "destination" not in non_mandatory_tours.columns:
        return pd.Series(False, index=non_mandatory_tours.index)

    s = reindex(land_use.area_type, non_mandatory_tours.destination)
    return s < settings['cbd_threshold']
示例#7
0
def patch_trip_ids(tours, trips):
    """
    replace survey trip_ids with asim standard trip_id
    replace survey tour_id foreign key with asim standard tour_id
    """

    # tour_id is a column, not index
    assert ASIM_TOUR_ID in tours

    # patch tour_id foreign key
    # tours['household_id'] = reindex(persons.household_id, tours.person_id)
    asim_tour_id = pd.Series(tours[ASIM_TOUR_ID].values,
                             index=tours[SURVEY_TOUR_ID].values)
    trips[ASIM_TOUR_ID] = reindex(asim_tour_id, trips[SURVEY_TOUR_ID])

    # person_is_university = persons.pstudent == constants.PSTUDENT_UNIVERSITY
    # tour_is_university = reindex(person_is_university, tours.person_id)
    # tour_primary_purpose = tours.tour_type.where((tours.tour_type != 'school') | ~tour_is_university, 'univ')
    # tour_primary_purpose = tour_primary_purpose.where(tours.tour_category!='atwork', 'atwork')
    #
    # trips['primary_purpose'] = reindex(tour_primary_purpose, trips.tour_id)

    # if order is ambiguous if trips depart in same time slot - order by SURVEY_TRIP_ID hoping that increases with time
    if 'trip_num' not in trips:
        trips['trip_num'] = \
            trips.sort_values(by=['tour_id', 'outbound', 'depart', SURVEY_TRIP_ID]).\
            groupby(['tour_id', 'outbound']).\
            cumcount() + 1

    cid.set_trip_index(trips)

    assert trips.index.name == ASIM_TRIP_ID
    trips = trips.reset_index().rename(columns={'trip_id': ASIM_TRIP_ID})

    return trips
示例#8
0
def create_logsum_trips(tours, segment_column_name, model_settings,
                        trace_label):
    """
    Construct table of trips from half-tours (1 inbound, 1 outbound) for each tour-mode.

    Parameters
    ----------
    tours : pandas.DataFrame
    segment_column_name : str
        column in tours table used for segmenting model spec
    model_settings : dict
    trace_label : str

    Returns
    -------
    pandas.DataFrame
        Table of trips: 2 per tour, with O/D and purpose inherited from tour
    """
    stop_frequency_alts = inject.get_injectable('stop_frequency_alts')
    stop_freq = '0out_0in'  # no intermediate stops
    tours['stop_frequency'] = stop_freq
    tours['primary_purpose'] = tours['tour_purpose']
    trips = trip.initialize_from_tours(tours, stop_frequency_alts)
    trips['stop_frequency'] = stop_freq
    outbound = trips['outbound']
    trips['depart'] = reindex(tours.start, trips.tour_id)
    trips.loc[~outbound, 'depart'] = reindex(tours.end, trips.loc[~outbound,
                                                                  'tour_id'])

    # actual segment doesn't matter. just need to grab one
    # to get a set of coefficients from the spec
    segment_name = tours.iloc[0][segment_column_name]
    tour_mode_alts = get_alts_from_segmented_nested_logit(
        model_settings, segment_name, trace_label)

    # repeat rows from the trips table iterating over tour mode
    logsum_trips = pd.DataFrame()
    for tour_mode in tour_mode_alts:
        trips['tour_mode'] = tour_mode
        logsum_trips = pd.concat((logsum_trips, trips), ignore_index=True)
    assert len(logsum_trips) == len(trips) * len(tour_mode_alts)
    logsum_trips.index.name = 'trip_id'

    return logsum_trips
示例#9
0
def set_tour_hour(trips, tours):
    """
    add columns 'tour_hour', 'earliest', 'latest' to trips

    Parameters
    ----------
    trips: pd.DataFrame
    tours: pd.DataFrame

    Returns
    -------
    modifies trips in place
    """

    # all trips must depart between tour start and end
    trips['earliest'] = reindex(tours.start, trips.tour_id)
    trips['latest'] = reindex(tours.end, trips.tour_id)

    # tour_hour is start for outbound trips, and end for inbound trips
    trips['tour_hour'] = np.where(trips.outbound, trips['earliest'],
                                  trips['latest']).astype(np.int8)

    # subtours indexed by parent_tour_id
    subtours = tours.loc[
        tours.primary_purpose == 'atwork',
        ['tour_num', 'tour_count', 'parent_tour_id', 'start', 'end']]

    subtours.parent_tour_id = subtours.parent_tour_id.astype(np.int64)
    subtours = subtours.set_index('parent_tour_id')
    subtours = subtours.astype(
        np.int16)  # remaining columns are all small ints

    # bool series
    trip_has_subtours = trips.tour_id.isin(subtours.index)

    outbound = trip_has_subtours & trips.outbound
    trips.loc[outbound, 'latest'] = \
        reindex(subtours[subtours.tour_num == 1]['start'], trips[outbound].tour_id)

    inbound = trip_has_subtours & ~trips.outbound
    trips.loc[inbound, 'earliest'] = \
        reindex(subtours[subtours.tour_num == subtours.tour_count]['end'], trips[inbound].tour_id)
示例#10
0
def trip_departure_choice(
        trips,
        trips_merged,
        skim_dict,
        chunk_size,
        trace_hh_id):

    trace_label = 'trip_departure_choice'
    model_settings = config.read_model_settings('trip_departure_choice.yaml')

    spec = simulate.read_model_spec(file_name=model_settings['SPECIFICATION'])

    trips_merged_df = trips_merged.to_frame()
    # add tour-based chunk_id so we can chunk all trips in tour together
    tour_ids = trips_merged[TOUR_ID].unique()
    trips_merged_df['chunk_id'] = reindex(pd.Series(list(range(len(tour_ids))), tour_ids), trips_merged_df.tour_id)

    max_tour_id = trips_merged[TOUR_ID].max()

    trip_departure_choice.MAX_TOUR_ID = int(np.power(10, np.ceil(np.log10(max_tour_id))))
    locals_d = config.get_model_constants(model_settings).copy()

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)
    tour_legs = get_tour_legs(trips_merged_df)
    pipeline.get_rn_generator().add_channel('tour_legs', tour_legs)

    if preprocessor_settings:
        od_skim = skim_dict.wrap('origin', 'destination')
        do_skim = skim_dict.wrap('destination', 'origin')

        skims = [od_skim, do_skim]

        simulate.set_skim_wrapper_targets(trips_merged_df, skims)

        locals_d.update({
            "od_skims": od_skim,
            "do_skims": do_skim,
        })

        expressions.assign_columns(
            df=trips_merged_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    choices = apply_stage_two_model(spec, trips_merged_df, chunk_size, trace_label)

    trips_df = trips.to_frame()
    trip_length = len(trips_df)
    trips_df = pd.concat([trips_df, choices], axis=1)
    assert len(trips_df) == trip_length
    assert trips_df[trips_df['depart'].isnull()].empty

    pipeline.replace_table("trips", trips_df)
示例#11
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """

    model_name = 'mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different logsum coefficients for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting tour processing to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged,
                                  tdd_alts, tour_segment_col, chunk_size,
                                  trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
示例#12
0
    def normal_for_df(self, df, mu=0, sigma=1, broadcast=False):
        """
        Return a single floating point normal random number in range (-inf, inf) for each row in df
        using the appropriate random channel for each row.

        Subsequent calls (in the same step) will return the next rand for each df row

        The resulting array will be the same length (and order) as df
        This method is designed to support alternative selection from a probability array

        The columns in df are ignored; the index name and values are used to determine
        which random number sequence to to use.

        We assume that we can identify the channel to used based on the name of df.index
        This channel should have already been registered by a call to add_channel (q.v.)

        If "true pseudo random" behavior is desired (i.e. NOT repeatable) the set_base_seed
        method (q.v.) may be used to globally reseed all random streams.

        Parameters
        ----------
        df : pandas.DataFrame
            df with index name and values corresponding to a registered channel

        mu : float or array of floats with one value per df row
        sigma : float or array of floats with one value per df row

        Returns
        -------
        rands : 1-D ndarray the same length as df (or Series with same index as df)
            a single float in lognormal distribution for each row in df
        """

        channel = self.get_channel_for_df(df)

        if broadcast:
            alts_df = df
            df = df.index.unique().to_series()
            rands = channel.normal_for_df(df,
                                          self.step_name,
                                          mu=0,
                                          sigma=1,
                                          lognormal=False)
            rands = reindex(pd.Series(rands, index=df.index), alts_df.index)
            rands = rands * sigma + mu
        else:
            rands = channel.normal_for_df(df,
                                          self.step_name,
                                          mu,
                                          sigma,
                                          lognormal=False)

        return rands
示例#13
0
def infer_atwork_subtour_frequency(configs_dir, tours):

    # first column is 'atwork_subtour_frequency' nickname, remaining columns are trip type counts
    alts = pd.read_csv(os.path.join(configs_dir, 'atwork_subtour_frequency_alternatives.csv'), comment='#')
    tour_types = list(alts.drop(columns=alts.columns[0]).columns)  # get trip_types, ignoring first column
    alts['alt_id'] = alts.index

    #             alt  eat  business  maint  alt_id
    # 0   no_subtours    0         0      0       0
    # 1           eat    1         0      0       1
    # 2     business1    0         1      0       2
    # 3         maint    0         0      1       3
    # 4     business2    0         2      0       4
    # 5  eat_business    1         1      0       5

    work_tours = tours[tours.tour_type == 'work']
    work_tours = work_tours[[ASIM_TOUR_ID]]

    subtours = tours[tours.tour_category == 'atwork']
    subtours = subtours[['tour_id', 'tour_type', 'parent_tour_id']]

    # actual tour counts (may exceed counts envisioned by alts)
    tour_counts = pd.DataFrame(index=work_tours[ASIM_TOUR_ID])
    for tour_type in tour_types:
        # count subtours of this type by parent_tour_id
        tour_type_count = subtours[subtours.tour_type == tour_type].groupby('parent_tour_id').size()
        # backfill with 0 count
        tour_counts[tour_type] = tour_type_count.reindex(tour_counts.index).fillna(0).astype(np.int8)

    # determine alt id corresponding to constrained_tour_counts
    # need to do index waltz because pd.merge doesn't preserve index in this case
    tour_counts = \
        pd.merge(tour_counts.reset_index(), alts,
                 left_on=tour_types, right_on=tour_types, how='left').set_index(tour_counts.index.name)

    atwork_subtour_frequency = tour_counts.alt

    # did we end up with any tour frequencies not in alts?
    if atwork_subtour_frequency.isna().any():
        bad_tour_frequencies = atwork_subtour_frequency.isna()
        logger.warning("WARNING Bad atwork subtour frequencies for %s work tours" % bad_tour_frequencies.sum())
        logger.warning("WARNING Bad atwork subtour frequencies: num_tours\n%s" %
                       tour_counts[bad_tour_frequencies])
        logger.warning("WARNING Bad atwork subtour frequencies: num_tours\n%s" %
                       subtours[subtours.parent_tour_id.isin(tour_counts[bad_tour_frequencies].index)].
                       sort_values('parent_tour_id'))
        bug

    atwork_subtour_frequency = reindex(atwork_subtour_frequency, tours[ASIM_TOUR_ID]).fillna('')

    return atwork_subtour_frequency
示例#14
0
    def all_transit_paths(self, access_df, egress_df, chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label, 'all_transit_paths')

        # deduped transit_df has one row per chooser for each boarding (btap) and alighting (atap) pair
        transit_df = pd.merge(
            access_df[['idx', 'btap']],
            egress_df[['idx', 'atap']],
            on='idx').drop_duplicates()

        # don't want transit trips that start and stop in same tap
        transit_df = transit_df[transit_df.atap != transit_df.btap]

        for c in list(chooser_attributes.columns):
            transit_df[c] = reindex(chooser_attributes[c], transit_df['idx'])

        transit_df = transit_df.reset_index(drop=True)

        if trace:
            self.trace_df(transit_df, trace_label, 'all_transit_df')

        return transit_df
示例#15
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

                                                   <added>
    PERID,  dest_TAZ, rand,            pick_count, logsum
    23750,  14,       0.565502716034,  4           1.85659498857
    23750,  16,       0.711135838871,  6           1.92315598631
    ...
    23751,  12,       0.408038878552,  1           2.40612135416
    23751,  14,       0.972732479292,  2           1.44009018355

    """

    trace_label = 'school_location_logsums'

    # extract logsums_spec from omnibus_spec
    # omnibus_spec = orca.get_injectable('tour_mode_choice_spec')
    # for tour_type in ['school', 'university']:
    #     logsums_spec = get_segment_and_unstack(omnibus_spec, tour_type)
    #     tracing.dump_df(DUMP, logsums_spec, trace_label, 'logsums_spec_%s' % tour_type)

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers, trace_label,
                        '%s_choosers' % school_type)

        logsums = compute_logsums(choosers, logsums_spec, logsum_settings,
                                  skim_dict, skim_stack, alt_col_name,
                                  chunk_size, trace_hh_id, trace_label)

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    orca.add_column("school_location_sample", "mode_choice_logsum", logsums)
示例#16
0
def run_destination_simulate(
        spec_segment_name,
        tours,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec_file_name = model_settings['SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        destination_sample,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='destination')

    return choices
示例#17
0
def process_trips(tours, stop_frequency_alts):

    MAX_TRIPS_PER_LEG = 4  # max number of trips per leg (inbound or outbound) of tour
    OUTBOUND_ALT = 'out'
    assert OUTBOUND_ALT in stop_frequency_alts.columns

    # get the actual alternatives for each person - have to go back to the
    # stop_frequency_alts dataframe to get this - the stop_frequency choice
    # column has the index values for the chosen alternative

    trips = stop_frequency_alts.loc[tours.stop_frequency]

    # assign tour ids to the index
    trips.index = tours.index

    """

    ::

      tours.stop_frequency    =>    proto trips table
      ________________________________________________________
                stop_frequency      |                out  in
      tour_id                       |     tour_id
      954910          1out_1in      |     954910       1   1
      985824          0out_1in      |     985824       0   1
    """

    # reformat with the columns given below
    trips = trips.stack().reset_index()
    trips.columns = ['tour_id', 'direction', 'trip_count']

    # tours legs have one more leg than stop
    trips.trip_count += 1

    # prefer direction as boolean
    trips['outbound'] = trips.direction == OUTBOUND_ALT

    """
           tour_id direction  trip_count  outbound
    0       954910       out           2      True
    1       954910        in           2     False
    2       985824       out           1      True
    3       985824        in           2     False
    """

    # now do a repeat and a take, so if you have two trips of given type you
    # now have two rows, and zero trips yields zero rows
    trips = trips.take(np.repeat(trips.index.values, trips.trip_count.values))
    trips = trips.reset_index(drop=True)

    grouped = trips.groupby(['tour_id', 'outbound'])
    trips['trip_num'] = grouped.cumcount() + 1

    trips['person_id'] = reindex(tours.person_id, trips.tour_id)
    trips['household_id'] = reindex(tours.household_id, trips.tour_id)

    trips['primary_purpose'] = reindex(tours.primary_purpose, trips.tour_id)

    # reorder columns and drop 'direction'
    trips = trips[['person_id', 'household_id', 'tour_id', 'primary_purpose',
                   'trip_num', 'outbound', 'trip_count']]

    """
      person_id  household_id  tour_id  primary_purpose trip_num  outbound  trip_count
    0     32927         32927   954910             work        1      True           2
    1     32927         32927   954910             work        2      True           2
    2     32927         32927   954910             work        1     False           2
    3     32927         32927   954910             work        2     False           2
    4     33993         33993   985824             univ        1      True           1
    5     33993         33993   985824             univ        1     False           2
    6     33993         33993   985824             univ        2     False           2

    """

    # canonical_trip_num: 1st trip out = 1, 2nd trip out = 2, 1st in = 5, etc.
    canonical_trip_num = (~trips.outbound * MAX_TRIPS_PER_LEG) + trips.trip_num
    trips['trip_id'] = trips.tour_id * (2 * MAX_TRIPS_PER_LEG) + canonical_trip_num

    trips.set_index('trip_id', inplace=True, verify_integrity=True)

    return trips
def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings_file_name = 'joint_tour_participation.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    estimator = estimation.manager.begin_estimation('joint_tour_participation')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(candidates)

    # add tour-based chunk_id so we can chunk all trips in tour together
    assert 'chunk_id' not in candidates.columns
    unique_household_ids = candidates.household_id.unique()
    household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids)
    candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id)

    choices = simulate.simple_simulate_by_chunk_id(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser,
        estimator=estimator)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    if estimator:
        estimator.write_choices(choices)

        # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series
        # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
        survey_participants_df = estimator.get_survey_table('joint_tour_participants')
        participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index)

        # but estimation software wants to know the choices value (alternative index)
        choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE})
        # estimator.write_override_choices(participate)  # write choices as boolean participate
        estimator.write_override_choices(choices)  # write choices as int alt indexes

        estimator.end_estimation()

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")
示例#19
0
    def get_survey_values(self, model_values, table_name, column_names):

        assert isinstance(model_values, (pd.Series, pd.DataFrame, pd.Index)), \
            "get_survey_values model_values has unrecognized type %s" % type(model_values)

        dest_index = model_values if isinstance(
            model_values, (pd.Index)) else model_values.index

        # read override_df table
        survey_df = manager.get_survey_table(table_name)

        assert survey_df is not None, \
            "get_survey_values: table '%s' not found" % (table_name,)

        column_name = column_names if isinstance(column_names, str) else None
        if column_name:
            column_names = [column_name]

        if not set(column_names).issubset(set(survey_df.columns)):
            missing_columns = list(set(column_names) - set(survey_df.columns))
            logger.error("missing columns (%s) in survey table %s" %
                         (missing_columns, table_name))
            print("survey table columns: %s" % (survey_df.columns, ))
            raise RuntimeError("missing columns (%s) in survey table %s" %
                               (missing_columns, table_name))

        assert set(column_names).issubset(set(survey_df.columns)), \
            f"missing columns ({list(set(column_names) - set(survey_df.columns))}) " \
            f"in survey table {table_name} {list(survey_df.columns)}"

        # for now tour_id is asim_tour_id in survey_df
        asim_df_index_name = dest_index.name
        if asim_df_index_name == survey_df.index.name:
            # survey table has same index as activitysim
            survey_df_index_column = 'index'
        elif asim_df_index_name in survey_df.columns:
            # survey table has activitysim index as column
            survey_df_index_column = asim_df_index_name
        elif 'asim_%s' % asim_df_index_name in survey_df.columns:
            # survey table has activitysim index as column with asim_ prefix
            survey_df_index_column = 'asim_%s' % asim_df_index_name
        else:
            logger.error("get_survey_values:index '%s' not in survey table" %
                         dest_index.name)
            # raise RuntimeError("index '%s' not in survey table %s" % (dest_index.name, table_name)
            survey_df_index_column = None

        logger.debug("get_survey_values: reindexing using %s.%s" %
                     (table_name, survey_df_index_column))

        values = pd.DataFrame(index=dest_index)
        for c in column_names:
            if survey_df_index_column == 'index':
                survey_values = survey_df[c]
            else:
                survey_values = pd.Series(
                    survey_df[c].values,
                    index=survey_df[survey_df_index_column])

            survey_values = reindex(survey_values, dest_index)

            # shouldn't be any choices we can't override
            missing_values = survey_values.isna()
            if missing_values.any():
                logger.error("missing survey_values for %s\n%s" %
                             (c, dest_index[missing_values]))
                logger.error("couldn't get_survey_values for %s in %s\n" %
                             (c, table_name))
                raise RuntimeError(
                    "couldn't get_survey_values for %s in %s\n" %
                    (c, table_name))

            values[c] = survey_values

        return values[column_name] if column_name else values
def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings = config.read_model_settings('joint_tour_participation.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_participation.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")
def participants_chooser(probs, choosers, spec, trace_label):
    """
    custom alternative to logit.make_choices for simulate.simple_simulate

    Choosing participants for mixed tours is trickier than adult or child tours becuase we
    need at least one adult and one child participant in a mixed tour. We call logit.make_choices
    and then check to see if the tour statisfies this requirement, and rechoose for any that
    fail until all are satisfied.

    In principal, this shold always occur eventually, but we fail after MAX_ITERATIONS,
    just in case there is some failure in program logic (haven't seen this occur.)

    Parameters
    ----------
    probs : pandas.DataFrame
        Rows for choosers and columns for the alternatives from which they
        are choosing. Values are expected to be valid probabilities across
        each row, e.g. they should sum to 1.
    choosers : pandas.dataframe
        simple_simulate choosers df
    spec : pandas.DataFrame
        simple_simulate spec df
        We only need spec so we can know the column index of the 'participate' alternative
        indicating that the participant has been chosen to participate in the tour
    trace_label : str

    Returns - same as logit.make_choices
    -------
    choices, rands
        choices, rands as returned by logit.make_choices (in same order as probs)

    """

    assert probs.index.equals(choosers.index)

    # choice is boolean (participate or not)
    model_settings = config.read_model_settings('joint_tour_participation.yaml')

    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = spec.columns.get_loc(choice_col)
    MAX_ITERATIONS = model_settings.get('max_participation_choice_iterations', 5000)

    trace_label = tracing.extend_trace_label(trace_label, 'participants_chooser')

    candidates = choosers.copy()
    choices_list = []
    rands_list = []

    num_tours_remaining = len(candidates.tour_id.unique())
    logger.info('%s %s joint tours to satisfy.', trace_label, num_tours_remaining,)

    iter = 0
    while candidates.shape[0] > 0:

        iter += 1

        if iter > MAX_ITERATIONS:
            logger.warning('%s max iterations exceeded (%s).', trace_label, MAX_ITERATIONS)
            diagnostic_cols = ['tour_id', 'household_id', 'composition', 'adult']
            unsatisfied_candidates = candidates[diagnostic_cols].join(probs)
            tracing.write_csv(unsatisfied_candidates,
                              file_name='%s.UNSATISFIED' % trace_label, transpose=False)
            print(unsatisfied_candidates.head(20))
            assert False

        choices, rands = logit.make_choices(probs, trace_label=trace_label, trace_choosers=choosers)
        participate = (choices == PARTICIPATE_CHOICE)

        # satisfaction indexed by tour_id
        tour_satisfaction = get_tour_satisfaction(candidates, participate)
        num_tours_satisfied_this_iter = tour_satisfaction.sum()

        if num_tours_satisfied_this_iter > 0:

            num_tours_remaining -= num_tours_satisfied_this_iter

            satisfied = reindex(tour_satisfaction, candidates.tour_id)

            choices_list.append(choices[satisfied])
            rands_list.append(rands[satisfied])

            # remove candidates of satisfied tours
            probs = probs[~satisfied]
            candidates = candidates[~satisfied]

        logger.info('%s iteration %s : %s joint tours satisfied %s remaining' %
                    (trace_label, iter, num_tours_satisfied_this_iter, num_tours_remaining,))

    choices = pd.concat(choices_list)
    rands = pd.concat(rands_list).reindex(choosers.index)

    # reindex choices and rands to match probs and v index
    choices = choices.reindex(choosers.index)
    rands = rands.reindex(choosers.index)
    assert choices.index.equals(choosers.index)
    assert rands.index.equals(choosers.index)

    logger.info('%s %s iterations to satisfy all joint tours.', trace_label, iter,)

    return choices, rands
示例#22
0
def process_trips(tours, stop_frequency_alts):

    MAX_TRIPS_PER_LEG = 4  # max number of trips per leg (inbound or outbound) of tour
    OUTBOUND_ALT = 'out'
    assert OUTBOUND_ALT in stop_frequency_alts.columns

    # get the actual alternatives for each person - have to go back to the
    # stop_frequency_alts dataframe to get this - the stop_frequency choice
    # column has the index values for the chosen alternative

    trips = stop_frequency_alts.loc[tours.stop_frequency]

    # assign tour ids to the index
    trips.index = tours.index
    """

    ::

      tours.stop_frequency    =>    proto trips table
      ________________________________________________________
                stop_frequency      |                out  in
      tour_id                       |     tour_id
      954910          1out_1in      |     954910       1   1
      985824          0out_1in      |     985824       0   1
    """

    # reformat with the columns given below
    trips = trips.stack().reset_index()
    trips.columns = ['tour_id', 'direction', 'trip_count']

    # tours legs have one more leg than stop
    trips.trip_count += 1

    # prefer direction as boolean
    trips['outbound'] = trips.direction == OUTBOUND_ALT
    """
           tour_id direction  trip_count  outbound
    0       954910       out           2      True
    1       954910        in           2     False
    2       985824       out           1      True
    3       985824        in           2     False
    """

    # now do a repeat and a take, so if you have two trips of given type you
    # now have two rows, and zero trips yields zero rows
    trips = trips.take(np.repeat(trips.index.values, trips.trip_count.values))
    trips = trips.reset_index(drop=True)

    grouped = trips.groupby(['tour_id', 'outbound'])
    trips['trip_num'] = grouped.cumcount() + 1

    trips['person_id'] = reindex(tours.person_id, trips.tour_id)
    trips['household_id'] = reindex(tours.household_id, trips.tour_id)

    trips['primary_purpose'] = reindex(tours.primary_purpose, trips.tour_id)

    # reorder columns and drop 'direction'
    trips = trips[[
        'person_id', 'household_id', 'tour_id', 'primary_purpose', 'trip_num',
        'outbound', 'trip_count'
    ]]
    """
      person_id  household_id  tour_id  primary_purpose trip_num  outbound  trip_count
    0     32927         32927   954910             work        1      True           2
    1     32927         32927   954910             work        2      True           2
    2     32927         32927   954910             work        1     False           2
    3     32927         32927   954910             work        2     False           2
    4     33993         33993   985824             univ        1      True           1
    5     33993         33993   985824             univ        1     False           2
    6     33993         33993   985824             univ        2     False           2

    """

    # canonical_trip_num: 1st trip out = 1, 2nd trip out = 2, 1st in = 5, etc.
    canonical_trip_num = (~trips.outbound * MAX_TRIPS_PER_LEG) + trips.trip_num
    trips['trip_id'] = trips.tour_id * (2 *
                                        MAX_TRIPS_PER_LEG) + canonical_trip_num

    trips.set_index('trip_id', inplace=True, verify_integrity=True)

    return trips
示例#23
0
def run_trip_destination(trips,
                         tours_merged,
                         estimator,
                         chunk_size,
                         trace_hh_id,
                         trace_label,
                         fail_some_trips_for_testing=False):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    want_sample_table
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')
    network_los = inject.get_injectable('network_los')

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(
        trips.trip_num < trips.trip_count, 0)

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination,
                               trips.tour_id).astype(np.int64)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(np.int64)
    trips['destination'] = np.where(trips.outbound, tour_destination,
                                    tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    if estimator:
        # need to check or override non-intermediate trip destination
        # should check consistency of survey trips origin, destination with parent tour and subsequent/prior trip?
        # FIXME if not consistent, do we fail or override? (seems weird to override them to bad values?)

        # expect all the same trips
        survey_trips = estimator.get_survey_table('trips').sort_index()
        assert survey_trips.index.equals(trips.index)

        first = (survey_trips.trip_num == 1)
        last = (survey_trips.trip_num == trips.trip_count)

        # expect survey's outbound first trip origin to be same as half tour origin
        assert (
            survey_trips.origin[survey_trips.outbound
                                & first] == tour_origin[survey_trips.outbound
                                                        & first]).all()
        # expect outbound last trip destination to be same as half tour destination
        assert (survey_trips.destination[survey_trips.outbound & last] ==
                tour_destination[survey_trips.outbound & last]).all()

        # expect inbound first trip origin to be same as half tour destination
        assert (survey_trips.origin[~survey_trips.outbound & first] ==
                tour_destination[~survey_trips.outbound & first]).all()
        # expect inbound last trip destination to be same as half tour origin
        assert (survey_trips.destination[~survey_trips.outbound & last] ==
                tour_origin[~survey_trips.outbound & last]).all()

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    redundant_cols = model_settings.get(
        'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS', [])
    if redundant_cols:
        tours_merged_cols = [
            c for c in tours_merged_cols if c not in redundant_cols
        ]

    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skim_hotel = SkimHotel(model_settings, network_los, trace_label)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by zone_id, purpose
    # e.g. size_terms.get(df.dest_zone_id, df.purpose)
    # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just zone_id index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST_COL_NAME']

    sample_list = []

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(
                trace_label, 'trip_num_%s' % trip_num)

            locals_dict = {'network_los': network_los}
            locals_dict.update(config.get_model_constants(model_settings))

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=locals_dict,
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label,
                        nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby(
                    'primary_purpose'):
                choices, destination_sample = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    want_logsums,
                    want_sample_table,
                    size_term_matrix,
                    skim_hotel,
                    estimator,
                    chunk_size,
                    trace_hh_id,
                    trace_label=tracing.extend_trace_label(
                        nth_trace_label, primary_purpose))

                choices_list.append(choices)
                if want_sample_table:
                    assert destination_sample is not None
                    sample_list.append(destination_sample)

            destinations_df = pd.concat(choices_list)

            if fail_some_trips_for_testing:
                if len(destinations_df) > 0:
                    destinations_df = destinations_df.drop(
                        destinations_df.index[0])

            failed_trip_ids = nth_trips.index.difference(destinations_df.index)
            if failed_trip_ids.any():
                logger.warning(
                    "%s sidelining %s trips without viable destination alternatives"
                    % (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids,
                          'origin'] = trips.loc[failed_trip_ids].origin.values

            if len(destinations_df) == 0:
                assert failed_trip_ids.all()
                logger.warning(
                    f"all {len(nth_trips)} {primary_purpose} trip_num {trip_num} trips failed"
                )

            if len(destinations_df) > 0:
                # - assign choices to this trip's destinations
                # if estimator, then the choices will already have been overridden by trip_destination_simulate
                # because we need to overwrite choices before any failed choices are suppressed
                assign_in_place(trips,
                                destinations_df.choice.to_frame('destination'))
                if want_logsums:
                    assert 'logsum' in destinations_df.columns
                    assign_in_place(
                        trips,
                        destinations_df.logsum.to_frame(logsum_column_name))

                # - assign choice to next trip's origin
                destinations_df.index = nth_trips.next_trip_id.reindex(
                    destinations_df.index)
                assign_in_place(trips,
                                destinations_df.choice.to_frame('origin'))

    del trips['next_trip_id']

    if len(sample_list) > 0:
        save_sample_df = pd.concat(sample_list)
    else:
        # this could happen if no intermediate trips, or if no saved sample desired
        save_sample_df = None

    return trips, save_sample_df
示例#24
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, want_logsums, model_settings,
                             network_los, destination_size_terms, estimator,
                             chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge)
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    persons_merged = persons_merged[[
        c for c in persons_merged.columns if c in chooser_columns
    ]]
    tours = tours[[
        c for c in tours.columns if c in chooser_columns or c == 'person_id'
    ]]
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')

    # interaction_sample requires that choosers.index.is_monotonic_increasing
    if not choosers.index.is_monotonic_increasing:
        logger.debug(
            f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing"
        )
        choosers = choosers.sort_index()

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
    # and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=model_spec,
                                          choice_column=alt_dest_col_name,
                                          want_logsums=want_logsums,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination',
                                          estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
示例#25
0
def home_taz(households, persons):
    return reindex(households.home_taz, persons.household_id)
示例#26
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
示例#27
0
def workplace_in_cbd(persons, land_use, settings):
    s = reindex(land_use.area_type, persons.workplace_taz)
    return s < settings['cbd_threshold']
示例#28
0
def dest_topology(tours, land_use):
    return reindex(land_use.TOPOLOGY, tours.destination)
def vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged, alts, spec,
        model_settings,
        chunk_size=0, trace_label=None):
    """
    Like vectorize_tour_scheduling but specifically for joint tours

    joint tours have a few peculiarities necessitating separate treatment:

    Timetable has to be initialized to set all timeperiods...

    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (or dict of specs keyed on tour_type if tour_types is not None)
    model_settings : dict

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    persons_timetable : TimeTable
        timetable updated with joint tours (caller should replace_table for it to persist)
    """

    trace_label = tracing.extend_trace_label(trace_label, 'vectorize_joint_tour_scheduling')

    assert len(joint_tours.index) > 0
    assert 'tour_num' in joint_tours.columns
    assert 'tour_type' in joint_tours.columns

    timetable_window_id_col = None
    tour_owner_id_col = 'household_id'
    segment = None

    persons_timetable = inject.get_injectable("timetable")
    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_householdid = pd.Series(alts.index[0], index=joint_tours.household_id.unique())

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    # print "participant windows before scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        # no more than one tour per household per call to schedule_tours
        assert not nth_tours.household_id.duplicated().any()

        nth_participants = \
            joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

        timetable = build_joint_tour_timetables(
            nth_tours, nth_participants,
            persons_timetable, alts)

        choices = \
            schedule_tours(nth_tours,
                           persons_merged, alts,
                           spec, segment,
                           model_settings,
                           timetable, timetable_window_id_col,
                           previous_tour_by_householdid, tour_owner_id_col,
                           chunk_size, tour_trace_label)

        # - update timetables of all joint tour participants
        persons_timetable.assign(
            nth_participants.person_id,
            reindex(choices, nth_participants.tour_id))

        choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # assert (alts.index == list(range(alts.shape[0]))).all()
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    tdd.index = choices.index
    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    # print "participant windows after scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    return tdd, persons_timetable
示例#30
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefore we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) choose_most_initial
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    Which option is applied is determined by the FAILFIX model setting

    """
    trace_label = "trip_scheduling"
    model_settings_file_name = 'trip_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add columns 'tour_hour', 'earliest', 'latest' to trips
    set_tour_hour(trips_df, tours)

    # trip_scheduling is a probabilistic model ane we don't support estimation,
    # but we do need to override choices in estimation mode
    estimator = estimation.manager.begin_estimation('trip_scheduling')
    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        chooser_cols_for_estimation = [
            'person_id',
            'household_id',
            'tour_id',
            'trip_num',
            'trip_count',
            'primary_purpose',
            'outbound',
            'earliest',
            'latest',
            'tour_hour',
        ]
        estimator.write_choosers(trips_df[chooser_cols_for_estimation])

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = reindex(
        pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id)

    assert 'DEPART_ALT_BASE' in model_settings
    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []

    for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id(
            trips_df, chunk_size, trace_label, trace_label):

        i = 0
        while (i < max_iterations) and not trips_chunk.empty:

            # only chunk log first iteration since memory use declines with each iteration
            with chunk.chunk_log(
                    trace_label) if i == 0 else chunk.chunk_log_skip():

                i += 1
                is_last_iteration = (i == max_iterations)

                trace_label_i = tracing.extend_trace_label(
                    trace_label, "i%s" % i)
                logger.info("%s scheduling %s trips within chunk %s",
                            trace_label_i, trips_chunk.shape[0], chunk_i)

                choices = \
                    run_trip_scheduling(
                        trips_chunk,
                        tours,
                        probs_spec,
                        model_settings,
                        estimator=estimator,
                        is_last_iteration=is_last_iteration,
                        trace_hh_id=trace_hh_id,
                        chunk_size=chunk_size,
                        chunk_tag=trace_label,
                        trace_label=trace_label_i)

                # boolean series of trips whose individual trip scheduling failed
                failed = choices.reindex(trips_chunk.index).isnull()
                logger.info("%s %s failed", trace_label_i, failed.sum())

                if not is_last_iteration:
                    # boolean series of trips whose leg scheduling failed
                    failed_cohorts = failed_trip_cohorts(trips_chunk, failed)
                    trips_chunk = trips_chunk[failed_cohorts]
                    choices = choices[~failed_cohorts]

                choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'trips',
                                              'depart')  # override choices
        estimator.write_override_choices(choices)
        estimator.end_estimation()
        assert not choices.isnull().any()

    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
示例#31
0
def dest_density_index(tours, land_use):
    return reindex(land_use.density_index, tours.destination)
def workplace_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                               workplace_location_sample, configs_dir,
                               chunk_size, trace_hh_id):
    """
    add logsum column to existing workplace_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in workplace_location_sample, and computing the logsum of all the utilities

                                                   <added>
    PERID,  dest_TAZ, rand,            pick_count, logsum
    23750,  14,       0.565502716034,  4           1.85659498857
    23750,  16,       0.711135838871,  6           1.92315598631
    ...
    23751,  12,       0.408038878552,  1           2.40612135416
    23751,  14,       0.972732479292,  2           1.44009018355

    """

    trace_label = 'workplace_location_logsums'

    logsums_spec = mode_choice_logsums_spec(configs_dir, 'work')

    workplace_location_settings = config.read_model_settings(
        configs_dir, 'workplace_location.yaml')

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    workplace_location_sample = workplace_location_sample.to_frame()

    logger.info("Running workplace_location_sample with %s rows" %
                len(workplace_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    choosers = pd.merge(workplace_location_sample,
                        persons_merged,
                        left_index=True,
                        right_index=True,
                        how="left")

    choosers['in_period'] = time_period_label(
        workplace_location_settings['IN_PERIOD'])
    choosers['out_period'] = time_period_label(
        workplace_location_settings['OUT_PERIOD'])

    # FIXME - should do this in expression file?
    choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                        choosers[alt_col_name])
    choosers['dest_density_index'] = reindex(land_use.density_index,
                                             choosers[alt_col_name])

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = compute_logsums(choosers, logsums_spec, logsum_settings,
                              skim_dict, skim_stack, alt_col_name, chunk_size,
                              trace_hh_id, trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved
    # logsums now does, since workplace_location_sample was on left side of merge de-dup merge
    orca.add_column("workplace_location_sample", "mode_choice_logsum", logsums)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings_file_name = 'mandatory_tour_scheduling.yaml'
    estimators = {}

    model_settings = config.read_model_settings(model_settings_file_name)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # load specs
    spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
    specs = {}
    estimators = {}
    for spec_segment_name, spec_settings in spec_segment_settings.items():

        # estimator for this tour_segment
        estimator = estimation.manager.begin_estimation(
            model_name='mandatory_tour_scheduling_%s' % spec_segment_name,
            bundle_name='mandatory_tour_scheduling')

        spec_file_name = spec_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(
            spec_segment_settings[spec_segment_name])
        specs[spec_segment_name] = simulate.eval_coefficients(
            model_spec, coefficients_df, estimator)

        if estimator:
            estimators[spec_segment_name] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(spec_settings)
            estimator.write_coefficients(coefficients_df)

    # - spec dict segmented by primary_purpose
    tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
    tour_segments = {}
    for tour_segment_name, spec_segment_name in tour_segment_settings.items():
        tour_segments[tour_segment_name] = {}
        tour_segments[tour_segment_name][
            'spec_segment_name'] = spec_segment_name
        tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
        tour_segments[tour_segment_name]['estimator'] = estimators.get(
            spec_segment_name)

    timetable = inject.get_injectable("timetable")

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    choices = vts.vectorize_tour_scheduling(mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            model_choices = choices[(
                mandatory_tours.tour_type == spec_segment_name)]

            # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't
            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in tours.groupby('tour_num', sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    timetable.replace_table()

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
示例#34
0
def home_is_urban(households, land_use, settings):
    s = reindex(land_use.area_type, households.home_taz)
    return s < settings['urban_threshold']
示例#35
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+
    """

    trace_label = 'school_location_logsums'

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]
    chooser_col_name = 'TAZ'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = skim_time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = skim_time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers,
                        tracing.extend_trace_label(trace_label, school_type),
                        'choosers')

        logsums = compute_logsums(
            choosers, logsums_spec, logsum_settings, skim_dict, skim_stack,
            chooser_col_name, alt_col_name, chunk_size, trace_hh_id,
            tracing.extend_trace_label(trace_label, school_type))

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    inject.add_column("school_location_sample", "mode_choice_logsum", logsums)
示例#36
0
def home_is_rural(households, land_use, settings):
    s = reindex(land_use.area_type, households.home_taz)
    return s > settings['rural_threshold']