Пример #1
0
def initialize_households():

    trace_label = 'initialize_households'

    with chunk.chunk_log(trace_label, base=True):

        chunk.log_rss(f"{trace_label}.inside-yield")

        households = inject.get_table('households').to_frame()
        assert not households._is_view
        chunk.log_df(trace_label, "households", households)
        del households
        chunk.log_df(trace_label, "households", None)

        persons = inject.get_table('persons').to_frame()
        assert not persons._is_view
        chunk.log_df(trace_label, "persons", persons)
        del persons
        chunk.log_df(trace_label, "persons", None)

        model_settings = config.read_model_settings(
            'initialize_households.yaml', mandatory=True)
        annotate_tables(model_settings, trace_label)

        # - initialize shadow_pricing size tables after annotating household and person tables
        # since these are scaled to model size, they have to be created while single-process
        # this can now be called as a stand alone model step instead, add_size_tables
        add_size_tables = model_settings.get('add_size_tables', True)
        if add_size_tables:
            # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning)
            shadow_pricing.add_size_tables()

        # - preload person_windows
        person_windows = inject.get_table('person_windows').to_frame()
        chunk.log_df(trace_label, "person_windows", person_windows)
Пример #2
0
def auto_ownership_simulate(households_merged, auto_ownership_spec,
                            auto_ownership_settings, trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """

    logger.info("Running auto_ownership_simulate with %d households" %
                len(households_merged))

    nest_spec = config.get_logit_model_settings(auto_ownership_settings)
    constants = config.get_model_constants(auto_ownership_settings)

    choices = asim.simple_simulate(choosers=households_merged.to_frame(),
                                   spec=auto_ownership_spec,
                                   nest_spec=nest_spec,
                                   locals_d=constants,
                                   trace_label=trace_hh_id
                                   and 'auto_ownership',
                                   trace_choice_name='auto_ownership')

    tracing.print_summary('auto_ownership', choices, value_counts=True)

    inject.add_column('households', 'auto_ownership', choices)

    pipeline.add_dependent_columns('households', 'households_autoown')

    if trace_hh_id:
        trace_columns = ['auto_ownership'
                         ] + inject.get_table('households_autoown').columns
        tracing.trace_df(inject.get_table('households').to_frame(),
                         label='auto_ownership',
                         columns=trace_columns,
                         warn_if_empty=True)
Пример #3
0
def test_persons_merged_table():

    persons_merged = inject.get_table('persons_merged').to_frame()
    assert 'person_gender' in persons_merged.columns
    assert 'hh_income' in persons_merged.columns

    persons = inject.get_table('persons').to_frame()
    assert (persons_merged.person_type == persons.person_type).all()

    assert persons_merged.shape[0] == 27
Пример #4
0
def initialize_households():

    trace_label = 'initialize_households'

    model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True)
    annotate_tables(model_settings, trace_label)

    # - initialize shadow_pricing size tables after annotating household and person tables
    # since these are scaled to model size, they have to be created while single-process
    shadow_pricing.add_size_tables()

    # - preload person_windows
    t0 = tracing.print_elapsed_time()
    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Пример #5
0
def initialize_households():

    trace_label = 'initialize_households'

    model_settings = config.read_model_settings('initialize_households.yaml',
                                                mandatory=True)
    annotate_tables(model_settings, trace_label)

    # - initialize shadow_pricing size tables after annotating household and person tables
    # since these are scaled to model size, they have to be created while single-process
    shadow_pricing.add_size_tables()

    # - preload person_windows
    t0 = tracing.print_elapsed_time()
    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Пример #6
0
    def __init__(self, size_term_selector):

        # do this once so they can request siae_terms for various segments (tour_type or purpose)
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        self.destination_size_terms = \
            tour_destination_size_terms(land_use, size_terms, size_term_selector)
Пример #7
0
def create_controls(spec):
        locals_d = {'df' : inject.get_table('all_acs').to_frame()}
        
        le = []
        
        for e in zip(spec.geog, spec.target,
                     spec.expression):
            geog, target, expression = e

            values = to_series(eval(expression, globals(), locals_d), target=target)
            le.append((target, values))
       
        variables = []
        seen = set()
        for statement in reversed(le):
            # statement is a tuple (<target_name>, <eval results in pandas.Series>)
            target_name = statement[0]
            if target_name not in seen:
                variables.insert(0, statement)
                seen.add(target_name)

         # DataFrame from list of tuples [<target_name>, <eval results>), ...]
        variables = pd.DataFrame.from_items(variables)
        variables = variables.merge(locals_d['df'][['state','county', 'tract', 'block group']], how='left', left_index = True, right_index = True)
        variables['block_group_id'] = variables['county'].astype('str')+variables['tract'].astype('str')+variables['block group'].astype('str')
        return variables
Пример #8
0
    def __init__(self, size_term_selector):

        # do this once so they can request siae_terms for various segments (tour_type or purpose)
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        self.destination_size_terms = \
            tour_destination_size_terms(land_use, size_terms, size_term_selector)
Пример #9
0
def create_mandatory_tours():

    # FIXME - move this to body?

    persons = inject.get_table('persons')
    configs_dir = inject.get_injectable('configs_dir')

    persons = persons.to_frame(columns=[
        "mandatory_tour_frequency", "is_worker", "school_taz", "workplace_taz"
    ])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]

    tour_frequency_alternatives = inject.get_injectable(
        'mandatory_tour_frequency_alternatives')

    tours = process_mandatory_tours(persons, tour_frequency_alternatives)

    expressions.assign_columns(df=tours,
                               model_settings='annotate_tours_with_dest',
                               configs_dir=configs_dir,
                               trace_label='create_mandatory_tours')

    pipeline.extend_table("tours", tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(tours, 'tours')
Пример #10
0
def test_disaggregate_trips_table():

    trips = inject.get_table('disaggregate_trips').to_frame()
    assert 'build_auto_time' in trips.columns
    assert 'base_auto_time' in trips.columns

    assert trips.shape[0] == 250
Пример #11
0
def append_tour_leg_trip_mode_choice_logsums(tours):
    """Creates trip mode choice logsum column in tours table for each tour mode and leg

    Parameters
    ----------
    tours : pd.DataFrame

    Returns
    -------
    tours : pd.DataFrame
        Adds two * n_modes logsum columns to each tour row, e.g. "logsum_DRIVE_outbound"
    """
    trips = inject.get_table('trips').to_frame()
    trip_dir_mode_logsums = trips.pivot(index='tour_id',
                                        columns=['tour_mode', 'outbound'],
                                        values='trip_mode_choice_logsum')
    new_cols = [
        '_'.join(['logsum', mode, 'outbound' if outbound else 'inbound'])
        for mode, outbound in trip_dir_mode_logsums.columns
    ]
    trip_dir_mode_logsums.columns = new_cols
    trip_dir_mode_logsums.reindex(tours.index)
    tours = pd.merge(tours,
                     trip_dir_mode_logsums,
                     left_index=True,
                     right_index=True)

    return tours
Пример #12
0
def write_data_dictionary(output_dir):
    """
    Write table_name, number of rows, columns, and bytes for each checkpointed table

    Parameters
    ----------
    output_dir: str

    """
    pd.options.display.max_columns = 500
    pd.options.display.max_rows = 100

    output_tables = pipeline.checkpointed_tables()

    records = []

    # write data dictionary for all checkpointed_tables
    with open(os.path.join(output_dir, 'data_dict.txt'), 'w') as file:
        for table_name in output_tables:
            df = inject.get_table(table_name, None).to_frame()

            print >> file, "\n### %s %s" % (table_name, df.shape)
            print >> file, df.dtypes

            rows, columns = df.shape
            bytes = df.memory_usage(index=True).sum()
            records.append((table_name, rows, columns, bytes))

    df = pd.DataFrame.from_records(
        records, columns=['table_name', 'rows', 'columns', 'bytes'])
    df.sort_values(by='table_name', inplace=True)
    df.to_csv(os.path.join(output_dir, 'data_dict.csv'))
Пример #13
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning("annotate_tables setting is empty - nothing to do!")

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']
        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:
            logger.info("renaming %s columns %s" % (tablename, column_map,))
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info("annotated %s SPEC %s" % (tablename, annotate['SPEC'],))
            expressions.assign_columns(
                df=df,
                model_settings=annotate,
                trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Пример #14
0
def previous_write_data_dictionary(output_dir):
    """
    Write table_name, number of rows, columns, and bytes for each checkpointed table

    Parameters
    ----------
    output_dir: str

    """

    model_settings = config.read_model_settings('write_data_dictionary')
    txt_format = model_settings.get('txt_format', 'data_dict.txt')
    csv_format = model_settings.get('csv_format', 'data_dict.csv')

    if txt_format:

        output_file_path = config.output_file_path(txt_format)

        pd.options.display.max_columns = 500
        pd.options.display.max_rows = 100

        output_tables = pipeline.checkpointed_tables()

        # write data dictionary for all checkpointed_tables

        with open(output_file_path, 'w') as output_file:
            for table_name in output_tables:
                df = inject.get_table(table_name, None).to_frame()

                print("\n### %s %s" % (table_name, df.shape), file=output_file)
                print('index:',
                      df.index.name,
                      df.index.dtype,
                      file=output_file)
                print(df.dtypes, file=output_file)
Пример #15
0
 def get_tazs(self):
     # FIXME - should compute on init?
     if self.zone_system == ONE_ZONE:
         tazs = inject.get_table('land_use').index.values
     else:
         tazs = self.maz_taz_df.TAZ.unique()
     assert isinstance(tazs, np.ndarray)
     return tazs
Пример #16
0
def test_trips_with_demographics_table():

    trips = inject.get_table('trips_with_demographics').to_frame()
    assert 'build_auto_time' in trips.columns
    assert 'base_auto_time' in trips.columns
    assert 'person_age' in trips.columns
    assert 'hh_income' in trips.columns

    assert trips.shape[0] == 250
def annotate_jtp(model_settings, trace_label):

    # - annotate persons
    persons = inject.get_table('persons').to_frame()
    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))
    pipeline.replace_table("persons", persons)
Пример #18
0
def annotate_tables(model_settings, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')

    chunk.log_rss(trace_label)

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(
            f"{trace_label} - annotate_tables setting is empty - nothing to do!"
        )

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")

        df = inject.get_table(tablename).to_frame()
        chunk.log_df(trace_label, tablename, df)

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                f"Setting 'column_map' has been changed to 'rename_columns'. "
                f"Support for 'column_map' in annotate_tables  will be removed in future versions.",
                FutureWarning)

            logger.info(
                f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(
                f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}"
            )
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        chunk.log_df(trace_label, tablename, df)

        # - write table to pipeline
        pipeline.replace_table(tablename, df)

        del df
        chunk.log_df(trace_label, tablename, None)
Пример #19
0
def write_data_dictionary(output_dir):

    output_tables = pipeline.checkpointed_tables()

    # write data dictionary for all checkpointed_tables
    with open(os.path.join(output_dir, 'data_dict.csv'), 'a') as file:
        for table_name in output_tables:
            df = inject.get_table(table_name, None).to_frame()
            print >> file, "\n### %s (%s)\n" % (table_name,
                                                df.shape), df.dtypes
Пример #20
0
    def __init__(self, size_term_selector):

        # do this once so they can request size_terms for various segments (tour_type or purpose)
        land_use = inject.get_table('land_use')
        self.land_use = land_use
        size_terms = inject.get_injectable('size_terms')
        self.destination_size_terms = \
            tour_destination_size_terms(self.land_use, size_terms, size_term_selector)

        assert not self.destination_size_terms.isna().any(axis=None)
Пример #21
0
def test_read_persons_table():

    table_settings = config.read_model_settings('tables.yaml')
    assert table_settings.get('persons') == 'persons.csv'

    # expect all of and only the columns specified by persons_column_map values
    persons = inject.get_table('persons').to_frame()
    assert expect_columns(persons,
                          list(table_settings['persons_column_map'].values()))

    assert persons.shape[0] == 27
Пример #22
0
def add_result_columns(base_dfname, from_df, prefix=''):

    dest_df = inject.get_table(base_dfname).to_frame()

    if prefix:
        from_df = from_df.copy()
        from_df.columns = [prefix + c for c in from_df.columns.values]

    assign_in_place(dest_df, from_df)

    pipeline.replace_table(base_dfname, dest_df)
Пример #23
0
def test_read_households_table():

    table_settings = config.read_model_settings('tables.yaml')

    households = inject.get_table('households').to_frame()
    assert not missing_columns(households,
                               list(table_settings['base_households_column_map'].values()))

    assert not missing_columns(households,
                               list(table_settings['build_households_column_map'].values()))

    assert households.shape[0] == 9
Пример #24
0
def input_pre_processor():

    # - load generic data
    data_dir = setting('data_dir', inject.get_injectable('data_dir'))
    load_tables('input_tables', data_dir)

    # - load scenario input data
    scenario_input_dir = os.path.join(scenario_dir(), 'inputs')
    load_tables('scenario_input_tables', scenario_input_dir)

    for table_name in pipeline.orca_dataframe_tables():
        df = inject.get_table(table_name, None).to_frame()
Пример #25
0
def _create_od_alts_from_dest_size_terms(size_terms_df,
                                         segment_name,
                                         od_id_col=None,
                                         origin_id_col='origin',
                                         dest_id_col='destination',
                                         origin_filter=None,
                                         origin_attr_cols=None):
    """
    Extend destination size terms to create dataframe representing the
    cartesian product of tour origins and destinations. Actual "Size Terms"
    will still only be associated with the destinations, but individual
    attributes of the origins can be preserved.
    """

    land_use = inject.get_table('land_use').to_frame(columns=origin_attr_cols)

    if origin_filter:
        origins = land_use.query(origin_filter)
    else:
        origins = land_use

    n_repeat = len(origins)
    od_alts = size_terms_df.reindex(size_terms_df.index.repeat(n_repeat))
    od_alts[origin_id_col] = list(
        origins.index.values) * od_alts.index.nunique()
    od_alts.reset_index(inplace=True)
    if dest_id_col not in od_alts.columns:
        od_alts.rename(columns={land_use.index.name: dest_id_col},
                       inplace=True)

    if od_id_col is None:
        new_index_name = get_od_id_col(origin_id_col, dest_id_col)
    else:
        new_index_name = od_id_col
    od_alts[new_index_name] = od_alts[origin_id_col].astype(
        str) + '_' + od_alts[dest_id_col].astype(str)
    od_alts.set_index(new_index_name, inplace=True)

    # manually add origin attributes to output since these can't be generated by
    # the destination-based size term calculator
    if origin_attr_cols:
        land_use.index.name = origin_id_col
        land_use.reset_index(inplace=True)
        od_alts.reset_index(inplace=True)
        od_alts = pd.merge(od_alts,
                           land_use[origin_attr_cols + [origin_id_col]],
                           on=origin_id_col,
                           how='left').set_index(new_index_name)

    return od_alts
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    trace_label = 'mandatory_tour_frequency'

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    inject.add_column("persons", "mandatory_tour_frequency", choices)

    create_mandatory_tours(trace_hh_id)

    # add mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_mtf")

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
Пример #27
0
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions, chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    persons_df = persons_merged.to_frame()

    constants = config.get_model_constants(cdap_settings)

    logger.info("Running cdap_simulate with %d persons" %
                len(persons_df.index))

    choices = run_cdap(
        persons=persons_df,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label='cdap')

    tracing.print_summary('cdap_activity',
                          choices.cdap_activity,
                          value_counts=True)

    print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True)

    choices = choices.reindex(persons_merged.index)
    inject.add_column("persons", "cdap_activity", choices.cdap_activity)
    inject.add_column("persons", "cdap_rank", choices.cdap_rank)

    pipeline.add_dependent_columns("persons", "persons_cdap")
    pipeline.add_dependent_columns("households", "households_cdap")

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Пример #28
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warning(
            "No 'input_table_list' found in settings. This will be a "
            "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    # FIXME undocumented feature
    if config.setting('write_raw_tables'):

        # write raw input tables as csv (before annotation)
        csv_dir = config.output_file_path('raw_tables')
        if not os.path.exists(csv_dir):
            os.makedirs(csv_dir)  # make directory if needed

        table_names = [t['tablename'] for t in table_list]
        for t in table_names:
            df = inject.get_table(t).to_frame()
            if t == 'households':
                df.drop(columns='chunk_id', inplace=True)
            df.to_csv(os.path.join(csv_dir, '%s.csv' % t), index=True)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Пример #29
0
def test_read_build_trips_table():

    table_settings = config.read_model_settings('tables.yaml')
    assert table_settings.get('buildtrips') == 'buildtrips_normal.csv'

    trips = inject.get_table('build_trips').to_frame()

    # expect all of and only the columns specified by persons_column_map values
    raw_columns = \
        mapped_columns(
            table_settings['buildtrips_column_map'],
            table_settings['buildtrips_baselos_column_map']) + ['build', 'base', 'person_id']

    assert expect_columns(trips, raw_columns)

    assert trips.shape[0] == 127
Пример #30
0
def write_estimation_specs(estimator, model_settings, settings_file):
    """
    write sample_spec, spec, and coefficients to estimation data bundle

    Parameters
    ----------
    model_settings
    settings_file
    """

    estimator.write_model_settings(model_settings, settings_file)
    # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
    estimator.write_spec(model_settings, tag='SPEC')
    estimator.write_coefficients(simulate.read_model_coefficients(model_settings))

    estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
    estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
Пример #31
0
def initialize():
    """
    Because random seed is set differently for each step, the sampling of households depends
    on which step they are initially loaded in so we force them to load here and they get
    stored to the pipeline,
    """

    t0 = tracing.print_elapsed_time()
    inject.get_table('land_use').to_frame()
    t0 = tracing.print_elapsed_time("preload land_use", t0, debug=True)

    inject.get_table('households').to_frame()
    t0 = tracing.print_elapsed_time("preload households", t0, debug=True)

    inject.get_table('persons').to_frame()
    t0 = tracing.print_elapsed_time("preload persons", t0, debug=True)

    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
def create_non_mandatory_tours():
    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    persons = inject.get_table('persons')
    alts = inject.get_injectable('non_mandatory_tour_frequency_alts')

    df = process_non_mandatory_tours(
        persons.non_mandatory_tour_frequency.dropna(),
        alts
    )

    pipeline.extend_table("tours", df)
    tracing.register_traceable_table('tours', df)
    pipeline.get_rn_generator().add_channel(df, 'tours')
Пример #33
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning("annotate_tables setting is empty - nothing to do!")

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']
        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                "annotate_tables option 'column_map' renamed 'rename_columns' and moved"
                "to settings.yaml. Support for 'column_map' in annotate_tables will be "
                "removed in future versions.", FutureWarning)

            logger.info("renaming %s columns %s" % (
                tablename,
                column_map,
            ))
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info("annotated %s SPEC %s" % (
                tablename,
                annotate['SPEC'],
            ))
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Пример #34
0
def add_null_results(trace_label, mandatory_tour_frequency_settings):
    logger.info("Skipping %s: add_null_results", trace_label)

    persons = inject.get_table('persons').to_frame()
    persons['mandatory_tour_frequency'] = ''

    tours = pd.DataFrame()
    tours['tour_category'] = None
    tours['tour_type'] = None
    tours['person_id'] = None
    tours.index.name = 'tour_id'
    pipeline.replace_table("tours", tours)

    expressions.assign_columns(
        df=persons,
        model_settings=mandatory_tour_frequency_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)
Пример #35
0
def get_shadow_pricing_info():
    """
    return dict with info about dtype and shapes of desired and modeled size tables

    block shape is (num_zones, num_segments + 1)


    Returns
    -------
    shadow_pricing_info: dict
        dtype: <sp_dtype>,
        block_shapes: dict {<model_selector>: <block_shape>}
    """

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    blocks = OrderedDict()
    for model_selector in shadow_pricing_models:

        sp_rows = len(land_use)
        sp_cols = len(size_terms[size_terms.model_selector == model_selector])

        # extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores
        blocks[block_name(model_selector)] = (sp_rows, sp_cols + 1)

    sp_dtype = np.int64

    shadow_pricing_info = {
        'dtype': sp_dtype,
        'block_shapes': blocks,
    }

    for k in shadow_pricing_info:
        logger.debug("shadow_pricing_info %s: %s" % (k, shadow_pricing_info.get(k)))

    return shadow_pricing_info
Пример #36
0
def test_mini_pipeline_run3():

    # test that hh_ids setting overrides household sampling

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    setup_dirs(configs_dir)
    inject_settings(configs_dir, hh_ids='override_hh_ids.csv')

    households = inject.get_table('households').to_frame()

    override_hh_ids = pd.read_csv(config.data_file_path('override_hh_ids.csv'))

    print("\noverride_hh_ids\n", override_hh_ids)

    print("\nhouseholds\n", households.index)

    assert households.shape[0] == override_hh_ids.shape[0]
    assert households.index.isin(override_hh_ids.household_id).all()

    inject.clear_cache()
    close_handlers()
Пример #37
0
def write_data_dictionary(output_dir):
    """
    Write table_name, number of rows, columns, and bytes for each checkpointed table

    Parameters
    ----------
    output_dir: str

    """
    pd.options.display.max_columns = 500
    pd.options.display.max_rows = 100

    output_tables = pipeline.checkpointed_tables()

    # write data dictionary for all checkpointed_tables

    mode = 'wb' if sys.version_info < (3,) else 'w'
    with open(config.output_file_path('data_dict.txt'), mode) as output_file:
        for table_name in output_tables:
            df = inject.get_table(table_name, None).to_frame()

            print("\n### %s %s" % (table_name, df.shape), file=output_file)
            print('index:', df.index.name, df.index.dtype, file=output_file)
            print(df.dtypes, file=output_file)
Пример #38
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
Пример #39
0
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Пример #40
0
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Пример #41
0
def add_size_tables():
    """
    inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace)

    Size tables are pandas dataframes with locations counts for model_selector by zone and segment
    tour_destination_size_terms

    if using shadow pricing, we scale size_table counts to sample population
    (in which case, they have to be created while single-process)

    Scaling is problematic as it breaks household result replicability across sample sizes
    It also changes the magnitude of the size terms so if they are used as utilities in
    expression files, their importance will diminish relative to other utilities as the sample
    size decreases.

    Scaling makes most sense for a full sample in conjunction with shadow pricing, where
    shadow prices can be adjusted iteratively to bring modelled counts into line with desired
    (size table) counts.
    """

    use_shadow_pricing = bool(config.setting('use_shadow_pricing'))

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    # probably ought not scale if not shadow_pricing (breaks partial sample replicability)
    # but this allows compatability with existing CTRAMP behavior...
    scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False)

    if shadow_pricing_models is None:
        logger.warning('shadow_pricing_models list not found in shadow_pricing settings')
        return

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    # since these are scaled to model size, they have to be created while single-process

    for model_selector, model_name in iteritems(shadow_pricing_models):

        model_settings = config.read_model_settings(model_name)

        assert model_selector == model_settings['MODEL_SELECTOR']

        segment_ids = model_settings['SEGMENT_IDS']
        chooser_table_name = model_settings['CHOOSER_TABLE_NAME']
        chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

        choosers_df = inject.get_table(chooser_table_name).to_frame()
        if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings:
            choosers_df = \
                choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0]

        # - raw_desired_size
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        raw_size = tour_destination_size_terms(land_use, size_terms, model_selector)
        assert set(raw_size.columns) == set(segment_ids.keys())

        if use_shadow_pricing or scale_size_table:

            inject.add_table('raw_' + size_table_name(model_selector), raw_size)

            # - scale size_table counts to sample population
            # scaled_size = zone_size * (total_segment_modeled / total_segment_desired)

            # segment scale factor (modeled / desired) keyed by segment_name
            segment_scale_factors = {}
            for c in raw_size:
                # number of zone demographics desired destination choices
                segment_desired_size = raw_size[c].astype(np.float64).sum()

                # number of synthetic population choosers in segment
                segment_chooser_count = \
                    (choosers_df[chooser_segment_column] == segment_ids[c]).sum()

                segment_scale_factors[c] = \
                    segment_chooser_count / np.maximum(segment_desired_size, 1)

                logger.info("add_desired_size_tables %s segment %s "
                            "desired %s modeled %s scale_factor %s" %
                            (chooser_table_name, c,
                             segment_desired_size,
                             segment_chooser_count,
                             segment_scale_factors[c]))

            # FIXME - should we be rounding?
            scaled_size = (raw_size * segment_scale_factors).round()
        else:
            scaled_size = raw_size

        inject.add_table(size_table_name(model_selector), scaled_size)
Пример #42
0
    def __init__(self, model_settings, num_processes, shared_data=None, shared_data_lock=None):
        """

        Presence of shared_data is used as a flag for multiprocessing
        If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer
        to aggregate modeled_size across all sub-processes, and shared_data_lock should be
        a multiprocessing.Lock object to coordinate access to that buffer.

        Optionally load saved shadow_prices from data_dir if config setting use_shadow_pricing
        and shadow_setting LOAD_SAVED_SHADOW_PRICES are both True

        Parameters
        ----------
        model_settings : dict
        shared_data : multiprocessing.Array or None (if single process)
        shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process)
        """

        self.num_processes = num_processes
        self.use_shadow_pricing = bool(config.setting('use_shadow_pricing'))
        self.saved_shadow_price_file_path = None  # set by read_saved_shadow_prices if loaded

        self.model_selector = model_settings['MODEL_SELECTOR']

        full_model_run = config.setting('households_sample_size') == 0
        if self.use_shadow_pricing and not full_model_run:
            logging.warning("deprecated combination of use_shadow_pricing and not full_model_run")

        self.segment_ids = model_settings['SEGMENT_IDS']

        # - modeled_size (set by call to set_choices/synchronize_choices)
        self.modeled_size = None

        if self.use_shadow_pricing:
            self.shadow_settings = config.read_model_settings('shadow_pricing.yaml')

            for k in self.shadow_settings:
                logger.debug("shadow_settings %s: %s" % (k, self.shadow_settings.get(k)))

        # - destination_size_table (desired_size)
        self.desired_size = inject.get_table(size_table_name(self.model_selector)).to_frame()

        # - shared_data
        if shared_data is not None:
            assert shared_data.shape[0] == self.desired_size.shape[0]
            assert shared_data.shape[1] == self.desired_size.shape[1] + 1  # tally column
            assert shared_data_lock is not None
        self.shared_data = shared_data
        self.shared_data_lock = shared_data_lock

        # - load saved shadow_prices (if available) and set max_iterations accordingly
        if self.use_shadow_pricing:
            self.shadow_prices = None
            self.shadow_price_method = self.shadow_settings['SHADOW_PRICE_METHOD']
            assert self.shadow_price_method in ['daysim', 'ctramp']

            if self.shadow_settings['LOAD_SAVED_SHADOW_PRICES']:
                # read_saved_shadow_prices logs error and returns None if file not found
                self.shadow_prices = self.read_saved_shadow_prices(model_settings)

            if self.shadow_prices is None:
                self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS', 5)
            else:
                self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS_SAVED', 1)

            # initial_shadow_price if we did not load
            if self.shadow_prices is None:
                # initial value depends on method
                initial_shadow_price = 1.0 if self.shadow_price_method == 'ctramp' else 0.0
                self.shadow_prices = \
                    pd.DataFrame(data=initial_shadow_price,
                                 columns=self.desired_size.columns,
                                 index=self.desired_size.index)
        else:
            self.max_iterations = 1

        self.num_fail = pd.DataFrame(index=self.desired_size.columns)
        self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns)
        self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns)
Пример #43
0
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)