Пример #1
0
def auto_ownership_simulate(households_merged, auto_ownership_spec,
                            auto_ownership_settings, trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """

    logger.info("Running auto_ownership_simulate with %d households" %
                len(households_merged))

    nest_spec = config.get_logit_model_settings(auto_ownership_settings)
    constants = config.get_model_constants(auto_ownership_settings)

    choices = asim.simple_simulate(choosers=households_merged.to_frame(),
                                   spec=auto_ownership_spec,
                                   nest_spec=nest_spec,
                                   locals_d=constants,
                                   trace_label=trace_hh_id
                                   and 'auto_ownership',
                                   trace_choice_name='auto_ownership')

    tracing.print_summary('auto_ownership', choices, value_counts=True)

    orca.add_column('households', 'auto_ownership', choices)

    pipeline.add_dependent_columns('households', 'households_autoown')

    if trace_hh_id:
        trace_columns = ['auto_ownership'
                         ] + orca.get_table('households_autoown').columns
        tracing.trace_df(orca.get_table('households').to_frame(),
                         label='auto_ownership',
                         columns=trace_columns,
                         warn_if_empty=True)
Пример #2
0
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions, chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    persons_df = persons_merged.to_frame()

    constants = config.get_model_constants(cdap_settings)

    logger.info("Running cdap_simulate with %d persons" %
                len(persons_df.index))

    choices = run_cdap(
        persons=persons_df,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label='cdap')

    tracing.print_summary('cdap_activity',
                          choices.cdap_activity,
                          value_counts=True)

    print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True)

    choices = choices.reindex(persons_merged.index)
    orca.add_column("persons", "cdap_activity", choices.cdap_activity)
    orca.add_column("persons", "cdap_rank", choices.cdap_rank)

    pipeline.add_dependent_columns("persons", "persons_cdap")
    pipeline.add_dependent_columns("households", "households_cdap")

    if trace_hh_id:

        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    trace_label = 'mandatory_tour_frequency'

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    inject.add_column("persons", "mandatory_tour_frequency", choices)

    create_mandatory_tours(trace_hh_id)

    # add mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_mtf")

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
Пример #4
0
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = asim.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        trace_label=trace_hh_id and 'mandatory_tour_frequency',
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    orca.add_column("persons", "mandatory_tour_frequency", choices)
    pipeline.add_dependent_columns("persons", "persons_mtf")

    create_mandatory_tours_table()

    # FIXME - test prng repeatability
    r = pipeline.get_rn_generator().random_for_df(choices)
    orca.add_column("persons", "mtf_rand", [item for sublist in r for item in sublist])

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="mandatory_tour_frequency",
                         columns=trace_columns,
                         warn_if_empty=True)
Пример #5
0
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
def non_mandatory_tour_frequency(persons_merged,
                                 non_mandatory_tour_frequency_alts,
                                 non_mandatory_tour_frequency_spec,
                                 non_mandatory_tour_frequency_settings,
                                 chunk_size,
                                 trace_hh_id):

    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    t0 = print_elapsed_time()

    choosers = persons_merged.to_frame()
    alts = non_mandatory_tour_frequency_alts.to_frame()

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers))

    constants = config.get_model_constants(non_mandatory_tour_frequency_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for name, segment in choosers.groupby('ptype_cat'):

        logger.info("Running segment '%s' of size %d" % (name, len(segment)))

        choices = asim.interaction_simulate(
            segment,
            alts,
            # notice that we pick the column for the segment for each segment we run
            spec=non_mandatory_tour_frequency_spec[[name]],
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=trace_hh_id and 'non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0)

        # FIXME - force garbage collection
        # mem = asim.memory_info()
        # logger.info('memory_info ptype %s, %s' % (name, mem))

    choices = pd.concat(choices_list)

    # FIXME - no need to reindex?
    orca.add_column("persons", "non_mandatory_tour_frequency", choices)

    create_non_mandatory_tours_table()

    pipeline.add_dependent_columns("persons", "persons_nmtf")

    if trace_hh_id:
        trace_columns = ['non_mandatory_tour_frequency']
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="non_mandatory_tour_frequency",
                         columns=trace_columns,
                         warn_if_empty=True)
def workplace_location_simulate(persons_merged, workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample annotated with mode_choice logsum
    to select a work_taz from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate',
                    'alternatives')

    constants = config.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    orca.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + orca.get_table('persons_workplace').columns
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Пример #8
0
def workplace_location_simulate(persons_merged, workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    The workplace location model predicts the zones in which various people will
    work.
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()
    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(workplace_location_settings)

    sample_size = workplace_location_settings["SAMPLE_SIZE"]

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", "TAZ_r")

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - HACK - only include columns actually used in spec (which we pathologically know)
    choosers = choosers[["income_segment", "TAZ", "mode_choice_logsums"]]

    choices = asim.interaction_simulate(choosers,
                                        alternatives,
                                        spec=workplace_location_spec,
                                        skims=skims,
                                        locals_d=locals_d,
                                        sample_size=sample_size,
                                        chunk_size=chunk_size,
                                        trace_label=trace_hh_id
                                        and 'workplace_location',
                                        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    orca.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    # FIXME - test prng repeatability
    r = pipeline.get_rn_generator().random_for_df(choices)
    orca.add_column("persons", "work_location_rand",
                    [item for sublist in r for item in sublist])

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + orca.get_table('persons_workplace').columns
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Пример #9
0
def school_location_simulate(persons_merged, school_location_spec,
                             school_location_settings, skim_dict,
                             destination_size_terms, chunk_size, trace_hh_id):
    """
    The school location model predicts the zones in which various people will
    go to school.
    """

    choosers = persons_merged.to_frame()
    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(school_location_settings)

    sample_size = school_location_settings["SAMPLE_SIZE"]

    logger.info("Running school_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", "TAZ_r")

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]

        # FIXME - no point in considering impossible alternatives
        alternatives_segment = alternatives[alternatives[school_type] > 0]

        logger.info(
            "school_type %s:  %s persons %s alternatives" %
            (school_type, len(choosers_segment), len(alternatives_segment)))

        if len(choosers_segment.index) > 0:

            choices = asim.interaction_simulate(
                choosers_segment,
                alternatives_segment,
                spec=school_location_spec[[school_type]],
                skims=skims,
                locals_d=locals_d,
                sample_size=sample_size,
                chunk_size=chunk_size,
                trace_label='school_location.%s' % school_type,
                trace_choice_name='school_location')

            choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1)

    tracing.print_summary('school_taz', choices, describe=True)

    orca.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    # FIXME - test prng repeatability
    r = pipeline.get_rn_generator().random_for_df(choices)
    orca.add_column("persons", "school_location_rand",
                    [item for sublist in r for item in sublist])

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + orca.get_table('persons_school').columns
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)