Пример #1
0
def test_linear_function_iadd():
    # Test inplace add on unattached LinearFunction_C
    lf = P.tottime * X.tottime + P.totcost * X.totcost
    lf += X("totcost*tottime") * P("fake")
    assert lf == P.tottime * X.tottime + P.totcost * X.totcost + X(
        "totcost*tottime") * P("fake")
    # Test inplace add on attached LinearFunction_C
    m = larch.Model(utility_ca=P.tottime * X.tottime + P.totcost * X.totcost)
    m.utility_ca += X("totcost*tottime") * P("fake")
    xx = P.tottime * X.tottime + P.totcost * X.totcost + X(
        "totcost*tottime") * P("fake")
    assert m.utility_ca == xx
Пример #2
0
def cdap_base_utility_by_person(model,
                                n_persons,
                                spec,
                                alts=None,
                                value_tokens=()):
    """
    Build the base utility by person for each pattern.

    Parameters
    ----------
    model : larch.Model
    n_persons : int
    spec : pandas.DataFrame
        The base utility by person spec provided by
        the ActivitySim framework.
    alts : dict, optional
        The keys are the names of the patterns, and
        the values are the alternative code numbers,
        as created by `generate_alternatives`.  If not
        given, the alts are automatically regenerated
        using that function.
    value_tokens : list-like of str, optional
        A list of tokens to edit within an the expressions,
        generally the column names of the provided values
        from the estimation data bundle.  Only used when
        `n_persons` is more than 1.
    """
    if n_persons == 1:
        for i in spec.index:
            if not pd.isna(spec.loc[i, "M"]):
                model.utility_co[1] += X(spec.Expression[i]) * P(spec.loc[i,
                                                                          "M"])
            if not pd.isna(spec.loc[i, "N"]):
                model.utility_co[2] += X(spec.Expression[i]) * P(spec.loc[i,
                                                                          "N"])
            if not pd.isna(spec.loc[i, "H"]):
                model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i,
                                                                          "H"])
    else:
        if alts is None:
            alts = generate_alternatives(n_persons)
        person_numbers = range(1, n_persons + 1)
        for pnum in person_numbers:
            for i in spec.index:
                for aname, anum in alts.items():
                    z = pnum - 1
                    if not pd.isna(spec.loc[i, aname[z]]):
                        x = apply_replacements(spec.Expression[i], f"p{pnum}",
                                               value_tokens)
                        model.utility_co[anum] += X(x) * P(spec.loc[i,
                                                                    aname[z]])
Пример #3
0
def linear_utility_from_spec(spec, x_col, p_col, ignore_x=(), segment_id=None):
    """
    Create a linear function from a spec DataFrame.

    Parameters
    ----------
    spec : pandas.DataFrame
        A spec for an ActivitySim model.
    x_col: str
        The name of the columns in spec representing the data.
    p_col: str or dict
        The name of the columns in spec representing the parameters.
        Give as a string for a single column, or as a dict to
        have segments on multiple columns. If given as a dict,
        the keys give the names of the columns to use, and the
        values give the identifiers that will need to match the
        loaded `segment_id` value.
    ignore_x : Collection, optional
        Labels in the spec file to ignore.  Typically this
        includes variables that are pre-processed by ActivitySim
        and therefore don't need to be made available in Larch.
    segment_id : str, optional
        The CHOOSER_SEGMENT_COLUMN_NAME identified for ActivitySim.
        This value is ignored if `p_col` is a string, and required
        if `p_col` is a dict.

    Returns
    -------
    LinearFunction_C
    """
    if isinstance(p_col, dict):
        if segment_id is None:
            raise ValueError('segment_id must be given if p_col is a dict')
        partial_utility = {}
        for seg_p_col, segval in p_col.items():
            partial_utility[seg_p_col] = linear_utility_from_spec(
                spec,
                x_col,
                seg_p_col,
                ignore_x,
            ) * X(f'{segment_id}=={segval}')
        return sum(partial_utility.values())
    return sum(
        P(getattr(i, p_col)) * X(getattr(i, x_col)) for i in spec.itertuples()
        if (getattr(i, x_col) not in ignore_x)
        and not pd.isna(getattr(i, p_col)))
Пример #4
0
def test_data_c():

    d = DataRef_C("hsh")
    assert "hsh" == d
    assert d == "hsh"
    assert not keyword.iskeyword(d)
    assert hash(d) == hash("hsh")
    assert repr(d) == "X.hsh"

    assert d == X.hsh
    assert d == X("hsh")
    assert d == X['hsh']

    p = ParameterRef_C("hsh")
    assert not p == d
    assert p != d
Пример #5
0
def test_overspec():

    m0 = larch.Model.Example(1)
    m0.utility_ca = m0.utility_ca + P.failpar * X('1')
    m0.utility_co[1] = P.ASC_DA
    m0.lock_value('tottime', -0.1)
    m0.utility_co[2] = P.ASC_SR2 + P('hhinc#23') * X.hhinc
    m0.utility_co[3] = P.ASC_SR3P + P('hhinc#23') * X.hhinc
    m0.remove_unused_parameters()
    m0.load_data()
    # constraint tests are unstable across platforms
    # r0 = m0.maximize_loglike(
    # 	quiet=True,
    # )
    m0.set_values({
        'ASC_BIKE': -0.8550063261138748,
        'ASC_DA': 0.9780172816142935,
        'ASC_SR2': -1.0303087193826583,
        'ASC_SR3P': -2.394702207497934,
        'ASC_TRAN': 1.2134607482035888,
        'ASC_WALK': 2.0885392231767055,
        'failpar': -1.2138930556454395e-14,
        'hhinc#23': -0.001647452425832848,
        'hhinc#4': -0.005545798283823439,
        'hhinc#5': -0.012530050562373019,
        'hhinc#6': -0.010792561322141715,
        'totcost': -0.005093524162084949,
        'tottime': -0.1,
    })
    m0.calculate_parameter_covariance()
    possover = m0.possible_overspecification
    assert possover.data.shape == (7, 2)
    assert all(possover.data.index.sort_values() == [
        'ASC_BIKE',
        'ASC_DA',
        'ASC_SR2',
        'ASC_SR3P',
        'ASC_TRAN',
        'ASC_WALK',
        'failpar',
    ])
Пример #6
0
def test_overspec():

    m0 = larch.Model.Example(1)
    m0.utility_ca = m0.utility_ca + P.failpar * X('1')
    m0.utility_co[1] = P.ASC_DA
    m0.lock_value('tottime', -0.1)
    m0.utility_co[2] = P.ASC_SR2 + P('hhinc#23') * X.hhinc
    m0.utility_co[3] = P.ASC_SR3P + P('hhinc#23') * X.hhinc
    m0.remove_unused_parameters()
    m0.load_data()
    r0 = m0.maximize_loglike(quiet=True, )
    m0.calculate_parameter_covariance()
    possover = m0.possible_overspecification
    assert possover.data.shape == (7, 2)
    assert all(possover.data.index.sort_values() == [
        'ASC_BIKE',
        'ASC_DA',
        'ASC_SR2',
        'ASC_SR3P',
        'ASC_TRAN',
        'ASC_WALK',
        'failpar',
    ])
Пример #7
0
def test_ref_gen():

    assert X["Asd"] == X("Asd") == X.Asd
    assert P["Asd"] == P("Asd") == P.Asd
    assert X.Asd != P.Asd
Пример #8
0
def test_data_c_math():

    assert X.Aaa + X.Bbb == X("Aaa+Bbb")
    assert X.Aaa - X.Bbb == X("Aaa-Bbb")
    assert X.Aaa * X.Bbb == X("Aaa*Bbb")
    assert X.Aaa / X.Bbb == X("Aaa/Bbb")
    assert X.Aaa & X.Bbb == X("Aaa&Bbb")
    assert X.Aaa | X.Bbb == X("Aaa|Bbb")
    assert X.Aaa ^ X.Bbb == X("Aaa^Bbb")
    assert X.Aaa**X.Bbb == X("Aaa**Bbb")
    assert X.Zzz / X.Aaa + X.Vvv * X.Bbb == X('(Zzz/Aaa)+(Vvv*Bbb)')
    assert +X.Aaa == X("Aaa")
    assert -X.Aaa == X("-Aaa")

    assert X.Aaa + 2 == X("Aaa+2")
    assert X.Aaa - 2 == X("Aaa-2")
    assert X.Aaa * 2 == X("Aaa*2")
    assert X.Aaa / 2 == X("Aaa/2")
    assert X.Aaa & 2 == X("Aaa&2")
    assert X.Aaa | 2 == X("Aaa|2")
    assert X.Aaa ^ 2 == X("Aaa^2")
    assert X.Aaa**2 == X("Aaa**2")

    assert 2 + X.Aaa == X("2+Aaa")
    assert 2 - X.Aaa == X("2-Aaa")
    assert 2 * X.Aaa == X("2*Aaa")
    assert 2 / X.Aaa == X("2/Aaa")
    assert 2 & X.Aaa == X("2&Aaa")
    assert 2 | X.Aaa == X("2|Aaa")
    assert 2 ^ X.Aaa == X("2^Aaa")
    assert 2**X.Aaa == X("2**Aaa")

    assert X.Aaa + 0 == X.Aaa
    assert 0 + X.Aaa == X.Aaa

    assert X.Aaa * 1 == X.Aaa
    assert 1 * X.Aaa == X.Aaa

    with pytest.raises(TypeError):
        _ = X.Aaa + "Plain String"

    with pytest.raises(TypeError):
        _ = X.Aaa - "Plain String"

    with pytest.raises(TypeError):
        _ = X.Aaa * "Plain String"

    with pytest.raises(TypeError):
        _ = X.Aaa / "Plain String"
Пример #9
0
def test_piecewise_linear():
    from larch.util.data_expansion import piecewise_linear

    func = piecewise_linear(X.DataName, P.ParamName, [3, 5, 7])
    assert func[0] == P('ParamName ① up to 3') * X('piece(DataName,None,3)')
    assert func[1] == P('ParamName ② 3 to 5') * X('piece(DataName,3,5)')
    assert func[2] == P('ParamName ③ 5 to 7') * X('piece(DataName,5,7)')
    assert func[3] == P('ParamName ④ over 7') * X('piece(DataName,7,None)')
    assert len(func) == 4

    func = piecewise_linear(X.DataName, breaks=[3, 5, 7])
    assert func[0] == P('DataName ① up to 3') * X('piece(DataName,None,3)')
    assert func[1] == P('DataName ② 3 to 5') * X('piece(DataName,3,5)')
    assert func[2] == P('DataName ③ 5 to 7') * X('piece(DataName,5,7)')
    assert func[3] == P('DataName ④ over 7') * X('piece(DataName,7,None)')
    assert len(func) == 4

    func = piecewise_linear('GenName', breaks=[3, 5, 7])
    assert func[0] == P('GenName ① up to 3') * X('piece(GenName,None,3)')
    assert func[1] == P('GenName ② 3 to 5') * X('piece(GenName,3,5)')
    assert func[2] == P('GenName ③ 5 to 7') * X('piece(GenName,5,7)')
    assert func[3] == P('GenName ④ over 7') * X('piece(GenName,7,None)')
    assert len(func) == 4

    with pytest.raises(ValueError):
        func = piecewise_linear('GenName', [3, 5, 7])
Пример #10
0
def location_choice_model(
    name="workplace_location",
    edb_directory="output/estimation_data_bundle/{name}/",
    coefficients_file="{name}_coefficients.csv",
    spec_file="{name}_SPEC.csv",
    size_spec_file="{name}_size_terms.csv",
    alt_values_file="{name}_alternatives_combined.csv",
    chooser_file="{name}_choosers_combined.csv",
    settings_file="{name}_model_settings.yaml",
    landuse_file="{name}_landuse.csv",
    return_data=False,
):
    model_selector = name.replace("_location", "")
    model_selector = model_selector.replace("_destination", "")
    model_selector = model_selector.replace("_subtour", "")
    model_selector = model_selector.replace("_tour", "")
    if model_selector == 'joint':
        model_selector = 'non_mandatory'
    edb_directory = edb_directory.format(name=name)

    def _read_csv(filename, **kwargs):
        filename = filename.format(name=name)
        return pd.read_csv(os.path.join(edb_directory, filename), **kwargs)

    coefficients = _read_csv(
        coefficients_file,
        index_col="coefficient_name",
    )
    spec = _read_csv(spec_file, comment="#")
    alt_values = _read_csv(alt_values_file)
    chooser_data = _read_csv(chooser_file)
    landuse = _read_csv(landuse_file, index_col="zone_id")
    master_size_spec = _read_csv(size_spec_file)

    # remove temp rows from spec, ASim uses them to calculate the other values written
    # to the EDB, but they are not actually part of the utility function themselves.
    spec = spec.loc[~spec.Expression.isna()]
    spec = spec.loc[~spec.Expression.str.startswith("_")].copy()

    settings_file = settings_file.format(name=name)
    with open(os.path.join(edb_directory, settings_file), "r") as yf:
        settings = yaml.load(
            yf,
            Loader=yaml.SafeLoader,
        )

    include_settings = settings.get("include_settings")
    if include_settings:
        include_settings = os.path.join(edb_directory, include_settings)
    if include_settings and os.path.exists(include_settings):
        with open(include_settings, "r") as yf:
            more_settings = yaml.load(
                yf,
                Loader=yaml.SafeLoader,
            )
        settings.update(more_settings)

    CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME")
    SEGMENT_IDS = settings.get("SEGMENT_IDS")
    if SEGMENT_IDS is None:
        SEGMENTS = settings.get("SEGMENTS")
        if SEGMENTS is not None:
            SEGMENT_IDS = {i: i for i in SEGMENTS}

    SIZE_TERM_SELECTOR = settings.get('SIZE_TERM_SELECTOR', model_selector)

    # filter size spec for this location choice only
    size_spec = (master_size_spec.query(
        f"model_selector == '{SIZE_TERM_SELECTOR}'").drop(
            columns="model_selector").set_index("segment"))
    size_spec = size_spec.loc[:, size_spec.max() > 0]

    size_coef = size_coefficients_from_spec(size_spec)

    indexes_to_drop = [
        "util_size_variable",  # pre-computed size (will be re-estimated)
        "util_size_variable_atwork",  # pre-computed size (will be re-estimated)
        "util_utility_adjustment",  # shadow pricing (ignored in estimation)
        "@df['size_term'].apply(np.log1p)",  # pre-computed size (will be re-estimated)
    ]
    if 'Label' in spec.columns:
        indexes_to_drop = [
            i for i in indexes_to_drop if i in spec.Label.to_numpy()
        ]
        label_column_name = 'Label'
    elif 'Expression' in spec.columns:
        indexes_to_drop = [
            i for i in indexes_to_drop if i in spec.Expression.to_numpy()
        ]
        label_column_name = 'Expression'
    else:
        raise ValueError("cannot find Label or Expression in spec file")

    expression_labels = None
    if label_column_name == 'Expression':
        expression_labels = {
            expr: f"variable_label{n:04d}"
            for n, expr in enumerate(spec.Expression.to_numpy())
        }

    # Remove shadow pricing and pre-existing size expression for re-estimation
    spec = (spec.set_index(label_column_name).drop(
        index=indexes_to_drop).reset_index())

    if label_column_name == 'Expression':
        spec.insert(0, "Label", spec['Expression'].map(expression_labels))
        alt_values['variable'] = alt_values['variable'].map(expression_labels)
        label_column_name = "Label"

    if name == 'trip_destination':
        CHOOSER_SEGMENT_COLUMN_NAME = 'primary_purpose'
        primary_purposes = spec.columns[3:]
        SEGMENT_IDS = {pp: pp for pp in primary_purposes}

    chooser_index_name = chooser_data.columns[0]
    x_co = chooser_data.set_index(chooser_index_name)
    x_ca = cv_to_ca(
        alt_values.set_index([chooser_index_name, alt_values.columns[1]]))

    if CHOOSER_SEGMENT_COLUMN_NAME is not None:
        # label segments with names
        SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()}
        x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply(
            lambda x: SEGMENT_IDS_REVERSE[x])
    else:
        x_co["_segment_label"] = size_spec.index[0]

    # compute total size values by segment
    for segment in size_spec.index:
        total_size_segment = pd.Series(0, index=landuse.index)
        x_co["total_size_" + segment] = 0
        for land_use_field in size_spec.loc[segment].index:
            total_size_segment += (landuse[land_use_field] *
                                   size_spec.loc[segment, land_use_field])
        x_co["total_size_" + segment] = total_size_segment.loc[
            x_co["override_choice"]].to_numpy()

    # for each chooser, collate the appropriate total size value
    x_co["total_size_segment"] = 0
    for segment in size_spec.index:
        labels = "total_size_" + segment
        rows = x_co["_segment_label"] == segment
        x_co.loc[rows, "total_size_segment"] = x_co[labels][rows]

    # Remove choosers with invalid observed choice (appropriate total size value = 0)
    valid_observed_zone = x_co["total_size_segment"] > 0
    x_co = x_co[valid_observed_zone]
    x_ca = x_ca[x_ca.index.get_level_values(chooser_index_name).isin(
        x_co.index)]

    # Merge land use characteristics into CA data
    try:
        x_ca_1 = pd.merge(x_ca, landuse, on="zone_id", how="left")
    except KeyError:
        # Missing the zone_id variable?
        # Use the alternative id's instead, which assumes no sampling of alternatives
        x_ca_1 = pd.merge(x_ca,
                          landuse,
                          left_on=x_ca.index.get_level_values(1),
                          right_index=True,
                          how="left")
    x_ca_1.index = x_ca.index

    # Availability of choice zones
    if "util_no_attractions" in x_ca_1:
        av = x_ca_1["util_no_attractions"].apply(
            lambda x: False if x == 1 else True).astype(np.int8)
    elif "@df['size_term']==0" in x_ca_1:
        av = x_ca_1["@df['size_term']==0"].apply(
            lambda x: False if x == 1 else True).astype(np.int8)
    else:
        av = 1

    d = DataFrames(co=x_co, ca=x_ca_1, av=av)

    m = Model(dataservice=d)
    if len(spec.columns) == 4 and all(
            spec.columns ==
        ['Label', 'Description', 'Expression', 'coefficient']):
        m.utility_ca = linear_utility_from_spec(
            spec,
            x_col="Label",
            p_col=spec.columns[-1],
            ignore_x=("local_dist", ),
        )
    elif len(spec.columns) == 4 \
            and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \
            and len(SEGMENT_IDS) == 1 \
            and spec.columns[3] == list(SEGMENT_IDS.values())[0]:
        m.utility_ca = linear_utility_from_spec(
            spec,
            x_col="Label",
            p_col=spec.columns[-1],
            ignore_x=("local_dist", ),
        )
    else:
        m.utility_ca = linear_utility_from_spec(
            spec,
            x_col=label_column_name,
            p_col=SEGMENT_IDS,
            ignore_x=("local_dist", ),
            segment_id=CHOOSER_SEGMENT_COLUMN_NAME,
        )

    if CHOOSER_SEGMENT_COLUMN_NAME is None:
        assert len(size_spec) == 1
        m.quantity_ca = sum(
            P(f"{i}_{q}") * X(q) for i in size_spec.index
            for q in size_spec.columns if size_spec.loc[i, q] != 0)
    else:
        m.quantity_ca = sum(
            P(f"{i}_{q}") * X(q) *
            X(f"{CHOOSER_SEGMENT_COLUMN_NAME}=={str_repr(SEGMENT_IDS[i])}")
            for i in size_spec.index for q in size_spec.columns
            if size_spec.loc[i, q] != 0)

    apply_coefficients(coefficients, m)
    apply_coefficients(size_coef, m, minimum=-6, maximum=6)

    m.choice_co_code = "override_choice"

    if return_data:
        return (
            m,
            Dict(
                edb_directory=Path(edb_directory),
                alt_values=alt_values,
                chooser_data=chooser_data,
                coefficients=coefficients,
                landuse=landuse,
                spec=spec,
                size_spec=size_spec,
                master_size_spec=master_size_spec,
                model_selector=model_selector,
                settings=settings,
            ),
        )

    return m
Пример #11
0
def cdap_interaction_utility(model, n_persons, alts, interaction_coef,
                             coefficients):

    person_numbers = list(range(1, n_persons + 1))

    matcher = re.compile("coef_[HMN]_.*")
    interact_coef_map = {}
    for c in coefficients.index:
        if matcher.search(c):
            c_split = c.split("_")
            for j in c_split[2:]:
                interact_coef_map[(c_split[1], j)] = c
                if all((i == 'x' for i in j)):  # wildcards also map to empty
                    interact_coef_map[(c_split[1], '')] = c

    for (cardinality, activity), coefs in interaction_coef.groupby(
        ["cardinality", "activity"]):
        _logger.info(
            f"{n_persons} person households, interaction cardinality {cardinality}, activity {activity}"
        )
        if cardinality > n_persons:
            continue
        elif cardinality == n_persons:
            this_aname = activity * n_persons
            this_altnum = alts[this_aname]
            for rowindex, row in coefs.iterrows():
                expression = "&".join(
                    f"(p{p}_ptype == {t})"
                    for (p, t) in zip(person_numbers, row.interaction_ptypes)
                    if t != "*")
                if expression:
                    if (activity, row.interaction_ptypes) in interact_coef_map:
                        linear_component = (
                            X(expression) *
                            P(interact_coef_map[(activity,
                                                 row.interaction_ptypes)]))
                    else:
                        linear_component = X(expression) * P(row.coefficient)
                else:
                    if (activity, row.interaction_ptypes) in interact_coef_map:
                        linear_component = P(
                            interact_coef_map[(activity,
                                               row.interaction_ptypes)])
                    else:
                        linear_component = P(row.coefficient)
                _logger.debug(
                    f"utility_co[{this_altnum} {this_aname}] += {linear_component}"
                )
                model.utility_co[this_altnum] += linear_component
        elif cardinality < n_persons:
            for combo in itertools.combinations(person_numbers, cardinality):
                pattern = interact_pattern(n_persons, combo, activity)
                for aname, anum in alts.items():
                    if pattern.match(aname):
                        for rowindex, row in coefs.iterrows():
                            expression = "&".join(
                                f"(p{p}_ptype == {t})"
                                for (p,
                                     t) in zip(combo, row.interaction_ptypes)
                                if t != "*")
                            # interaction terms without ptypes (i.e. with wildcards)
                            # only apply when the household size matches the cardinality
                            if expression != "":
                                if (activity, row.interaction_ptypes
                                    ) in interact_coef_map:
                                    linear_component = (
                                        X(expression) * P(interact_coef_map[
                                            (activity,
                                             row.interaction_ptypes)]))
                                else:
                                    linear_component = X(expression) * P(
                                        row.coefficient)
                                _logger.debug(
                                    f"utility_co[{anum} {aname}] += {linear_component}"
                                )
                                model.utility_co[anum] += linear_component
Пример #12
0
def linear_utility_from_spec(spec, x_col, p_col, ignore_x=(), segment_id=None):
    """
    Create a linear function from a spec DataFrame.

    Parameters
    ----------
    spec : pandas.DataFrame
        A spec for an ActivitySim model.
    x_col: str
        The name of the columns in spec representing the data.
    p_col: str or dict
        The name of the columns in spec representing the parameters.
        Give as a string for a single column, or as a dict to
        have segments on multiple columns. If given as a dict,
        the keys give the names of the columns to use, and the
        values give the identifiers that will need to match the
        loaded `segment_id` value.
    ignore_x : Collection, optional
        Labels in the spec file to ignore.  Typically this
        includes variables that are pre-processed by ActivitySim
        and therefore don't need to be made available in Larch.
    segment_id : str, optional
        The CHOOSER_SEGMENT_COLUMN_NAME identified for ActivitySim.
        This value is ignored if `p_col` is a string, and required
        if `p_col` is a dict.

    Returns
    -------
    LinearFunction_C
    """
    if isinstance(p_col, dict):
        if segment_id is None:
            raise ValueError("segment_id must be given if p_col is a dict")
        partial_utility = {}
        for seg_p_col, segval in p_col.items():
            partial_utility[seg_p_col] = linear_utility_from_spec(
                spec,
                x_col,
                seg_p_col,
                ignore_x,
            ) * X(f"{segment_id}=={str_repr(segval)}")
        return sum(partial_utility.values())
    parts = []
    for i in spec.index:
        _x = spec.loc[i, x_col]
        try:
            _x = _x.strip()
        except AttributeError:
            if np.isnan(_x):
                _x = None
            else:
                raise
        _p = spec.loc[i, p_col]

        if _x is not None and (_x not in ignore_x) and not pd.isna(_p):
            # process coefficients when they are multiples instead of raw names
            if isinstance(_p, str) and "*" in _p:
                _p_star = [i.strip() for i in _p.split("*")]
                if len(_p_star) == 2:
                    try:
                        _p0 = float(_p_star[0])
                    except ValueError:
                        # first term not a number, maybe the second is
                        try:
                            _p1 = float(_p_star[1])
                        except ValueError:
                            # second term also not a number, it's just a star in a name
                            _P = P(_p)
                        else:
                            # second term is a number, use the multiplier
                            _P = P(_p_star[0]) * _p1
                    else:
                        # first term is a number, ensure the second is not
                        try:
                            _p1 = float(_p_star[1])
                        except ValueError:
                            # second term is not a number, use the multiplier
                            _P = P(_p_star[1]) * _p0
                        else:
                            # both terms are numbers, not allowed
                            raise ValueError(
                                f"parameter is just {_p}, I need a name")
                else:
                    # not handling triple-multiple terms (or worse)
                    _P = P(_p)
            else:
                _P = P(_p)
            parts.append(_P * X(_x))
    return sum(parts)
Пример #13
0
def test_linear_func():

    assert LinearComponent_C(param="pname", data="dname") == P.pname * X.dname

    assert type(list(P.singleton + P.pname * X.dname)[0]) is LinearComponent_C
    assert type(list(P.singleton + P.pname * X.dname)[1]) is LinearComponent_C

    assert type(list(+P.pname * X.dname + P.singleton)[0]) is LinearComponent_C
    assert type(list(+P.pname * X.dname + P.singleton)[1]) is LinearComponent_C

    assert list(-(P.pname * X.dname + P.singleton)) == [
        LinearComponent_C('pname', 'dname', -1.0),
        LinearComponent_C('singleton', '1', -1.0),
    ]
    assert list(-(P.pname * X.dname - P.singleton)) == [
        LinearComponent_C('pname', 'dname', -1.0),
        LinearComponent_C('singleton', '1', 1.0),
    ]

    assert list((P.pname * X.dname - P.singleton) * X.Sss) == [
        LinearComponent_C(param='pname', data='dname*Sss', scale=1.0),
        LinearComponent_C(param='singleton', data='Sss', scale=-1.0),
    ]

    assert list(sum(PX(i) for i in ['Aaa', 'Bbb'])) == [
        LinearComponent_C(param='Aaa', data='Aaa', scale=1.0),
        LinearComponent_C(param='Bbb', data='Bbb', scale=1.0),
    ]

    u = P.Aaa * X.Aaa + P.Bbb * X.Bbb
    u += P.Ccc * X.Ccc

    assert u == P.Aaa * X.Aaa + P.Bbb * X.Bbb + P.Ccc * X.Ccc

    assert P.ppp * X.xxx * 1.234 == P.ppp * 1.234 * X.xxx
    assert P.ppp * X.xxx * 1.234 == X.xxx * P.ppp * 1.234
    assert P.ppp * X.xxx * 1.234 == X.xxx * 1.234 * P.ppp
    assert P.ppp * X.xxx * 1.234 == 1.234 * X.xxx * P.ppp
    assert P.ppp * X.xxx * 1.234 == 1.234 * P.ppp * X.xxx

    assert (P.ppp * X.xxx) * 1.234 == P.ppp * (1.234 * X.xxx)
    assert (P.ppp * X.xxx) * 1.234 == X.xxx * (P.ppp * 1.234)
    assert (P.ppp * X.xxx) * 1.234 == X.xxx * (1.234 * P.ppp)
    assert (P.ppp * X.xxx) * 1.234 == 1.234 * (X.xxx * P.ppp)
    assert (P.ppp * X.xxx) * 1.234 == 1.234 * (P.ppp * X.xxx)

    assert (P.ppp * X.xxx) * 1.234 == (P.ppp * 1.234) * X.xxx
    assert (P.ppp * X.xxx) * 1.234 == (X.xxx * P.ppp) * 1.234
    assert (P.ppp * X.xxx) * 1.234 == (X.xxx * 1.234) * P.ppp
    assert (P.ppp * X.xxx) * 1.234 == (1.234 * X.xxx) * P.ppp
    assert (P.ppp * X.xxx) * 1.234 == (1.234 * P.ppp) * X.xxx

    assert (P.ppp * X.xxx * 1.234) == P.ppp * (1.234 * X.xxx)
    assert (P.ppp * X.xxx * 1.234) == X.xxx * (P.ppp * 1.234)
    assert (P.ppp * X.xxx * 1.234) == X.xxx * (1.234 * P.ppp)
    assert (P.ppp * X.xxx * 1.234) == 1.234 * (X.xxx * P.ppp)
    assert (P.ppp * X.xxx * 1.234) == 1.234 * (P.ppp * X.xxx)

    assert (P.ppp * X.xxx * 1.234) == (P.ppp * 1.234) * X.xxx
    assert (P.ppp * X.xxx * 1.234) == (X.xxx * P.ppp) * 1.234
    assert (P.ppp * X.xxx * 1.234) == (X.xxx * 1.234) * P.ppp
    assert (P.ppp * X.xxx * 1.234) == (1.234 * X.xxx) * P.ppp
    assert (P.ppp * X.xxx * 1.234) == (1.234 * P.ppp) * X.xxx

    assert P.ppp * (X.xxx * 1.234) == P.ppp * (1.234 * X.xxx)
    assert P.ppp * (X.xxx * 1.234) == X.xxx * (P.ppp * 1.234)
    assert P.ppp * (X.xxx * 1.234) == X.xxx * (1.234 * P.ppp)
    assert P.ppp * (X.xxx * 1.234) == 1.234 * (X.xxx * P.ppp)
    assert P.ppp * (X.xxx * 1.234) == 1.234 * (P.ppp * X.xxx)

    assert P.ppp * (X.xxx * 1.234) == (P.ppp * 1.234) * X.xxx
    assert P.ppp * (X.xxx * 1.234) == (X.xxx * P.ppp) * 1.234
    assert P.ppp * (X.xxx * 1.234) == (X.xxx * 1.234) * P.ppp
    assert P.ppp * (X.xxx * 1.234) == (1.234 * X.xxx) * P.ppp
    assert P.ppp * (X.xxx * 1.234) == (1.234 * P.ppp) * X.xxx

    assert (P.ppp * X.xxx) * X.xxx == P.ppp * X('xxx*xxx')
    assert (P.ppp * X.xxx) * (P("_") * X.xxx) == P.ppp * X('xxx*xxx')
    assert (P("_") * X.xxx) * (P.ppp * X.xxx) == P.ppp * X('xxx*xxx')

    # Test squaring a boolean
    assert (P.ppp *
            X('boolean(xxx)')) * X('boolean(xxx)') == P.ppp * X('boolean(xxx)')
    assert (P.ppp * X('boolean(xxx)')) * (
        P("_") * X('boolean(xxx)')) == P.ppp * X('boolean(xxx)')

    assert ((P.p1 * X.x1 + P.p2 * X.x2) *
            (P('_') * 1.1 * X.x1 + P('_') * 2 * X.x2)) == (
                P.p1 * 1.1 * X('x1*x1') + P.p1 * 2.0 * X('x1*x2') +
                P.p2 * 1.1 * X('x2*x1') + P.p2 * 2.0 * X('x2*x2'))
Пример #14
0
def test_pmath_in_utility():
    d = larch.examples.MTC()
    m0 = larch.Model(dataservice=d)

    m0.utility_co[2] = P("ASC_SR2") * 10 + P("hhinc#2") / 10 * X("hhinc")
    m0.utility_co[3] = P("ASC_SR3P") * 10 + P("hhinc#3") / 10 * X("hhinc")
    m0.utility_co[4] = P("ASC_TRAN") * 10 + P("hhinc#4") / 10 * X("hhinc")
    m0.utility_co[5] = P("ASC_BIKE") * 10 + P("hhinc#5") / 10 * X("hhinc")
    m0.utility_co[6] = P("ASC_WALK") * 10 + P("hhinc#6") / 10 * X("hhinc")

    m0.utility_ca = (
        +P("nonmotorized_time") / 10. * X("(altnum>4) * tottime") +
        P("motorized_ovtt") * 10 * X("(altnum <= 4) * ovtt") +
        P("motorized_ivtt") * X("(altnum <= 4) * ivtt") + PX("totcost"))
    m0.availability_var = '_avail_'
    m0.choice_ca_var = '_choice_'

    m1 = larch.Model(dataservice=d)

    m1.utility_co[2] = P("ASC_SR2") * X('10') + P("hhinc#2") * X("hhinc/10")
    m1.utility_co[3] = P("ASC_SR3P") * X('10') + P("hhinc#3") * X("hhinc/10")
    m1.utility_co[4] = P("ASC_TRAN") * X('10') + P("hhinc#4") * X("hhinc/10")
    m1.utility_co[5] = P("ASC_BIKE") * X('10') + P("hhinc#5") * X("hhinc/10")
    m1.utility_co[6] = P("ASC_WALK") * X('10') + P("hhinc#6") * X("hhinc/10")

    m1.utility_ca = (+P("nonmotorized_time") * X("(altnum>4) * tottime / 10") +
                     P("motorized_ovtt") * X("(altnum <= 4) * ovtt * 10") +
                     P("motorized_ivtt") * X("(altnum <= 4) * ivtt") +
                     PX("totcost"))
    m1.availability_var = '_avail_'
    m1.choice_ca_var = '_choice_'

    m0.load_data()
    m1.load_data()

    r0 = m0.maximize_loglike(quiet=True)
    r1 = m1.maximize_loglike(quiet=True)
    assert r0.loglike == pytest.approx(-3587.6430040944942)
    assert r1.loglike == pytest.approx(-3587.6430040944942)

    m0.calculate_parameter_covariance()
    m1.calculate_parameter_covariance()
    t = {
        'ASC_BIKE': -5.318650574990901,
        'ASC_SR2': -22.291563439182628,
        'ASC_SR3P': -22.174552606750527,
        'ASC_TRAN': -3.293923857045225,
        'ASC_WALK': 1.6172450189610719,
        'hhinc#2': -1.4000897138949544,
        'hhinc#3': 0.12900984170888324,
        'hhinc#4': -3.0601742475362923,
        'hhinc#5': -2.333410249527477,
        'hhinc#6': -3.048442130390144,
        'motorized_ivtt': -0.4116740527068954,
        'motorized_ovtt': -12.958446214791113,
        'nonmotorized_time': -11.789244777056298,
        'totcost': -20.19350165272386,
    }
    assert dict(m0.pf['t_stat']) == pytest.approx(t, rel=1e-5)
    assert dict(m1.pf['t_stat']) == pytest.approx(t, rel=1e-5)

    assert (m0.get_value(P.motorized_ivtt) * 60) / (
        m0.get_value(P.totcost) * 100) == pytest.approx(0.3191492801963062)
    assert m0.get_value((P.motorized_ivtt * 60) /
                        (P.totcost * 100)) == pytest.approx(0.3191492801963062)
    assert (m1.get_value(P.motorized_ivtt) * 60) / (
        m1.get_value(P.totcost) * 100) == pytest.approx(0.3191492801963062)
    assert m1.get_value((P.motorized_ivtt * 60) /
                        (P.totcost * 100)) == pytest.approx(0.3191492801963062)
Пример #15
0
def test_simple_model_group():

    df = pd.read_csv(example_file("MTCwork.csv.gz"))
    df.set_index(['casenum', 'altnum'], inplace=True)
    d = larch.DataFrames(df, ch='chose', crack=True)
    d.set_alternative_names({
        1: 'DA',
        2: 'SR2',
        3: 'SR3+',
        4: 'Transit',
        5: 'Bike',
        6: 'Walk',
    })

    m0 = larch.Model(dataservice=d)
    m0.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m0.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m0.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m0.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m0.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m0.utility_ca = (
        (P("tottime_m") * X("tottime") + P("totcost_m") * X("totcost")) *
        X("femdum == 0") +
        (P("tottime_f") * X("tottime") + P("totcost_f") * X("totcost")) *
        X("femdum == 1"))

    m1 = larch.Model(dataservice=d.selector_co("femdum == 0"))
    m1.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m1.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m1.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m1.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m1.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m1.utility_ca = P("tottime_m") * X("tottime") + P("totcost_m") * X(
        "totcost")

    m2 = larch.Model(dataservice=d.selector_co("femdum == 1"))
    m2.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m2.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m2.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m2.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m2.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m2.utility_ca = P("tottime_f") * X("tottime") + P("totcost_f") * X(
        "totcost")

    m0.load_data()
    assert m0.loglike2().ll == approx(-7309.600971749625)

    m1.load_data()
    assert m1.loglike2().ll == approx(-4068.8091617468717)

    m2.load_data()
    assert m2.loglike2().ll == approx(-3240.7918100027578)

    from larch.model.model_group import ModelGroup

    mg = ModelGroup([m1, m2])

    assert mg.loglike2().ll == approx(-7309.600971749625)
    assert mg.loglike() == approx(-7309.600971749625)

    pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(),
                                   m0.loglike2().dll.sort_index())

    m0.simple_step_bhhh()
    mg.set_values(**m0.pf.value)

    pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(),
                                   m0.loglike2().dll.sort_index())

    assert mg.loglike2().ll == approx(-4926.4822036792275)
    assert mg.check_d_loglike().data.similarity.min() > 4

    result = mg.maximize_loglike(method='slsqp')
    assert result.loglike == approx(-3620.697668335103)

    mg2 = ModelGroup([])
    mg2.append(m1)
    mg2.append(m2)
    assert mg2.loglike() == approx(-3620.697667552756)

    mg3 = ModelGroup([])
    mg3.append(m1)
    mg3.append(m2)
    mg3.doctor()
    assert mg3.loglike() == approx(-3620.697667552756)
Пример #16
0
def test_ch_av_summary_output():

    skims = larch.OMX(larch.exampville.files.skims, mode='r')
    hh = pandas.read_csv(larch.exampville.files.hh)
    pp = pandas.read_csv(larch.exampville.files.person)
    tour = pandas.read_csv(larch.exampville.files.tour)

    pp_col = [
        'PERSONID', 'HHID', 'HHIDX', 'AGE', 'WORKS', 'N_WORK_TOURS',
        'N_OTHER_TOURS', 'N_TOURS', 'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO',
        'N_TRIPS_NHB'
    ]

    raw = tour.merge(hh, on='HHID').merge(pp[pp_col], on=('HHID', 'PERSONID'))
    raw["HOMETAZi"] = raw["HOMETAZ"] - 1
    raw["DTAZi"] = raw["DTAZ"] - 1

    raw = raw[raw.TOURPURP == 1]

    f_tour = raw.join(skims.get_rc_dataframe(
        raw.HOMETAZi,
        raw.DTAZi,
    ))

    DA = 1
    SR = 2
    Walk = 3
    Bike = 4
    Transit = 5

    dfs = larch.DataFrames(
        co=f_tour,
        alt_codes=[DA, SR, Walk, Bike, Transit],
        alt_names=['DA', 'SR', 'Walk', 'Bike', 'Transit'],
    )

    m = larch.Model(dataservice=dfs)
    m.title = "Exampville Work Tour Mode Choice v1"

    m.utility_co[DA] = (
        +P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST  # dollars per mile
    )

    m.utility_co[SR] = (
        +P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost *
        (X.AUTO_COST * 0.5)  # dollars per mile, half share
        + P("HighInc:SR") * X("INCOME>75000"))

    m.utility_co[Walk] = (+P.ASC_Walk + P.NonMotorTime * X.WALK_TIME +
                          P("HighInc:Walk") * X("INCOME>75000"))

    m.utility_co[Bike] = (+P.ASC_Bike + P.NonMotorTime * X.BIKE_TIME +
                          P("HighInc:Bike") * X("INCOME>75000"))

    m.utility_co[Transit] = (+P.ASC_Transit + P.InVehTime * X.TRANSIT_IVTT +
                             P.OutVehTime * X.TRANSIT_OVTT +
                             P.Cost * X.TRANSIT_FARE +
                             P("HighInc:Transit") * X("INCOME>75000"))

    # No choice or avail data set
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q.values, [
        ['DA', None, None],
        ['SR', None, None],
        ['Walk', None, None],
        ['Bike', None, None],
        ['Transit', None, None],
        ['', 0, ''],
    ])

    # Reasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 16',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([7564.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))

    # Unreasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 26',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(
        q.columns, ['name', 'chosen', 'available', 'chosen but not available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([6376.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))
    assert numpy.array_equal(q['chosen but not available'].values,
                             [942.0, 0.0, 0.0, 0.0, 0.0, 942.0])
Пример #17
0
dfs = larch.DataFrames(
    co=df,
    alt_codes=[DA,SR,Walk,Bike,Transit],
    alt_names=['DA','SR','Walk','Bike','Transit'],
    ch_name='TOURMODE',
)

# Model Definition
m = larch.Model(dataservice=dfs)
m.title = "Exampville Work Tour Mode Choice v1"

from larch import P, X
P('NamedParameter')
X.NamedDataValue
P('Named Parameter')
X("log(INCOME)")
P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST

m.utility_co[DA] = (
        + P.InVehTime * X.AUTO_TIME
        + P.Cost * X.AUTO_COST # dollars per mile
)


m.utility_co[SR] = (
        + P.ASC_SR
        + P.InVehTime * X.AUTO_TIME
        + P.Cost * (X.AUTO_COST * 0.5) # dollars per mile, half share
        + P("LogIncome:SR") * X("log(INCOME)")
)