def test_latent_class_full_data(swissmetro_raw_df): dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3]) m1 = larch.Model(dataservice=dfs) m1.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m1.choice_co_code = 'CHOICE' m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST") m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST") m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST") m2 = larch.Model(dataservice=dfs) m2.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m2.choice_co_code = 'CHOICE' m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X( "TRAIN_CO*(GA==0)") * P("B_COST") m2.utility_co[ 2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST") m2.utility_co[3] = P( "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST") km = larch.Model(dataservice=dfs, alts=[1, 2]) km.utility_co[2] = P.W_OTHER from larch.model.latentclass import LatentClassModel m = LatentClassModel(km, {1: m1, 2: m2}) m.load_data() m.set_value(P.ASC_CAR, 0.125 / 2) m.set_value(P.ASC_TRAIN, -0.398 / 2) m.set_value(P.B_COST, -.0126 / 2) m.set_value(P.B_TIME, -0.028 / 2) m.set_value(P.W_OTHER, 1.095 / 2) check1 = m.check_d_loglike() assert dict(check1.data.analytic) == approx({ 'ASC_CAR': -81.69736186616234, 'ASC_TRAIN': -613.131371089499, 'B_COST': -6697.31706964777, 'B_TIME': -40104.940072046316, 'W_OTHER': 245.43145056623683, })
def test_latent_class_mixed_data(swissmetro_raw_df): dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3]) m1 = larch.Model(dataservice=dfs) m1.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m1.choice_co_code = 'CHOICE' m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST") m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST") m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST") m2 = larch.Model(dataservice=dfs) m2.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m2.choice_co_code = 'CHOICE' m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X( "TRAIN_CO*(GA==0)") * P("B_COST") m2.utility_co[ 2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST") m2.utility_co[3] = P( "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST") dfs2 = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2]) km = larch.Model(dataservice=dfs2) km.utility_co[2] = P.W_OTHER from larch.model.latentclass import LatentClassModel with raises(ValueError): m = LatentClassModel(km, {1: m1, 2: m2})
def test_ch_av_summary_output(): skims = larch.OMX(larch.exampville.files.skims, mode='r') hh = pandas.read_csv(larch.exampville.files.hh) pp = pandas.read_csv(larch.exampville.files.person) tour = pandas.read_csv(larch.exampville.files.tour) pp_col = [ 'PERSONID', 'HHID', 'HHIDX', 'AGE', 'WORKS', 'N_WORK_TOURS', 'N_OTHER_TOURS', 'N_TOURS', 'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO', 'N_TRIPS_NHB' ] raw = tour.merge(hh, on='HHID').merge(pp[pp_col], on=('HHID', 'PERSONID')) raw["HOMETAZi"] = raw["HOMETAZ"] - 1 raw["DTAZi"] = raw["DTAZ"] - 1 raw = raw[raw.TOURPURP == 1] f_tour = raw.join(skims.get_rc_dataframe( raw.HOMETAZi, raw.DTAZi, )) DA = 1 SR = 2 Walk = 3 Bike = 4 Transit = 5 dfs = larch.DataFrames( co=f_tour, alt_codes=[DA, SR, Walk, Bike, Transit], alt_names=['DA', 'SR', 'Walk', 'Bike', 'Transit'], ) m = larch.Model(dataservice=dfs) m.title = "Exampville Work Tour Mode Choice v1" m.utility_co[DA] = ( +P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST # dollars per mile ) m.utility_co[SR] = ( +P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost * (X.AUTO_COST * 0.5) # dollars per mile, half share + P("HighInc:SR") * X("INCOME>75000")) m.utility_co[Walk] = (+P.ASC_Walk + P.NonMotorTime * X.WALK_TIME + P("HighInc:Walk") * X("INCOME>75000")) m.utility_co[Bike] = (+P.ASC_Bike + P.NonMotorTime * X.BIKE_TIME + P("HighInc:Bike") * X("INCOME>75000")) m.utility_co[Transit] = (+P.ASC_Transit + P.InVehTime * X.TRANSIT_IVTT + P.OutVehTime * X.TRANSIT_OVTT + P.Cost * X.TRANSIT_FARE + P("HighInc:Transit") * X("INCOME>75000")) # No choice or avail data set m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal(q.columns, ['name', 'chosen', 'available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q.values, [ ['DA', None, None], ['SR', None, None], ['Walk', None, None], ['Bike', None, None], ['Transit', None, None], ['', 0, ''], ]) # Reasonable choice and avail data set m.choice_co_code = 'TOURMODE' m.availability_co_vars = { DA: 'AGE >= 16', SR: '1', Walk: 'WALK_TIME < 60', Bike: 'BIKE_TIME < 60', Transit: 'TRANSIT_FARE>0', } m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal(q.columns, ['name', 'chosen', 'available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q['name'].values, ['DA', 'SR', 'Walk', 'Bike', 'Transit', '']) assert numpy.array_equal(q['chosen'].values, [6052., 810., 196., 72., 434., 7564.]) assert numpy.array_equal( q['available'].values, numpy.array([7564.0, 7564.0, 4179.0, 7564.0, 4199.0, ''], dtype=object)) # Unreasonable choice and avail data set m.choice_co_code = 'TOURMODE' m.availability_co_vars = { DA: 'AGE >= 26', SR: '1', Walk: 'WALK_TIME < 60', Bike: 'BIKE_TIME < 60', Transit: 'TRANSIT_FARE>0', } m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal( q.columns, ['name', 'chosen', 'available', 'chosen but not available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q['name'].values, ['DA', 'SR', 'Walk', 'Bike', 'Transit', '']) assert numpy.array_equal(q['chosen'].values, [6052., 810., 196., 72., 434., 7564.]) assert numpy.array_equal( q['available'].values, numpy.array([6376.0, 7564.0, 4179.0, 7564.0, 4199.0, ''], dtype=object)) assert numpy.array_equal(q['chosen but not available'].values, [942.0, 0.0, 0.0, 0.0, 0.0, 942.0])
def test_latent_class(swissmetro_raw_df): dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3]) m1 = larch.Model(dataservice=dfs) m1.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m1.choice_co_code = 'CHOICE' m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST") m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST") m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST") m2 = larch.Model(dataservice=dfs) m2.availability_co_vars = { 1: "TRAIN_AV_SP", 2: "SM_AV", 3: "CAR_AV_SP", } m2.choice_co_code = 'CHOICE' m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X( "TRAIN_CO*(GA==0)") * P("B_COST") m2.utility_co[ 2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST") m2.utility_co[3] = P( "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST") km = larch.Model() km.utility_co[2] = P.W_OTHER from larch.model.latentclass import LatentClassModel m = LatentClassModel(km, {1: m1, 2: m2}) m.load_data() m.set_value(P.ASC_CAR, 0.125 / 2) m.set_value(P.ASC_TRAIN, -0.398 / 2) m.set_value(P.B_COST, -.0126 / 2) m.set_value(P.B_TIME, -0.028 / 2) m.set_value(P.W_OTHER, 1.095 / 2) check1 = m.check_d_loglike() assert dict(check1.data.analytic) == approx({ 'ASC_CAR': -81.69736186616234, 'ASC_TRAIN': -613.131371089499, 'B_COST': -6697.31706964777, 'B_TIME': -40104.940072046316, 'W_OTHER': 245.43145056623683, }) assert check1.data.similarity.min() > 4 m.set_value(P.ASC_CAR, 0.125) m.set_value(P.ASC_TRAIN, -0.398) m.set_value(P.B_COST, -.0126) m.set_value(P.B_TIME, -0.028) m.set_value(P.W_OTHER, 1.095) assert m.loglike() == approx(-5208.502259337974) check2 = m.check_d_loglike() assert dict(check2.data.analytic) == approx({ 'ASC_CAR': 0.6243716033364302, 'ASC_TRAIN': 0.9297965389102578, 'B_COST': -154.03997923797007, 'B_TIME': 76.19297915128493, 'W_OTHER': -0.7936963902343083, }) assert check2.data.similarity.min( ) > 2 # similarity is a bit lower very close to the optimum
def test_simple_model_group(): df = pd.read_csv(example_file("MTCwork.csv.gz")) df.set_index(['casenum', 'altnum'], inplace=True) d = larch.DataFrames(df, ch='chose', crack=True) d.set_alternative_names({ 1: 'DA', 2: 'SR2', 3: 'SR3+', 4: 'Transit', 5: 'Bike', 6: 'Walk', }) m0 = larch.Model(dataservice=d) m0.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m0.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m0.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m0.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m0.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m0.utility_ca = ( (P("tottime_m") * X("tottime") + P("totcost_m") * X("totcost")) * X("femdum == 0") + (P("tottime_f") * X("tottime") + P("totcost_f") * X("totcost")) * X("femdum == 1")) m1 = larch.Model(dataservice=d.selector_co("femdum == 0")) m1.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m1.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m1.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m1.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m1.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m1.utility_ca = P("tottime_m") * X("tottime") + P("totcost_m") * X( "totcost") m2 = larch.Model(dataservice=d.selector_co("femdum == 1")) m2.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m2.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m2.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m2.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m2.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m2.utility_ca = P("tottime_f") * X("tottime") + P("totcost_f") * X( "totcost") m0.load_data() assert m0.loglike2().ll == approx(-7309.600971749625) m1.load_data() assert m1.loglike2().ll == approx(-4068.8091617468717) m2.load_data() assert m2.loglike2().ll == approx(-3240.7918100027578) from larch.model.model_group import ModelGroup mg = ModelGroup([m1, m2]) assert mg.loglike2().ll == approx(-7309.600971749625) assert mg.loglike() == approx(-7309.600971749625) pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(), m0.loglike2().dll.sort_index()) m0.simple_step_bhhh() mg.set_values(**m0.pf.value) pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(), m0.loglike2().dll.sort_index()) assert mg.loglike2().ll == approx(-4926.4822036792275) assert mg.check_d_loglike().data.similarity.min() > 4 result = mg.maximize_loglike(method='slsqp') assert result.loglike == approx(-3620.697668335103) mg2 = ModelGroup([]) mg2.append(m1) mg2.append(m2) assert mg2.loglike() == approx(-3620.697667552756) mg3 = ModelGroup([]) mg3.append(m1) mg3.append(m2) mg3.doctor() assert mg3.loglike() == approx(-3620.697667552756)
skims.get_rc_dataframe( df["HOMETAZi"], df["DTAZi"], ) ) # For clarity, we can define numbers as names for modes DA = 1 SR = 2 Walk = 3 Bike = 4 Transit = 5 dfs = larch.DataFrames( co=df, alt_codes=[DA,SR,Walk,Bike,Transit], alt_names=['DA','SR','Walk','Bike','Transit'], ch_name='TOURMODE', ) # Model Definition m = larch.Model(dataservice=dfs) m.title = "Exampville Work Tour Mode Choice v1" from larch import P, X P('NamedParameter') X.NamedDataValue P('Named Parameter') X("log(INCOME)") P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST m.utility_co[DA] = (