def test_log_likelihood(): """ Computes the log-likelihood "by hand" for a simple example and ensures that the one returned by xlogit is the same """ P = 1 # Without panel data betas = np.array([.1, .1, .1, .1]) X_, y_ = X.reshape(N, P, J, K), y.reshape(N, P, J, 1) # Compute log likelihood using xlogit model = MixedLogit() model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n']) draws = model._get_halton_draws(N, R, K) # (N,Kr,R) panel_info = np.ones((N, P)) obtained_loglik, _ = model._loglik_gradient(betas, X_, y_, panel_info, draws, None, None) # Compute expected log likelihood "by hand" Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None] eXB = np.exp(np.einsum('npjk,nkr -> npjr', X_, Br)) p = eXB / np.sum(eXB, axis=2, keepdims=True) expected_loglik = -np.sum( np.log((y_ * p).sum(axis=2).prod(axis=1).mean(axis=1))) assert pytest.approx(expected_loglik, obtained_loglik)
def test_log_likelihood(): """ Computes the log-likelihood "by hand" for a simple example and ensures that the one returned by xlogit is the same """ betas = np.array([.1, .1, .1, .1]) X_, y_ = X.reshape(N, J, K), y.reshape(N, J, 1) # Compute log likelihood using xlogit model = MixedLogit() model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n']) draws = model._generate_halton_draws(N, R, K) # (N,Kr,R) obtained_loglik = model._loglik_gradient(betas, X_, y_, None, draws, None, None, { 'samples': N, 'draws': R }, return_gradient=False) # Compute expected log likelihood "by hand" Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None] eXB = np.exp(np.einsum('njk,nkr -> njr', X_, Br)) p = eXB / np.sum(eXB, axis=1, keepdims=True) expected_loglik = -np.sum(np.log((y_ * p).sum(axis=1).mean(axis=1))) assert expected_loglik == pytest.approx(obtained_loglik)
def test__balance_panels(): """ Ensures that unbalanced panels are properly balanced when required """ X_, y_ = X.reshape(N, J, K), y.reshape(N, J, 1) model = MixedLogit() X_, y_, panel_info = model._balance_panels(X_, y_, panels) assert np.array_equal(panel_info, np.array([[1, 1], [1, 0]])) assert X_.shape == (4, 2, 2)
def test__transform_betas(): """ Check that betas are properly transformed to random draws """ betas = np.array([.1, .1, .1, .1]) # Compute log likelihood using xlogit model = MixedLogit() model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n']) draws = model._get_halton_draws(N, R, K) # (N,Kr,R) expected_betas = betas[None, [0, 1], None] + \ draws*betas[None, [2, 3], None] _, obtained_betas = model._transform_betas(betas, draws) assert np.allclose(expected_betas, obtained_betas)
def test_gpu_not_available(): """ Ensures that xlogit detects that GPU is not available based on CuPy's installation status """ assert not MixedLogit.check_if_gpu_available()
def test_lrtest(): """ Ensures a correct result of the lrtest. The comparison values were obtained from comparison with lrtest in R's lmtest package """ general = MixedLogit() general.loglikelihood = 1312 restricted = MixedLogit() restricted.loglikelihood = -1305 general.loglikelihood = -1312 general.coeff_ = np.zeros(4) restricted.coeff_ = np.zeros(2) obtained = lrtest(general, restricted) expected = {'pval': 0.0009118819655545164, 'chisq': 14, 'degfree': 2}
def test_validate_inputs(): """ Covers potential mistakes in parameters of the fit method that xlogit should be able to identify """ model = MixedLogit() with pytest.raises(ValueError): # wrong distribution model.fit(X, y, varnames=varnames, alts=alts, ids=ids, n_draws=10, maxiter=0, verbose=0, halton=True, randvars={'a': 'fake'}) with pytest.raises(ValueError): # wrong var name model.fit(X, y, varnames=varnames, alts=alts, ids=ids, n_draws=10, maxiter=0, verbose=0, halton=True, randvars={'fake': 'n'})
def test_fit(): """ Ensures the log-likelihood works for a single iterations with the default initial coefficients. The value of -1.794 was computed by hand for comparison purposes """ # There is no need to initialize a random seed as the halton draws produce # reproducible results model = MixedLogit() model.fit(X, y, varnames=varnames, alts=alts, n_draws=10, panels=panels, ids=ids, randvars=randvars, maxiter=0, verbose=0, halton=True) assert pytest.approx(model.loglikelihood, -1.79451632)
def test_fit(): """ Ensures the log-likelihood works for multiple iterations with the default initial coefficients. The value of -1.473423 was computed by hand for comparison purposes """ # There is no need to initialize a random seed as the halton draws produce # reproducible results model = MixedLogit() model.fit(X, y, varnames, alts, ids, randvars, n_draws=10, panels=panels, maxiter=0, verbose=0, halton=True, init_coeff=np.repeat(.1, 4)) assert model.loglikelihood == pytest.approx(-1.473423)
def test_predict(): """ Ensures that returned choice probabilities are consistent. """ # There is no need to initialize a random seed as the halton draws produce # reproducible results P = 1 # Without panel data betas = np.array([.1, .1, .1, .1]) X_ = X.reshape(N, P, J, K) model = MixedLogit() model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n']) model.alternatives = np.array([1, 2]) model.coeff_ = betas model.randvars = randvars model._isvars, model._asvars, model._varnames = [], varnames, varnames model._fit_intercept = False model.coeff_names = np.array(["a", "b", "sd.a", "sd.b"]) #model.fit(X, y, varnames, alts, ids, randvars, verbose=0, halton=True) y_pred, proba, freq = model.predict(X, varnames, alts, ids, n_draws=R, return_proba=True, return_freq=True) # Compute choice probabilities by hand draws = model._get_halton_draws(N, R, K) # (N,Kr,R) Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None] V = np.einsum('npjk,nkr -> npjr', X_, Br) V[V > 700] = 700 eV = np.exp(V) e_proba = eV / np.sum(eV, axis=2, keepdims=True) expec_proba = e_proba.prod(axis=1).mean(axis=-1) expec_ypred = model.alternatives[np.argmax(expec_proba, axis=1)] alt_list, counts = np.unique(expec_ypred, return_counts=True) expec_freq = dict( zip(list(alt_list), list(np.round(counts / np.sum(counts), 3)))) assert np.array_equal(expec_ypred, y_pred) assert expec_freq == freq
X = df[varnames].values y = df['choice'].values randvars = {'meals': 'n', 'petfr': 'n', 'emipp': 'n'} alts = df['alt'] ids = df['id'] panels = None batch_size = 5000 if not use_gpu: device.disable_gpu_acceleration() if profile: ini_ram = curr_ram() profiler = Profiler().start(measure_gpu_mem=use_gpu) np.random.seed(0) model = MixedLogit() model.fit(X, y, varnames, alts=alts, ids=ids, n_draws=n_draws, panels=panels, verbose=0, randvars=randvars, batch_size=batch_size) if profile: ellapsed, max_ram, max_gpu = profiler.stop() log("{:6} {:7.2f} {:11.2f} {:7.3f} {:7.3f} {}".format( n_draws, ellapsed, model.loglikelihood, max_ram - ini_ram, max_gpu,
""" This file executes the benchmark. Check the README.md file in this folder to make sure all the requirments are satisfied. """ import os from tools import init_profiler_output_files, log import sys # sys.path.append("../../") # Path of xlogit library root folder. from xlogit import MixedLogit MixedLogit.check_if_gpu_available() mini = len(sys.argv) == 2 and sys.argv[1] == 'mini' init_profiler_output_files() def profile_range_draws(command, r_draws, dataset, usegpu=False): log("\n\n=== " + dataset + " dataset. " + command.split()[1] + ('(using GPU)' if usegpu else '') + " ===") log("Ndraws Time(s) Log-Likeli. RAM(GB) GPU(GB) Converg.") for r in range(1, r_draws + 1): os.system("{} {} {} {} prof".format(command, r * 100, dataset, usegpu * 1)) def profile_range_draws_and_cores(command, r_draws, r_cores): log("\n\n=== artificial dataset. " + command.split()[1] + " ===") for n_draws in r_draws: for n_cores in r_cores: os.system("{} {} {}".format(command, n_draws, n_cores))
alt_list=['TRAIN', 'SM', 'CAR'], empty_val=0, varying=['TT', 'CO', 'AV'], alt_is_prefix=True) # ===== STEP 3. CREATE MODEL SPECIFICATION ===== df['ASC_TRAIN'] = np.ones(len(df)) * (df['alt'] == 'TRAIN') df['ASC_CAR'] = np.ones(len(df)) * (df['alt'] == 'CAR') df['TT'], df['CO'] = df['TT'] / 100, df['CO'] / 100 # Scale variables annual_pass = (df['GA'] == 1) & (df['alt'].isin(['TRAIN', 'SM'])) df.loc[annual_pass, 'CO'] = 0 # Cost zero for pass holders # ===== STEP 4. ESTIMATE MODEL PARAMETERS ===== from xlogit import MixedLogit varnames = ['ASC_CAR', 'ASC_TRAIN', 'CO', 'TT'] model = MixedLogit() model.fit(X=df[varnames], y=df['CHOICE'], varnames=varnames, alts=df['alt'], ids=df['custom_id'], panels=df["ID"], avail=df['AV'], randvars={'TT': 'n'}, n_draws=1500) model.summary() """ OUTPUT: Estimation time= 1.3 seconds --------------------------------------------------------------------------- Coefficient Estimate Std.Err. z-val P>|z|
'nonsig1', 'nonsig2', 'nonsig3' ] X = df[varnames].values y = df['choice'].values randvars = {'meals': 'n', 'petfr': 'n', 'emipp': 'n'} alts = [1, 2, 3] panels = None if not use_gpu: device.disable_gpu_acceleration() if profile: ini_ram = curr_ram() profiler = Profiler().start(measure_gpu_mem=use_gpu) np.random.seed(0) model = MixedLogit() model.fit(X, y, varnames, alts=alts, n_draws=n_draws, panels=panels, verbose=0, randvars=randvars) if profile: ellapsed, max_ram, max_gpu = profiler.stop() log("{:6} {:7.2f} {:11.2f} {:7.3f} {:7.3f} {}".format( n_draws, ellapsed, model.loglikelihood, max_ram - ini_ram, max_gpu, model.convergence)) profiler.export('xlogit' + ('_gpu' if use_gpu else ''), dataset, n_draws,