예제 #1
0
def test_log_likelihood():
    """
    Computes the log-likelihood "by hand" for a simple example and ensures
    that the one returned by xlogit is the same
    """
    P = 1  # Without panel data
    betas = np.array([.1, .1, .1, .1])
    X_, y_ = X.reshape(N, P, J, K), y.reshape(N, P, J, 1)

    # Compute log likelihood using xlogit
    model = MixedLogit()
    model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n'])
    draws = model._get_halton_draws(N, R, K)  # (N,Kr,R)
    panel_info = np.ones((N, P))
    obtained_loglik, _ = model._loglik_gradient(betas, X_, y_, panel_info,
                                                draws, None, None)

    # Compute expected log likelihood "by hand"
    Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None]
    eXB = np.exp(np.einsum('npjk,nkr -> npjr', X_, Br))
    p = eXB / np.sum(eXB, axis=2, keepdims=True)
    expected_loglik = -np.sum(
        np.log((y_ * p).sum(axis=2).prod(axis=1).mean(axis=1)))

    assert pytest.approx(expected_loglik, obtained_loglik)
예제 #2
0
def test_log_likelihood():
    """
    Computes the log-likelihood "by hand" for a simple example and ensures
    that the one returned by xlogit is the same
    """
    betas = np.array([.1, .1, .1, .1])
    X_, y_ = X.reshape(N, J, K), y.reshape(N, J, 1)

    # Compute log likelihood using xlogit
    model = MixedLogit()
    model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n'])
    draws = model._generate_halton_draws(N, R, K)  # (N,Kr,R)
    obtained_loglik = model._loglik_gradient(betas,
                                             X_,
                                             y_,
                                             None,
                                             draws,
                                             None,
                                             None, {
                                                 'samples': N,
                                                 'draws': R
                                             },
                                             return_gradient=False)

    # Compute expected log likelihood "by hand"
    Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None]
    eXB = np.exp(np.einsum('njk,nkr -> njr', X_, Br))
    p = eXB / np.sum(eXB, axis=1, keepdims=True)
    expected_loglik = -np.sum(np.log((y_ * p).sum(axis=1).mean(axis=1)))

    assert expected_loglik == pytest.approx(obtained_loglik)
예제 #3
0
def test__balance_panels():
    """
    Ensures that unbalanced panels are properly balanced when required
    """
    X_, y_ = X.reshape(N, J, K), y.reshape(N, J, 1)
    model = MixedLogit()
    X_, y_, panel_info = model._balance_panels(X_, y_, panels)

    assert np.array_equal(panel_info, np.array([[1, 1], [1, 0]]))
    assert X_.shape == (4, 2, 2)
예제 #4
0
def test__transform_betas():
    """
    Check that betas are properly transformed to random draws

    """
    betas = np.array([.1, .1, .1, .1])

    # Compute log likelihood using xlogit
    model = MixedLogit()
    model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n'])
    draws = model._get_halton_draws(N, R, K)  # (N,Kr,R)
    expected_betas = betas[None, [0, 1], None] + \
        draws*betas[None, [2, 3], None]
    _, obtained_betas = model._transform_betas(betas, draws)

    assert np.allclose(expected_betas, obtained_betas)
예제 #5
0
def test_gpu_not_available():
    """
    Ensures that xlogit detects that GPU is not available based on CuPy's
    installation status

    """
    assert not MixedLogit.check_if_gpu_available()
예제 #6
0
def test_lrtest():
    """
    Ensures a correct result of the lrtest. The comparison values were 
    obtained from comparison with lrtest in R's lmtest package
    """
    general = MixedLogit()
    general.loglikelihood = 1312
    restricted = MixedLogit()
    restricted.loglikelihood = -1305
    general.loglikelihood = -1312
    general.coeff_ = np.zeros(4)
    restricted.coeff_ = np.zeros(2)

    obtained = lrtest(general, restricted)
    expected = {'pval': 0.0009118819655545164, 'chisq': 14, 'degfree': 2}
예제 #7
0
def test_validate_inputs():
    """
    Covers potential mistakes in parameters of the fit method that xlogit
    should be able to identify
    """
    model = MixedLogit()
    with pytest.raises(ValueError):  # wrong distribution
        model.fit(X,
                  y,
                  varnames=varnames,
                  alts=alts,
                  ids=ids,
                  n_draws=10,
                  maxiter=0,
                  verbose=0,
                  halton=True,
                  randvars={'a': 'fake'})

    with pytest.raises(ValueError):  # wrong var name
        model.fit(X,
                  y,
                  varnames=varnames,
                  alts=alts,
                  ids=ids,
                  n_draws=10,
                  maxiter=0,
                  verbose=0,
                  halton=True,
                  randvars={'fake': 'n'})
예제 #8
0
def test_fit():
    """
    Ensures the log-likelihood works for a single iterations with the default
    initial coefficients. The value of -1.794 was computed by hand for
    comparison purposes
    """
    # There is no need to initialize a random seed as the halton draws produce
    # reproducible results
    model = MixedLogit()
    model.fit(X,
              y,
              varnames=varnames,
              alts=alts,
              n_draws=10,
              panels=panels,
              ids=ids,
              randvars=randvars,
              maxiter=0,
              verbose=0,
              halton=True)

    assert pytest.approx(model.loglikelihood, -1.79451632)
예제 #9
0
def test_fit():
    """
    Ensures the log-likelihood works for multiple iterations with the default
    initial coefficients. The value of -1.473423 was computed by hand for
    comparison purposes
    """
    # There is no need to initialize a random seed as the halton draws produce
    # reproducible results
    model = MixedLogit()
    model.fit(X,
              y,
              varnames,
              alts,
              ids,
              randvars,
              n_draws=10,
              panels=panels,
              maxiter=0,
              verbose=0,
              halton=True,
              init_coeff=np.repeat(.1, 4))

    assert model.loglikelihood == pytest.approx(-1.473423)
예제 #10
0
def test_predict():
    """
    Ensures that returned choice probabilities are consistent.
    """
    # There is no need to initialize a random seed as the halton draws produce
    # reproducible results
    P = 1  # Without panel data
    betas = np.array([.1, .1, .1, .1])
    X_ = X.reshape(N, P, J, K)

    model = MixedLogit()
    model._rvidx, model._rvdist = np.array([True, True]), np.array(['n', 'n'])
    model.alternatives = np.array([1, 2])
    model.coeff_ = betas
    model.randvars = randvars
    model._isvars, model._asvars, model._varnames = [], varnames, varnames
    model._fit_intercept = False
    model.coeff_names = np.array(["a", "b", "sd.a", "sd.b"])

    #model.fit(X, y, varnames, alts, ids, randvars, verbose=0, halton=True)
    y_pred, proba, freq = model.predict(X,
                                        varnames,
                                        alts,
                                        ids,
                                        n_draws=R,
                                        return_proba=True,
                                        return_freq=True)

    # Compute choice probabilities by hand
    draws = model._get_halton_draws(N, R, K)  # (N,Kr,R)
    Br = betas[None, [0, 1], None] + draws * betas[None, [2, 3], None]
    V = np.einsum('npjk,nkr -> npjr', X_, Br)
    V[V > 700] = 700
    eV = np.exp(V)
    e_proba = eV / np.sum(eV, axis=2, keepdims=True)
    expec_proba = e_proba.prod(axis=1).mean(axis=-1)
    expec_ypred = model.alternatives[np.argmax(expec_proba, axis=1)]
    alt_list, counts = np.unique(expec_ypred, return_counts=True)
    expec_freq = dict(
        zip(list(alt_list), list(np.round(counts / np.sum(counts), 3))))

    assert np.array_equal(expec_ypred, y_pred)
    assert expec_freq == freq
예제 #11
0
    X = df[varnames].values
    y = df['choice'].values
    randvars = {'meals': 'n', 'petfr': 'n', 'emipp': 'n'}
    alts = df['alt']
    ids = df['id']
    panels = None
    batch_size = 5000

if not use_gpu:
    device.disable_gpu_acceleration()
if profile:
    ini_ram = curr_ram()
    profiler = Profiler().start(measure_gpu_mem=use_gpu)

np.random.seed(0)
model = MixedLogit()
model.fit(X,
          y,
          varnames,
          alts=alts,
          ids=ids,
          n_draws=n_draws,
          panels=panels,
          verbose=0,
          randvars=randvars,
          batch_size=batch_size)

if profile:
    ellapsed, max_ram, max_gpu = profiler.stop()
    log("{:6} {:7.2f} {:11.2f} {:7.3f} {:7.3f} {}".format(
        n_draws, ellapsed, model.loglikelihood, max_ram - ini_ram, max_gpu,
예제 #12
0
"""
This file executes the benchmark. Check the README.md file in this folder
to make sure all the requirments are satisfied.
"""

import os
from tools import init_profiler_output_files, log
import sys
# sys.path.append("../../")  # Path of xlogit library root folder.
from xlogit import MixedLogit

MixedLogit.check_if_gpu_available()
mini = len(sys.argv) == 2 and sys.argv[1] == 'mini'
init_profiler_output_files()


def profile_range_draws(command, r_draws, dataset, usegpu=False):
    log("\n\n=== " + dataset + " dataset. " + command.split()[1] +
        ('(using GPU)' if usegpu else '') + " ===")
    log("Ndraws Time(s) Log-Likeli. RAM(GB) GPU(GB) Converg.")
    for r in range(1, r_draws + 1):
        os.system("{} {} {} {} prof".format(command, r * 100, dataset,
                                            usegpu * 1))


def profile_range_draws_and_cores(command, r_draws, r_cores):
    log("\n\n=== artificial dataset. " + command.split()[1] + " ===")
    for n_draws in r_draws:
        for n_cores in r_cores:
            os.system("{} {} {}".format(command, n_draws, n_cores))
예제 #13
0
                  alt_list=['TRAIN', 'SM', 'CAR'],
                  empty_val=0,
                  varying=['TT', 'CO', 'AV'],
                  alt_is_prefix=True)

# ===== STEP 3. CREATE MODEL SPECIFICATION =====
df['ASC_TRAIN'] = np.ones(len(df)) * (df['alt'] == 'TRAIN')
df['ASC_CAR'] = np.ones(len(df)) * (df['alt'] == 'CAR')
df['TT'], df['CO'] = df['TT'] / 100, df['CO'] / 100  # Scale variables
annual_pass = (df['GA'] == 1) & (df['alt'].isin(['TRAIN', 'SM']))
df.loc[annual_pass, 'CO'] = 0  # Cost zero for pass holders

# ===== STEP 4. ESTIMATE MODEL PARAMETERS =====
from xlogit import MixedLogit
varnames = ['ASC_CAR', 'ASC_TRAIN', 'CO', 'TT']
model = MixedLogit()
model.fit(X=df[varnames],
          y=df['CHOICE'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'],
          panels=df["ID"],
          avail=df['AV'],
          randvars={'TT': 'n'},
          n_draws=1500)
model.summary()
"""
OUTPUT:
Estimation time= 1.3 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
예제 #14
0
        'nonsig1', 'nonsig2', 'nonsig3'
    ]
    X = df[varnames].values
    y = df['choice'].values
    randvars = {'meals': 'n', 'petfr': 'n', 'emipp': 'n'}
    alts = [1, 2, 3]
    panels = None

if not use_gpu:
    device.disable_gpu_acceleration()
if profile:
    ini_ram = curr_ram()
    profiler = Profiler().start(measure_gpu_mem=use_gpu)

np.random.seed(0)
model = MixedLogit()
model.fit(X,
          y,
          varnames,
          alts=alts,
          n_draws=n_draws,
          panels=panels,
          verbose=0,
          randvars=randvars)

if profile:
    ellapsed, max_ram, max_gpu = profiler.stop()
    log("{:6} {:7.2f} {:11.2f} {:7.3f} {:7.3f} {}".format(
        n_draws, ellapsed, model.loglikelihood, max_ram - ini_ram, max_gpu,
        model.convergence))
    profiler.export('xlogit' + ('_gpu' if use_gpu else ''), dataset, n_draws,