def test_spline_regression():
    mdl = gam.GAM('normal', name='test_additive_regression')
    mdl.add_feature(name='hft', type='spline', rel_dof=9.)

    X, y = generate_spline_data(1000)
    mdl.fit(X, y, verbose=False, plot_convergence=True)
    mdl.plot('hft', true_fn=lambda x: np.sin(12. * (x + 0.2)) / (x + 0.2))
def test_linear_regression():
    mdl = gam.GAM('normal', name='test_linear_regression')
    mdl.add_feature(name='purchases', type='linear', transform=np.log1p)
    mdl.add_feature(name='gender', type='categorical')
    mdl.add_feature(name='country', type='categorical')

    X, y = generate_data(1000)
    mdl.fit(X, y, verbose=False, plot_convergence=True)
    mdl.summary()

    Xtest, ytest = generate_data(100)
    yhat = mdl.predict(Xtest)
    err = ytest - yhat
    print 'MSE:', err.dot(err) / len(err)  # MSE
def test_additive_regression():
    mdl = gam.GAM('normal', name='test_additive_regression')
    mdl.add_feature(name='hft', type='spline', rel_dof=9.)
    mdl.add_feature(name='purchases', type='linear', transform=np.log1p)
    mdl.add_feature(name='gender', type='categorical')
    mdl.add_feature(name='country', type='categorical')

    X, y = generate_data(1000, include_hft=True)
    mdl.fit(X, y, verbose=False, plot_convergence=True)
    mdl.plot('hft', true_fn=gmu_hft)

    mdl.summary()

    Xtest, ytest = generate_data(100, include_hft=True)
    yhat = mdl.predict(Xtest)
    err = ytest - yhat
    print 'MSE:', err.dot(err) / len(err)  # MSE
def test_logistic_regression():
    mdl = gam.GAM('binomial', name='test_logistic_regression')
    mdl.add_feature(name='purchases', type='linear', transform=np.log1p)
    mdl.add_feature(name='gender', type='categorical')
    mdl.add_feature(name='country', type='categorical')

    X, y = generate_data(1000, link=_logit_link, family=_binomial_family)
    mdl.fit(X, y, verbose=False, plot_convergence=True)
    mdl.summary()

    # Get the "true" probabilities, ytest
    Xtest, mu_test = generate_data(100,
                                   link=_logit_link,
                                   family=_binomial_family,
                                   return_mean=True)
    mu_hat = mdl.predict(Xtest)
    err = mu_test - mu_hat
    print 'MSE:', err.dot(err) / len(err)  # MSE
def test_cross_validation():
    mdl = gam.GAM('normal', name='test_additive_regression')
    mdl.add_feature(name='hft', type='spline', rel_dof=9.)

    num_training_examples = 1000
    X, y = generate_spline_data(num_training_examples)

    # Use K-fold cross validation to estimate the optimal smoothing parameter
    K = 5
    ii = np.random.permutation(num_training_examples)
    num_smooths = 20
    dev = np.zeros((num_smooths, ))
    smoothing = np.linspace(0.5, 5.0, num_smooths)

    for j in range(num_smooths):
        for i in range(K):
            ia = int(i * float(num_training_examples) / K)
            ib = int((i + 1) * float(num_training_examples) / K) - 1

            traini = np.append(ii[0:ia], ii[ib:num_training_examples])
            testi = ii[ia:ib]

            Xtraini = X.iloc[traini, :]
            ytraini = y[traini]
            Xtesti = X.iloc[testi, :]
            ytesti = y[testi]

            mdl.fit(Xtraini, ytraini, smoothing=smoothing[j])
            dev[j] += mdl.deviance(Xtesti, ytesti) / np.size(ytesti)

        dev[j] /= K

    # Refit model using entire training set and best smoothing parameter
    best_smoothing = np.argmin(dev)
    mdl.fit(X, y, smoothing=smoothing[best_smoothing])
    mdl.plot('hft', true_fn=gmu_hft)

    mdl.summary()

    Xtest, ytest = generate_spline_data(100)
    yhat = mdl.predict(Xtest)
    err = ytest - yhat
    print 'MSE:', err.dot(err) / len(err)