def test_spline_regression(): mdl = gam.GAM('normal', name='test_additive_regression') mdl.add_feature(name='hft', type='spline', rel_dof=9.) X, y = generate_spline_data(1000) mdl.fit(X, y, verbose=False, plot_convergence=True) mdl.plot('hft', true_fn=lambda x: np.sin(12. * (x + 0.2)) / (x + 0.2))
def test_linear_regression(): mdl = gam.GAM('normal', name='test_linear_regression') mdl.add_feature(name='purchases', type='linear', transform=np.log1p) mdl.add_feature(name='gender', type='categorical') mdl.add_feature(name='country', type='categorical') X, y = generate_data(1000) mdl.fit(X, y, verbose=False, plot_convergence=True) mdl.summary() Xtest, ytest = generate_data(100) yhat = mdl.predict(Xtest) err = ytest - yhat print 'MSE:', err.dot(err) / len(err) # MSE
def test_additive_regression(): mdl = gam.GAM('normal', name='test_additive_regression') mdl.add_feature(name='hft', type='spline', rel_dof=9.) mdl.add_feature(name='purchases', type='linear', transform=np.log1p) mdl.add_feature(name='gender', type='categorical') mdl.add_feature(name='country', type='categorical') X, y = generate_data(1000, include_hft=True) mdl.fit(X, y, verbose=False, plot_convergence=True) mdl.plot('hft', true_fn=gmu_hft) mdl.summary() Xtest, ytest = generate_data(100, include_hft=True) yhat = mdl.predict(Xtest) err = ytest - yhat print 'MSE:', err.dot(err) / len(err) # MSE
def test_logistic_regression(): mdl = gam.GAM('binomial', name='test_logistic_regression') mdl.add_feature(name='purchases', type='linear', transform=np.log1p) mdl.add_feature(name='gender', type='categorical') mdl.add_feature(name='country', type='categorical') X, y = generate_data(1000, link=_logit_link, family=_binomial_family) mdl.fit(X, y, verbose=False, plot_convergence=True) mdl.summary() # Get the "true" probabilities, ytest Xtest, mu_test = generate_data(100, link=_logit_link, family=_binomial_family, return_mean=True) mu_hat = mdl.predict(Xtest) err = mu_test - mu_hat print 'MSE:', err.dot(err) / len(err) # MSE
def test_cross_validation(): mdl = gam.GAM('normal', name='test_additive_regression') mdl.add_feature(name='hft', type='spline', rel_dof=9.) num_training_examples = 1000 X, y = generate_spline_data(num_training_examples) # Use K-fold cross validation to estimate the optimal smoothing parameter K = 5 ii = np.random.permutation(num_training_examples) num_smooths = 20 dev = np.zeros((num_smooths, )) smoothing = np.linspace(0.5, 5.0, num_smooths) for j in range(num_smooths): for i in range(K): ia = int(i * float(num_training_examples) / K) ib = int((i + 1) * float(num_training_examples) / K) - 1 traini = np.append(ii[0:ia], ii[ib:num_training_examples]) testi = ii[ia:ib] Xtraini = X.iloc[traini, :] ytraini = y[traini] Xtesti = X.iloc[testi, :] ytesti = y[testi] mdl.fit(Xtraini, ytraini, smoothing=smoothing[j]) dev[j] += mdl.deviance(Xtesti, ytesti) / np.size(ytesti) dev[j] /= K # Refit model using entire training set and best smoothing parameter best_smoothing = np.argmin(dev) mdl.fit(X, y, smoothing=smoothing[best_smoothing]) mdl.plot('hft', true_fn=gmu_hft) mdl.summary() Xtest, ytest = generate_spline_data(100) yhat = mdl.predict(Xtest) err = ytest - yhat print 'MSE:', err.dot(err) / len(err)