示例#1
0
def wage_data_linear():
    X, y = wage()

    gam = LinearGAM(n_splines=10)
    gam.gridsearch(X, y, lam=np.logspace(-5,3,50))

    XX = gam.generate_X_grid()

    plt.figure()
    fig, axs = plt.subplots(1,3)

    titles = ['year', 'age', 'education']
    for i, ax in enumerate(axs):
        ax.plot(XX[:, i], gam.partial_dependence(XX, feature=i))
        ax.plot(XX[:, i], *gam.partial_dependence(XX, feature=i, width=.95)[1],
                c='r', ls='--')
        if i == 0:
            ax.set_ylim(-30,30);
        ax.set_title(titles[i])

    fig.tight_layout()
    plt.savefig('imgs/pygam_wage_data_linear.png', dpi=300)
示例#2
0
文件: conftest.py 项目: maorn/pyGAM
def wage_X_y():
    # y is real
    # recommend LinearGAM
    return wage(return_X_y=True)
示例#3
0
gam1.summary()

gam2 = LinearGAM(te(0, 1)).fit(X, y)
gam2.summary()

import pandas as pd

pd.DataFrame(X).corr()

######################################################
# regression

from pygam import LinearGAM, s, f
from pygam.datasets import wage

X, y = wage(return_X_y=True)

## model
gam = LinearGAM(s(0) + s(1) + f(2))
gam.gridsearch(X, y)
gam.summary()

## plotting
plt.figure()
fig, axs = plt.subplots(1, 3)

titles = ['year', 'age', 'education']
for i, ax in enumerate(axs):
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
示例#4
0
from pygam.datasets import wage
from pygam import LinearGAM, s, f
import numpy as np
import matplotlib.pyplot as plt

X, y = wage()

gam = LinearGAM(s(0, n_splines=5) + s(1) + f(2)).fit(X, y)

gam.summary()

lam = np.logspace(-3, 5, 5)
lams = [lam] * 3

gam.gridsearch(X, y, lam=lams)
gam.summary()

lams = np.random.rand(100, 3)  # random points on [0, 1], with shape (100, 3)
lams = lams * 8 - 3  # shift values to -3, 3
lams = np.exp(lams)  # transforms values to 1e-3, 1e3

random_gam = LinearGAM(s(0) + s(1) + f(2)).gridsearch(X, y, lam=lams)
random_gam.summary()

print(gam.statistics_['GCV'] < random_gam.statistics_['GCV'])

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
示例#5
0

def crude_strCat_to_int(ar, findex):
    '''
    check if feature is a string
    if so replaces its unique values with an integer 
    coresponding to lexicographical order
    '''

    if isinstance(ar[0, findex], str):
        ar[:, findex] = np.unique(ar[:, findex], return_inverse=True)[1]


# load dataset (as pd.DataFrame) => describe features

df = wage(return_X_y=False)
df.describe(include='all')
'''
type of terms:
1) int/category ['year']
2) int          ['age']
3) continous    ['logwage']
4) category     ['sex','maritl','race','education','religion','jobclass','health','health_ins']
'''

#prep X and y

features = ['year', 'age', 'education']

X = df[features].values
crude_strCat_to_int(X, 2)
示例#6
0
df = pd.DataFrame(dataset, columns=column_name)
# df1 = pd.DataFrame(dataset1, columns=column_name)

db.connect.commit()

train_value = df['2020-09-01' > df['date']]

x_train1 = train_value.iloc[:, 1:].astype('float64')
y_train1 = train_value['value'].astype('float64').to_numpy()

x_train2 = train_value['rain'].astype('float64')

from pygam import LinearGAM, s, f
from pygam.datasets import wage
x_train2, y_train1 = wage()
gam = LinearGAM(s(0) + s(1) + f(2)).fit(x_train2,
                                        y_train1)  # s(0) + s(1) + f(2)
gam.summary()

import matplotlib.pyplot as plt

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
    pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95)

    plt.figure()
    plt.plot(XX[:, term.feature], pdep)