Пример #1
0
def gam(x, y):
    lams = np.random.rand(100, x.shape[1])
    lams = np.exp(lams)
    linear_gam = GAM(n_splines=10, max_iter=1000)
    cv_results = linear_gam.gridsearch(x,
                                       y,
                                       return_scores=True,
                                       lam=lams,
                                       progress=False)
    cv_results_df = pd.DataFrame(cv_results,
                                 index=['score'
                                        ]).T.sort_values(by='score',
                                                         ascending=False)
    return linear_gam, cv_results_df
Пример #2
0
def superlearnersetup(var_type, K=5):
    """Super Learner setup for binary and continuous variables"""
    if var_type == 'binary':
        # Binary variable
        log_b = LogisticRegression(penalty='none',
                                   solver='lbfgs',
                                   max_iter=1000)
        rdf_b = RandomForestClassifier(
            n_estimators=500,
            min_samples_leaf=20)  # max features is sqrt(n_features)
        gam1_b = LogisticGAM(n_splines=4, lam=0.6)
        gam2_b = LogisticGAM(n_splines=6, lam=0.6)
        nn1_b = MLPClassifier(hidden_layer_sizes=(4, ),
                              activation='relu',
                              solver='lbfgs',
                              max_iter=2000)
        emp_b = EmpiricalMean()

        lib = [log_b, gam1_b, gam2_b, rdf_b, nn1_b, emp_b]
        libnames = [
            "Logit", "GAM1", "GAM2", "Random Forest", "Neural-Net", "Mean"
        ]
        sl = SuperLearner(lib,
                          libnames,
                          loss="nloglik",
                          K=K,
                          print_results=False)

    elif var_type == 'continuous':
        # Continuous variable
        lin_c = LinearRegression()
        rdf_c = RandomForestRegressor(n_estimators=500, min_samples_leaf=20)
        gam1_c = GAM(link='identity', n_splines=4, lam=0.6)
        gam2_c = GAM(link='identity', n_splines=6, lam=0.6)
        nn1_c = MLPRegressor(hidden_layer_sizes=(4, ),
                             activation='relu',
                             solver='lbfgs',
                             max_iter=2000)
        emp_c = EmpiricalMean()

        lib = [lin_c, gam1_c, gam2_c, rdf_c, nn1_c, emp_c]
        libnames = [
            "Linear", "GAM1", "GAM2", "Random Forest", "Neural-Net", "Mean"
        ]
        sl = SuperLearner(lib, libnames, K=K, print_results=False)

    else:
        raise ValueError("Not Supported")

    return sl
    def __init__(self, algorithm, params=None):
        '''
        Initialize the class with a list of possible algorithms and 
        recommended hyperparameter ranges
        '''
        if algorithm == 'etr':  # Extra trees regressor
            from sklearn.ensemble import ExtraTreesRegressor
            self.hyper_range = {
                "max_depth": [4, 8, 12, 16, 20],
                "min_samples_split": np.arange(2, 11),
                "min_samples_leaf": np.arange(1, 11),
                "n_estimators": np.arange(10, 801, 40)
            }
            self.algorithm = ExtraTreesRegressor()

        elif algorithm == 'gbm':  # Gradient boosting model
            from sklearn.ensemble import GradientBoostingRegressor
            self.hyper_range = {
                "max_depth": [4, 8, 12, 16, 20],
                "min_samples_split": np.arange(2, 11),
                "min_samples_leaf": np.arange(1, 11),
                "n_estimators": np.arange(10, 801, 40)
            }
            self.algorithm = GradientBoostingRegressor()

        elif algorithm == 'gam':  # Generalized additive model
            from pygam import GAM
            self.hyper_range = {'n_splines': np.arange(5, 40)}
            self.algorithm = GAM()

        # Set scorer as R2
        self.my_scorer = make_scorer(r2_score, greater_is_better=True)
Пример #4
0
def global_explanation_plot(gam, feature_names, number_cols=4):

    number_lines = len(feature_names) // 2
    if (len(feature_names) % 2) != 0:
        number_lines += 1

    fig, axs = plt.subplots(number_lines, number_cols)
    fig.set_size_inches(20, 2 * number_lines * number_cols)
    titles = feature_names

    xx = GAM.generate_X_grid(gam)
    for j, sub_axs in enumerate(axs):
        for i, ax in enumerate(sub_axs):
            if number_cols * j + i >= len(titles):
                ax.remove()
            else:
                pdep, confi = gam.partial_dependence(xx,
                                                     feature=number_cols * j +
                                                     i,
                                                     width=.95)
                ax.plot(xx[:, 0], pdep, LineWidth=3)
                ax.plot(xx[:, 0], confi[0][:, 0], c='grey', ls='--', alpha=0.6)
                ax.plot(xx[:, 0], confi[0][:, 1], c='grey', ls='--', alpha=0.6)
                ax.set_title(titles[number_cols * j + i],
                             pad=10,
                             fontdict={
                                 'fontsize': 20,
                                 'fontweight': 'bold'
                             })
Пример #5
0
def run_GAM(X,y,n_splines=15,distr='binomial',link='logit'):
    ''' Run a Generalized additive model on the inputs.
    This function does NOT add a constant, as I think pygam takes care of that.'''

    # make y a column vector
    if y.ndim==1:
        y = y[:,np.newaxis]

    # init yhat
    yhat = np.empty_like(y).ravel()
    yhat[:]=np.nan

    # get idx of nans so we dont try to predict those
    idx = np.all(np.isfinite(X), axis=1)

    # init, fit, and predict othe GAM
    gam = GAM(distribution=distr, link=link, n_splines=n_splines)
    gam.gridsearch(X[idx, :], y[idx])
    yhat[idx] = gam.predict(X[idx,:])

    return yhat,gam
Пример #6
0
def GamCV(x, y):
    lams = np.random.rand(10, x.shape[1])
    lams = np.exp(lams)
    linear_gam = GAM(n_splines=10, max_iter=1000)
    parameters = {'lam': [x for x in lams]}
    gam_cv = GridSearchCV(linear_gam,
                          parameters,
                          cv=5,
                          iid=False,
                          return_train_score=True,
                          refit=True,
                          scoring='neg_mean_squared_error')
    gam_cv.fit(x, y)
    cv_results_df = pd.DataFrame(gam_cv.cv_results_).sort_values(
        by='mean_test_score', ascending=False)
    return gam_cv, cv_results_df
Пример #7
0
def BAM():

    gam = GAM(s(0, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147])
                    + s(1, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147])
                    + te(0, 1, dtype=['numerical', 'numerical']), distribution= 'normal', link = 'identity', fit_intercept=True)
    print(gam.gridsearch(X, y, n_splines=np.arange(50)).summary())
    plt.scatter(X[:, 0][0:56], y[0:56], s=3, linewidths=0.0001, label='data')
    plt.plot(X[:, 0][0:56], gam.predict(X[0:56]), color='red', linewidth=1, label='prediction')
    plt.legend()
    plt.title('Basic Additive Model')
    plt.show()

    # error calculation
    rmse_val = rmse(np.array(y), np.array(gam.predict(X)))
    print("RMSE is: " + str(rmse_val))
    mae = mean_absolute_error(y, gam.predict(X))
    print("MAE is: " + str(mae))
    mape = mean_absolute_percentage_error(np.array(y), np.array(gam.predict(X)))
    print("MAPE is: " + str(mape))
Пример #8
0
def display_gam(input_df, target_col, ncols=5):
    print(
        "==============================================================================================================="
    )
    print('# GAM')
    print(
        "==============================================================================================================="
    )
    target_col = [target_col] if isinstance(target_col, str) else target_col
    key_cols = [
        c for c in list(input_df.select_dtypes('number'))
        if c not in target_col
    ]
    _df = input_df[key_cols]
    _df = _df.fillna(_df.median())
    y = input_df[target_col]

    nfigs = len(_df.columns)
    nrows = nfigs // ncols + 1 if nfigs % ncols != 0 else nfigs // ncols

    model = GAM()
    model.fit(_df, y)

    fig, axes = plt.subplots(figsize=(ncols * 3, nrows * 2),
                             ncols=ncols,
                             nrows=nrows)
    axes = np.array(axes).flatten()
    for i, (ax, title, p_value) in enumerate(
            zip(axes, _df.columns, model.statistics_['p_values'])):
        XX = model.generate_X_grid(term=i)
        ax.plot(XX[:, i], model.partial_dependence(term=i, X=XX))
        ax.plot(XX[:, i],
                model.partial_dependence(term=i, X=XX, width=.95)[1],
                c='r',
                ls='--')
        ax.axhline(0, c='#cccccc')
        ax.set_title("{0:} (p={1:.2})".format(title, p_value))
        ax.set_yticks([])
        ax.grid()

    fig.tight_layout()
    display(fig)
    plt.close()
Пример #9
0
# histogram smoothing

from pygam import PoissonGAM
from pygam.datasets import faithful

X, y = faithful(return_X_y=True)

gam = PoissonGAM().gridsearch(X, y)

plt.hist(faithful(return_X_y=False)['eruptions'], bins=200, color='k')
plt.plot(X, gam.predict(X), color='r')
plt.title('Best Lambda: {0:.2f}'.format(gam.lam[0][0]))

######################################################
# regression

from pygam import GAM
from pygam.datasets import trees

X, y = trees(return_X_y=True)
X.shape
X
y

gam = GAM(distribution='gamma', link='log')
gam.gridsearch(X, y)

plt.scatter(y, gam.predict(X))
plt.xlabel('true volume')
plt.ylabel('predicted volume')
Пример #10
0
from pygam import GAM
import causaldag as cd
import numpy as np
import os
import random
np.random.seed(1729)
random.seed(1729)

d = cd.GaussDAG([0, 1, 2], arcs={(0, 1), (0, 2)})
s = d.sample(100)
np.savetxt(os.path.expanduser('~/Desktop/s1.txt'), s)

gam = GAM()
gam.fit(s[:, 0], s[:, 1])
res1 = gam.deviance_residuals(s[:, 0], s[:, 1])
print(gam.summary())
gam.fit(s[:, 0], s[:, 1])
res2 = gam.deviance_residuals(s[:, 0], s[:, 2])
print(gam.summary())
print(res1)
print(res2)
Пример #11
0
                                                    random_state=42)

#%%
# plotting
# plotting
fig = plt.figure()
ax = plt.axes(projection='3d')
nr = 2
ax.scatter3D(X[:, 1][::nr], X[:, 0][::nr], y[::nr], c=y[::2], cmap='Spectral')
plt.show()
#%%
# pyGAM
from pygam import LinearGAM, s, te, PoissonGAM, f, GAM

gam = GAM(
    s(0, constraints="monotonic_inc", n_splines=15) +
    s(1) +  #, constraints="concave", n_splines=100) +
    te(1, 0))
gam.fit(X_train, y_train)

titles = ['QDot[l/min*m]', 'TemperaturStart']
fig, axs = plt.subplots(1, len(titles), figsize=(13, 9))

# plot partial dependences
for i, ax in enumerate(axs):
    print("i = ", i)
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r')
    ax.set_title(titles[i])
def gen_simulations(n, data_dir='~/physics_guided_nn/data/'):

    x, y, xt = utils.loaddata('validation',
                              None,
                              dir="~/physics_guided_nn/data/",
                              raw=True,
                              doy=False)
    y = y.to_frame()

    # Hold out a year as test data
    train_x = x[~x.index.year.isin([2012])]
    train_y = y[~y.index.year.isin([2012])]

    print(train_x)

    train_x['year'] = pd.DatetimeIndex(train_x['date']).year
    train_x = train_x.drop(['date'], axis=1)

    gamTair = GAM(s(0, by=1, n_splines=200,
                    basis='cp')).fit(train_x[['DOY', 'year']], train_x['Tair'])
    with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamTair',
              'wb') as f:
        pickle.dump(gamTair, f)
    gamPrecip = GAM(s(0, by=1, n_splines=200,
                      basis='cp')).fit(train_x[['DOY', 'year']],
                                       train_x['Precip'])
    with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamPrecip',
              'wb') as f:
        pickle.dump(gamPrecip, f)
    gamVPD = GAM(s(0, by=1, n_splines=200,
                   basis='cp')).fit(train_x[['DOY', 'year']], train_x['VPD'])
    with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamVPD',
              'wb') as f:
        pickle.dump(gamVPD, f)
    gamPAR = GAM(s(0, by=1, n_splines=200,
                   basis='cp')).fit(train_x[['DOY', 'year']], train_x['PAR'])
    with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamPAR',
              'wb') as f:
        pickle.dump(gamPAR, f)
    gamfapar = GAM(s(0, by=1, n_splines=200,
                     basis='cp')).fit(train_x[['DOY', 'year']],
                                      train_x['fapar'])
    with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamfapar',
              'wb') as f:
        pickle.dump(gamfapar, f)

    p = parameter_samples(n_samples=n)
    #np.savetext('parameter_simulations.csv', p, delimiter=';')
    pt = torch.tensor(p, dtype=torch.float64)

    d = []

    for i in range(n):
        c = climate_simulations(train_x)
        #np.savetext('climate_simulations.csv', c.to_numpy(), delimiter=';')
        ct = torch.tensor(c.to_numpy(), dtype=torch.float64)

        out = models.physical_forward(parameters=pt[i, :], input_data=ct)
        out = out.detach().numpy()
        #np.savetext('gpp_simulations.csv')

        c['GPP'] = out
        d.append(c)

    d = pd.concat(d)
    d.to_csv(''.join((data_dir, 'DA_preles_sims.csv')), index=False)
     plot.do_lineplot(N_Y2[:,q-1], N_Y1[:,q-1], 'species2_Y_abun'+ str(q))
     plot.do_lineplot(N_J2[:,q-1], N_J1[:,q-1], 'species2_J_abun'+ str(q))
 for q in range(1,no_patches+1):
     plot.do_lineplot(NN1[:,q-1], NNN[:,q-1],  'onestage'+ str(q))
 XGam=temp1[:,0]
 NNGam=NN[:,0]
 NNNGam=NNN[:,0]
 Nsimnew=np.ndarray(shape=(rows, cols), dtype=float, order='F')
 XXGam=temperatures[:,0]
 for q in range(1,no_patches):
     XGam=np.hstack(( XGam, temp1[:,q]))
     NNGam=np.hstack((NNGam, NN[:,q]))
     NNNGam=np.hstack((NNNGam, NNN[:,q]))
     #Z=temp1[:,q-1]
     XXGam=np.hstack((XXGam, temperatures[:,q]))
 gam = GAM().fit(XGam, NNGam)
 ZZ=gam.predict(XXGam)
 for q in range(1,no_patches+1):
     yy=NNN[:,q-1]
     #print(NNN[:,q-1])
     #XX=temperatures[:,q-1]
     Nsimnew[:,q-1]=ZZ[(q-1)*70:((q-1)*70)+70]
     #print(NNN[:,q-1])
     NNNsim=Nsimnew[:,q-1]
     ZZsim=pd.Series(NNNsim,index=pd.Series(range(0,70)))
     NO=pd.Series(yy,index=pd.Series(range(0,70)))
     fig, ax=plt.subplots()
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     ax.yaxis.set_ticks_position('left')
     ax.xaxis.set_ticks_position('bottom')