def gam(x, y): lams = np.random.rand(100, x.shape[1]) lams = np.exp(lams) linear_gam = GAM(n_splines=10, max_iter=1000) cv_results = linear_gam.gridsearch(x, y, return_scores=True, lam=lams, progress=False) cv_results_df = pd.DataFrame(cv_results, index=['score' ]).T.sort_values(by='score', ascending=False) return linear_gam, cv_results_df
def superlearnersetup(var_type, K=5): """Super Learner setup for binary and continuous variables""" if var_type == 'binary': # Binary variable log_b = LogisticRegression(penalty='none', solver='lbfgs', max_iter=1000) rdf_b = RandomForestClassifier( n_estimators=500, min_samples_leaf=20) # max features is sqrt(n_features) gam1_b = LogisticGAM(n_splines=4, lam=0.6) gam2_b = LogisticGAM(n_splines=6, lam=0.6) nn1_b = MLPClassifier(hidden_layer_sizes=(4, ), activation='relu', solver='lbfgs', max_iter=2000) emp_b = EmpiricalMean() lib = [log_b, gam1_b, gam2_b, rdf_b, nn1_b, emp_b] libnames = [ "Logit", "GAM1", "GAM2", "Random Forest", "Neural-Net", "Mean" ] sl = SuperLearner(lib, libnames, loss="nloglik", K=K, print_results=False) elif var_type == 'continuous': # Continuous variable lin_c = LinearRegression() rdf_c = RandomForestRegressor(n_estimators=500, min_samples_leaf=20) gam1_c = GAM(link='identity', n_splines=4, lam=0.6) gam2_c = GAM(link='identity', n_splines=6, lam=0.6) nn1_c = MLPRegressor(hidden_layer_sizes=(4, ), activation='relu', solver='lbfgs', max_iter=2000) emp_c = EmpiricalMean() lib = [lin_c, gam1_c, gam2_c, rdf_c, nn1_c, emp_c] libnames = [ "Linear", "GAM1", "GAM2", "Random Forest", "Neural-Net", "Mean" ] sl = SuperLearner(lib, libnames, K=K, print_results=False) else: raise ValueError("Not Supported") return sl
def __init__(self, algorithm, params=None): ''' Initialize the class with a list of possible algorithms and recommended hyperparameter ranges ''' if algorithm == 'etr': # Extra trees regressor from sklearn.ensemble import ExtraTreesRegressor self.hyper_range = { "max_depth": [4, 8, 12, 16, 20], "min_samples_split": np.arange(2, 11), "min_samples_leaf": np.arange(1, 11), "n_estimators": np.arange(10, 801, 40) } self.algorithm = ExtraTreesRegressor() elif algorithm == 'gbm': # Gradient boosting model from sklearn.ensemble import GradientBoostingRegressor self.hyper_range = { "max_depth": [4, 8, 12, 16, 20], "min_samples_split": np.arange(2, 11), "min_samples_leaf": np.arange(1, 11), "n_estimators": np.arange(10, 801, 40) } self.algorithm = GradientBoostingRegressor() elif algorithm == 'gam': # Generalized additive model from pygam import GAM self.hyper_range = {'n_splines': np.arange(5, 40)} self.algorithm = GAM() # Set scorer as R2 self.my_scorer = make_scorer(r2_score, greater_is_better=True)
def global_explanation_plot(gam, feature_names, number_cols=4): number_lines = len(feature_names) // 2 if (len(feature_names) % 2) != 0: number_lines += 1 fig, axs = plt.subplots(number_lines, number_cols) fig.set_size_inches(20, 2 * number_lines * number_cols) titles = feature_names xx = GAM.generate_X_grid(gam) for j, sub_axs in enumerate(axs): for i, ax in enumerate(sub_axs): if number_cols * j + i >= len(titles): ax.remove() else: pdep, confi = gam.partial_dependence(xx, feature=number_cols * j + i, width=.95) ax.plot(xx[:, 0], pdep, LineWidth=3) ax.plot(xx[:, 0], confi[0][:, 0], c='grey', ls='--', alpha=0.6) ax.plot(xx[:, 0], confi[0][:, 1], c='grey', ls='--', alpha=0.6) ax.set_title(titles[number_cols * j + i], pad=10, fontdict={ 'fontsize': 20, 'fontweight': 'bold' })
def run_GAM(X,y,n_splines=15,distr='binomial',link='logit'): ''' Run a Generalized additive model on the inputs. This function does NOT add a constant, as I think pygam takes care of that.''' # make y a column vector if y.ndim==1: y = y[:,np.newaxis] # init yhat yhat = np.empty_like(y).ravel() yhat[:]=np.nan # get idx of nans so we dont try to predict those idx = np.all(np.isfinite(X), axis=1) # init, fit, and predict othe GAM gam = GAM(distribution=distr, link=link, n_splines=n_splines) gam.gridsearch(X[idx, :], y[idx]) yhat[idx] = gam.predict(X[idx,:]) return yhat,gam
def GamCV(x, y): lams = np.random.rand(10, x.shape[1]) lams = np.exp(lams) linear_gam = GAM(n_splines=10, max_iter=1000) parameters = {'lam': [x for x in lams]} gam_cv = GridSearchCV(linear_gam, parameters, cv=5, iid=False, return_train_score=True, refit=True, scoring='neg_mean_squared_error') gam_cv.fit(x, y) cv_results_df = pd.DataFrame(gam_cv.cv_results_).sort_values( by='mean_test_score', ascending=False) return gam_cv, cv_results_df
def BAM(): gam = GAM(s(0, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147]) + s(1, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147]) + te(0, 1, dtype=['numerical', 'numerical']), distribution= 'normal', link = 'identity', fit_intercept=True) print(gam.gridsearch(X, y, n_splines=np.arange(50)).summary()) plt.scatter(X[:, 0][0:56], y[0:56], s=3, linewidths=0.0001, label='data') plt.plot(X[:, 0][0:56], gam.predict(X[0:56]), color='red', linewidth=1, label='prediction') plt.legend() plt.title('Basic Additive Model') plt.show() # error calculation rmse_val = rmse(np.array(y), np.array(gam.predict(X))) print("RMSE is: " + str(rmse_val)) mae = mean_absolute_error(y, gam.predict(X)) print("MAE is: " + str(mae)) mape = mean_absolute_percentage_error(np.array(y), np.array(gam.predict(X))) print("MAPE is: " + str(mape))
def display_gam(input_df, target_col, ncols=5): print( "===============================================================================================================" ) print('# GAM') print( "===============================================================================================================" ) target_col = [target_col] if isinstance(target_col, str) else target_col key_cols = [ c for c in list(input_df.select_dtypes('number')) if c not in target_col ] _df = input_df[key_cols] _df = _df.fillna(_df.median()) y = input_df[target_col] nfigs = len(_df.columns) nrows = nfigs // ncols + 1 if nfigs % ncols != 0 else nfigs // ncols model = GAM() model.fit(_df, y) fig, axes = plt.subplots(figsize=(ncols * 3, nrows * 2), ncols=ncols, nrows=nrows) axes = np.array(axes).flatten() for i, (ax, title, p_value) in enumerate( zip(axes, _df.columns, model.statistics_['p_values'])): XX = model.generate_X_grid(term=i) ax.plot(XX[:, i], model.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i], model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--') ax.axhline(0, c='#cccccc') ax.set_title("{0:} (p={1:.2})".format(title, p_value)) ax.set_yticks([]) ax.grid() fig.tight_layout() display(fig) plt.close()
# histogram smoothing from pygam import PoissonGAM from pygam.datasets import faithful X, y = faithful(return_X_y=True) gam = PoissonGAM().gridsearch(X, y) plt.hist(faithful(return_X_y=False)['eruptions'], bins=200, color='k') plt.plot(X, gam.predict(X), color='r') plt.title('Best Lambda: {0:.2f}'.format(gam.lam[0][0])) ###################################################### # regression from pygam import GAM from pygam.datasets import trees X, y = trees(return_X_y=True) X.shape X y gam = GAM(distribution='gamma', link='log') gam.gridsearch(X, y) plt.scatter(y, gam.predict(X)) plt.xlabel('true volume') plt.ylabel('predicted volume')
from pygam import GAM import causaldag as cd import numpy as np import os import random np.random.seed(1729) random.seed(1729) d = cd.GaussDAG([0, 1, 2], arcs={(0, 1), (0, 2)}) s = d.sample(100) np.savetxt(os.path.expanduser('~/Desktop/s1.txt'), s) gam = GAM() gam.fit(s[:, 0], s[:, 1]) res1 = gam.deviance_residuals(s[:, 0], s[:, 1]) print(gam.summary()) gam.fit(s[:, 0], s[:, 1]) res2 = gam.deviance_residuals(s[:, 0], s[:, 2]) print(gam.summary()) print(res1) print(res2)
random_state=42) #%% # plotting # plotting fig = plt.figure() ax = plt.axes(projection='3d') nr = 2 ax.scatter3D(X[:, 1][::nr], X[:, 0][::nr], y[::nr], c=y[::2], cmap='Spectral') plt.show() #%% # pyGAM from pygam import LinearGAM, s, te, PoissonGAM, f, GAM gam = GAM( s(0, constraints="monotonic_inc", n_splines=15) + s(1) + #, constraints="concave", n_splines=100) + te(1, 0)) gam.fit(X_train, y_train) titles = ['QDot[l/min*m]', 'TemperaturStart'] fig, axs = plt.subplots(1, len(titles), figsize=(13, 9)) # plot partial dependences for i, ax in enumerate(axs): print("i = ", i) XX = gam.generate_X_grid(term=i) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX, width=.95)[1], c='r') ax.set_title(titles[i])
def gen_simulations(n, data_dir='~/physics_guided_nn/data/'): x, y, xt = utils.loaddata('validation', None, dir="~/physics_guided_nn/data/", raw=True, doy=False) y = y.to_frame() # Hold out a year as test data train_x = x[~x.index.year.isin([2012])] train_y = y[~y.index.year.isin([2012])] print(train_x) train_x['year'] = pd.DatetimeIndex(train_x['date']).year train_x = train_x.drop(['date'], axis=1) gamTair = GAM(s(0, by=1, n_splines=200, basis='cp')).fit(train_x[['DOY', 'year']], train_x['Tair']) with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamTair', 'wb') as f: pickle.dump(gamTair, f) gamPrecip = GAM(s(0, by=1, n_splines=200, basis='cp')).fit(train_x[['DOY', 'year']], train_x['Precip']) with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamPrecip', 'wb') as f: pickle.dump(gamPrecip, f) gamVPD = GAM(s(0, by=1, n_splines=200, basis='cp')).fit(train_x[['DOY', 'year']], train_x['VPD']) with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamVPD', 'wb') as f: pickle.dump(gamVPD, f) gamPAR = GAM(s(0, by=1, n_splines=200, basis='cp')).fit(train_x[['DOY', 'year']], train_x['PAR']) with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamPAR', 'wb') as f: pickle.dump(gamPAR, f) gamfapar = GAM(s(0, by=1, n_splines=200, basis='cp')).fit(train_x[['DOY', 'year']], train_x['fapar']) with open('/home/fr/fr_fr/fr_mw1205/physics_guided_nn/results/gamfapar', 'wb') as f: pickle.dump(gamfapar, f) p = parameter_samples(n_samples=n) #np.savetext('parameter_simulations.csv', p, delimiter=';') pt = torch.tensor(p, dtype=torch.float64) d = [] for i in range(n): c = climate_simulations(train_x) #np.savetext('climate_simulations.csv', c.to_numpy(), delimiter=';') ct = torch.tensor(c.to_numpy(), dtype=torch.float64) out = models.physical_forward(parameters=pt[i, :], input_data=ct) out = out.detach().numpy() #np.savetext('gpp_simulations.csv') c['GPP'] = out d.append(c) d = pd.concat(d) d.to_csv(''.join((data_dir, 'DA_preles_sims.csv')), index=False)
plot.do_lineplot(N_Y2[:,q-1], N_Y1[:,q-1], 'species2_Y_abun'+ str(q)) plot.do_lineplot(N_J2[:,q-1], N_J1[:,q-1], 'species2_J_abun'+ str(q)) for q in range(1,no_patches+1): plot.do_lineplot(NN1[:,q-1], NNN[:,q-1], 'onestage'+ str(q)) XGam=temp1[:,0] NNGam=NN[:,0] NNNGam=NNN[:,0] Nsimnew=np.ndarray(shape=(rows, cols), dtype=float, order='F') XXGam=temperatures[:,0] for q in range(1,no_patches): XGam=np.hstack(( XGam, temp1[:,q])) NNGam=np.hstack((NNGam, NN[:,q])) NNNGam=np.hstack((NNNGam, NNN[:,q])) #Z=temp1[:,q-1] XXGam=np.hstack((XXGam, temperatures[:,q])) gam = GAM().fit(XGam, NNGam) ZZ=gam.predict(XXGam) for q in range(1,no_patches+1): yy=NNN[:,q-1] #print(NNN[:,q-1]) #XX=temperatures[:,q-1] Nsimnew[:,q-1]=ZZ[(q-1)*70:((q-1)*70)+70] #print(NNN[:,q-1]) NNNsim=Nsimnew[:,q-1] ZZsim=pd.Series(NNNsim,index=pd.Series(range(0,70))) NO=pd.Series(yy,index=pd.Series(range(0,70))) fig, ax=plt.subplots() ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom')