class AdaptiveLogisticGAM(BaseEstimator, RegressorMixin): def __init__(self, param_grid=None, gam_params=None): # create GAM if gam_params is None: gam_params = {} self.model = LogisticGAM(**gam_params) # set grid search parameters if param_grid is None: param_grid = GAM_GRID_BASE self.param_grid = param_grid def fit(self, X, y): if isinstance(X, pd.DataFrame): X = X.values # fit using grid-search self.model.gridsearch(X, y, progress=False, **self.param_grid) def predict(self, X): if isinstance(X, pd.DataFrame): X = X.values return self.model.predict(X) def predict_proba(self, X): if isinstance(X, pd.DataFrame): X = X.values return self.model.predict_proba(X)
def GAM2(self): """GAM of splines, where we perform variable selection to find the best model.""" from pygam import LogisticGAM, s, l, f terms = s(0) + s(1) + s(2) + s(3) + s(4) + s(5) + s(6) + s(7) gam = LogisticGAM(terms=terms, fit_intercept=False) mod = gam.gridsearch(self.Xtrain.values, self.ytrain, \ lam=np.logspace(-3, 3, 11)) # Generate the model mod.summary() # Pseudo-R2: 0.6449 ypred = mod.predict(self.Xtest) MSE1 = np.mean((self.ytest - ypred.reshape(-1, 1))**2).values if self.plot: plt.plot(range(len(ypred.reshape(-1,1))),\ ypred.reshape(-1,1)-0.5,"r.", label='GAM model') plt.plot(range(len(self.ytest)), self.ytest, "b.", label='Testing Data') plt.legend() plt.title("GAM model with linear terms. Prediction data is\n"\ + "scaled downwards by 0.5 for visual purposes.") plt.ylabel("FFVC score") plt.xlabel("Sample no.") plt.show()
def simulation(No_T,n,p,box_plot=True): err=[] for i in range (No_T): #generate the test data X_train,Y_train=generate_data(n,p) X_test,Y_test= generate_data(n,p) logit_gam = LogisticGAM() logit_gam.gridsearch(X_train,Y_train) #calculate test error test_err=sum(logit_gam.predict(X_test)!=Y_test)/n err.append(test_err) if box_plot: plt.figure(num=None,figsize=(8,6),dpi=80) plt.boxplot(err) plt.text(1.1,0.15,"Mean:{:.2f}".format(np.mean(err))) plt.text(1.1,0.14,"Var:{:.3f}".format(np.var(err))) plt.title("logisticGAM") plt.ylabel("Test Error") plt.show()
gam1 = gam1.fit(trainX, trainy, weights=w) import numpy as np lams = np.random.rand(10, 33) # random points on [0, 1], with shape (100, 3) n_splines = [5, 10, 15, 20, 25] lams = lams * 6 # shift values to -3, 3 lams = lams - 3 lams = np.exp(lams) cons = [ 'convex', 'concave', 'monotonic_inc', 'monotonic_dec', 'circular', 'none' ] random = LogisticGAM(aa).gridsearch(trainX, trainy, weights=w, lam=lams, n_splines=n_splines) random = random.gridsearch(trainX, trainy, constraints=cons) print(random.lam) print(random.n_splines) print(random.constraints) print(random.accuracy(testX, testy)) from sklearn.metrics import confusion_matrix preds = random.predict(testX) print(confusion_matrix(testy, preds)) for i, term in enumerate(random.terms): if term.isintercept: continue XX = random.generate_X_grid(term=i) pdep, confi = random.partial_dependence(term=i, X=XX, width=0.95) plt.figure() plt.plot(XX[:, term.feature], pdep)
meshgrid=True, width=.95) ax.plot(XX[0], pdep) ax.plot(XX[0], confi[:, 0], c='grey', ls='--') ax.plot(XX[0], confi[:, 1], c='grey', ls='--') ax.set_title(selected_features[i]) plt.show() #----------------------------------------------------- # Grid search with pyGAM #default in pyGAM grid search is lambda space of {'lam':np.logspace(-3,3,11)} gam3 = LogisticGAM() gam3.gridsearch(X, y) gam3.summary() roc_auc_score(y, gam3.predict_proba(X)) #0.9936710533269911 gam3.accuracy(X, y) #0.9560632688927944 #----------------------------------------------------- # Generalizing a GAM import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import log_loss # We can split the data just like we usually would:
class EpidemicModels: # Sequential 6 layer neural network def returnSequential6(self): model = Sequential() model.add(Dense(50, input_dim=20, activation='relu')) model.add(Dense(40, activation='relu')) model.add(Dense(30, activation='relu')) model.add(Dense(20, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def returnSequential9(self): model = Sequential() model.add(Dense(80, input_dim=20, activation='relu')) model.add(Dense(70, activation='relu')) model.add(Dense(60, activation='relu')) model.add(Dense(50, activation='relu')) model.add(Dense(40, activation='relu')) model.add(Dense(30, activation='relu')) model.add(Dense(20, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def RNN(self): model = Sequential() model.add(SimpleRNN(2, input_dim=20)) model.add(Dense(1, activation='linear')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def multi_RNN(self): model = Sequential() model.add(SimpleRNN(2, input_dim=20)) model.add(Dense(40, activation='relu')) model.add(Dense(20, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def baseline(self): # Create model model = Sequential() model.add(Dense(20, input_dim=20, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def lstm(self): model = Sequential() model.add(LSTM(10, input_dim=20)) model.add(Dense(1, activation='linear')) model.compile(loss='mean_absolute_error', optimizer='adam') return model def multi_lstm(self): model = Sequential() model.add(LSTM(4, input_dim=20, return_sequences=True)) model.add(LSTM(4, input_dim=20)) model.add(Dense(1, activation='linear')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model # Sequential 4 layer neural network def returnSequential2(self): model = Sequential() model.add(Dense(14, activation='relu', input_dim=20)) model.add(Dense(units=7, activation='relu')) model.add(Dense(units=1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def __init__(self, m=1): if m == 0: self.model = self.baseline() self.type = 0 elif m == 1: self.model = self.returnSequential2() self.type = 2 elif m == 2: self.model = self.returnSequential6() self.type = 2 elif m == 3: self.model = self.RNN() self.type = 1 elif m == 4: self.model = self.multi_RNN() self.type = 1 elif m == 5: self.model = self.lstm() self.type = 1 elif m == 6: self.model = self.multi_lstm() self.type = 1 elif m == 7: self.model = LogisticGAM() self.type = 3 elif m == 8: self.model = self.returnSequential9() self.type = 2 def returnModel(self): return self.model def train(self, X, y, bs=10, epochs=100): if self.type == 1: X = np.reshape(X, (X.shape[0], 1, X.shape[1])) if self.type == 3: self.model.gridsearch(X, y) else: self.model.fit(X, y, batch_size=bs, epochs=epochs, shuffle=True) def prediction(self, X): if self.type == 1: X = np.reshape(X, (X.shape[0], 1, X.shape[1])) return self.model.predict(X) def cross_eval(self, X, y, bs=10, ep=100, k=5): scores = [] if self.type == 0: kf = KFold(n_splits=k, shuffle=True, random_state=0) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0) a, score = self.model.evaluate(X_test, y_test, verbose=0) scores.append(score) return sum(scores) / len(scores) elif self.type == 1: kf = KFold(n_splits=k, shuffle=False, random_state=0) scores = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1])) X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1])) self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0) score = self.model.evaluate(X_test, y_test, verbose=0) scores.append(score) return sum(scores) / len(scores) elif self.type == 2: kf = KFold(n_splits=k, shuffle=True, random_state=0) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0) a, score = self.model.evaluate(X_test, y_test, verbose=0) print(score) scores.append(score) return sum(scores) / len(scores) elif self.type == 3: kf = KFold(n_splits=k, shuffle=False, random_state=0) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] self.model.gridsearch(X_train, y_train) y_pre = self.model.predict(X_test) print(y_pre) scores.append(f1_score(y_pre, y_test)) return sum(scores) / len(scores)