def modFirst(self, paramF="train.json"): """returns the default model""" tml = modL.modelList(paramF) clf = tml.regL['bagReg']['mod'] decT = tml.regL['decTree']['mod'] clf.set_params(base_estimator=decT) return clf
def modFirst(self, paramF): """returns the default model""" tml = modL.modelList(paramF) clf = tml.regL[self.modName]['mod'] # if self.modName == "perceptron": # clf.set_params(hidden_layer_sizes=(self.X.shape[2],)) return clf
def regressorSingle(X, y, nXval=6, isShuffle=True, paramF="train.json"): """apply a regressor""" tml = modL.modelList() clf = tml.regL['bagReg']['mod'] decT = tml.regL['decTree']['mod'] clf.set_params(base_estimator=decT) N = len(X) corrL = [] fitL = [] if isShuffle: shuffleL = random.sample(range(N), N) else: shuffleL = list(range(N)) X = np.array(X) X = np.nan_to_num(X) y = np.array(y) for j in range(nXval): #cross validation partS = [int(j / nXval * N), int((j + 1) / nXval * N)] idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)] idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:] fit_q = clf.fit(X[idL, :], y[idL]) y_pred = fit_q.predict(X) corrL.append(sp.stats.pearsonr(y, y_pred)[0]) fitL.append(fit_q) # if np.isnan(corrL)[0]: # return fit_q, [0] if False: # pick a random model nRandom = int(nXval * np.random.uniform()) fit_q = fitL[nRandom] else: # pick the best fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0] return fit_q, corrL
def linLeastSq(X, y): """linear model with least square""" tml = modL.modelList() clf = tml.regL['elastic_cv']['mod'] model = clf.fit(X, y) return model.coef_ # model = sm.OLS(y,X).fit() # return model.params if False: predictions = model.predict(X) X1 = np.c_[X, np.ones(X.shape[0])] # add bias term beta_hat = np.linalg.lstsq(X1, y)[0][:X.shape[1]] return beta_hat beta_hat = np.dot(np.linalg.inv(np.dot(X1.T, X1)), np.dot(X1.T, y)) beta_hat = np.linalg.lstsq(np.vstack([X, np.ones(len(X))]).T, y)[0] def ser_sin(x, t, param): return x * t.sum(axis=0) def ser_fun_min(x, t, y, param): return ser_sin(x, t, param).sum() - y.sum() x0 = X.sum(axis=0) x0 = x0 / x0.mean() x0 = np.linspace(1, 1, X.shape[1]) res = least_squares(ser_fun_min, x0, args=(X, y, x0)) beta_hat = res['x'] return beta_hat
def regressor(X, y, nXval=6, isShuffle=True, paramF="train.json"): from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import BaggingRegressor tml = modL.modelList() clf = tml.regL['bagReg']['mod'] decT = tml.regL['decTree']['mod'] clf.set_params(base_estimator=decT) decT = DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, random_state=None, splitter='best') clf = BaggingRegressor(base_estimator=decT, bootstrap=True, bootstrap_features=False, max_features=1.0, max_samples=1.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) N = len(X) X = np.array(X) X = np.nan_to_num(X) y = np.array(y) corrL = [] fitL = [] if isShuffle: shuffleL = random.sample(range(N), N) else: shuffleL = list(range(N)) if nXval == 1: fit_q = clf.fit(X, y) y_pred = fit_q.predict(X) return fit_q, {} for j in range(nXval): #cross validation partS = [int(j / nXval * N), int((j + 1) / nXval * N)] idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)] idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:] fit_q = clf.fit(X[idL, :], y[idL]) y_pred = fit_q.predict(X) corrL.append(t_s.calcMetrics(y, y_pred)) fitL.append(fit_q) # if np.isnan(corrL)[0]: # return fit_q, [0] if True: # pick a random model nRandom = int(nXval * np.random.uniform()) fit_q = fitL[nRandom] else: # pick the best fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0] return fit_q, pd.DataFrame(corrL)
def modPick(self, clf): """pick a random configuration set from the grid""" tml = modL.modelList() pLasso = tml.gridL[self.modName] paraB = clf.get_params() k = random.choice(list(pLasso)) v = random.choice(pLasso[k]) paraB[k] = v clf.set_params(**paraB) return clf, paraB
def loopMod(self, paramF="train.json", test_size=0.4): """loop over all avaiable models""" N = len(self.y) shuffleL = random.sample(range(N), N) partS = [0, int(N * (1. - test_size)), int(N * (1.)), N] trainL = shuffleL[partS[0]:partS[1]] testL = shuffleL[partS[1]:partS[2]] #self.X_train,self.X_test,self.y_train,self.y_test = sk.model_selection.train_test_split(self.X, self.y,test_size=test_size,random_state=0) trainR = [] model = [] rocC = [] tml = modL.modelList(paramF) tml.set_params() for index in range(tml.nCat()): clf = tml.retCat(index) if not clf['active']: continue # try: mod, trainS, testS, t_diff, x_pr, y_pr, auc, fsc, acc, cv = self.perfCla( clf, trainL, testL) # except: # print('error: returning model') # return clf['mod'], trainR trainR.append([ clf['name'], trainS, testS, t_diff, auc, fsc, acc, clf["type"] ]) model.append(mod) rocC.append([x_pr, y_pr]) #print("{m} trained {c} in {f:.2f} s".format(m=modN,c=index,f=t_diff)) trainR = pd.DataFrame(trainR) trainR.columns = [ "model", "train_score", "test_score", "time", "auc", "fsc", "acc", "type" ] trainR.loc[:, 'perf'] = trainR['acc'] * trainR['auc'] trainR = trainR.sort_values(['perf'], ascending=False) mod = model[trainR.index.values[0]] self.rocC = rocC self.trainR = trainR y_pred = mod.predict(self.X) try: y_class = y_pred.dot(range(y_pred.shape[1])) except IndexError: y_class = y_pred self.y_pred = y_pred return mod, trainR #, self.y, y_class
def modPick(self, clf): """pick a random configuration set from the grid""" tml = modL.modelList() pDecT = tml.gridL['decTree'] pBag = tml.gridL['bagging'] paraB = clf.get_params() del paraB['base_estimator'] decT = clf.get_params()['base_estimator'] paraS = decT.get_params() k = random.choice(list(pDecT)) v = random.choice(pDecT[k]) paraS[k] = v k = random.choice(list(pBag)) v = random.choice(pBag[k]) paraB[k] = v decT.set_params(**paraS) clf.set_params(**paraB) clf.set_params(base_estimator=decT) s = {**paraS, **paraB} return clf, s
def tune(self, paramF="train.json", tuneF="train_tune.json"): """tune all avaiable models""" tml = modL.modelList(paramF) params = tml.get_params() with open(tuneF) as f: pgrid = json.load(f) for idx in range(len(pgrid)): if not pgrid[idx]['active']: continue print("tuning: " + pgrid[idx]['name']) clf = tml.retCat(idx)['mod'] CV_rfc = GridSearchCV(estimator=clf, param_grid=pgrid[idx]['param_grid'], cv=5, return_train_score=False) CV_rfc.fit(self.X, self.y) for k, v in CV_rfc.best_params_.items(): params[idx][k] = v with open(paramF, 'w') as f: f.write(json.dumps(params))
def regressor(X, vf, vg, nXval=False, isShuffle=True, paramF="train.json"): """apply a regressor""" tml = modL.modelList(paramF) clf = tml.regL['bagReg']['mod'] decT = tml.regL['decTree']['mod'] clf.set_params(base_estimator=decT) y = vg / vf if False: y = (vf - vg) / vf r_tayl = (1. - r_taylor) y[y != y] = 1. y[y == float('Inf')] = 1. N = len(X) corrL = [] fitL = [] if isShuffle: shuffleL = random.sample(range(N), N) else: shuffleL = list(range(N)) X = np.array(X) X = np.nan_to_num(X) y = np.array(y) for j in range(nXval): #cross validation partS = [int(j / nXval * N), int((j + 1) / nXval * N)] idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)] idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:] fit_q = clf.fit(X[idL, :], y[idL]) r_quot = fit_q.predict(X) corr = vf * r_quot corrL.append(sp.stats.pearsonr(corr, vg)[0]) fitL.append(fit_q) if np.isnan(corrL)[0]: return fit_q, [0] if False: # pick a random model nRandom = int(nXval * np.random.uniform()) fit_q = fitL[nRandom] else: # pick the best fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0] return fit_q, corrL
tL = [ 'temperature', 'apparentTemperature', 'dewPoint', 'humidity', 'windSpeed', 'windGust', 'windBearing', 'cloudCover', 'uvIndex', 'visibility', 'precipAccumulation', 'pressure', 'ozone' ] tL = [ 'temperature', 'apparentTemperature', 'humidity', 'ozone', 'pressure', 'windSpeed', 'windBearing', 'cloudCover', 'precipAccumulation' ] m = "rain" X = t_s.interpMissing(timeL[tL]) y = s_s.interpMissing(hourL[m]) #y, _ = t_r.binVector(y,nBin=7,threshold=0.5) mod = t_l.modelList(paramF=baseDir + "train/weath_" + m + ".json") mod.get_params() importlib.reload(t_l) importlib.reload(tlib) tMod = tlib.trainMod(X, y) mod, trainR = tMod.loopMod(paramF=baseDir + "train/weath_" + m + ".json", test_size=.4) tMod.plotRoc() if False: print('----------------------feature-importance------------------------') tL = [ 'temperature', 'apparentTemperature', 'humidity', 'ozone', 'pressure', 'windSpeed', 'windBearing', 'cloudCover', 'precipAccumulation' ]