def LinearReg(file1, file2): feature1, lable1 = file2matrix(file1) regr = LinearRegression() regr.fit(feature1, lable1) feature2, label2 = file2matrix(file2) y_true = label2 y_score = regr.decision_function(feature2) y_pred = regr.predict(feature2) return y_true, y_score, y_pred
class LinearRegression(): def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=1): self.LR = LR(fit_intercept, normalize, copy_X, n_jobs) def decision_function(self, x): return self.LR.decision_function(x) def fit(self, x, y): return self.LR.fit(x, y) def get_params(self): return self.LR.get_params() def predict(self, x): return self.LR.predict(x) def set_params(self, **params): self.LR.set_params(params)
import numpy as np def prediction_error(predict,test): return np.array([abs(diff) for diff in predict-test]) import load_data loader = load_data.bikeshare_loader() loader.preprocess() #train set (X,y_c)= loader.training_data(range(9),9) (X,y_r)= loader.training_data(range(9),10) #test set (test_X,test_y_c)= loader.test_data(range(9),9) (test_X,test_y_c)= loader.test_data(range(9),10) from sklearn.linear_model import LinearRegression linreg=LinearRegression(fit_intercept=True, normalize=True) linreg.fit(X,y_c) print "Linear coefficients:" print linreg.decision_function(X) predict_y_c=linreg.predict(test_X) error_y_c=prediction_error(predict_y_c,test_y_c) print "Max Value: {}, Average error: {}".format (test_y_c.max(),error_y_c.mean()) import matplotlib.pyplot as plt plt.figure() plt.title("Linear Regression") plt.plot(test_X[:,0],predict_y_c,'b') plt.plot(test_X[:,0],test_y_c,'g') plt.plot(test_X[:,0],error_y_c,'r')
# 3.7 Grafique la curva ROC () def plot_roc_curve(fpr, tpr, label=None): import matplotlib.pyplot as plt plt.plot(fpr, tpr, linewidth=2, label=label) plt.plot([0, 1], [0, 1], 'k--') plt.axis([0, 1, 0, 1]) plt.xlabel('FPR') plt.ylabel('TPR') plt.title("ROC Curve") plt.show() y_score = model.decision_function(X) (fpr, tpr, thresholds) = metrics.roc_curve(y, y_score) plot_roc_curve(fpr, tpr) # 3.8 Calcule la Probabilidad de cada clasificación y compare la regla de clasificación, # la clase predicha y la clase real p = model.predict_proba(X) # versus y_pred versus y #3.9 Realice la clasificación multiclase (8 clases) utilizando el citerio OnevsRest # Calcule la probabilidad predicha, su consistencia con la clasificación efectiva y la clase real data_ret['target'] = np.empty((len(data_ret), 1)) for i in range(0, len(data_ret)): if (data_ret.iloc[i, 0] < -0.01):
class Model: def __init__(self, stock, params={'lag': 5}, param_ranges={'lag': range(2, 20, 2)}, debug=False): """Initializes model""" self.mod = LinearRegression() self.name = 'LINREG' self.params = params self.param_ranges = param_ranges self.debug = debug self.investments = {} self.performance = {} self.stock = stock self.yields = {} self.predictedYs = [] self.actualYs = [] self.pYields = [] self.cashStock = {} self.classification = False def __str__(self): return "Linear Regression Model" def addPerformance(self, alpha, performance): self.performance[alpha] = performance def addCashStock(self, alpha, cashStock): self.cashStock[alpha] = cashStock def addInvestments(self, alpha, investments): self.investments[alpha] = investments def addYield(self, alpha, pyield): self.yields[alpha] = pyield def fit(self, X, y): self.mod.fit(X, y) def score(self, X, y): return self.mod.score(X, y) def initMod(self, data, params): self.params = params self.lag_n = params['lag'] self.lag = TimeLag(self.lag_n) self.laggedData = self.lag.transform(data) def validate(self, day, n_splits=2, kfold=True): kf = KFold(n_splits=2) dayBefore = day - datetime.timedelta(days=1) combinations = self.generateCombinations(self.param_ranges) bestParams = [] bestScore = -100 X = self.stock.data['Close'][:day] cat = 'Classification' if self.classification else 'Close' if self.debug: print("input for model " + str(X.tail())) if not kfold: self.initMod(X, self.params) y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:] self.fit(self.laggedData[:dayBefore], y) return for combo in combinations: total = 0 self.initMod(X, combo) y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:] for train_index, test_index in kf.split( self.laggedData[:dayBefore]): X_train, X_test = self.laggedData.iloc[ train_index], self.laggedData.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] self.fit(X_train, y_train) total += self.score(X_test, y_test) if self.debug: print("total score: " + str(total) + " for params: " + str(combo) + " avg score: " + str(total / n_splits)) if total / n_splits > bestScore: bestScore = total / n_splits bestParams = combo if self.debug: print("model validated, chosing params: " + str(bestParams)) self.initMod(X, bestParams) y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:] self.fit(self.laggedData[:dayBefore], y) def numValidations(self, freq): if freq == 0: return [0] else: return range(0, self.stock.n_days_test, freq) def generateCombinations(self, params): options = [] keys = [] for key, value in params.items(): options.append(value) keys.append(key) combos = [x for x in itertools.product(*options)] comboDicts = [] for combo in combos: temp = {} for i in range(len(keys)): temp[keys[i]] = combo[i] comboDicts.append(temp) return comboDicts def getYields(self, validationFreq=0): pYields = [] validationDays = self.numValidations(validationFreq) predictedYs = [] actualYs = [] for i in range(len(self.stock.testData)): day = self.stock.testData.index[i] self.validate(day, kfold=(i in validationDays)) if self.debug: print("training model for day " + str(day)) print("Lagged data for day " + str(day) + " : " + str(self.laggedData[day:day])) predictY = self.mod.predict(self.laggedData[day:day]) oldY = self.stock.testData.iloc[i]['Open'] actualY = self.stock.testData.iloc[i]['Close'] pYield = (predictY - oldY) / oldY if self.classification: conf = self.mod.decision_function(self.laggedData[day:day]) pYield = conf predictY = [actualY + 5] if predictY == 1 else [actualY - 5] pYields.append(pYield[0]) # if self.name=='LASSO' or self.name=='RIDGE' or self.name=='RIDGECLASS' or self.name=='MLP': predictedYs.append(predictY[0]) # else: # print (self.name) # predictedYs.append(predictY[0][0]) actualYs.append(actualY) self.predictedYs = predictedYs self.actualYs = actualYs self.pYields = pYields self.meanError = sum( map(lambda x, y: abs(x - y), predictedYs, actualYs)) / len(predictedYs) return pYields