def test_theil_sen_1d_no_intercept(): X, y, w, c = gen_toy_problem_1d(intercept=False) # Check that Least Squares fails lstq = LinearRegression(fit_intercept=False).fit(X, y) assert np.abs(lstq.coef_ - w - c) > 0.5 # Check that Theil-Sen works theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w + c, 1) assert_almost_equal(theil_sen.intercept_, 0.0) # non-regression test for #18104 theil_sen.score(X, y)
def getscore_getnext(df, days_ahead, coin): forecast_val = days_ahead forecast_col = 'close' df.fillna(value=-99999, inplace=True) df['label'] = df[forecast_col].shift(-forecast_val) #X = X[:-forecast_val] X = np.array(df.drop(['label', 'date'], 1)) X = preprocessing.scale(X) futureX = X[-1:] X = X[:-forecast_val] df.dropna(inplace=True) y = np.array(df['label']) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.15) ''' inPickle = open('%s.pickle' %(coin), 'rb') clf = pickle.load(inPickle) ''' clf = TheilSenRegressor() clf.fit(X_train, y_train) confidence = clf.score(X_test, y_test) #print "accuracy with 1.0 being perfect:", (confidence) futureval = clf.predict(futureX) return (confidence, futureval)
def train_and_return_model_replicas(self, host, port, username, password, appType, appNames, folderNames): df = self.getAndCombineAllDbs(host, port, username, password, appNames, folderNames) df['total_cpu_util'] = df['pod_util_cpu_avg'] * df['num_pods'] df['total_mem_util'] = df['pod_util_mem_avg'] * df['num_pods'] df_X = df[['requests']].values df_Y = df[['total_cpu_util']].values X_train, X_test, y_train, y_test = train_test_split(df_X, df_Y, test_size=0.33, random_state=42) X, y = make_regression(n_samples=df_X.shape[0], n_features=1, noise=4.0, random_state=0) regr = TheilSenRegressor(random_state=0).fit(X_train, y_train) regr.score(X, y) y_pred = regr.predict(X_test) rms = sqrt(mean_squared_error(y_test, y_pred)) print('RMs score: %.2f' % rms) return regr, rms
def _cfunc_theilsen(x, y): """ Get Theil-Sen regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) Theil-Sen score """ from sklearn.linear_model import TheilSenRegressor r = TheilSenRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
# Theil sen model from sklearn.linear_model import TheilSenRegressor # Theil Sen Regressor Model # Instantiate ts_reg = TheilSenRegressor(random_state = 508) # Fit ts_reg.fit(X_train, y_train) # Predict y_pred = ts_reg.predict(X_test) # Score y_score_ts = ts_reg.score(X_test, y_test) print(y_score_ts) ############# # Regression tree from sklearn.tree import DecisionTreeRegressor # Regression trees # Instantiate tree_reg = DecisionTreeRegressor(criterion = 'mse', min_samples_leaf = 14, random_state = 508) # Fit tree_reg.fit(X_train, y_train)
print('R² linear reg.: %.2f' % lin.score(X, Y)) #R² print('MSE linear reg.: %.2f' % mean_squared_error(y_test, y_predlin)) # MSE print('R²: %.2f' % ridge.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_ridge)) # MSE print(lasso.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_lasso)) # MSE print('R²: %.2f' % elast.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predelast)) print('R²: %.2f' % ransac.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predransac)) print('R²: %.2f' % ts.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predts)) print('R²: %.2f' % huber.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predhuber)) """# Classification models""" # 5.2.1 Logistic Regression log = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial') pred_log = log.fit(X_train, y_train).predict( X_test ) #train the algorithm on training data and predict using the testing data y_pred_log = log.predict(X_test) print(log.predict(X_test)) #predcit results based on the trained model
#!/usr/bin/env python import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import TheilSenRegressor data = pd.read_csv("dataset.csv",header=0) X = data.loc[:,["Commune","Etage","Superficie","Piece"]].values Y = data.loc[:,"Prix"].values X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2) regressor = TheilSenRegressor(random_state=0) regressor.fit(X_train,Y_train) score = regressor.score(X_test,Y_test) print(score)
def quantify_beat(self, beatnumber): beatindex = self.ibeats[beatnumber] # approx expected ibi meanibi = np.mean(np.diff(self.tbeats)) # downslope is less than half of full beat. look for peaks on either side downslopewindow = int((meanibi / 2.5) * self.fps) # pick preceding maximum try: maxindex = np.where( heartbeat_localmax(self.x[(beatindex - downslopewindow):beatindex]))[0][-1] except: maxindex = np.argmax(self.x[(beatindex - downslopewindow):beatindex]) peaki = beatindex - downslopewindow + maxindex # double check we didn't go beyond prev. beat if beatnumber > 0 and peaki <= self.ibeats[beatnumber - 1]: peaki = self.ibeats[beatnumber - 1] + downslopewindow + np.argmax( self.x[(self.ibeats[beatnumber - 1] + downslopewindow):beatindex]) # pick succeeding minimum troughi = beatindex + np.argmin( self.x[beatindex:(beatindex + downslopewindow)]) # double check we didn't go beyond next beat if beatnumber < len( self.ibeats) - 1 and troughi >= self.ibeats[beatnumber + 1]: troughi = beatindex + np.argmin( self.x[beatindex:(self.ibeats[beatnumber + 1] - 1)]) # robust regression on downslope downslopemodel = TheilSenRegressor().fit( self.t[peaki:troughi].reshape(-1, 1), self.x[peaki:troughi]) r2 = downslopemodel.score(self.t[peaki:troughi].reshape(-1, 1), self.x[peaki:troughi]) # count which points are close enough to prediction predicted_downslope = downslopemodel.predict( self.t[peaki:troughi].reshape(-1, 1)) amplitude = self.x[peaki] - self.x[troughi] m, k = downslopemodel.coef_[0], downslopemodel.intercept_ point_to_line_distances = np.abs(k + m * self.t[peaki:troughi] - self.x[peaki:troughi]) / np.sqrt( 1 + m * m) point_to_line_distance_percentages = 100.0 / amplitude * point_to_line_distances ok_points = np.where(point_to_line_distance_percentages < BeatQuality.ACCEPTED_DEVIATION_PERCENTAGE)[0] fraction_acceptable = 1.0 / (troughi - peaki) * len(ok_points) # numerically characterize non-crap portion of the slope ok_slope_length = fraction_acceptable * np.sqrt( (troughi - peaki)**2 + (self.x[peaki] - self.x[troughi])**2) ok_slope_angle = np.arctan(downslopemodel.coef_[0]) # numerically characterize beat placement beat_downslope_orthogonal_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * ( np.abs(k + m * self.t[beatindex] - self.x[beatindex]) / np.sqrt(1 + m * m)) beat_downslope_peak_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * np.sqrt( (beatindex - peaki)**2 + (self.x[peaki] - self.x[beatindex])**2) # check if certain to be bad fit iscrap = False if np.abs( r2 ) < BeatQuality.MINIMUM_R2 or fraction_acceptable < BeatQuality.MINIMUM_LINEARITY: print "crap! ", beatnumber, r2, fraction_acceptable iscrap = True return ok_slope_length, ok_slope_angle, beat_downslope_orthogonal_distance, beat_downslope_peak_distance, iscrap
X = vec.fit_transform(x_train).toarray() Y = np.asarray(train.CLOSE) Y = Y.astype('int') #Pre-Processing Test data X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']] x_test = X_test.to_dict(orient='records') vec = DictVectorizer() x = vec.fit_transform(x_test).toarray() y = np.asarray(test.CLOSE) y = y.astype('int') #Classifier clf = TheilSenRegressor() clf.fit(X, Y) print("Accuracy of this Statistical Arbitrage model is: ", clf.score(x, y)) predict = clf.predict(x) test['predict'] = predict #Ploting train.index = train.Date test.index = test.Date train['CLOSE'].plot() test['CLOSE'].plot() test['predict'].plot() plt.legend(loc='best') plt.xlabel('Date') plt.ylabel('Price') plt.show()
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import classification_report, confusion_matrix #loading the dataset train = pd.read_csv("C:/Users/HP/Desktop/train (1).csv") test = pd.read_csv("C:/Users/HP/Desktop/test (2).csv") train = train.dropna() test = test.dropna() train.head() X_train = np.array(train.iloc[:, :-1].values) y_train = np.array(train.iloc[:, 1].values) X_test = np.array(test.iloc[:, :-1].values) y_test = np.array(test.iloc[:, 1].values) #TheilSen Regressor from sklearn.linear_model import TheilSenRegressor model = TheilSenRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = model.score(X_test, y_test) plt.plot(X_train, model.predict(X_train), color='b') plt.show() print(accuracy) print(accuracy)
vec = DictVectorizer() X = vec.fit_transform(x_train).toarray() Y = np.asarray(train.CLOSE) Y = Y.astype('int') #Pre-Processing Test data X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']] x_test = X_test.to_dict(orient='records') vec = DictVectorizer() x = vec.fit_transform(x_test).toarray() y = np.asarray(test.CLOSE) y = y.astype('int') #Classifier clf = TheilSenRegressor() clf.fit(X, Y) print("Statistical Arbitrage model's accuracy is: ", clf.score(x, y)) predict = clf.predict(x) test['predict'] = predict #Ploting train.index = train.Date test.index = test.Date train['CLOSE'].plot() test['CLOSE'].plot() test['predict'].plot() plt.legend(loc='best') plt.xlabel('Date') plt.ylabel('Price') plt.show()
from sklearn.model_selection import train_test_split # x_train, x_test, y_train, y_test = train_test_split(xTrain, yTrain, test_size=0.2) kf = KFold(n_splits=5) linScores = [] tsScores = [] hrScores = [] # bardScores = [] brScores = [] # enScores = [] ridgeScores = [] for trainingSplits, testingSplits in kf.split(xTrain): x_train, x_test, y_train, y_test = xTrain.loc[trainingSplits], xTrain.loc[testingSplits], yTrain.loc[trainingSplits], yTrain.loc[testingSplits] linModel.fit(x_train, y_train) linScores.append(linModel.score(x_test, y_test)) tsModel.fit(x_train, y_train) tsScores.append(tsModel.score(x_test, y_test)) hrModel.fit(x_train, y_train) hrScores.append(hrModel.score(x_test, y_test)) # bardModel.fit(x_train, y_train) # bardScores.append(bardModel.score(x_test, y_test)) brModel.fit(x_train, y_train) brScores.append(brModel.score(x_test, y_test)) # enModel.fit(x_train, y_train) # enScores.append(enModel.score(x_test, y_test)) ridgeModel.fit(x_train, y_train) ridgeScores.append(ridgeModel.score(x_test, y_test)) print(linScores, np.mean(linScores)) print(tsScores, np.mean(tsScores)) print(hrScores, np.mean(hrScores)) # print(bardScores, np.mean(bardScores)) print(brScores, np.mean(brScores))
is_model_valid=None, max_trials=100, stop_n_inliers=inf, stop_score=inf, stop_probability=0.99, residual_metric=None, loss='absolute_loss', random_state=None) rg_2 = TheilSenRegressor(fit_intercept=True, copy_X=True, max_subpopulation=10000.0, n_subsamples=None, max_iter=300, tol=0.001, random_state=None, n_jobs=1, verbose=False) rg_3 = HuberRegressor(epsilon=1.35, max_iter=100, alpha=0.0001, warm_start=False, fit_intercept=True, tol=1e-05) rg_1.fit(X_train, Y_train) rg_2.fit(X_train, Y_train) rg_3.fit(X_train, Y_train) rg_1.score(X_test, Y_test) rg_2.score(X_test, Y_test) rg_3.score(X_test, Y_test)
def fit_TheilSen(features_train, labels_train, features_pred): model = TheilSenRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred
model1 = LinearRegression() model1.fit(X_train, y_train) em_time = time.time() print("Multivariate Linear Regression: ") print("Train Accuracy: ", model1.score(X_train, y_train)) print("Test Accuracy: ", model1.score(X_test, y_test)) print("Execution Time: ", em_time - sm_time) # Theil-Sen Regression st_time = time.time() model2 = TheilSenRegressor() model2.fit(X_train, y_train) et_time = time.time() print("Theil-Sen Regression: ") print("Train Accuracy: ", model2.score(X_train, y_train)) print("Test Accuracy: ", model2.score(X_test, y_test)) print("Execution Time: ", et_time - st_time) # Huber Regression sh_time = time.time() model3 = HuberRegressor() model3.fit(X_train, y_train) eh_time = time.time() print("Huber Regression: ") print("Train Accuracy: ", model3.score(X_train, y_train)) print("Test Accuracy: ", model3.score(X_test, y_test)) print("Execution Time: ", eh_time - sh_time) # Diagram and plots