class Simple: def __init__(self, a, b, c, d): self.model = TheilSenRegressor() def update_a_b(self, x, y): self.model.fit(x.reshape(-1, 1), y) def set_c_d(self, c, d): pass def get_y(self, x): return self.model.predict(x.reshape(-1, 1)) def get_likelihood(self, x, y): return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x))) def to_string(self): return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_) def get_a_b(self): return self.model.coef_, self.model.intercept_ @staticmethod def var_to_weight(v): return 1 @staticmethod def get_c_d(x, r): return None, None
def getscore_getnext(df, days_ahead, coin): forecast_val = days_ahead forecast_col = 'close' df.fillna(value=-99999, inplace=True) df['label'] = df[forecast_col].shift(-forecast_val) #X = X[:-forecast_val] X = np.array(df.drop(['label', 'date'], 1)) X = preprocessing.scale(X) futureX = X[-1:] X = X[:-forecast_val] df.dropna(inplace=True) y = np.array(df['label']) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.15) ''' inPickle = open('%s.pickle' %(coin), 'rb') clf = pickle.load(inPickle) ''' clf = TheilSenRegressor() clf.fit(X_train, y_train) confidence = clf.score(X_test, y_test) #print "accuracy with 1.0 being perfect:", (confidence) futureval = clf.predict(futureX) return (confidence, futureval)
def compute_quantal_size(scan): """ Estimate the unit change in calcium response corresponding to a unit change in pixel intensity (dubbed quantal size, lower is better). Assumes images are stationary from one timestep to the next. Uses it to calculate a measure of noise per bright intensity (which increases linearly given that imaging noise is poisson), fits a line to it and uses the slope as the estimate. :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames). :returns: int minimum pixel value in the scan (that appears a min number of times) :returns: int maximum pixel value in the scan (that appears a min number of times) :returns: np.array pixel intensities used for the estimation. :returns: np.array noise variances used for the estimation. :returns: float the estimated quantal size :returns: float the estimated zero value """ # Set some params num_frames = scan.shape[2] min_count = num_frames * 0.1 # pixel values with fewer appearances will be ignored max_acceptable_intensity = 3000 # pixel values higher than this will be ignored # Make sure field is at least 32 bytes (int16 overflows if summed to itself) scan = scan.astype(np.float32, copy=False) # Create pixel values at each position in field eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.) pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps) pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32) # Compute a good range of pixel values (common, not too bright values) unique_pixels, counts = np.unique(pixels, return_counts=True) min_intensity = min(unique_pixels[counts > min_count]) max_intensity = max(unique_pixels[counts > min_count]) max_acceptable_intensity = min(max_intensity, max_acceptable_intensity) pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity) # Select pixels in good range pixels = pixels[pixels_mask] unique_pixels, counts = np.unique(pixels, return_counts=True) # Compute noise variance variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask] pixels -= min_intensity variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value for i in range(0, len(pixels), int(1e8)): # chunk it for memory efficiency variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)], minlength=len(unique_pixels))[unique_pixels - min_intensity] unique_variances = variance_sum / counts # average variance per intensity # Compute quantal size (by fitting a linear regressor to predict the variance from intensity) X = unique_pixels.reshape(-1, 1) y = unique_variances model = TheilSenRegressor() # robust regression model.fit(X, y) quantal_size = model.coef_[0] zero_level = - model.intercept_ / model.coef_[0] return (min_intensity, max_intensity, unique_pixels, unique_variances, quantal_size, zero_level)
def robust_cor(x, y): if isinstance(x[0], list): x = list(map(list, zip(*x))) else: x = np.array(x).reshape(-1, 1) X = np.array(x) Y = np.array(y) theil_regr = TheilSenRegressor(random_state=42) theil_regr.fit(X, Y) y_pred = theil_regr.predict(X) res = y_pred - y tot_dev = y - np.mean(y) SSres = np.dot(res, res) SStot = np.dot(tot_dev, tot_dev) adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] - 1) sgn = np.sign(theil_regr.coef_)[0] if adjR2 > 0: corr_val = sgn * np.sqrt(adjR2) else: corr_val = 0 return [ corr_val, theil_regr.coef_, theil_regr.intercept_, theil_regr.breakdown_ ]
def compute_quantal_size(scan): """ Estimate the unit change in calcium response corresponding to a unit change in pixel intensity (dubbed quantal size, lower is better). Assumes images are stationary from one timestep to the next. Uses it to calculate a measure of noise per bright intensity (which increases linearly given that imaging noise is poisson), fits a line to it and uses the slope as the estimate. :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames). :returns: int minimum pixel value in the scan (that appears a min number of times) :returns: int maximum pixel value in the scan (that appears a min number of times) :returns: np.array pixel intensities used for the estimation. :returns: np.array noise variances used for the estimation. :returns: float the estimated quantal size :returns: float the estimated zero value """ # Set some params num_frames = scan.shape[2] min_count = num_frames * 0.1 # pixel values with fewer appearances will be ignored max_acceptable_intensity = 3000 # pixel values higher than this will be ignored # Make sure field is at least 32 bytes (int16 overflows if summed to itself) scan = scan.astype(np.float32, copy=False) # Create pixel values at each position in field eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.) pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps) pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32) # Compute a good range of pixel values (common, not too bright values) unique_pixels, counts = np.unique(pixels, return_counts=True) min_intensity = min(unique_pixels[counts > min_count]) max_intensity = max(unique_pixels[counts > min_count]) max_acceptable_intensity = min(max_intensity, max_acceptable_intensity) pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity) # Select pixels in good range pixels = pixels[pixels_mask] unique_pixels, counts = np.unique(pixels, return_counts=True) # Compute noise variance variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask] pixels -= min_intensity variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value for i in range(0, len(pixels), int(1e8)): # chunk it for memory efficiency variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)], minlength=len(unique_pixels))[unique_pixels - min_intensity] unique_variances = variance_sum / counts # average variance per intensity # Compute quantal size (by fitting a linear regressor to predict the variance from intensity) X = unique_pixels.reshape(-1, 1) y = unique_variances model = TheilSenRegressor() # robust regression model.fit(X, y) quantal_size = model.coef_[0] zero_level = - model.intercept_ / model.coef_[0] return (min_intensity, max_intensity, unique_pixels, unique_variances, quantal_size, zero_level)
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres, valid_times_unix_sec): """Fits Theil-Sen model for one storm track. P = number of points in track :param x_coords_metres: length-P numpy array of x-coordinates. :param y_coords_metres: length-P numpy array of y-coordinates. :param valid_times_unix_sec: length-P numpy array of times. :return: theil_sen_dict: Dictionary with the following keys. theil_sen_dict['x_intercept_metres']: x-intercept. theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second). theil_sen_dict['y_intercept_metres']: y-intercept. theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second). """ num_points = len(x_coords_metres) valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1)) model_object_for_x = TheilSenRegressor(fit_intercept=True) model_object_for_x.fit(valid_times_unix_sec, x_coords_metres) model_object_for_y = TheilSenRegressor(fit_intercept=True) model_object_for_y.fit(valid_times_unix_sec, y_coords_metres) return { X_INTERCEPT_KEY: model_object_for_x.intercept_, X_VELOCITY_KEY: model_object_for_x.coef_, Y_INTERCEPT_KEY: model_object_for_y.intercept_, Y_VELOCITY_KEY: model_object_for_y.coef_ }
def calculate_scaling_params(events, kmer_mean_levels): events = pd.DataFrame(events) events['pos'] = events['move'].cumsum() jump_positions = events[events['move'] > 1]['pos'] jump_positions = set(jump_positions - 1) | set(jump_positions) nonjump_positions = set(events['pos']) - jump_positions if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS: return statelevels = [] statelevels_jump = [] for pos, posevents in events.groupby('pos'): state = posevents['model_state'].iloc[0] if '_' in state: continue medlevel = posevents['mean'].median() if pos in nonjump_positions: statelevels.append([medlevel, kmer_mean_levels[state]]) else: statelevels_jump.append([medlevel, kmer_mean_levels[state]]) statelevels_jump = np.array(statelevels_jump) statelevels = np.array(statelevels) regr = TheilSenRegressor(random_state=922) regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1]) return regr.coef_[0], regr.intercept_
def estimate_txty(cluster, k=20): xs = [] ys = [] zs = [] tx = [] ty = [] for i, node in cluster.nodes(data=True): xs.append(node['features']['SX']) ys.append(node['features']['SY']) zs.append(node['features']['SZ']) tx.append(node['features']['TX']) ty.append(node['features']['TY']) xs = np.array(xs) ys = np.array(ys) zs = np.array(zs) tx = np.array(tx) ty = np.array(ty) argosorted_z = np.argsort(zs) lr = TheilSenRegressor() lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), xs[argosorted_z][:k]) TX = lr.coef_[0] lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), ys[argosorted_z][:k]) TY = lr.coef_[0] return TX, TY
def test_checksubparams_n_subsamples_if_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0) with pytest.raises(ValueError): theil_sen.fit(X, y)
class Regressor(BaseEstimator): def __init__(self): self.regressorName="linear" if self.regressorName=="rf": self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1) elif self.regressorName=="gb": self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls' ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0 ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True) #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1) #self.clf=gb elif self.regressorName=="ridge": self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False) elif self.regressorName=="linear": self.clf = LinearRegression() elif self.regressorName=="lasso": self.clf = LassoCV(cv=10) elif self.regressorName=="svr": self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01) elif self.regressorName=="knn": self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1) elif self.regressorName=="gauss": self.clf = TheilSenRegressor() def fit(self, X, y): X=csc_matrix(X) print "Training Algorithm" self.clf.fit(X, y) #print self.clf.best_estimator_ def predict(self, X): X=csr_matrix(X) print "Testing Algorithm" return self.clf.predict(X) def getRegressor(self): return self.clf def getRegressorName(self): return self.regressorName def getParamGrid(self): if self.regressorName=="rf": defaultGrid=[None] maxDepthGrid=np.arange(10,70,7) maxFeaturesGrid=["sqrt","log2",None] maxTreesGrid=np.arange(10,100,10) param_grid = {'max_features': defaultGrid} elif self.regressorName == "gb": #maxDepthGrid=np.arange(3,20,5) learningRateGrid=np.arange(50,100,10) #param_grid = {'max_depth': maxDepthGrid} #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']} param_grid={'alpha':[0.9]} return param_grid
class Regressor(BaseEstimator): def __init__(self): self.regressorName="gb" if self.regressorName=="rf": self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1) elif self.regressorName=="gb": self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls' ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0 ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True) #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1) #self.clf=gb elif self.regressorName=="ridge": self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False) elif self.regressorName=="linear": self.clf = LinearRegression(alpha=0.01,max_iter=5000) elif self.regressorName=="lasso": self.clf = LassoCV(cv=10) elif self.regressorName=="svr": self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01) elif self.regressorName=="knn": self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1) elif self.regressorName=="gauss": self.clf = TheilSenRegressor() def fit(self, X, y): #X=csc_matrix(X) self.clf.fit(X, y) #print self.clf.best_estimator_ def predict(self, X): #X=csr_matrix(X) return self.clf.predict(X) def getRegressor(self): return self.clf def getRegressorName(self): return self.regressorName def getParamGrid(self): if self.regressorName=="rf": defaultGrid=[None] maxDepthGrid=np.arange(10,70,7) maxFeaturesGrid=["sqrt","log2",None] maxTreesGrid=np.arange(10,100,10) param_grid = {'max_features': defaultGrid} elif self.regressorName == "gb": #maxDepthGrid=np.arange(3,20,5) learningRateGrid=np.arange(50,100,10) #param_grid = {'max_depth': maxDepthGrid} #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']} param_grid={'alpha':[0.9]} return param_grid
def _fit_robust_line(shifts): """ Use a robust linear regression algorithm to fit a line to the data.""" from sklearn.linear_model import TheilSenRegressor X = np.arange(len(shifts)).reshape(-1, 1) y = shifts model = TheilSenRegressor() # robust regression model.fit(X, y) line = model.predict(X) return line
class _TheilSenRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def theilsen_regress_predict(var): """ Input:- var: 1-D array var regressortype = LinearRegression, TheilSenRegressor Output: regression coefficient """ regressor = TheilSenRegressor() y = np.asarray(var).reshape(-1, 1) X = np.arange(len(y)).reshape(-1, 1) regressor.fit(X, y) return regressor.predict(X)
def theilsen_regress_coeff(var, a): """ Input:- var: 1-D array var a: 1-D array index regressortype = LinearRegression, TheilSenRegressor Output: regression coefficient """ regressor = TheilSenRegressor() y = np.asarray(var).reshape(-1, 1) X = a.reshape(-1, 1) regressor.fit(X, y) return np.array([regressor.coef_])
def _regress_a(X, y, robust, n_jobs): """ Calculates the slope and intercept """ if robust: model = TheilSenRegressor(n_jobs=n_jobs) else: model = LinearRegression(n_jobs=n_jobs) model.fit(X, y) slope_m = model.coef_[0] intercept_b = model.intercept_ return slope_m, intercept_b
def _cfunc_theilsen(x, y): """ Get Theil-Sen regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) Theil-Sen score """ from sklearn.linear_model import TheilSenRegressor r = TheilSenRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
class r07522507_TheilSenRegressor(regression): def trainAlgo(self): self.model = TheilSenRegressor( fit_intercept=self.param['fit_intercept'], copy_X=self.param['copy_X'], max_subpopulation=self.param['max_subpopulation'], n_subsamples=self.param['n_subsamples'], max_iter=self.param['max_iter'], tol=self.param['tol'], random_state=self.param['random_state'], verbose=self.param['verbose'], ) self.model.fit(self.inputData['X'], self.outputData['Y']) def predictAlgo(self): self.result['Y'] = self.model.predict(self.inputData['X'])
def fit(self, X, y, random_state=None): """ Train ENOLS on the given training set. Parameters ---------- X: an input array of shape (n_sample, n_features) y: an array of shape (n_sample,) containing the classes for the input examples Return ------ self: the fitted model """ # use random instead of np.random to sample random numbers below random = check_random_state(random_state) estimators = [('lr', LinearRegression())] if isinstance(self.sample_size, int): self.sample_size = 'reservoir_sampling' # add all the trained OLS models to this list self.estimators_lr, self.estimators_TSR, self.estimators_enols = [], [], [] for i in range(self.n_estimators): samples = sample_without_replacement(n_population=random.choice([50, 100]), n_samples=random.choice([10, 20]), random_state=random_state, method=self.sample_size) X_train, y_train = [], [] for i in samples: X_train.append(X[i]), y_train.append(y[i]) reg = LinearRegression() reg.fit(np.array(X_train), np.array(y_train)) tsr = TheilSenRegressor() tsr.fit(np.array(X_train), np.array(y_train)) enol = StackingRegressor(estimators=estimators, final_estimator=LinearRegression()) enol.fit(np.array(X_train), np.array(y_train)) self.estimators_lr.append(reg), self.estimators_TSR.append(tsr), self.estimators_enols.append(enol) return self
def learn_a_b(x, y, lamb, alpha, a0=-0.5, b0=3.4): (c, d) = lamb (e, f) = alpha if (a0 == 0.0) or (b0 == 0.0): model = TheilSenRegressor() model.fit(x.reshape(-1, 1), y) a0 = model.coef_[0] b0 = model.intercept_ if (d == 0) and (c == 0): r = a0 * x + b0 - y d = np.log(np.min(np.abs(r))) - 1e-8 r = minimize(Pareto2.obj_a_b, [a0, b0], args=(x, y, (c, d), (e, f)), method='Nelder-Mead', options={ 'maxiter': 10000, 'disp': False }) # print r if not r.success: print "Optimization Failed", r return r.x[0], r.x[1]
def test_checksubparams_too_many_subsamples(): X, y, w, c = gen_toy_problem_1d() theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0) with pytest.raises(ValueError): theil_sen.fit(X, y)
def test_checksubparams_negative_subpopulation(): X, y, w, c = gen_toy_problem_1d() theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0) with pytest.raises(ValueError): theil_sen.fit(X, y)
def theilsen_regressor(self): x_train, x_test, y_train, y_test = self.preprocessing() model = TheilSenRegressor() y_pred = model.fit(x_train, y_train).predict(x_test) self.printing(y_test, y_pred, 'Theilsen')
def TSReg(X,Y): model = TheilSenRegressor() trained_model = model.fit(X,Y) return trained_model
''' # 5.1.5.1 RANSAC regression ransac = RANSACRegressor() pred_ransac = ransac.fit(X_train, y_train).predict( X_test ) #train the algorithm on training data and predict using the testing data y_predransac = ransac.predict(X_test) print('Betas: ', list(zip(ransac.coef_, X))) print('Beta0: %.2f' % ransac.intercept_) #Beta0 # 5.1.5.2 Theil-Sen regression ts = TheilSenRegressor() pred_ts = ts.fit(X_train, y_train).predict( X_test ) #train the algorithm on training data and predict using the testing data y_predts = ts.predict(X_test) print('Betas: ', list(zip(ts.coef_, X))) print('Beta0: %.2f' % ts.intercept_) #Beta0 # 5.1.5.3 Huber regression huber = HuberRegressor(alpha=0.0) pred_huber = huber.fit(X_train, y_train).predict( X_test ) #train the algorithm on training data and predict using the testing data y_predhuber = huber.predict(X_test) print('Betas: ', list(zip(huber.coef_, X))) print('Beta0: %.2f' % huber.intercept_) #Beta0 """# Regression Model selection After calculating different regression models it is necessary to compare models and evaluate which is the best given the database.
# Model via linear regression from sklearn.linear_model import TheilSenRegressor reg = TheilSenRegressor() reg.fit(X_train, y_train)
data_train = pd.read_csv('Data/train_copy.csv') train = normalize(preprocess(data_train)) Xdata = train.drop(columns='Survived') ydata = train['Survived'] X_train, X_test, y_train, y_true = train_test_split(Xdata, ydata, test_size=0.1, random_state=42, stratify=ydata) #New classifiers Class1 = RANSACRegressor(random_state=42) Class1.fit(X_train, y_train) Class1_predictions = Class1.predict(X_test) Class1_accuracy = accuracy_score(y_true, Class1_predictions, normalize=True, sample_weight=None) Class2 = TheilSenRegressor(random_state=42) Class2.fit(X_train, y_train) Class2_predictions = Class1.predict(X_test) Class2_accuracy = accuracy_score(y_true, Class2_predictions, normalize=True, sample_weight=None) Class3 = LinearRegression() Class3.fit(X_train, y_train) Class3_predictions = Class3.predict(X_test) Class3_accuracy = accuracy_score(y_true, Class3_predictions, normalize=True, sample_weight=None) Class4 = HuberRegressor(alpha=0.0, epsilon=epsilon) Class4.fit(X_train, y_train) Class4_predictions = Class4.predict(X_test) Class4_accuracy = accuracy_score(y_true, Class4_predictions, normalize=True, sample_weight=None) #Print different accuracies
#!/usr/bin/env python import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import TheilSenRegressor data = pd.read_csv("dataset.csv",header=0) X = data.loc[:,["Commune","Etage","Superficie","Piece"]].values Y = data.loc[:,"Prix"].values X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2) regressor = TheilSenRegressor(random_state=0) regressor.fit(X_train,Y_train) score = regressor.score(X_test,Y_test) print(score)
def fit_TheilSen(features_train, labels_train, features_pred): model = TheilSenRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred
plot_prediction("Linear Regression", Y_pred, test['close']) # Lasso Lars lassolars_reg = LassoLars() lassolars_reg.fit(X_train, Y_train) Y_pred = lassolars_reg.predict(X_test) lassolars_r2 = r2_score(Y_expected, Y_pred) lassolars_mse = mean_squared_error(Y_expected, Y_pred) print("Lasso Lars Regression\n", "R2: ", lassolars_r2, "MSE:", lassolars_mse) plot_prediction("Lasso Lars Regression", Y_pred, test['close']) # Theil Sen Regressor theil_reg = TheilSenRegressor() theil_reg.fit(X_train, Y_train) Y_pred = theil_reg.predict(X_test) theil_r2 = r2_score(Y_expected, Y_pred) theil_mse = mean_squared_error(Y_expected, Y_pred) print("Theil Sen Regression\n", "R2: ", theil_r2, "MSE:", theil_mse) plot_prediction("Theil Sen Regression", Y_pred, test['close']) # Bayesian Ridge bayesian_reg = BayesianRidge() bayesian_reg.fit(X_train, Y_train) Y_pred = bayesian_reg.predict(X_test) bayesian_r2 = r2_score(Y_expected, Y_pred) bayesian_mse = mean_squared_error(Y_expected, Y_pred) print("Bayesian Ridge Regression\n", "R2: ", bayesian_r2, "MSE:", bayesian_mse) plot_prediction("Bayesian Ridge Regression", Y_pred, test['close'])
vec = DictVectorizer() X = vec.fit_transform(x_train).toarray() Y = np.asarray(train.CLOSE) Y = Y.astype('int') #Pre-Processing Test data X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']] x_test = X_test.to_dict(orient='records') vec = DictVectorizer() x = vec.fit_transform(x_test).toarray() y = np.asarray(test.CLOSE) y = y.astype('int') #Classifier clf = TheilSenRegressor() clf.fit(X, Y) print("Accuracy of this Statistical Arbitrage model is: ", clf.score(x, y)) predict = clf.predict(x) test['predict'] = predict #Ploting train.index = train.Date test.index = test.Date train['CLOSE'].plot() test['CLOSE'].plot() test['predict'].plot() plt.legend(loc='best') plt.xlabel('Date') plt.ylabel('Price') plt.show()
# The score is directly comparable to R-Square print(y_score) ######### # Theil sen model from sklearn.linear_model import TheilSenRegressor # Theil Sen Regressor Model # Instantiate ts_reg = TheilSenRegressor(random_state = 508) # Fit ts_reg.fit(X_train, y_train) # Predict y_pred = ts_reg.predict(X_test) # Score y_score_ts = ts_reg.score(X_test, y_test) print(y_score_ts) ############# # Regression tree from sklearn.tree import DecisionTreeRegressor # Regression trees # Instantiate
# Show the RANSAC fit plt.plot(x, line_ransac, color='yellow', label='RANSAC') # plt.show() # Theil-Sen estimator: # General info: https://en.wikipedia.org/wiki/Theil%E2%80%93Sen_estimator # Good ONLY for LINEAR REGRESSION # Sci-kit learn implementation: http://scikit-learn.org/stable/auto_examples/linear_model/plot_theilsen.html # Init the Theil-Sen estimator instance theil = TheilSenRegressor() # Fit with the Theil-Sen estimator theil.fit(x, line_data) # Get the fitted data result line_theil = theil.predict(x) # Plot Theil-Sen results plt.plot(x, line_theil, color='red', label='Theil-Sen') plt.legend(loc='lower right') plt.show() plt.clf() ###################################