Exemplo n.º 1
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert np.abs(lstq.coef_ - w - c) > 0.5
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.0)

    # non-regression test for #18104
    theil_sen.score(X, y)
Exemplo n.º 2
0
def getscore_getnext(df, days_ahead, coin):

    forecast_val = days_ahead

    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    df['label'] = df[forecast_col].shift(-forecast_val)

    #X = X[:-forecast_val]

    X = np.array(df.drop(['label', 'date'], 1))

    X = preprocessing.scale(X)

    futureX = X[-1:]
    X = X[:-forecast_val]
    df.dropna(inplace=True)

    y = np.array(df['label'])

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.15)
    '''
    inPickle = open('%s.pickle' %(coin), 'rb')
    clf = pickle.load(inPickle)
    '''
    clf = TheilSenRegressor()

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    #print "accuracy with 1.0 being perfect:", (confidence)
    futureval = clf.predict(futureX)
    return (confidence, futureval)
Exemplo n.º 3
0
    def train_and_return_model_replicas(self, host, port, username, password,
                                        appType, appNames, folderNames):
        df = self.getAndCombineAllDbs(host, port, username, password, appNames,
                                      folderNames)
        df['total_cpu_util'] = df['pod_util_cpu_avg'] * df['num_pods']
        df['total_mem_util'] = df['pod_util_mem_avg'] * df['num_pods']
        df_X = df[['requests']].values
        df_Y = df[['total_cpu_util']].values
        X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                            df_Y,
                                                            test_size=0.33,
                                                            random_state=42)
        X, y = make_regression(n_samples=df_X.shape[0],
                               n_features=1,
                               noise=4.0,
                               random_state=0)
        regr = TheilSenRegressor(random_state=0).fit(X_train, y_train)
        regr.score(X, y)
        y_pred = regr.predict(X_test)

        rms = sqrt(mean_squared_error(y_test, y_pred))
        print('RMs score: %.2f' % rms)
        return regr, rms
Exemplo n.º 4
0
    def _cfunc_theilsen(x, y):
        """
        Get Theil-Sen regression score for data set.

        Args:
            x: (list<float>) independent property (x-axis)
            y: (list<float>) dependent property (y-axis)

        Returns: (float) Theil-Sen score

        """
        from sklearn.linear_model import TheilSenRegressor
        r = TheilSenRegressor(random_state=21)
        x_coeff = np.array(x)[:, np.newaxis]
        r.fit(x_coeff, y)
        return r.score(x_coeff, y)
# Theil sen model


from sklearn.linear_model import TheilSenRegressor # Theil Sen Regressor Model

# Instantiate
ts_reg = TheilSenRegressor(random_state = 508)

# Fit
ts_reg.fit(X_train, y_train)

# Predict
y_pred = ts_reg.predict(X_test)

# Score
y_score_ts = ts_reg.score(X_test, y_test)

print(y_score_ts)

#############
# Regression tree

from sklearn.tree import DecisionTreeRegressor # Regression trees

# Instantiate
tree_reg = DecisionTreeRegressor(criterion = 'mse',
                                 min_samples_leaf = 14,
                                 random_state = 508)

# Fit
tree_reg.fit(X_train, y_train)
print('R² linear reg.: %.2f' % lin.score(X, Y))  #R²
print('MSE linear reg.: %.2f' % mean_squared_error(y_test, y_predlin))  # MSE

print('R²: %.2f' % ridge.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_ridge))  # MSE

print(lasso.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_lasso))  # MSE

print('R²: %.2f' % elast.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predelast))

print('R²: %.2f' % ransac.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predransac))

print('R²: %.2f' % ts.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predts))

print('R²: %.2f' % huber.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predhuber))
"""# Classification models"""

# 5.2.1 Logistic Regression
log = LogisticRegression(random_state=0,
                         solver='lbfgs',
                         multi_class='multinomial')
pred_log = log.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_pred_log = log.predict(X_test)
print(log.predict(X_test))  #predcit results based on the trained model
#!/usr/bin/env python 

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import TheilSenRegressor

data = pd.read_csv("dataset.csv",header=0)

X = data.loc[:,["Commune","Etage","Superficie","Piece"]].values
Y = data.loc[:,"Prix"].values

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)

regressor = TheilSenRegressor(random_state=0)
regressor.fit(X_train,Y_train)
score = regressor.score(X_test,Y_test)
print(score)
Exemplo n.º 8
0
    def quantify_beat(self, beatnumber):
        beatindex = self.ibeats[beatnumber]
        # approx expected ibi
        meanibi = np.mean(np.diff(self.tbeats))
        # downslope is less than half of full beat. look for peaks on either side
        downslopewindow = int((meanibi / 2.5) * self.fps)
        # pick preceding maximum
        try:
            maxindex = np.where(
                heartbeat_localmax(self.x[(beatindex -
                                           downslopewindow):beatindex]))[0][-1]
        except:
            maxindex = np.argmax(self.x[(beatindex -
                                         downslopewindow):beatindex])
        peaki = beatindex - downslopewindow + maxindex
        # double check we didn't go beyond prev. beat
        if beatnumber > 0 and peaki <= self.ibeats[beatnumber - 1]:
            peaki = self.ibeats[beatnumber - 1] + downslopewindow + np.argmax(
                self.x[(self.ibeats[beatnumber - 1] +
                        downslopewindow):beatindex])
        # pick succeeding minimum
        troughi = beatindex + np.argmin(
            self.x[beatindex:(beatindex + downslopewindow)])
        # double check we didn't go beyond next beat
        if beatnumber < len(
                self.ibeats) - 1 and troughi >= self.ibeats[beatnumber + 1]:
            troughi = beatindex + np.argmin(
                self.x[beatindex:(self.ibeats[beatnumber + 1] - 1)])
        # robust regression on downslope
        downslopemodel = TheilSenRegressor().fit(
            self.t[peaki:troughi].reshape(-1, 1), self.x[peaki:troughi])
        r2 = downslopemodel.score(self.t[peaki:troughi].reshape(-1, 1),
                                  self.x[peaki:troughi])
        # count which points are close enough to prediction
        predicted_downslope = downslopemodel.predict(
            self.t[peaki:troughi].reshape(-1, 1))
        amplitude = self.x[peaki] - self.x[troughi]
        m, k = downslopemodel.coef_[0], downslopemodel.intercept_
        point_to_line_distances = np.abs(k + m * self.t[peaki:troughi] -
                                         self.x[peaki:troughi]) / np.sqrt(
                                             1 + m * m)
        point_to_line_distance_percentages = 100.0 / amplitude * point_to_line_distances
        ok_points = np.where(point_to_line_distance_percentages <
                             BeatQuality.ACCEPTED_DEVIATION_PERCENTAGE)[0]
        fraction_acceptable = 1.0 / (troughi - peaki) * len(ok_points)
        # numerically characterize non-crap portion of the slope
        ok_slope_length = fraction_acceptable * np.sqrt(
            (troughi - peaki)**2 + (self.x[peaki] - self.x[troughi])**2)
        ok_slope_angle = np.arctan(downslopemodel.coef_[0])
        # numerically characterize beat placement
        beat_downslope_orthogonal_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * (
            np.abs(k + m * self.t[beatindex] - self.x[beatindex]) /
            np.sqrt(1 + m * m))
        beat_downslope_peak_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * np.sqrt(
            (beatindex - peaki)**2 + (self.x[peaki] - self.x[beatindex])**2)

        # check if certain to be bad fit
        iscrap = False
        if np.abs(
                r2
        ) < BeatQuality.MINIMUM_R2 or fraction_acceptable < BeatQuality.MINIMUM_LINEARITY:
            print "crap! ", beatnumber, r2, fraction_acceptable
            iscrap = True

        return ok_slope_length, ok_slope_angle, beat_downslope_orthogonal_distance, beat_downslope_peak_distance, iscrap
Exemplo n.º 9
0
X = vec.fit_transform(x_train).toarray()
Y = np.asarray(train.CLOSE)
Y = Y.astype('int')

#Pre-Processing Test data
X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']]
x_test = X_test.to_dict(orient='records')
vec = DictVectorizer()
x = vec.fit_transform(x_test).toarray()
y = np.asarray(test.CLOSE)
y = y.astype('int')

#Classifier
clf = TheilSenRegressor()
clf.fit(X, Y)
print("Accuracy of this Statistical Arbitrage model is: ", clf.score(x, y))
predict = clf.predict(x)

test['predict'] = predict

#Ploting
train.index = train.Date
test.index = test.Date
train['CLOSE'].plot()
test['CLOSE'].plot()
test['predict'].plot()
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix

#loading the dataset
train = pd.read_csv("C:/Users/HP/Desktop/train (1).csv")
test = pd.read_csv("C:/Users/HP/Desktop/test (2).csv")
train = train.dropna()
test = test.dropna()
train.head()

X_train = np.array(train.iloc[:, :-1].values)
y_train = np.array(train.iloc[:, 1].values)
X_test = np.array(test.iloc[:, :-1].values)
y_test = np.array(test.iloc[:, 1].values)

#TheilSen Regressor
from sklearn.linear_model import TheilSenRegressor
model = TheilSenRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
plt.plot(X_train, model.predict(X_train), color='b')
plt.show()
print(accuracy)

print(accuracy)
Exemplo n.º 11
0
vec = DictVectorizer()
X = vec.fit_transform(x_train).toarray()
Y = np.asarray(train.CLOSE)
Y = Y.astype('int')

#Pre-Processing Test data
X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']]
x_test = X_test.to_dict(orient='records')
vec = DictVectorizer()
x = vec.fit_transform(x_test).toarray()
y = np.asarray(test.CLOSE)
y = y.astype('int')

#Classifier
clf = TheilSenRegressor()
clf.fit(X, Y)
print("Statistical Arbitrage model's accuracy is: ", clf.score(x, y))
predict = clf.predict(x)

test['predict'] = predict

#Ploting
train.index = train.Date
test.index = test.Date
train['CLOSE'].plot()
test['CLOSE'].plot()
test['predict'].plot()
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
Exemplo n.º 12
0
from sklearn.model_selection import train_test_split
# x_train, x_test, y_train, y_test = train_test_split(xTrain, yTrain, test_size=0.2)
kf = KFold(n_splits=5)
linScores = []
tsScores = []
hrScores = []
# bardScores = []
brScores = []
# enScores = []
ridgeScores = []
for trainingSplits, testingSplits in kf.split(xTrain):
    x_train, x_test, y_train, y_test = xTrain.loc[trainingSplits], xTrain.loc[testingSplits], yTrain.loc[trainingSplits], yTrain.loc[testingSplits]
    linModel.fit(x_train, y_train)
    linScores.append(linModel.score(x_test, y_test))
    tsModel.fit(x_train, y_train)
    tsScores.append(tsModel.score(x_test, y_test))
    hrModel.fit(x_train, y_train)
    hrScores.append(hrModel.score(x_test, y_test))
    # bardModel.fit(x_train, y_train)
    # bardScores.append(bardModel.score(x_test, y_test))
    brModel.fit(x_train, y_train)
    brScores.append(brModel.score(x_test, y_test))
    # enModel.fit(x_train, y_train)
    # enScores.append(enModel.score(x_test, y_test))
    ridgeModel.fit(x_train, y_train)
    ridgeScores.append(ridgeModel.score(x_test, y_test))
print(linScores, np.mean(linScores))
print(tsScores, np.mean(tsScores))
print(hrScores, np.mean(hrScores))
# print(bardScores, np.mean(bardScores))
print(brScores, np.mean(brScores))
Exemplo n.º 13
0
                       is_model_valid=None,
                       max_trials=100,
                       stop_n_inliers=inf,
                       stop_score=inf,
                       stop_probability=0.99,
                       residual_metric=None,
                       loss='absolute_loss',
                       random_state=None)
rg_2 = TheilSenRegressor(fit_intercept=True,
                         copy_X=True,
                         max_subpopulation=10000.0,
                         n_subsamples=None,
                         max_iter=300,
                         tol=0.001,
                         random_state=None,
                         n_jobs=1,
                         verbose=False)
rg_3 = HuberRegressor(epsilon=1.35,
                      max_iter=100,
                      alpha=0.0001,
                      warm_start=False,
                      fit_intercept=True,
                      tol=1e-05)

rg_1.fit(X_train, Y_train)
rg_2.fit(X_train, Y_train)
rg_3.fit(X_train, Y_train)

rg_1.score(X_test, Y_test)
rg_2.score(X_test, Y_test)
rg_3.score(X_test, Y_test)
Exemplo n.º 14
0
def fit_TheilSen(features_train, labels_train, features_pred):
	model = TheilSenRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred
model1 = LinearRegression()
model1.fit(X_train, y_train)
em_time = time.time()
print("Multivariate Linear Regression: ")
print("Train Accuracy: ", model1.score(X_train, y_train))
print("Test Accuracy: ", model1.score(X_test, y_test))
print("Execution Time: ", em_time - sm_time)

# Theil-Sen Regression

st_time = time.time()
model2 = TheilSenRegressor()
model2.fit(X_train, y_train)
et_time = time.time()
print("Theil-Sen Regression: ")
print("Train Accuracy: ", model2.score(X_train, y_train))
print("Test Accuracy: ", model2.score(X_test, y_test))
print("Execution Time: ", et_time - st_time)

# Huber Regression

sh_time = time.time()
model3 = HuberRegressor()
model3.fit(X_train, y_train)
eh_time = time.time()
print("Huber Regression: ")
print("Train Accuracy: ", model3.score(X_train, y_train))
print("Test Accuracy: ", model3.score(X_test, y_test))
print("Execution Time: ", eh_time - sh_time)

# Diagram and plots