示例#1
0
    def setUp(self):

        self.model1 = Model.Model("./data/simple", ".csv")
        self.files = self.model1.set_files_in_directory()
        self.model2 = Model.Model("./data/student", ".csv")
        self.model3 = Model.Model("./datfff", ".csv")
        self.model4 = Model.Model("./data/student", ".txt")
        self.model1out = self.model1.set_dataframes(self.files)
        self.model2out = self.model2.set_dataframes(self.files)
        self.regression1 = Regression.Regression(self.model1out)
        self.regression2 = Regression.Regression(self.model2)
示例#2
0
def main():

    new_model = Model.Model("./data/simple", ".csv")

    files = new_model.set_files_in_directory()

    dic = new_model.set_dataframes(files)

    new_reg = rg.Regression(dic)

    training_data = new_reg.split_data()[0]

    column_names = new_reg.get_columnNames(training_data)

    ind, dep = new_reg.get_data(columns_names=column_names,
                                training_data=training_data)

    lr = new_reg.run(training_data)

    if lr.__class__.__name__ == "UnivariateLR":
        m, b = lr.run()
        print(m, b)
        y_hat = lr.predict(m, b)
        print(lr.evaluate_model(ind, y_hat))
        m, b = lr.get_params_history()
        lr.plot_history_m(m)

    elif lr.__class__.__name__ == "MultivariateLR":
        B, cost_history = lr.run()
        y_hat = lr.predict(B)
        print(lr.evaluate_model(dep, y_hat))
        lr.plot_cost(cost_history)
示例#3
0
def analyze_regression(x1, x2, y, method='ols', n_folds=5, data_name='data'):

    max_degree = 20
    n_lambdas = 9
    lambdas = np.logspace(-3, 3, n_lambdas)

    error_scores = pd.DataFrame(columns=['degree', 'lambda', 'MSE_train',
        'MSE_test', 'R2_train', 'R2_test', 'bias_train', 'bias_test',
        'var_train', 'var_test'])

    if method=='ols':
        lambdas = [0]
    
    filename = 'error_scores_' + data_name + '_' + method


    if n_folds > 1: 
        filename += '_cv'

    for lambda_ in lambdas:
        for deg in range(1, max_degree+1):
            X = create_design_matrix(x1, x2, deg=deg)

            if n_folds > 1:
                mse_train, mse_test, r2_train, r2_test, bias_train, bias_test, var_train, var_test = cross_validation(X, y, n_folds, method, lambda_)
                
                
            else:
                model = Regression(method, lambda_=lambda_)
                model.fit(X, y)
                model.predict(X)
                mse_train = mean_squared_error(model.y, model.y_pred)
                r2_train = r2_score(model.y, model.y_pred)
                bias_train = bias(model.y, model.y_pred)
                var_train = np.var(model.y_pred)
                mse_test = None
                r2_test = None
                bias_test = None
                var_test = None



            error_scores = error_scores.append({'degree': deg, 
                                                'lambda': lambda_, 
                                                'MSE_train': mse_train, 
                                                'MSE_test': mse_test,
                                                'R2_train': r2_train, 
                                                'R2_test': r2_test,
                                                'bias_train': bias_train,
                                                'bias_test': bias_test,
                                                'var_train': var_train,
                                                'var_test': var_test},
                                                ignore_index=True)

    

    print(error_scores)
    error_scores.to_csv(filename + '.csv')
示例#4
0
 def build(self):
     root = ScreenManager()
     root.transition = SwapTransition()
     root.add_widget(MainMenu())
     root.add_widget(
         bm.BracketMethods(screenManager=root, name='bracket_methods'))
     root.add_widget(om.OpenMethods(screenManager=root,
                                    name='open_methods'))
     root.add_widget(
         soe.SystemOfEquations(screenManager=root, name='system_equations'))
     root.add_widget(
         ip.Interpolation(screenManager=root, name='interpolation'))
     root.add_widget(rg.Regression(screenManager=root, name='regression'))
     return root
示例#5
0
def cross_validation(X, y, n_folds, method='ols', lambda_=0.01):

    if len(y.shape) > 1:
        y = np.ravel(y)

    kf = KFold(n_splits=n_folds, random_state=0, shuffle=True)

    mse = np.zeros((n_folds, 2))
    r2 = np.zeros((n_folds, 2))
    b = np.zeros((n_folds, 2))
    var = np.zeros((n_folds, 2))

    
    i = 0
    for train_index, val_index in kf.split(X):
        model = Regression(method, lambda_)
        model.fit(X[train_index], y[train_index])

        model.predict(X[train_index])
        y_pred_train = model.y_pred
        
        model.predict(X[val_index])
        y_pred_test = model.y_pred


        mse[i][0] = mean_squared_error(y[train_index], y_pred_train)
        mse[i][1] = mean_squared_error(y[val_index], y_pred_test)
        r2[i][0] = r2_score(y[train_index], y_pred_train)
        r2[i][1] = r2_score(y[val_index], y_pred_test)
        b[i][0] = bias(y[train_index], y_pred_train)
        b[i][1] = bias(y[val_index], y_pred_test)
        var[i][0] = np.var(y_pred_train)
        var[i][1] = np.var(y_pred_test)

        i += 1



    mse_train = np.mean(mse[:,0])
    mse_test = np.mean(mse[:,1])
    r2_train = np.mean(r2[:,0])
    r2_test = np.mean(r2[:,1])
    b_train = np.mean(b[:,0])
    b_test = np.mean(b[:,1])
    var_train = np.mean(var[:,0])
    var_test = np.mean(var[:,1])


    return mse_train, mse_test, r2_train, r2_test, b_train, b_test, var_train, var_test
########DEFINE COLLECTION FIELDS##########
print('DATA_COLLECTION_BEGIN')
inputPeriods = [5, 10, 20, 50, 100, 200]
pastReturnPeriods = [1, 2, 5, 10, 20, 50, 100]
retPeriods = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50]
adjClose = ifld.AdjClose()
longVolume = ifld.SMA(100, ifld.AdjVolume())
collectionFields = []

#import random
#randomSymbols = random.sample(list(stockData.index.get_level_values('Symbol').unique()),2)
#stockData = stockData[stockData.index.get_level_values('Symbol').isin(randomSymbols)]

linRegressions = []
for period in inputPeriods:
    linearReg = reg.Regression(period, adjClose)
    linRegressions.append(linearReg)
    collectionFields.extend(linearReg.getRegFieldsList())
    sdPeriod = ifld.SD(period, ifld.PcntChange(1, False, adjClose),
                       'SD_PCNT_' + str(period))
    rollingMin = ifld.RollingMin(period, adjClose)
    rollingMax = ifld.RollingMax(period, adjClose)
    minDuration = ifld.ExtremeDuration(period, adjClose, False,
                                       'Min_Duration_' + str(period))
    maxDuration = ifld.ExtremeDuration(period, adjClose, True,
                                       'Max_Duration_' + str(period))
    minDurationLag = ifld.Lag(minDuration, 1,
                              'Min_Duration_' + str(period) + '_Lag')
    maxDurationLag = ifld.Lag(maxDuration, 1,
                              'Max_Duration_' + str(period) + '_Lag')
    retracedFromHigh = ifld.Divide(ifld.RetracementPcnt(period, True),
示例#7
0
    meanY = float(Y.mean().values)
    X = dataset.drop(columns=['ERP', 'PRP', 'vendor name', 'model name'])
    # # Separation between train dataset and test dataset with train_frac
    index_separation = int(data_lenght * train_frac)
    Xtrain = X.iloc[:index_separation]
    Ytrain = Y.iloc[:index_separation]
    Xtest = X.iloc[index_separation:]
    Ytest = Y.iloc[index_separation:]

    return Xtrain, Ytrain, Xtest, Ytest, meanY


# Preparing the values and initiating the regression class
# ----------------------------------------------------------
X, Y, Xtest, Ytest, meanY = prepareValues(verbose=True)
Regression = rd.Regression(X, Y, verbose=True, unified=False)

# # Training the model with X and Y sets
# # -------------------------------------
print(tc.WARNING + "--> Training our regression model..." + tc.ENDC)
Regression.train_model()
print(tc.OKGREEN + "  Training phase of the model finished!" + tc.ENDC)
print(tc.OKGREEN + "  Output model of the training (beta) :" + tc.ENDC)
print(Regression.beta)

# # Training the model with X and Y sets
# # -------------------------------------
print(tc.WARNING + "--> Testing the model with the last 20% of the dataset!" +
      tc.ENDC)
average_error = Regression.test_model(Xtest, Ytest)
print(tc.OKGREEN + "  Average error :" + tc.ENDC, average_error)
示例#8
0
import Graph as g
import File as f
import Regression as r
import numpy as np


def func(l, g):
    return np.power(g, 0.5) * np.power(l, -0.5)


file = f.File("data.csv")
x, y, yerr = file.read_x_y_values_from_file()
regression = r.Regression(x, y, yerr, func)
regression.fit_data()
graph = g.Graph(x, y, yerr, func, regression.popt[0])
graph.title = r'Calculating $g$ through measuring the period of a pendulum'
graph.x_caption = r'Length of pendulum $l/\textrm{m}$'
graph.y_caption = r'Angular frequency of pendulum $\omega /\textrm{s}^{-1}$'
graph.text = r'$g = ' + str(graph.g) + r' \pm 0.06$'
graph.text_x = 0.35
graph.text_y = 7.5

graph.show_graph()
示例#9
0
from Regression import *
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

irisdata = datasets.load_iris()
X_train, X_test, Target_train, Target_test = train_test_split(irisdata.data,
                                                              irisdata.target,
                                                              test_size=.4)
Batch_size = 10
epoch_num = int(len(X_train) / Batch_size)
MeanSquareError = np.zeros((3, epoch_num))
R2Score = np.zeros((3, epoch_num))
for epoch in range(epoch_num):
    X_batch = X_train[epoch:epoch + Batch_size, :]
    Y_batch = Target_train[epoch:epoch + Batch_size]
    Reg = Regression(X_batch, Y_batch)
    LinReg, _ = Reg.LinearRegression(X_batch)
    RigReg, _ = Reg.RidgeRegression(X_batch, alpha=0.1)
    LasReg, _ = Reg.LassoRegression(X_batch, alpha=0.1)
    LinReg_Eval = Evaluation(LinReg, Y_batch)
    RigReg_Eval = Evaluation(RigReg, Y_batch)
    LasReg_Eval = Evaluation(LasReg, Y_batch)

    MeanSquareError[0, epoch] = LinReg_Eval.MeanSquarErr()
    MeanSquareError[1, epoch] = RigReg_Eval.MeanSquarErr()
    MeanSquareError[2, epoch] = LasReg_Eval.MeanSquarErr()
    R2Score[0, epoch] = LinReg_Eval.R2Square()
    R2Score[1, epoch] = RigReg_Eval.R2Square()
    R2Score[2, epoch] = LasReg_Eval.R2Square()

fig = plt.figure()
示例#10
0
    args = parser.parse_args()
    stock_code = args.code
    expect_tag = args.label
    method = args.method

    stockcodes_list = ['000001']
    filenames_list = ["5min/000001.csv"]

    expect_day = '2018-01-18'
    his_num = 5
    # print(stock_code)

    fast_data_searcher = FastResearchData(stock_code, stockcodes_list,
                                          filenames_list)
    stock_data = fast_data_searcher.run()

    # calculator = CalCorrMatrix()
    data_preparer = PreProcessor(stock_data, expect_day, expect_tag, his_num)
    valid_set, train_set, valid_tag, train_tag = data_preparer.run()

    regress = Regression(valid_set, train_set, valid_tag, train_tag, method)
    pred_result = regress.run()

    print(pred_result)

    evaluator = Evaluate(valid_set, valid_tag, pred_result, expect_tag, method)
    evaluator.run()

    drawer = PicDrawer(method, valid_tag, pred_result)
    drawer.picDrawer()
示例#11
0
def test_Regression_fit(method='ols'):
    
    # Data generation
    N = 100 # data size
    p = 5   # polynomial degree

    np.random.seed(0)
    x = np.random.rand(N, 1)
    y = 5*x*x + 0.1*np.random.randn(N, 1)


    # Creating design matrix X
    X = np.ones((N, p + 1))
    for i in range(1, p + 1):
        X[:,i] = x[:,0]**i

#    x1, x2 = generate_mesh(0, 1, 100)
#    y = franke_function(x1, x2, eps=0.00)
#
#    X = create_design_matrix(x1, x2, deg=5)
    
    test_model = Regression(method=method, lambda_=0.01)

    # Manual
    test_model.fit(X, y)
    beta = test_model.beta
    test_model.predict(X)
    y_pred = test_model.y_pred
    r2 = r2_score(test_model.y, test_model.y_pred)
    mse = mean_squared_error(test_model.y, test_model.y_pred)


    # Scikit-learn
#    test_model.skl_fit(X, y)
#    beta_skl = test_model.beta
#    test_model.skl_predict(X)
#    y_pred_skl = test_model.y_pred
#    r2_skl = r2_score(test_model.y, test_model.y_pred)
#    mse_skl = mean_squared_error(test_model.y, test_model.y_pred)
#    test_model.skl_fit(X, y)
#    beta_skl = test_model.beta
#    test_model.skl_predict(X)
#    y_pred_skl = test_model.y_pred
#    r2_skl = r2_score(test_model.y, test_model.y_pred)
#    mse_skl = mean_squared_error(test_model.y, test_model.y_pred)
#
#    print('Beta:')
#    print(beta)
#    print(beta_skl)
#    print('y:')
#    print(y_pred)
#    print(y_pred_skl)
#    print('mse:')
#    print(mse)
#    print(mse_skl)
#
#    tol = 1e-15
#
#    
#    assert mean_squared_error(y_pred, y_pred_skl) < tol
#    assert mean_squared_error(beta, beta_skl) < tol
#
    plot_regression(x, y, x, y_pred)
from pylab import *
from numpy import *
from Regression import *

Reg = Regression()
"""Load in data and calculate the split ratio"""
data = loadtxt('Q1.data')
p = 13
"""Shuffle Data"""
data = data.reshape(-1, p + 1)
order = range(shape(data)[0])
random.shuffle(order)
data = data[order, :]
split = int(len(data) * .66)

covX = cov(transpose(data))
sdX = sqrt(diag(covX))
for i in range(p + 1):
    data[:, i] = data[:, i] / sdX[i]

traindata = data[0:split, :]
testdata = data[split:len(data), :]
"""Response splitting"""
ytrain = traindata[:, p]
ytrain = transpose(matrix(ytrain))
N = len(ytrain)
ytest = testdata[:, p]
ytest = transpose(matrix(ytest))
Ntest = len(ytest)
"""Add Squared Terms"""
#X = concatenate((X,pow(data[:,0:p],2)), axis = 1)