def init_data(): X, y = import_power_plant_data() X, y = X.to_numpy(), y.to_numpy() #print(X,y) #exit() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) opt = SGD(lr=0.01) epoch = 10000 regressor = LinearRegression(opt, epoch=epoch) x_plot = list(range(1,epoch+1)) all_mse = regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) #print(len(predicted)) #exit() mse_value = Metrics.mse(y_test, predicted) #print(len(x_plot), len(all_mse)) #print(mse_value) #y_pred_line = regressor.predict(X) #cmap = plt.get_cmap('viridis') #fig = plt.figure(figsize=(8,6)) #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) #plt.plot(x_plot, all_mse, color = "blue", linewidth=2) Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations") plt.show()
def update_plot(self, plot_state): data = plot_state['inputs'] X, y = np.array(data['x']), np.array(data['y']) regressor = LinearRegression( basis_function=ScalarBasisFunctions.Polynomial( plot_state['Polynomial Degree']), l2_cost=plot_state['L2 Weight Penalty']) regressor.fit(X, y) inputs = np.linspace(*X_RANGE, self.PLOT_POINTS) self.fit_line.data_source.data = dict(x=inputs, y=regressor.predict(inputs))
def update_step(x_batch, y_batch, model, learning_rate): """Performs on single update step, (i.e. forward then backward). Args: x_batch(numpy.ndarray): input data of dimension (N, ndims). y_batch(numpy.ndarray): label data of dimension (N, 1). model(LinearModel): Initialized linear model. """ f = LinearRegression.forward(model, x_batch) grad = learning_rate * LinearRegression.backward(model, f, y_batch) model.w = model.w - learning_rate * grad
def generate_regression_predictions(): X, Y = get_regression_training_data() test_X = get_regression_testing_data() lr = LinearRegression() lr.fit(X, Y) predictions = [str(datetime.timedelta(seconds=int(s))) for s in lr.predict(test_X)] for i, x in enumerate(test_X): # set those who don't have a full marathon to -1 if x[2] == -1: predictions[i] = -1 return predictions
def main(): dataset = datasets.load_breast_cancer() features = dataset.data features = StandardScaler().fit_transform(features) num_features = features.shape[1] labels = dataset.target train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset to be exact as per the batch size # e.g. train_size = 1898322, batch_size = 256 # [:1898322-(1898322%256)] = [:1898240] # 1898322 // 256 = 7415; 7415 * 256 = 1898240 train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() train_size = train_features.shape[0] # slice the dataset to be exact as per the batch size test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] test_size = test_features.shape[0] model = LinearRegression( alpha=LEARNING_RATE, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES, sequence_length=num_features, ) model.train( epochs=3000, log_path="./log_path/linear_regression/", train_data=[train_features, train_labels], train_size=train_size, validation_data=[test_features, test_labels], validation_size=test_size, result_path="./results/linear_regression/", )
def test_input_output(self): model = LinearRegression(10) x = np.zeros([4, 10]) y = np.zeros([4, ]) # Check forward shape. f = model.forward(x) self.assertEqual(f.shape, (4,)) # Check backward shape. gradient = model.backward(f, y) self.assertEqual(gradient.shape, (11,)) # Check loss shape. loss = model.loss(f, y) self.assertEqual(loss.shape, ())
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_columns = FLAGS.feature_columns.split(',') # Load dataset. dataset = read_dataset("data/train.csv") # Data processing. train_set = preprocess_data(dataset, feature_columns=feature_columns, squared_features=True) # Initialize model. ndim = train_set[0].shape[1] model = LinearRegression(ndim, 'zeros') # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, shuffle=True) print('Performed gradient descent.') else: # Compute closed form solution. train_model_analytic(train_set, model) print('Closed form solution.') train_loss = eval_model(train_set, model) print("Train loss: %s" % train_loss) # Plot the x vs. y if one dimension. if train_set[0].shape[1] == 1: plot_x_vs_y(train_set, model) # Eval model. raw_eval = read_dataset("data/val.csv") eval_set = preprocess_data(raw_eval, feature_columns=feature_columns, squared_features=True) eval_loss = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) # Test model. raw_test = read_dataset("data/test.csv") test_set = preprocess_data(raw_test, feature_columns=feature_columns, squared_features=True) test_loss = eval_model(test_set, model) print("Test loss: %s" % test_loss)
def main(): dataset = datasets.load_breast_cancer() features = dataset.data features = StandardScaler().fit_transform(features) num_features = features.shape[1] labels = dataset.target train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset to be exact as per the batch size # e.g. train_size = 1898322, batch_size = 256 # [:1898322-(1898322%256)] = [:1898240] # 1898322 // 256 = 7415; 7415 * 256 = 1898240 train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() train_size = train_features.shape[0] # slice the dataset to be exact as per the batch size test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] test_size = test_features.shape[0] model = LinearRegression(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES, sequence_length=num_features) model.train(epochs=3000, log_path='./log_path/linear_regression/', train_data=[train_features, train_labels], train_size=train_size, validation_data=[test_features, test_labels], validation_size=test_size, result_path='./results/linear_regression/')
def linear_predict(stock_code): if "useStockPrice" not in request.args or "n" not in request.args: return jsonify({ "success": False, "error": { "code": "invalid-argument" } }) model_options = { "stock_code": stock_code, "use_stock_price": False if request.args.get("useStockPrice") != "true" else True, "n": int(request.args.get("n")) } model = LinearRegression(model_options, load=True, saved_model_dir="./saved_models/linear") if model.model is None: return jsonify({ "success": False, "error": { "code": "invalid-argument" } }) if not model_options["use_stock_price"]: stock_prices = pd.read_csv("./data/stock_prices/" + stock_code + ".csv", nrows=1) predictions = model.predict(stock_prices.loc[0, "adjusted_close"]) else: predictions = model.predict() return jsonify({"success": True, "predictions": predictions.tolist()})
def main(): args = getArguments() print('[DEBUG]', args) x, y = make_regression(n_samples=args.n_samples, n_features=1, noise=args.noise, bias=np.random.uniform(-200, 200), random_state=42) scaler = StandardScaler() x = scaler.fit_transform(x) lr = LinearRegression(x, y.reshape(-1, 1), alpha=args.lr, max_epochs=args.max_epochs, epsilon=args.epsilon, batch_size=args.batch_size) bestTheta = lr.getThetaByNormalEquations() bestPredictions = lr.getPrediction(lr.x, bestTheta) bestCost = lr.getCost(bestPredictions, lr.y) print(f'[DEBUG] Best Theta: {bestTheta.tolist()}') print(f'[DEBUG] Best Cost: {bestCost}') lr.runGradientDescent() optimizedTheta = lr.theta optimizedPredictions = lr.getPrediction(lr.x, optimizedTheta) optimizedCost = lr.getCost(optimizedPredictions, lr.y) print(f'[DEBUG] Optimized Theta: {optimizedTheta.tolist()}') print(f'[DEBUG] Optimized Cost: {optimizedCost}') plotAndSaveGraphs(lr, args, scaler)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type # num_steps = FLAGS.num_steps feature_type = 'default' model_type = 'svm' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'uniform') elif model_type == 'logistic': model = LogisticRegression(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc) # Test Model. print("Start doing the test") data_test = read_dataset('data/test_lab.txt', 'data/image_data') print("Start preprocess testing data") data_test = preprocess_data(data_test, feature_type) print("Making predictions") data_test['label'] = model.predict(model.forward(data_test['image'])) print("Output the results to csv file") write_dataset('data/test_lab.txt', data_test) # Generate Kaggle output. print("Finished!")
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate feature_type = FLAGS.feature_type model_type = FLAGS.model_type num_steps = FLAGS.num_steps # Load dataset. data = read_dataset('data/val_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, feature_type) # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'ones') elif model_type == 'logistic': model = LogisticRegression(ndim, 'zeros') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'zeros') # Train Model. model = train_model(data, model, learning_rate, num_steps=num_steps) # Eval Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type) acc, loss = eval_model(data_test, model) # Test Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type)
def main(): parser = argparse.ArgumentParser(description='Linear Regression test') parser.add_argument('-m', '--method', type=str, default='ols', help='model method: ols or grad_descent') parser.add_argument('-n', '--n_iter', type=int, default=50, help='number of iterations for grad_descent') args = parser.parse_args() method = args.method n_iter = args.n_iter X, y, m, bias = \ generate_linear_data(n_samples=1000, n_features=10, bias=10) X_train, X_test, y_train, y_test = split_dataset(X, y) print("Training size: %s, Test size %s" % (len(X_train), len(X_test))) print("-" * 20) # Fit and predict model = LinearRegression(n_iter=n_iter) model.fit(X_train, y_train, method) y_pred = model.predict(X_test) print("-" * 20) # Scoring model.score(y_test, y_pred) print("-" * 20) print("True coefs: ", np.insert(m, 0, bias)) print("Model coefs:", model.beta_hat) print("-" * 20) # Plotting plot_regression_residual(y_test, y_pred, bins=int(len(X_train) / 20)) if method == 'grad_descent': plot_iteration_vs_cost(n_iter, model.cost_h)
def get_predictions(stock_code): """Gets the predictions of a stock from all trained models. 1. Get all saved models. 2. Build the predict data based on the model's input options. 3. Predict stock price. Args: stock_code: Stock code specifying a stock. Returns: A dict with all predictions and models information. Format: { "predictions": [ [p11, p12, ...], [p21, p22, ...], ... ], "models": [m1_info, m2_info, ...] } """ predictions_all = [] snakes_all = [] upper_all = [] lower_all = [] models_all = [] past_predictions_all = [] NUM_OF_DAY = 100 TIME_INTERVAL = 10 with open('./data/stock_prices/' + stock_code + '.csv', 'r') as csv_file: reader = csv.reader(csv_file) # remove header and get the latest 101 data stock_data_segment = list(reader)[1:NUM_OF_DAY + 2] actual_prices = [] for line in stock_data_segment: actual_prices.append(float(line[5])) actual_prices = actual_prices[::-1] actual_prices_all = np.flipud(pd.read_csv('./data/stock_prices/' + stock_code + '.csv')["adjusted_close"].values) # x = np.array(actual_prices_all) # x = x[-1000:] nxt = actual_prices_all[1:] prev = actual_prices_all[: -1] sd = np.std((nxt - prev)/prev) # linear model predictions nn_start_idx = len(models_all) models = LinearRegression.get_all_models(stock_code, SAVED_MODELS_DIR_MAP[LinearRegression.MODEL]) or [] for model_idx, model in enumerate(models): print("Linear Regression Model {}".format(model_idx + 1)) predict_n = model.model_options["predict_n"] x = build_predict_dataset(model.input_options, model.model_options["predict_n"]) prediction = model.predict(x) predictions_all.append(prediction.tolist()) # build snakes test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False, test_set="snakes") # predict snakes test set prediction_test = model.predict(x_test) snakes_all.append(prediction_test.tolist()) # calculate upper bound and lower bound upper_all.append((prediction[0] + np.std(prediction_test - y_test, axis=0)).tolist()) lower_all.append((prediction[0] - np.std(prediction_test - y_test, axis=0)).tolist()) # build full test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False) # predict full test set prediction_test = model.predict(x_test) past_predictions_all.append(prediction_test[:, 0].tolist()) models_all += [ { "modelIndex": i + nn_start_idx, "modelName": model.get_model_display_name(), "score": rating_calculation.model_rating(actual_prices, snakes_all[i + nn_start_idx], TIME_INTERVAL, sd), "percentageChange": rating_calculation.percentageChange(actual_prices[-1], predictions_all[i + nn_start_idx][-1]), "trendScore": rating_calculation.calculate_trend_score( np.array(past_predictions_all[i + nn_start_idx]), np.array(actual_prices_all[-100:]) ), "trend": rating_calculation.count_trend( np.array(predictions_all[i + nn_start_idx]), actual_prices_all[-1] ) } for i, model in enumerate(models) ] # svr model predictions nn_start_idx = len(models_all) models = SupportVectorRegression.get_all_models(stock_code, SAVED_MODELS_DIR_MAP[SupportVectorRegression.MODEL]) or [] for model_idx, model in enumerate(models): print("Support Vector Regression Model {}".format(model_idx + 1)) predict_n = model.model_options["predict_n"] x = build_predict_dataset(model.input_options, model.model_options["predict_n"]) prediction = model.predict(x) predictions_all.append(prediction.tolist()) # build snakes test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False, test_set="snakes") # predict snakes test set prediction_test = model.predict(x_test) snakes_all.append(prediction_test.tolist()) # calculate upper bound and lower bound upper_all.append((prediction[0] + np.std(prediction_test - y_test, axis=0)).tolist()) lower_all.append((prediction[0] - np.std(prediction_test - y_test, axis=0)).tolist()) # build full test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False) # predict full test set prediction_test = model.predict(x_test) past_predictions_all.append(prediction_test[:, 0].tolist()) models_all += [ { "modelIndex": i + nn_start_idx, "modelName": model.get_model_display_name(), "score": rating_calculation.model_rating(actual_prices, snakes_all[i + nn_start_idx], TIME_INTERVAL, sd), "percentageChange": rating_calculation.percentageChange(actual_prices[-1], predictions_all[i + nn_start_idx][-1]), "trendScore": rating_calculation.calculate_trend_score( np.array(past_predictions_all[i + nn_start_idx]), np.array(actual_prices_all[-100:]) ), "trend": rating_calculation.count_trend( np.array(predictions_all[i + nn_start_idx]), actual_prices_all[-1] ) } for i, model in enumerate(models) ] # linear index model predictions models = LinearIndexRegression.get_all_models(stock_code, SAVED_MODELS_DIR_MAP[LinearIndexRegression.MODEL]) or [] for model_idx, model in enumerate(models): print("Linear Index Regression Model {}".format(model_idx + 1)) x = build_predict_dataset(model.input_options, model.model_options["predict_n"]) prediction = model.predict(x) predictions_all.append(prediction.tolist()) snakes_all.append(None) upper_all.append(None) lower_all.append(None) past_predictions_all.append(None) models_all += [{ "modelName": model.get_model_display_name(), # "score": rating_calculation.model_rating(actual_prices, snakes[0], TIME_INTERVAL), # "direction": rating_calculation.direction(actual_prices[-1], predictions[0][-1]) } for model in models] # svr index model predictions models = SupportVectorIndexRegression.get_all_models(stock_code, SAVED_MODELS_DIR_MAP[SupportVectorIndexRegression.MODEL]) or [] for model_idx, model in enumerate(models): print("Support Vector Index Regression Model {}".format(model_idx + 1)) x = build_predict_dataset(model.input_options, model.model_options["predict_n"]) prediction = model.predict(x) predictions_all.append(prediction.tolist()) snakes_all.append(None) upper_all.append(None) lower_all.append(None) past_predictions_all.append(None) models_all += [{ "modelName": model.get_model_display_name(), # "score": rating_calculation.model_rating(actual_prices, snakes[0], TIME_INTERVAL), # "direction": rating_calculation.direction(actual_prices[-1], predictions[0][-1]) } for model in models] # neural network predictions models = DenseNeuralNetwork.get_all_models(stock_code, SAVED_MODELS_DIR_MAP[DenseNeuralNetwork.MODEL]) or [] nn_start_idx = len(models_all) for model_idx, model in enumerate(models): print("Neural Network Model {}".format(model_idx + 1)) predict_n = model.model_options["predict_n"] if predict_n == 1: last_predictions = [] for _ in range(10): # get predict input x = build_predict_dataset(model.input_options, predict_n, previous=np.array(last_predictions)) # predict prediction = model.predict(x) last_predictions.append(prediction.tolist()[0]) predictions_all.append(last_predictions) # build full test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False) # predict full test set prediction_test = model.predict(x_test) past_predictions_all.append(prediction_test.flatten().tolist()) # get stock data stock_data = get_stock_data(model.input_options["stock_codes"]) # predict snakes test set snakes = np.array([[] for _ in range(10)]) for _ in range(10): snakes_x = [] for snake_idx in range(10): snakes_x += build_predict_dataset( model.input_options, predict_n, stock_data=stock_data, previous=snakes[snake_idx], skip_last=10 + snake_idx * 10 ).tolist() snakes_prediction = model.predict(np.array(snakes_x)) snakes = np.concatenate((snakes, snakes_prediction), axis=1) snakes = np.flipud(snakes) snakes_all.append(snakes.tolist()) # calculate upper bound and lower bound snakes_y = stock_data[model.input_options["stock_code"]][model.input_options["column"]].values[-100:].reshape(10, 10) upper_all.append((last_predictions + np.std(snakes - snakes_y, axis=0)).tolist()) lower_all.append((last_predictions - np.std(snakes - snakes_y, axis=0)).tolist()) else: # get predict input x = build_predict_dataset(model.input_options, predict_n) # predict prediction = model.predict(x) predictions_all.append(prediction.tolist()) # build snakes test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False, test_set="snakes") # predict snakes test set prediction_test = model.predict(x_test) snakes_all.append(prediction_test.tolist()) # calculate upper bound and lower bound upper_all.append((prediction[0] + np.std(prediction_test - y_test, axis=0)).tolist()) lower_all.append((prediction[0] - np.std(prediction_test - y_test, axis=0)).tolist()) # build full test set x_test, y_test = build_predict_dataset(model.input_options, predict_n, predict=False) # predict full test set prediction_test = model.predict(x_test) past_predictions_all.append(prediction_test[:, 0].tolist()) models_all += [ { "modelIndex": i + nn_start_idx, "modelName": model.get_model_display_name(), "model": "dnn", "modelOptions": model.model_options, "inputOptions": model.input_options, "score": rating_calculation.model_rating(actual_prices, snakes_all[i + nn_start_idx], TIME_INTERVAL, sd), # "direction": rating_calculation.direction(actual_prices[-1], predictions_all[i + nn_start_idx][-1]), "percentageChange": rating_calculation.percentageChange(actual_prices[-1], predictions_all[i + nn_start_idx][-1]), "trendScore": rating_calculation.calculate_trend_score( np.array(past_predictions_all[i + nn_start_idx]), np.array(actual_prices_all[-100:]) ), "trend": rating_calculation.count_trend( np.array(predictions_all[i + nn_start_idx]), actual_prices_all[-1] ) } for i, model in enumerate(models) ] return { "predictions": predictions_all, "snakes": snakes_all, "upper": upper_all, "lower": lower_all, "rollingPredict": past_predictions_all, "models": models_all, "grade": rating_calculation.calculate_traffic_light_score(models_all, sd, VALID_MODEL_THRESHOLD), "threshold": VALID_MODEL_THRESHOLD, "stockTrendScore": rating_calculation.calculate_stock_trend_score(models_all, VALID_MODEL_THRESHOLD) }
######################### main ######################### ### load in the test set ### with open("../data_augmentation_models/data/all_early_stages/pkl/test_features.pkl", "rb") as fin: test_features = pickle.load(fin) ### define the model ### # to get the arguments params = load_config('./models/config.yaml') # define the model bootstrap_model = LinearRegression(params["feature_dim"], params["output_dim"]) # load in the pre-trained model PATH_pretrained = "./models/model_6.pth" print("read in", PATH_pretrained) bootstrap_model.load_state_dict(torch.load(PATH_pretrained)) # define the device and move the model into the device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("the device is", device) bootstrap_model.to(device) ### bootstrap to get confidence interval ###
def train_models(train_models_data): """Trains models. Args: train_models_data: Train models data. Format: { models: [ { "model": "model type, matches MODEL in a model class", "stockCode": "the predicting stock", "modelOptions": "model options dict", "inputOptions": "input options dict" } ] } Refer to train_models_sample.json. """ if not path.isdir(SAVED_MODELS_DIR): makedirs(SAVED_MODELS_DIR) for train_model_data_idx, train_model_data in enumerate(train_models_data): print("Model {}".format(train_model_data_idx + 1)) # initialize the model if train_model_data["model"] == LinearRegression.MODEL: model = LinearRegression(train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) elif train_model_data["model"] == SupportVectorRegression.MODEL: model = SupportVectorRegression( train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) elif train_model_data["model"] == LinearIndexRegression.MODEL: model = LinearIndexRegression(train_model_data["modelOptions"], train_model_data["inputOptions"], train_model_data["stock_code"]) elif train_model_data["model"] == SupportVectorIndexRegression.MODEL: model = SupportVectorIndexRegression( train_model_data["modelOptions"], train_model_data["inputOptions"], train_model_data["stock_code"]) elif train_model_data["model"] == DenseNeuralNetwork.MODEL: model = DenseNeuralNetwork( train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) # prepare the data x, y, other_data = build_training_dataset( train_model_data["inputOptions"], model.model_options["predict_n"]) if train_model_data["model"] in [ LinearRegression.MODEL, SupportVectorRegression.MODEL, DenseNeuralNetwork.MODEL ]: # get the training set x = x[:-100] y = y[:-100] if "normalize" in train_model_data["inputOptions"]: model.input_options["normalize_data"] = other_data[ "normalize_data"] # train the model model.train(x, y) # save the model model.save(SAVED_MODELS_DIR_MAP[train_model_data["model"]])
except: scores = pd.DataFrame( columns=['model', 'target_month', 'window_length', 'score']) scores = pd.DataFrame( columns=['model', 'target_month', 'window_length', 'score']) dataset = 'datasets/r_non_stationary.pkl' models = [ _LSTM("LSTM"), CONVNET("CONVNET"), _Prophet("Prophet"), MEAN("MEAN"), LAST("LAST"), LinearRegression("Linear Regression", False), SES("SES") ] lr_u = LinearRegression("Univariate Linear Regression", True) lr_m = LinearRegression("Multivariate Linear Regression", False) mean = MEAN("MEAN") models = [mean, lr_u, lr_m, _Prophet("Prophet")] models = add_xgb(models) models = [Gauss("Gauss-1"), Gauss("Gauss-2"), lr_m, mean] data = get_data_from_dataset(dataset) source_data = preprocess(data)
from models.linear_regression import LinearRegression # Use custom styling from file matplotlib.rc_file('../plotstyle') # Generate data random.seed(0) X = np.array([i for i in range(20)], dtype='float32') X = np.reshape(X, (20, 1)) X = np.concatenate((np.ones((20, 1), dtype='float32'), X), axis=1) y = np.array([(i + random.uniform(-2, 2)) for i in range(20)], dtype='float32') y = np.reshape(y, (20, 1)) # Fit model to data model = LinearRegression(data=X, labels=y) weights = model.fit() # Generate line of best fit x_bf = np.linspace(0, 20, dtype='float32') y_bf = np.array([(weights[0][0] + x * weights[1][0]) for x in x_bf], dtype='float32') plt.scatter(X[:, 1], y, color='b', s=50, label='Samples') plt.plot(x_bf, y_bf, color='r', label='Fitted Model') plt.xlabel('$x$') plt.ylabel('$y$') plt.title('Linear Regression') plt.legend() plt.show()
def train_model(args): if not os.path.isdir("./saved_models"): os.makedirs("./saved_models") if args.model == "linear": model = LinearRegression({ "stock_code": args.regression_stock_code, "use_stock_price": args.regression_use_stock_price, "n": args.regression_n }) stock_prices = pd.read_csv("./data/stock_prices/" + args.regression_stock_code + ".csv", nrows=args.regression_n) model.train(stock_prices) model.save("./saved_models/linear") elif args.model == "svr": model = SupportVectorRegression({ "stock_code": args.regression_stock_code, "use_stock_price": args.regression_use_stock_price, "n": args.regression_n, "kernel": args.kernel, "degree": args.degree, "gamma": args.gamma if args.gamma != -1 else "auto", "coef0": args.coef0, "tol": args.tol, "C": args.C, "epsilon": args.epsilon, "shrinking": args.shrinking, "cache_size": args.cache_size, "verbose": args.verbose, "max_iter": args.max_iter }) stock_prices = pd.read_csv("./data/stock_prices/" + args.regression_stock_code + ".csv", nrows=args.regression_n) model.train(stock_prices) model.save("./saved_models/svr") else: return
from data.datasets import StatsDatasetRegression from models.linear_regression import LinearRegression from trainer.regression_trainer import RegressionTrainer import pandas as pd import visualizer import torch # ---------------------------------------------------------------------------------------------- # This file trains and tests performance of the linear regression model on the optimized dataset # ---------------------------------------------------------------------------------------------- # MODEL VARIABLES MODEL = LinearRegression(4, 2) TRAINING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_train.csv")) TESTING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_test.csv")) EPOCHS = 500 LEARNING_RATE = 0.007 OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE) LOSS = torch.nn.MSELoss() if __name__ == '__main__': trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS) trainer.train() trainer.print_best_results() visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/opt_reg_acc.png") visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/opt_reg_loss.png")
y_batch(numpy.ndarray): label data of dimension (N, 1). model(LinearModel): Initialized linear model. """ f = LinearRegression.forward(model, x_batch) grad = learning_rate * LinearRegression.backward(model, f, y_batch) model.w = model.w - learning_rate * grad dataset = io_tools.read_dataset('train.csv') # print(dataset) data = data_tools.preprocess_data(dataset) ndim = data[0].shape[1] print('data[0]', data[0]) print('ndim', ndim) # print(data) train_model(data, LinearRegression(ndim)) def train_model_analytic(processed_dataset, model): """Computes and sets the optimal model weights (model.w). Args: processed_dataset(list): List of [x,y] processed from utils.data_tools.preprocess_data. model(LinearRegression): LinearRegression model. """ # model.w = model.w - def eval_model(processed_dataset, model): """Performs evaluation on a dataset.