def build_model_and_evaluate_rms(prev_pred=None): model = Model3() X_combined, y = model.combined_features(target="personality") # combining the prediction of previous tasks to predict another task if prev_pred is not None: X_combined = pd.concat([X_combined, prev_pred]) # X, y = utils.extract_data(X_combined, label="personality") X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.20, random_state = 2) reg = RegressorChain(XGBRegressor(n_estimators=200, max_depth=2, objective="reg:squarederror"), order = [0,3,1,4,2]) reg = reg.fit(X_train, y_train) y_pred = reg.predict(X_test) # Calculating RMSE for all personality # order: rmse = [] for i,value in enumerate(utils.regressor_labels): rmse.append(sqrt(mean_squared_error(y_pred[:,i], y_test[value]))) return rmse, reg
class LassoModel(RegModels): def __init__(self, params): super(LassoModel, self).__init__(params) self.name = "Lasso" def train(self): self.model = Lasso() self.model = RegressorChain(self.model) self.model.fit(self.train_x, self.train_y) self.train_output = self.model.predict(self.train_x)
def predict(ticker, interval): period = "max" if interval in ["1d", "5d"] else "1mo" df = pd.DataFrame( yf.Ticker(ticker).history(interval=interval, period=period)) df.dropna(inplace=True) last_timestamp = int(df.index[-1].timestamp() * 1000) print("LAST_TIMESTAMP:", last_timestamp) #Reshape the data data = df["Close"].values X = [] y = [] for i in range(0, len(data) - LOOK_BACK - PREDICT_FORWARD): X.append(data[i:i + LOOK_BACK]) y.append(data[i + LOOK_BACK:i + LOOK_BACK + PREDICT_FORWARD]) print("X_LENGTH:", len(X)) print("y_LENGTH:", len(y)) # define base model model = LinearSVR(dual=False, loss="squared_epsilon_insensitive") # define the chained multioutput wrapper model wrapper = RegressorChain(model) # fit the model on the whole dataset wrapper.fit(X, y) # make prediction historic_data = data[len(data) - LOOK_BACK:] predictions = wrapper.predict([historic_data])[0].tolist() payload = [] time_increment = 0 if interval == "5m": time_increment = FIVE_MINUTES elif interval == "30m": time_increment = THIRTY_MINUTES elif interval == "1d": time_increment = ONE_DAY elif interval == "5d": time_increment = FIVE_DAYS print("TIME_INCREMENT:", time_increment) for p in predictions: last_timestamp += time_increment payload.append({"date": last_timestamp, "price": p}) return {"response_code": 200, "payload": payload}
def findNextTick(df, type): nextStrings = [] #Creating a column for next value (This is what we are predicting) for i in predictionLabels: nextString = "next" + str(i) df[nextString] = df[i].shift(-1) nextStrings.append(nextString) X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction. df = df[0:-1] #Taking all but the last value for training X = df.drop(nextStrings, axis=1) #Dropping the answers y = df[nextStrings] #Creating an answer list r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) estimators = [ ('r1', r1), ('r2', r2), ('r3', r3) ] if(type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1) ) elif(type == 1): regressor = ensemble.VotingRegressor( estimators=estimators ) print("I got here!") regressor = RegressorChain(regressor) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0,X_pred.iloc[0][predictionLabels]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y":y_pred,"X":x_predTime}
def test_sklearn_regressor_chain(self): for n_targets in [2, 3, 4]: for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]: seed = random.randint(0, 2**32 - 1) order = [i for i in range(n_targets)] random.Random(seed).shuffle(order) if model_class != LinearRegression: model = RegressorChain(model_class(random_state=seed), order=order) else: model = RegressorChain(model_class(), order=order) X, y = datasets.make_regression( n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed ) X = X.astype("float32") y = y.astype("float32") model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"}) self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-4, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
def chainregressor(X, Y): # Fit estimators ESTIMATORS = { "Extra trees + chain": ExtraTreesRegressor(n_estimators=10, max_features=X.shape[1], random_state=0), "K-nn + chain": KNeighborsRegressor(), "Linear regression + chain": LinearRegression(), "Ridge + chain": RidgeCV(), } kf = KFold(n_splits=5, shuffle=True) # Define the split - into kf_split = kf.get_n_splits( X) # returns the number of splitting iterations in the cross-validator accuracy = [] r2score = [] meansquared_error = [] coefficients = 0 rng = np.random.RandomState(1) meansquared_error_es = dict() r2score_es = dict() for name, estimator in ESTIMATORS.items(): meansquared_error = [] r2score = [] estimator = RegressorChain(estimator) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] estimator.fit(X_train, y_train) y_pred = estimator.predict(X_test) meansquared_error.append(mean_squared_error(y_test, y_pred)) r2score.append(r2_score(y_test, y_pred)) meansquared_error_es[name] = statistics.mean(meansquared_error) r2score_es[name] = statistics.mean(r2score) print(meansquared_error_es) print(r2score_es)
def test_sklearn_regressor_chain(self): for n_targets in [2, 3, 4]: for model_class in [ DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression ]: order = [i for i in range(n_targets)] random.shuffle(order) model = RegressorChain(model_class(), order=order) X, y = datasets.make_regression(n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=2021) X = X.astype('float32') y = y.astype('float32') model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-5, atol=1e-5)
# Chained Models for Each Output (RegressorChain) # https://machinelearningmastery.com/multi-output-regression-models-with-python/ # Another approach to using single-output regression models for multioutput regression is to create a linear # sequence of models. # The first model in the sequence uses the input and predicts one output; the second model uses the input and # the output from the first model to make a prediction; the third model uses the input and output from the # first two models to make a prediction, and so on. from sklearn.multioutput import RegressorChain wrapper = RegressorChain(rf) wrapper.fit(X_train, y_train) rf_y_test_pred = wrapper.predict(X_test) # summarize prediction print(rf_y_test_pred[0:5]) print(rf_y_test_pred.astype('int')[0:5]) rf_y_test_pred = rf_y_test_pred.astype('int') # In[ ]: # Use the R forest's predict method on the test data rf_y_test_pred = rf.predict(X_test) print(rf_y_test_pred[0:5]) print(rf_y_test_pred.astype('int')[0:5]) rf_y_test_pred = rf_y_test_pred.astype('int') # ### RMSLE
sc_X = MinMaxScaler() dataset_r8 = sc_X.fit_transform(dataset_r8) dataset_r8 = pd.DataFrame(dataset_r8) X = dataset_r8.iloc[:, :4] y = dataset_r8.iloc[:, 4:] # define base model model = LinearSVR(max_iter=5000) #model = LinearRegression() # define the chained multioutput wrapper model wrapper = RegressorChain(model) # fit the model on the whole dataset #wrapper.fit(X, y) # make a single prediction from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1) # fit model wrapper.fit(X_train, y_train) y_pred = wrapper.predict(X_test) from sklearn.metrics import mean_squared_error rmse = (mean_squared_error(y_true=y_test, y_pred=y_pred, multioutput='raw_values')) for i in rmse: print(sqrt(i))
# Transform test values using scaler X_new_sc = scaler.transform(X_new) # In[ ]: # Predict on new values # --------------------- # Feed-forward ANN y_pred_ann = model.predict(X_new_sc) # Support Vector Machine (Regressor) y_pred_svr_reg = multi_svr.predict(X_new) # Support Vector Machine (Chained) y_pred_svr_chn = chain_svr.predict(X_new) # DecisionTrees y_pred_tree = tree.predict(X_new) # In[ ]: def mean_absolute_percentage_error(y_true, y_pred): """Mean absolute percentage error Parameters ---------- y_true: np.array array holding true values y_pred: np.array