def _model(self, ts, dummies, stationary, trend, diff, method='lbfgs'): # TODO implement robust and corona variable exog = np.array(dummies).reshape(-1, 1) if dummies is not None else None exog = self.x_train years = ts.index[-1].year - ts.index[0].year + 1 periods = 52 if (ts.index[2].month - ts.index[0].month) in {0, 1} else 12 hyper_params = self.get_hyperparams() if len(hyper_params) == 0: sarimax = pm.auto_arima(y=ts, X=exog, seasonal=True, stationary=stationary, d=diff, max_p=10, method=method, trend=trend, with_intercept=True, max_order=None, max_P=int(years / 2), D=pm.arima.nsdiffs(ts, periods), m=periods, stepwise=True, maxiter=45, sarimax_kwargs={'cov_type': None}) else: sarimax = pm.ARIMA(order=eval(hyper_params['Order']), seasonal_order=eval( hyper_params['Seasonal order']), method=method, maxiter=45, trend=hyper_params['Trend']).fit(y=ts, X=exog) self.model = sarimax
def fit(self, price_indices): self.model = {} self.order = None with_intercept = None for capital_name in price_indices: self.model[capital_name] = {'ma': None, 'data': None, 'size': 0} seq_price = price_indices[capital_name].copy() seq_price[1:] = (seq_price[1:] - seq_price[:-1]) / seq_price[:-1] seq_price[0] = 0 if self.order is None: model = pm.auto_arima(seq_price, seasonal=False, start_p=0, max_p=0, start_q=3, max_q=50, trace=True) params = model.get_params() self.order, _with_intercept = params['order'], params[ 'with_intercept'] if self.order[2] < 4: self.order = (self.order[0], self.order[1], 4) model = pm.ARIMA(order=self.order, with_intercept=False) model.fit(seq_price) self.model[capital_name]['ma'] = model.maparams() self.model[capital_name]['data'] = seq_price[-(self.order[1] + self.order[2]):] self.model[capital_name]['size'] = seq_price.shape[0] - ( self.order[1] + self.order[2]) pass
def __init__(self, target_column: str, order: tuple, seasonal_order: tuple, method: str = 'lbfgs', use_exog: bool = False, with_intercept: bool = True, trend: str = None, log: bool = False, power_transf: bool = False, one_step_ahead: bool = False): """ :param target_column: target_column for prediction :param order: (p, d, q) of (S)ARIMA(X) model :param seasonal_order: (P, D, Q, m) of (S)ARIMA(X) model :param method: method to use for optimization :param use_exog: use exogenous input :param with_intercept: use intercept :param trend: trend component :param log: use log transform :param power_transf: use power transform :param one_step_ahead: perform one step ahead prediction """ super().__init__(target_column=target_column, seasonal_periods=seasonal_order[3], name='(S)ARIMA(X)', one_step_ahead=one_step_ahead) self.model = pmdarima.ARIMA(order=order, seasonal_order=seasonal_order, maxiter=50, disp=1, method=method, with_intercept=with_intercept, enforce_stationarity=False, suppress_warnings=True) self.use_exog = use_exog self.exog_cols_dropped = None self.trend = trend self.log = log self.power_transformer = sklearn.preprocessing.PowerTransformer( ) if power_transf else None self.contains_zeros = False
def _arima_train(table, input_cols, p, d, q, intercept=True): arima = pm.ARIMA(order=(p, d, q), with_intercept=intercept) model = _model_dict('arima_model') rb = BrtcReprBuilder() rb.addMD( strip_margin(""" |## ARIMA Train Result | """.format())) for column in input_cols: arima_fit = arima.fit(table[column]) model['arima_' + str(column)] = arima_fit rb.addMD( strip_margin(""" |### Column : {col} | | - (p,d,q) order : ({p_val}, {d_val}, {q_val}) | - Intercept : {itc} | - Coefficients Array : {ca} | - AIC : {aic} | """.format(col=column, p_val=p, d_val=d, q_val=q, itc=intercept, ca=str(arima_fit.params().tolist()), aic=arima_fit.aic()))) model['coefficients_array_' + str(column)] = arima_fit.params() model['aic_' + str(column)] = arima_fit.aic() model['input_columns'] = input_cols # model['order'] = arima_fit.order() model['intercept'] = intercept model['_repr_brtc_'] = rb.get() return {'model': model}
def arima(price, window, desc): pred = np.full(price.shape, np.nan) for i in tnrange(window, price.shape[0], desc=desc): train = price[i - window:i] if np.any(np.isnan(train)): continue with warnings.catch_warnings(): # Uninvertible hessian warnings.filterwarnings('ignore', 'Inverting') # RuntimeWarning: invalid value encountered in true_divide warnings.filterwarnings('ignore', 'invalid') # RuntimeWarning: overflow encountered in exp warnings.filterwarnings('ignore', 'overflow') # ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals # warnings.filterwarnings('ignore', 'Maximum') # RuntimeWarning: divide by zero encountered in true_divide warnings.filterwarnings('ignore', 'divide') # Initialize model model = auto_arima(train, max_p=3, max_q=3, seasonal=False, trace=False, error_action='ignore', suppress_warnings=True) # Determine model parameters model.fit(train) order = model.get_params()['order'] # Fit and predict model = pm.ARIMA(order=order) model.fit(train) pred[i] = model.predict(1) return pred
def train_model_graph(df): current_dates = [pd.Timestamp(i) for i in df['BILL_DATE'].values] last_bill = current_dates[-1] future_dates = [] for i in range(1, 13): bill = pd.Timestamp(last_bill) + pd.DateOffset(months=i) future_dates.append(bill) bill_dates = np.append(current_dates, future_dates) y = df['TOTAL'].values arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) arima.fit(y) forecasts = arima.predict(12) fig = px.line() fig.add_scatter(x=bill_dates, y=df['TOTAL'].values, name='Historical Bills') fig.add_scatter(x=bill_dates[len(df['BILL_DATE'].values):], y=forecasts, mode='lines', name='Predicted Bills') return fig
def get_arima(data, train_len, test_len): # prepare train and test data data = data.tail(test_len + train_len).reset_index(drop=True) train = data.head(train_len).values.tolist() test = data.tail(test_len).values.tolist() # Initialize model model = auto_arima(train, max_p=3, max_q=3, seasonal=False, trace=True, error_action='ignore', suppress_warnings=True) # Determine model parameters model.fit(train) order = model.get_params()['order'] print('ARIMA order:', order, '\n') # Genereate predictions prediction = [] for i in range(len(test)): # model = pm.ARIMA(order, seasonal_order) model = pm.ARIMA(order=order) model.fit(train) print('working on', i + 1, 'of', test_len, '-- ' + str(int(100 * (i + 1) / test_len)) + '% complete') prediction.append(model.predict()[0]) train.append(test[i]) # Generate error data mse = mean_squared_error(test, prediction) rmse = mse**0.5 mape = mean_absolute_percentage_error(pd.Series(test), pd.Series(prediction)) return prediction, mse, rmse, mape
""" print(__doc__) # Author: Taylor Smith <*****@*****.**> import pmdarima as pm import joblib # for persistence import os # ############################################################################# # Load the data and split it into separate pieces y = pm.datasets.load_wineind() train, test = y[:125], y[125:] # Fit an ARIMA arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) arima.fit(y) # ############################################################################# # Persist a model and create predictions after re-loading it pickle_tgt = "arima.pkl" try: # Pickle it joblib.dump(arima, pickle_tgt, compress=3) # Load the model up, create predictions arima_loaded = joblib.load(pickle_tgt) preds = arima_loaded.predict(n_periods=test.shape[0]) print("Predictions: %r" % preds) finally:
import numpy as np import pmdarima as pm from pmdarima import model_selection print("pmdarima version: %s" % pm.__version__) # Load the data and split it into separate pieces data = pm.datasets.load_wineind() train, test = model_selection.train_test_split(data, train_size=165) # Even though we have a dedicated train/test split, we can (and should) still # use cross-validation on our training set to get a good estimate of the model # performance. We can choose which model is better based on how it performs # over various folds. model1 = pm.ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)) model2 = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1) model1_cv_scores = model_selection.cross_val_score( model1, train, scoring='smape', cv=cv, verbose=2) model2_cv_scores = model_selection.cross_val_score( model2, train, scoring='smape', cv=cv, verbose=2) print("Model 1 CV scores: {}".format(model1_cv_scores.tolist())) print("Model 2 CV scores: {}".format(model2_cv_scores.tolist())) # Pick based on which has a lower mean error rate m1_average_error = np.average(model1_cv_scores) m2_average_error = np.average(model2_cv_scores)
import numpy as np import pmdarima as pm from pmdarima import model_selection print("pmdarima version: %s" % pm.__version__) # Load the data and split it into separate pieces data = pm.datasets.load_wineind() train, test = model_selection.train_test_split(data, train_size=165) # Even though we have a dedicated train/test split, we can (and should) still # use cross-validation on our training set to get a good estimate of the model # performance. We can choose which model is better based on how it performs # over various folds. model1 = pm.ARIMA(order=(2, 1, 1)) model2 = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True) cv = model_selection.SlidingWindowForecastCV(window_size=100, step=24, h=1) model1_cv_scores = model_selection.cross_val_score( model1, train, scoring='smape', cv=cv, verbose=2) model2_cv_scores = model_selection.cross_val_score( model2, train, scoring='smape', cv=cv, verbose=2) print("Model 1 CV scores: {}".format(model1_cv_scores.tolist())) print("Model 2 CV scores: {}".format(model2_cv_scores.tolist())) # Pick based on which has a lower mean error rate
def evaluate(predictOffest, trainY, testY, params): print("ARIMA evaluation start!") start = time.time() m = { "mae": 0, "rmse": 0, "mase": 0, "r2": 0, } order = (1, 1, 2) seasonal_order = (0, 1, 1, 12) if params is not None: if "order" in params: order = tuple(params["order"]) if "seasonal_order" in params: order = tuple(params["seasonal_order"]) arima = pm.ARIMA(order=order, seasonal_order=seasonal_order) arima.fit(trainY) count = len(testY) // predictOffest - 1 print("ARIMA count:{} predictOffest:{} len(testY):{}".format( count, predictOffest, len(testY))) for i in range((len(testY) // predictOffest) - 1): start1 = time.time() forecasts = arima.predict(predictOffest) forecasts = [0 if a_ < 0.01 else a_ for a_ in forecasts] forecasts = [1 if a_ > 1 else a_ for a_ in forecasts] updateT = None if i * predictOffest + predictOffest < len(testY): updateT = testY[i * predictOffest:i * predictOffest + predictOffest] elif i * predictOffest + predictOffest >= (len(testY) - 1): updateT = testY[i * predictOffest:len(testY) - 1] predictLen = len(testY) - 1 - i * predictOffest trainY = np.concatenate((trainY, updateT), axis=None) arima.update(updateT) _m = ModelUtils.getMetrics(updateT, forecasts, trainY) m["mae"] = _m["mae"] m["rmse"] = _m["rmse"] m["rmse"] = _m["mase"] m["r2"] = _m["r2"] end1 = time.time() print("{}/{}".format(i, count) + (' ARIMA sub MAE: %.2f' % m["mae"]) + ",spent: %.4fs" % (end1 - start1)) xlen = len(testY) // predictOffest print("ARIMA xlen:{} predictOffest:{}".format(xlen, predictOffest)) m["mae"] = m["mae"] / xlen m["rmse"] = m["rmse"] / xlen m["rmse"] = m["mase"] / xlen m["r2"] = m["r2"] / xlen end = time.time() print((' ARIMA MAE: %.2f' % m["mae"]) + ",spent: %.4fs" % (end - start)) return m
def predictFutureStats(player_id, sorted_Matches, all_player_stats_rows, stat_index, topN): topN = min(len(sorted_Matches), topN) #Get stats of target player p_indx = all_player_stats_rows[:, 0] == player_id player_stats_rows = all_player_stats_rows[p_indx, :] # player_stats_rows = np.array(stats_cursor.execute('SELECT * FROM Stats where player_id="'+player_id+'"').fetchall()) player_stat_X = player_stats_rows[:, 2].astype(int) player_stat_Y = player_stats_rows[:, stat_index].astype(float) next_season_age = int(player_stats_rows[-1, :][2]) + 1 #ARIMA. Use arima as endogenous regression on the new season and append it to player_stat_Y array if np.all(player_stat_Y == 0): player_stat_Y = np.append(player_stat_Y, player_stat_Y[-1]) else: model = pm.ARIMA(order=(0, 0, 0), maxiter=100, method='powell') fitted = model.fit(player_stat_Y) APRED = fitted.predict(1)[0] player_stat_Y = np.append(player_stat_Y, APRED) player_stat_X = np.append(player_stat_X, next_season_age) Ref = np.zeros( [len(player_stat_X), topN] ) #matrix to hold the statistics of the topN players. we will use this to calculate the prediction weights for the target player Ref_weights = np.zeros(topN) Ref_next = np.zeros( [topN] ) #matrix to hold the statistics of the topN players for the next season. We will use this together with the calculated weights to generate the prediction #Get stats of topN matched players for i in range(topN): Ref_weights[i] = sorted_Matches[i][1] match_player_id = sorted_Matches[i][0] m_indx = all_player_stats_rows[:, 0] == match_player_id match_player_stats_rows = all_player_stats_rows[m_indx, :] match_player_stat_X = match_player_stats_rows[:, 2].astype(int) match_player_stat_Y = match_player_stats_rows[:, stat_index].astype(float) #populate Reference matrix only at x locations (i.e. age) given by target player for s in range(len(player_stat_X)): loc = np.where(match_player_stat_X == player_stat_X[s]) if loc[0].size > 0: Ref[s, i] = match_player_stat_Y[loc] #append the stat from the next season (i.e. to be predicted) next_season_match_stat = match_player_stat_Y[np.where( match_player_stat_X == next_season_age)][0] Ref_next[i] = next_season_match_stat #Remove any entries in the Ref array where all players have 0 values non_zero_indx = [] for t in range(Ref.shape[0]): if any(Ref[t, :] > 0): non_zero_indx.append(t) #weighted SSE objective_fun = functools.partial(weighted_sum_objective, arg1=player_stat_Y, arg2=Ref) x0 = np.ones([topN]) out = minimize(objective_fun, x0, options={'disp': False, 'maxiter': 200}) predicted_stats = np.mean(Ref_next * out.x) # Various linear combination # [x,resid,rank,s] = np.linalg.lstsq(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx]) #least-squares solution to a linear matrix equation (Numpy) # [x,resid]=nnls(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx]) #non-negative least squares # out=lsq_linear(Ref[non_zero_indx,:],player_stat_Y[non_zero_indx], bounds=(0, np.inf)) # least squares with bound constraints (Scipy) # predicted_stats=np.sum(out.x.T*Ref_next) #Linear combination of players # #using cvxpy # x = cvx.Variable(topN) # A=Ref[non_zero_indx,:] # b=player_stat_Y[non_zero_indx] # objective = cvx.Minimize(cvx.sum_squares(A*x - b)) # constraints = [cvx.sum(x) == 1, x>=0] #convex # prob = cvx.Problem(objective, constraints) # result = prob.solve() # predicted_stats=np.sum(x.value.T*Ref_next) # #using regression type 1 # x_train=Ref #topN-dim features x num_seasons observations # y_train=player_stat_Y #num_seasons labels # # model = KNeighborsRegressor(n_neighbors=3) # model =RandomForestRegressor(max_depth=2, random_state=0) # model.fit(x_train, y_train) # predicted_stats=model.predict(Ref_next.reshape(1,-1))[0] #topN-dim prediction feature vector # # using regression type 2 # x_train=Ref.T # y_train=Ref_next # # model = KNeighborsRegressor(n_neighbors=3) # model =RandomForestRegressor(max_depth=4, random_state=0) # #model.fit(x_train, y_train) # model.fit(x_train, y_train, sample_weight=Ref_weights) # predicted_stats=model.predict(player_stat_Y.reshape(1,-1))[0] # if math.isnan(predicted_stats): # predicted_stats=0 return predicted_stats
exog_pred_series_2 = new_temp[year_2020_index + 1:] exog_pred_series_2 = exog_pred_series_2["tmax"] exog_pred_2 = np.expand_dims(exog_pred_series_2.to_numpy(),axis=1) exog_train_series = pd.concat([exog_train_series_1,exog_train_series_2],axis=1) exog_train = exog_train_series.to_numpy() exog_pred_series = pd.concat([exog_pred_series_1,exog_pred_series_2],axis=1) exog_pred = exog_pred_series.to_numpy() #change parameters accordingly my_order = (0, 1, 2) smodel = pm.ARIMA(order=my_order) smodel_fit = smodel.fit(train,exogenous=exog_train) fitted = smodel.predict(n_periods=n_periods, exogenous=exog_pred) # fitted_series = pd.Series(fitted,name='Value') fitted_series = pd.concat([years,fitted_series],axis=1) final = pd.concat([ind,fitted_series],axis=0, ignore_index=True) plt.plot(final["Value"]) plt.show() final.to_csv(path_to_save + index_files[i], index=False)
#mv=np.ones_like(mesh[0],dtype=np.int)*6 paramsV = np.array([*mesh]).T if stochastic == True: np.random.shuffle(paramsV) paramsV = paramsV[:ncandidates] print(paramsV.shape) params_final = np.zeros_like(paramsV[0]) least_err = 5000 test_err = 0 yhat = [] for params in paramsV: print("Training ARIMA(%d,%d,%d) seasonal=(%d,%d,%d,%d)" % tuple(params)) model = pm.ARIMA(order=tuple(params[:3]), seasonal_order=tuple(params[3:]), suppress_warnings=True) try: model.fit(train) except Exception as ex: print("Error occurred: %s" % ex) continue y = model.predict(n_periods=nV + nT) err = np.square(y[:nV] - validation_y).mean() print("MSE=%.5f" % err) if least_err > err: least_err = err yhat = y params_final = params test_err = np.square(yhat[nV:] - test_y).mean()
""" print(__doc__) # Author: Taylor Smith <*****@*****.**> import numpy as np import pmdarima as pm from pmdarima import model_selection from matplotlib import pyplot as plt print("pmdarima version: %s" % pm.__version__) # Load the data and split it into separate pieces y = pm.datasets.load_wineind() est = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True) cv = model_selection.SlidingWindowForecastCV(window_size=150, step=4, h=4) predictions = model_selection.cross_val_predict(est, y, cv=cv, verbose=2, averaging="median") # plot the predictions over the original series x_axis = np.arange(y.shape[0]) n_test = predictions.shape[0] plt.plot(x_axis, y, alpha=0.75, c='b') plt.plot(x_axis[-n_test:], predictions, alpha=0.75, c='g') # Forecasts plt.title("Cross-validated wineind forecasts")
def build_model(self, y_train): order = self.find_order(y_train) self.model = pm.ARIMA(order) return self