def get(self, request, *args, **kwargs): start_date = self.request.query_params.get('startdate', '1970-01-30') end_date = self.request.query_params.get('enddate', '2018-01-01') data = read_frame(PriceProduction.objects.all()) data['date'] = pd.to_datetime(data['date']) data = data.drop('id', axis=1) data = data.set_index('date') startdate = dat.strptime(start_date, '%Y-%m-%d') enddate = dat.strptime(end_date, '%Y-%m-%d') nextmonth = enddate + relativedelta.relativedelta(months=1) train, test = data[startdate:nextmonth], data[nextmonth:] model = VARMAX(train, order=(1, 1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(len(test) - 1) yhat['actual'] = test['price'] predictdata = yhat.drop("production", axis=1) metrics = forecast_accuracy(predictdata['price'], predictdata['actual']) predictdata.index = predictdata.index.astype("str") print(predictdata) json = predictdata.to_json() json = ast.literal_eval(json) json['mape'] = metrics['mape'] return Response(json)
def find_best_parameters(self, data: pd.DataFrame): """ Given a dataset, finds the best parameters using the settings in the class """ #### dmax here means the column number of the data frame: it serves as a placeholder for columns dmax = data.shape[1] ############################################################################################### cols = data.columns.tolist() # TODO: #14 Make sure that we have a way to not rely on column order to determine the target # It is assumed that the first column of the dataframe is the target variable #### ### make sure that is the case before doing this program #################### i = 1 results_dict = {} for d_val in range(1, dmax): # Takes the target column and one other endogenous column at a time # and makes a prediction based on that. Then selects the best # exogenous column at the end. y_train = data.iloc[:, [0, d_val]] print('\nAdditional Variable in VAR model = %s' % cols[d_val]) info_criteria = pd.DataFrame( index=['AR{}'.format(i) for i in range(0, self.p_max+1)], columns=['MA{}'.format(i) for i in range(0, self.q_max+1)] ) for p_val, q_val in itertools.product(range(0, self.p_max+1), range(0, self.q_max+1)): if p_val == 0 and q_val == 0: info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan print(' Iteration %d completed' % i) i += 1 else: try: model = VARMAX(y_train, order=(p_val, q_val), trend='c') model = model.fit(max_iter=1000, disp=False) info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + self.scoring) print(' Iteration %d completed' % i) i += 1 except Exception: i += 1 print(' Iteration %d completed' % i) info_criteria = info_criteria[info_criteria.columns].astype(float) interim_d = copy.deepcopy(d_val) interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria) if self.verbose == 1: _, axis = plt.subplots(figsize=(20, 10)) axis = sns.heatmap( info_criteria, mask=info_criteria.isnull(), ax=axis, annot=True, fmt='.0f' ) axis.set_title(self.scoring) results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1] best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0] self.best_p = int(best_pdq.split(' ')[0]) self.best_d = int(best_pdq.split(' ')[1]) self.best_q = int(best_pdq.split(' ')[2]) print('Best variable selected for VAR: %s' % data.columns.tolist()[self.best_d])
def varma_prediction(train,test,steps): p,q = get_var_pq_params(train) model = VARMAX(train,order=(p, q)) model_fit = model.fit(disp=False) if not steps: prediction = model_fit.forecast(steps=len(test)) else: prediction = model_fit.forecast(steps=steps) multi_predicts_df = pd.DataFrame(prediction, columns = train.columns) return multi_predicts_df
def varma_forecast(history, config): order, trend = config # define model model = VARMAX(history, order=order, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # fit model model_fit = model.fit(disp=False) # make one step forecast yhat = model_fit.predict(len(history), len(history)) return yhat[0]
def model_varmax(train_data,test_data,train_data1,test_data1): x = train_data1.reshape((372,1)) x1 = train_data.reshape((372,1)) lis = np.concatenate((x,x1), axis = 1) print(np.shape(lis)) #forecast model = VARMAX(lis, order=(1,1)) model_fit = model.fit(disp = -1) print(model_fit.summary().tables[1]) predictions = model_fit.forecast(steps=10) print('VARMAX RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
def VectorAutoRegressiveMovingAverage(self): #currently, exodata not used. #make a dataframe the size of prediction datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3))) #convert to a list datalist = data.values.tolist() # create a model for each axis and predict each axis model = VARMAX(datalist, order=(1, 1)) model_fit = model.fit(disp=False) datahat = model_fit.forecast(model_fit.y, steps=(self.end - self.start)) return (datahat)
def get(self, request, *args, **kwargs): n_steps = int(self.request.query_params.get('nsteps', 10)) data = read_frame(PriceProduction.objects.all()) data['date'] = pd.to_datetime(data['date']) data = data.drop('id', axis=1) data = data.set_index('date') model = VARMAX(data, order=(1, 1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(n_steps) yhat = yhat['price'] yhat.index = yhat.index.astype("str") json = yhat.to_json() json = ast.literal_eval(json) return Response(json)
def VARMA(self, order=(1, 1), name="VARMA"): print("=" * 30 + "\n" + name + "\n" + "=" * 30 + "\n") # fit model model = VARMAX(self.data_train, order=order) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.forecast(steps=42) prediction = pd.DataFrame(yhat, index=self.data_test.index.values, columns=self.data_train.columns.values) plt.plot(self.data_train_and_test) plt.plot(prediction, color='red') plt.title(name) plt.show()
def test_4(self): data = self.getMultiDimensionalData() model = VARMAX(data,order=(1,2)) result = model.fit() f_name='varmax_12.pmml' StatsmodelsToPmml(result, f_name,model_name="varmax_test",conf_int=[95]) model_name = self.adapa_utility.upload_to_zserver(f_name) z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS') forecasts=result.get_forecast(5) z_forecast_hum = list(z_pred['outputs'][0]['predicted_SanDiegoHum'].values()) model_forecast_hum = forecasts.predicted_mean['SanDiegoHum'].values.tolist() z_forecast_pressure = list(z_pred['outputs'][0]['predicted_SanDiegoPressure'].values()) model_forecast_pressure = forecasts.predicted_mean['SanDiegoPressure'].values.tolist() z_forecast_temp = list(z_pred['outputs'][0]['predicted_SanDiegoTemp'].values()) model_forecast_temp = forecasts.predicted_mean['SanDiegoTemp'].values.tolist() z_conf_int_95_lower_hum = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoHum'].values()) model_conf_int_95_lower_hum = forecasts.conf_int()['lower SanDiegoHum'].values.tolist() z_conf_int_95_lower_pressure = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoPressure'].values()) model_conf_int_95_lower_pressure = forecasts.conf_int()['lower SanDiegoPressure'].values.tolist() z_conf_int_95_lower_temp = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoTemp'].values()) model_conf_int_95_lower_temp = forecasts.conf_int()['lower SanDiegoTemp'].values.tolist() z_conf_int_95_upper_hum = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoHum'].values()) model_conf_int_95_upper_hum = forecasts.conf_int()['upper SanDiegoHum'].values.tolist() z_conf_int_95_upper_pressure = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoPressure'].values()) model_conf_int_95_upper_pressure = forecasts.conf_int()['upper SanDiegoPressure'].values.tolist() z_conf_int_95_upper_temp = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoTemp'].values()) model_conf_int_95_upper_temp = forecasts.conf_int()['upper SanDiegoTemp'].values.tolist() self.assertEqual(np.allclose(z_forecast_hum,model_forecast_hum),True) self.assertEqual(np.allclose(z_forecast_pressure,model_forecast_pressure),True) self.assertEqual(np.allclose(z_forecast_temp,model_forecast_temp),True) self.assertEqual(np.allclose(z_conf_int_95_lower_hum,model_conf_int_95_lower_hum),True) self.assertEqual(np.allclose(z_conf_int_95_lower_pressure,model_conf_int_95_lower_pressure),True) self.assertEqual(np.allclose(z_conf_int_95_lower_temp,model_conf_int_95_lower_temp),True) self.assertEqual(np.allclose(z_conf_int_95_upper_hum,model_conf_int_95_upper_hum),True) self.assertEqual(np.allclose(z_conf_int_95_upper_pressure,model_conf_int_95_upper_pressure),True) self.assertEqual(np.allclose(z_conf_int_95_upper_temp,model_conf_int_95_upper_temp),True)
def predict(self, action): """ Description: returns action based on input state x """ #store the new action #self.ts = np.roll(self.ts, -1, axis = 0) #self.ts[-1] = action del self.ts[0] self.ts.append(action) #print(self.ts) model = VARMAX(self.ts, order=(self.p, self.p)) model_fit = model.fit(disp=False) self.y_pred = model_fit.forecast(steps=1) print(self.y_pred) return self.y_pred
def trainVectorARMAMethodModel(): X_train = readVectorARMAMethodXTrain() #training model on the training set vectorARMAMethodModel = VARMAX(X_train, order=(1, 2), trend="c") #we are taking p = 5 as we have created different models based on the different p values. #Model gives minimum aic and bic for p =5 vectorARMAMethodModelResult = vectorARMAMethodModel.fit(maxiter=1000, disp=False) #saving the model in pickle file saveVectorARMAMethodModel(vectorARMAMethodModelResult) print(vectorARMAMethodModelResult.summary())
def _fit(self, train_data): """Fits the model based on training data `train_data`. Parameters ---------- train_data: pd.DataFrame A pandas DataFrame representing the data used for training. Returns ------- None """ varma_order = (self._p, self._q) model = VARMAX(train_data, order=varma_order) self._model = model.fit(disp=False)
def varma_final(self): predictions = [] input_data = numpy.array(self.total) input_data = numpy.log(input_data) input_data = self.difference(input_data) input_data = pd.DataFrame(input_data) input_data = input_data.dropna() for i in range(0, len(self.test)): model = VARMAX(input_data, order=(1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast() predictions.append(yhat) input_data.append(yhat) for i in range(0, len(predictions)): predictions[i] = round(predictions[i], 2) if predictions[i] < 0: predictions[i] = 0 return predictions
def varmax_model_fit(self, x_train, x_test, df_time, oreder = (1, 0), col_exog=[], verbose = 1): if col_exog: exo_train = pd.DataFrame() exo_test = pd.DataFrame() for col in col_exog: exo_train[col] = x_train[col] x_train.drop([col], axis=1, inplace = True) exo_test[col] = x_test[col] x_test.drop([col], axis=1, inplace = True) model = VARMAX(x_train, order=oreder, exog=exo_train) else: model = VARMAX(x_train, order=oreder) result = model.fit() out = durbin_watson(result.resid) df_results = pd.DataFrame() for col, val in zip(x_train.columns, out): df_results[col] = [round(val, 2)] if verbose == 1: st.subheader('durbin_watson test') st.write('the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies') st.write(df_results.T) if col_exog: df_forecast = result.forecast(steps=x_test.shape[0], exog = exo_test) else: df_forecast = result.forecast(steps=x_test.shape[0]) df_forecast.index = df_time['test'] df_forecast.columns = x_test.columns x_test.index = df_time['test'] if verbose == 1: st.write(df_forecast) for i, col in enumerate(x_test): fig = ds().nuova_fig(555+i) st.subheader(col) df_forecast[col].plot(label = 'Predicition') x_test[col].plot(label = 'True') ds().legenda() st.pyplot(fig) return df_forecast
def initialize(self, params): self.p = params['p'] self.action_dim = params['dim'] self.ts = [ [0] * self.action_dim ] * self.p #[np.zeros(self.action_dim) for i in range(self.p)]#np.zeros((self.p, self.action_dim)) data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) model = VARMAX(self.ts, order=(16, 16)) print("VARMAX") model_fit = model.fit() print("fit") exit() self.initialized = True
def precictTrajectory(self): predict_num = 5 gps_points = self.gps_points() # data = [[p["long"],p["lat"]] for p in gps_points] data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) model = VARMAX(data, order=(1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(predict_num) return { "object_id": self.lastappeared.object_id, "gps_points": [{ "long": p[0], "lat": p[1] } for p in yhat] }
def _fit(self, train_features, train_target): """Fits the model based on `train_features` and `train_target`. A VARMAX model is built to predict the target variables with data given by `train_target` based on the features with data given by `train_features`. Parameters ---------- train_features: pd.DataFrame A pandas DataFrame representing the training features. train_target: pd.Series A pandas Series representing the target variable. Returns ------- None """ varmax_order = (self._p, self._q) model = VARMAX(train_target, train_features, order=varmax_order) self._model = model.fit(disp=False) self._is_fit = True
def regress_varmax(df_endog, bin_size_weeks, n): """ Trains a varmax model on time series for each patent up to n steps, working forwards from the publication date or working backwards from the current date. Also includes exogenous patent features. :param df_endog: the multiple endogenous time series, not yet transformed :param bin_size_weeks: the bin size in weeks :type bin_size_weeks: pd.Timedelta :param n: the number of steps required in each patent series - must make a square matrix! :return: None """ df_endog = VARMAXTransformer("varmax").transform(df_endog, bin_size_weeks, n) # remove columns with low variance order = 4 df_endog = df_endog.loc[:, df_endog.apply(pd.Series.nunique, axis=0) > order] logger.debug(df_endog) logger.debug(df_endog.describe()) logger.debug("Training VARMAX...") model = VARMAX(df_endog.values, order=(order, 0)) res = model.fit(maxiter=1000, disp=True) logger.debug(res.summary())
def build_var_model(df, criteria, forecast_period=2, p_max=3, q_max=3, verbose=0): """ This builds a VAR model given a multivariate time series data frame with time as the Index. Note that the input "y_train" can be a data frame with one column or multiple cols or a multivariate array. However, the first column must be the target variable. The others are added. You must include only Time Series data in it. DO NOT include "Non-Stationary" or "Trendy" data. Make sure your Time Series is "Stationary" before you send it in!! If not, this will give spurious results. Since it automatically builds a VAR model, you need to give it a Criteria to optimize on. You can give it any of the following metrics as criteria: AIC, BIC, Deviance, Log-likelihood. You can give the highest order values for p and q. Default is set to 3 for both. """ df = df[:] #### dmax here means the column number of the data frame: it serves as a placeholder for columns dmax = df.shape[1] ############################################################################################### cols = df.columns.tolist() ts_train = df[:-forecast_period] ts_test = df[-forecast_period:] if verbose == 1: print( 'Data Set split into train %s and test %s for Cross Validation Purposes' % (ts_train.shape, ts_test.shape)) # It is assumed that the first column of the dataframe is the target variable #### ### make sure that is the case before doing this program #################### i = 1 results_dict = {} for d_val in range(1, dmax): y_train = ts_train.iloc[:, [0, d_val]] print('\nAdditional Variable in VAR model = %s' % cols[d_val]) info_criteria = pd.DataFrame( index=['AR{}'.format(i) for i in range(0, p_max + 1)], columns=['MA{}'.format(i) for i in range(0, q_max + 1)]) for p_val, q_val in itertools.product(range(0, p_max + 1), range(0, q_max + 1)): if p_val == 0 and q_val == 0: info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan print(' Iteration %d completed' % i) i += 1 else: try: model = VARMAX(y_train, order=(p_val, q_val), trend='c') model = model.fit(max_iter=1000, displ=False) info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + criteria) print(' Iteration %d completed' % i) i += 1 except: i += 1 print(' Iteration %d completed' % i) info_criteria = info_criteria[info_criteria.columns].astype(float) interim_d = copy.deepcopy(d_val) interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria) if verbose == 1: fig, ax = plt.subplots(figsize=(20, 10)) ax = sns.heatmap(info_criteria, mask=info_criteria.isnull(), ax=ax, annot=True, fmt='.0f') ax.set_title(criteria) results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1] best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0] best_p = int(best_pdq.split(' ')[0]) best_d = int(best_pdq.split(' ')[1]) best_q = int(best_pdq.split(' ')[2]) print('Best variable selected for VAR: %s' % ts_train.columns.tolist()[best_d]) y_train = ts_train.iloc[:, [0, best_d]] bestmodel = VARMAX(y_train, order=(best_p, best_q), trend='c') bestmodel = bestmodel.fit() if verbose == 1: bestmodel.plot_diagnostics(figsize=(16, 12)) ax = bestmodel.impulse_responses(12, orthogonalized=True).plot(figsize=(12, 4)) ax.set(xlabel='Time Steps', title='Impulse Response Functions') res2 = bestmodel.get_forecast(forecast_period) res2_df = res2.summary_frame() rmse, norm_rmse = print_dynamic_rmse(ts_test.iloc[:, 0], res2_df['mean'].values, ts_train.iloc[:, 0]) return bestmodel, res2_df, rmse, norm_rmse
print('stationary') x.plot() plt.show() plt.pause(5) test_stationarity(data['qty']) data['qty'] = pd.Series(np.log(data['qty']).diff().dropna()) data.dropna(inplace=True) # data.plot() # plt.show() # train, validate = train_test_split(data, test_size = 0.3) train = data[:int(0.8 * (len(data)))] validate = data[int(0.2 * (len(data))):] model = VARMAX(endog=train, enforce_stationarity=True) model_fit = model.fit(maxiters=1) print('-----------RESULTS----------------') print(model_fit.summary()) prediction = model_fit.predict(start=datetime.strptime('20180101', '%Y%m%'), steps=len(validate)) print(prediction) print('Variables for th model %s' % result.exog_names) order = result.k_ar forecast_values = pd.DataFrame( data=result.forecast(y=data['qty'].values, steps=5)) result.plot_forecast(steps=5, plot_stderr=False) pred = model_fit.forecast(model_fit.y, steps=len(validate))
Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) The Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) is an extension of the VARMA model that also includes the modeling of exogenous variables. It is a multivariate version of the ARMAX method. Exogenous variables are also called covariates and can be thought of as parallel input sequences that have observations at the same time steps as the original series. The primary series(es) are referred to as endogenous data to contrast it from the exogenous sequence(s). The observations for exogenous variables are included in the model directly at each time step and are not modeled in the same way as the primary endogenous sequence (e.g. as an AR, MA, etc. process). The VARMAX method can also be used to model the subsumed models with exogenous variables, such as VARX and VMAX. The method is suitable for multivariate time series without trend and seasonal components with exogenous variables. ''' from random import random # VARMAX example from statsmodels.tsa.statespace.varmax import VARMAX # contrived dataset with dependency data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) data_exog = [x + random() for x in range(100)] # fit model model = VARMAX(data, exog=data_exog, order=(1, 1)) model_fit = model.fit(disp=False) # make prediction data_exog2 = [[100]] yhat = model_fit.forecast(exog=data_exog2) print(yhat)
plot_acf(endog_diff['energy_sum'], lags=20) # In[51]: plot_pacf(endog_diff['energy_sum'], lags=20) # In[54]: from statsmodels.tsa.statespace.varmax import VARMAX model_varmax = VARMAX(endog=endog_diff, exog=exog, order=(15, 0)) results_varmax = model_varmax.fit(maxiter=5000, disp=False) results_varmax.summary() # In[55]: results_varmax.plot_diagnostics() # In[56]: #exog_test = merged_df_varmax_test[['humidity', 'temperatureLow', 'month_1', 'month_2', 'month_3', # 'month_4','month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10','month_11', 'month_12']] #exog_test = merged_df_varmax_test[['humidity', 'day_0', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6',
def varmax( tickers, p: int = 2, q: int = 0, ): # Split data train_val_test_split = {'train': 0.7, 'val': 0.85, 'test': 1} train_data = data[0:int(n * train_val_test_split['train'])] val_data = data[int(n * train_val_test_split['train'] ):int(n * train_val_test_split['val'])] test_data = data[int(n * train_val_test_split['val'] ):int(n * train_val_test_split['test'])] # split data in X and Y y_list = [ticker + '_returns' for ticker in tickers] # Train endog_y = train_data[y_list] exog_x = train_data.drop(columns=y_list) # Validate endog_y_val = val_data[y_list] exog_x_val = val_data.drop(columns=y_list) # Test endog_y_test = test_data[y_list] exog_x_test = test_data.drop(columns=y_list) # Fit model model = VARMAX(endog=endog_y.values, exog=exog_x.values, order=(p, q)) model_fit = model.fit(disp=False, order=(p, q), maxiter=200, method='nm') # Validate predictions_val = model_fit.forecast(steps=exog_x_val.shape[0], exog=exog_x_val.values) MSE = 0 #for i in range(endog_y_val.shape[0]): # for j in range(endog_y_val.shape[1]): # MSE += (endog_y_val.values[i, j] - float(predictions_val[i][j]))**2 print('p:', p, ' MSE:', MSE) # Test -- this is just here for simplcity!! predictions_test = model_fit.forecast(steps=exog_x_test.shape[0], exog=exog_x_test.values) train_residuals = model_fit.resid pd.DataFrame(train_residuals).to_csv( '../output/VARMAX_results/residual_data_train.csv') val_residual = endog_y_val.values - predictions_val pd.DataFrame(val_residual).to_csv( '../output/VARMAX_results/residual_data_val.csv') test_residual = endog_y_test.values - predictions_test pd.DataFrame(test_residual).to_csv( '../output/VARMAX_results/residual_data_test.csv') q: bool = False if q is True: for i, ticker in enumerate(tickers): real_val = endog_y_val.values[:, i] pred_val = predictions_val[:, i] pd.DataFrame(real_val).to_csv( '../output/VARMAX_results/val_files/' + ticker + '_val_predictions.csv', index=False) pd.DataFrame(pred_val).to_csv( '../output/VARMAX_results/val_files/' + ticker + '_val_real.csv', index=False) real_test = endog_y_test.values[:, i] pred_test = predictions_test[:, i] pd.DataFrame(real_test).to_csv( '../output/VARMAX_results/test_files/' + ticker + '_test_predictions.csv', index=False) pd.DataFrame(pred_test).to_csv( '../output/VARMAX_results/test_files/' + ticker + '_test_real.csv', index=False) # Evaluate pic: bool = False if pic is True: for i, ticker in enumerate(tickers): pred = (predictions[:, i] + 1) * opens_val.values[:, i] real = (endog_y_val.values[:, i] + 1) * opens_val.values[:, i] MSE = sum((pred - real)**2) / endog_y_val.shape[0] dummy_mse = sum((real[1:real.shape[0]] - real[0:real.shape[0] - 1]) **2) / (endog_y_val.shape[0] - 1) print('=========', ticker, '=========') print('Dummy MSE:', dummy_mse) print('MSE:', MSE) pred_zero_one = predictions[:, i] pred_zero_one[pred_zero_one > 0] = 1 pred_zero_one[pred_zero_one < 0] = 0 print('Predicted ones:', np.mean(pred_zero_one)) real_zero_one = endog_y_val.values[:, i] real_zero_one[real_zero_one > 0] = 1 real_zero_one[real_zero_one < 0] = 0 print('Real ones:', np.mean(real_zero_one)) TP = np.sum(np.logical_and(pred_zero_one == 1, real_zero_one == 1)) TN = np.sum(np.logical_and(pred_zero_one == 0, real_zero_one == 0)) FP = np.sum(np.logical_and(pred_zero_one == 1, real_zero_one == 0)) FN = np.sum(np.logical_and(pred_zero_one == 0, real_zero_one == 1)) print('True positive:', TP) print('True Negative:', TN) print('False positive:', FP) print('False Negative:', FN) accuracy = (TP + TN) / (TP + TN + FP + FN) print('Dummy guess:', max(np.mean(real_zero_one), 1 - np.mean(real_zero_one))) print('Accuracy:', max(accuracy, 1 - accuracy)) plt.plot(real, color='red', label='Real ' + ticker + ' Stock Price') plt.plot(pred, color='blue', label='Predicted ' + ticker + ' Stock Price') plt.title(ticker + ' Stock Price Prediction') plt.xlabel('Time') plt.ylabel(ticker + ' Stock Price') plt.legend() plt.savefig('../output/VARMAX_results/VARMAX_test_' + ticker + '.png') plt.close()
'Central_Bank_Rate_(CBR)'] = true_predictions_localrates[ 'Central_Bank_Rate_(CBR)'].apply(np.floor) true_predictions_localrates.index = pd.to_datetime( true_predictions_localrates.index) index_localrates = pd.date_range(appdata_localrates.index.max() + timedelta(1), periods=periods_input, freq='MS') true_predictions_localrates.index = index_localrates.date # true_predictions_localrates.index = pd.to_datetime(true_predictions_localrates.index).strftime('%Y-%m') # st.subheader("Local Rates Forecasted Values with Vector Autoregression") # st.dataframe(true_predictions_localrates) # Local Rates - VARMA model_localrates_varma = VARMAX(appdata_localrates, order=(1, 2)) model_localrates_varma_fit = model_localrates_varma.fit(disp=False) yhat_localrates_varma = model_localrates_varma_fit.forecast( steps=periods_input) yhat_localrates_varma_df = pd.DataFrame( yhat_localrates_varma, columns=appdata_localrates.columns).abs() yhat_localrates_varma_df.index = pd.date_range( appdata_localrates.index.max() + timedelta(1), periods=periods_input, freq='MS') yhat_localrates_varma_df[ 'Central_Bank_Rate_(CBR)'] = yhat_localrates_varma_df[ 'Central_Bank_Rate_(CBR)'].apply(np.floor) yhat_localrates_varma_df.index = yhat_localrates_varma_df.index.date # yhat_localrates_varma_df.index = pd.to_datetime(yhat_localrates_varma_df.index).strftime('%Y-%m') # st.subheader("Local Rates Forecasted Values with Vector Autoregression Moving Average") # st.dataframe(yhat_localrates_varma_df)
auto_arima(df1['Money'], maxiter=1000) auto_arima(df1['Spending'], maxiter=1000) df_transformed = df1.diff().diff() df_transformed = df_transformed.dropna() nobs = 12 train, test = df_transformed[0:-nobs], df_transformed[-nobs:] model = VARMAX(train, order=(1, 2), trend='c') results = model.fit(maximer=1000, disp=False) results.summary() df_forecast = results.forecast(12) df_forecast df_forecast['Money1d'] = (df1['Money'].iloc[-nobs - 1] - df1['Money'].iloc[-nobs - 2]) + df_forecast['Money'] df_forecast['MoneyForecast'] = df1['Money'].iloc[ -nobs - 1] + df_forecast['Money1d'].cumsum() df_forecast['Spending1d'] = ( df1['Spending'].iloc[-nobs - 1] -
encoding="utf-8-sig", converters={0: to_dt}, names=["TS", "x", "y", "z"]) req_period = datetime.timedelta(milliseconds=100) even_frame = frame.resample(req_period).mean().interpolate() #aclr_x=even_frame["x"] aclr_x = even_frame seria_len = len(aclr_x) train_seria, test_seria = aclr_x[:seria_len // 2], aclr_x[seria_len // 2:] model = VARMAX(train_seria, order=(5, 5)) #model = VARMAX(train_seria, order=(3, 3)) #model = VARMAX(train_seria,) model_fit = model.fit() predictions = model_fit.forecast(len(test_seria)) print(type(predictions)) print(predictions.shape) for axis in range(3): plt.subplot(3, 1, axis + 1) # plt.plot(test_seria.index[:100], predictions[:, axis][:100], label="predictions") plt.plot(predictions.iloc[:100, axis], label="predicted") plt.plot(test_seria.iloc[:100, axis], label="expected") plt.legend(loc="upper right") plt.show()
print(jh_results.cvt) # dim = (n,3) critical value table (90%, 95%, 99%) print(jh_results.evec) print(jh_results.eig) train = johan_test_temp[:int(0.8*(len(johan_test_temp)))] valid = johan_test_temp[int(0.8*(len(johan_test_temp))):] train_orignal = orignal_dataframe[:int(0.8*(len(orignal_dataframe)))] valid_orignal = orignal_dataframe[int(0.8*(len(orignal_dataframe))):] order = [2,3,4,5,6] for i in order: model = VARMAX(train, order=(i,0), trend='c') model_result = model.fit(maxiter= 1000) print(model_result.summary()) model_result.plot_diagnostics(variable=0) plt.show() model_result.plot_diagnostics(variable=1) plt.show() model_result.plot_diagnostics(variable=2) plt.show() """ VAR_forecast_value_hrf_pressure = np.exp(train["value_hrf_pressure"]) * train_orignal['value_hrf_pressure'][-2:] VAR_forecast_value_hrf_humidity = np.exp(train["value_hrf_humidity"]) * train_orignal['value_hrf_humidity'][-2:] #VAR_forecast_value_hrf_pressure = np.exp(train["value_hrf_temperature_bmp180"]) * train['value_hrf_temperature_bmp180'][-2:] rmse_value_hrf_pressure = math.sqrt(mean_squared_error(train_orignal['value_hrf_pressure'][-2:], VAR_forecast_value_hrf_pressure)) rmse_value_hrf_humidity = math.sqrt(mean_squared_error(train_orignal['value_hrf_humidity'][-2:], VAR_forecast_value_hrf_humidity)) print(rmse_value_hrf_pressure)