def model(self, column_name, df, apply_smoothing, smoothing_level=None): """ performs predictions using the simple exponential smoothing model approach :input column_name : str, name of column to hold the predicted values :input df : dataframe, weekly-level data :input apply_smoothing : bool, indicates whether to factor-in smoothing parameters in the Holt model :input smoothing_level : int, default=None, l parameter in Simple Exponential Smoothing model :returns df : dataframe, weekly-level, with predictions """ m = self.prediction_period if apply_smoothing == True: fit1 = SimpleExpSmoothing(df["train"][:-m]).fit( smoothing_level=smoothing_level, optimized=True) params = None elif apply_smoothing == False: fit1 = SimpleExpSmoothing(df["train"][:-m]).fit(optimized=True) params = fit1.params y_fit = fit1.fittedvalues y_fore = fit1.forecast(m) df[column_name] = np.nan #df[column_name][:-1] = y_fit df[column_name][:-m] = df['train'].iloc[:-m] df[column_name][-m:] = y_fore #df[column_name].iloc[-1:] = list(y_pred)[-1] return df
def sem(obs, p_mean:bool, boxcox:bool, n_forecast:int): ''' Implement a Simple Exponential Smoothin with Grid Search to find the model with the best combination of parameters based on an SSE minimization. Input: :param obs: sequential data for forecasting :param p_mean: wether or not to apply penalized_mean :param boxcox: wether or not to apply boxcox transformation :param n_forecast: number of observations to forecast Output: forecast: Forecast for the next n_forecast observations aic_min: AIC of model bic: BIC of model mse: MSE of model sse_min: SSE of model ''' assert type(obs) == pd.core.series.Series, "Data must be of pandas Series type" if p_mean == True: y_prc = pp_transforms().penalized_mean(obs) else: y_prc = obs untransform = False if boxcox == True: y_prc, lmbd = pp_transforms().boxcox(y_prc) untransform = True # Perform a Grid Search to find the best model sse_min, sl = 1000000000, 0 print(obs) print(y_prc) try: for i in [.95,.9,.85,.8,.75,.7,.65,.6,.55,.5,.45,.4,.35,.3,.25,.2,.15,.1,.05]: mdl = SimpleExpSmoothing(y_prc).fit(smoothing_level = i, optimized = False) sse = np.sum((y_prc.astype(float) - mdl.fittedvalues)**2) if sse <= sse_min: sse_min, sl = sse, i except Exception as e: print("Error while fitting the model","\n",e) # Best model mdl = SimpleExpSmoothing(y_prc).fit(smoothing_level = sl,optimized=False) aic, bic = mdl.aic, mdl.bic # Untransform Box-Cox Data if untransform: pred = pp_transforms().boxcox_untransform(mdl.fittedvalues, lmbd) forecast = pp_transforms().boxcox_untransform(mdl.forecast(n_forecast), lmbd) mse = ((obs.astype(float).values - pred)**2).mean() else: pred = mdl.fittedvalues forecast = mdl.forecast(n_forecast) mse = ((obs.astype(float).values - pred)**2).mean() return (forecast, aic, bic, mse, sse_min)
def fit_SES(params, df): data = df.carbon_monoxide[:-1] if 'a' in params: a = params['a'] fit = SimpleExpSmoothing( data, initialization_method="estimated").fit(smoothing_level=float(a)) else: fit = SimpleExpSmoothing(data, initialization_method="estimated").fit() fcast = fit.forecast(1) return fcast[0], fit.model.params['smoothing_level']
def simple_exponetial_smoothing_forX(arr, alpha=0.3): # initialization sample_size = int(arr.shape[0]) time_size = int(arr.shape[1]) feature_size = int(arr.shape[2]) # create empty array smoothing_arr = np.zeros((sample_size, time_size, feature_size - 1)) for idx, temp_arr in enumerate(arr): for col in range(1, feature_size): # open col is 1 index if col < 5: temp_series = temp_arr[:, col].reshape(-1) smoother = SimpleExpSmoothing(temp_series, initialization_method="heuristic").fit(smoothing_level=0.3,optimized=False) temp_smoothing_series = smoother.fittedvalues smoothing_arr[idx, :, col-1] = temp_smoothing_series else: pass_series = temp_arr[:, col].reshape(-1) smoothing_arr[idx, :, col-1] = pass_series return smoothing_arr
def forecast_ses(og_df): if len(og_df) <= 1: result = [0, 0] else: df = og_df.copy() train = aggregate_by_day(df) test = train.copy() #print('train df before create split') #print(train) test = test.reindex(create_split(train)) y_hat_avg = test.copy() fit2 = SimpleExpSmoothing(np.asarray(train['Count'])).fit( smoothing_level=0.6, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test)) plt.figure(figsize=(16, 8)) plt.plot(train['Count'], label='Train') plt.plot(y_hat_avg['SES'], label='SES') plt.legend(loc='best') # plt.show() # print(y_hat_avg['SES'].iloc[0]) # get max y value and index (x) date_projected = str(y_hat_avg['SES'].idxmax()) qty_projected = str(y_hat_avg.loc[y_hat_avg['SES'].idxmax(), 'SES']) result = [date_projected, qty_projected] return result
def simpleExpSmoothing(x, y, save_fn): pred = [] for data in x: fit = SimpleExpSmoothing(data).fit(smoothing_level=0.6, optimized=False) pred.append(fit.forecast(1)) save_fn('rate_simpleExpSmoothing.txt', np.array(pred), y)
def simple_exponential_smoothing(input_df, kunag, matnr, alpha=0.6): df = input_df.copy() df = remove_negative_rows(df) df_series = individual_series(df, kunag, matnr) df_series = data_transformation.get_weekly_aggregate(df_series) df_series["date"] = df_series["dt_week"].map(str) df_series["date"] = df_series["date"].apply(lambda x: x.replace("-", "")) df_series["prediction"] = df_series["quantity"] df_series_train, df_series_test = splitter(df_series) k = 0 for index, row in df_series_test.iterrows(): fit2 = SimpleExpSmoothing(np.asarray(df_series_train["quantity"])).fit( smoothing_level=alpha, optimized=False) row["prediction"] = fit2.forecast(1) df_series_train = pd.concat([df_series_train, pd.DataFrame(row).T ]).reset_index(drop=True) if k == 0: test_index = df_series_train.shape[0] - 1 k = 1 output_df = df_series_train test_df = df_series_train.iloc[test_index:] # print("mean squared error is :",mean_squared_error(output_df["quantity"], output_df["prediction"])) return output_df, mean_squared_error(test_df["quantity"], test_df["prediction"])
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: self.logger.info('Simple Exponential Smoothing Primitive called') outputs = inputs self._training_inputs, self._training_indices = self._get_columns_to_fit( inputs, self.hyperparams) try: columns_to_calculate_simple_exponential_smoothing = List[str] if (self.hyperparams['use_columns'] == ()): columns_to_calculate_simple_exponential_smoothing = list( set(inputs.columns) - set(['d3mIndex', 'timestamp', 'ground_truth'])) else: columns_to_calculate_simple_exponential_smoothing = self.hyperparams[ 'use_columns'] for column in self._training_indices: outputs[inputs.columns[column] + "_simple_exponential_smoothing"] = SimpleExpSmoothing( inputs.iloc[:, column]).fit( smoothing_level=0.2, optimized=False).fittedvalues except Exception as e: self.logger.error( "Error in Calculating simple exponential smoothing", e) self._update_metadata(outputs) #print(inputs) #print("-------------") print(outputs) return base.CallResult(outputs)
def SES_find_smoothng_level(df): train = df[0:20] test = df[20:] num = 0 rms = 0.0 for id_idx in range(1, 25, 1): if train['id' + str(id_idx)].sum() > 25: fit = SimpleExpSmoothing(train['id' + str(id_idx)]).fit( smoothing_level=0.1, optimized=False) fcast = fit.forecast(len(test)) plt.plot(train['id' + str(id_idx)], marker='o', label='train_id' + str(id_idx)) plt.plot(test['id' + str(id_idx)], marker='o', label='test_id' + str(id_idx)) plt.plot(fcast, marker='o', label='SES' + str(id_idx)) plt.legend(loc='best') rms = rms + math.sqrt( mean_squared_error(test['id' + str(id_idx)], fcast)) num = num + 1 plt.show() mean_error = rms / num #print(mean_error) return 0.1
def ses(input_df, kunag, matnr, n, alpha): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SES', marker='.') plt.legend(loc='best') plt.title("SES") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['SES'] return y_hat_avg, rms, mae
def get_gravity_from_acc(acc): """ Get gravity from acc data by using low pass filter Parameters ---------- acc : numpy array [time, x, y, z] Returns -------- gravity : 1-D array The gravity component in 3 axis """ if debug: print('get gravity component...') gravity_component = [0.0] * 3 # use low pass filter (exponential) # https://developer.android.com/guide/topics/sensors/sensors_motion # https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1 for i in range(3): # TODO: we don't need to use all rows to get the component top_rows = min(len(acc), 1000) acc_x = acc[:top_rows, i + 1] fit_x = SimpleExpSmoothing(acc_x).fit() fcast_x = fit_x.forecast(1) gravity_component[i] = fcast_x[0] if debug: print_floats(*gravity_component, description="Gravity component:") return gravity_component
def run(self): # Default called function with mythread.start() print("{} started!".format(self.getName())) data = pd.read_csv('cpuData.txt', dtype={ 'date': str, 'cpu': np.float16 }, sep=';', names=['date', 'cpu']) data = format_values(data) # Simple Exponential Smoothing fit = SimpleExpSmoothing(data['cpu']).fit(smoothing_level=0.2, optimized=False) data['cpu'] = fit.fittedvalues # END data = filter_day(data) if self.small: data = data.tail(50) plot_data([data.to_numpy()], ['0.2'], is_cpu=True) print("{} finished!".format(self.getName()))
def ses1(input_df, kunag, matnr, n, l, alpha): index = str(kunag) + "-" + str(matnr) dfw = n_series(df, kunag, matnr) test1 = test(df, kunag, matnr) mae = [] for i in range(0, l): k = n - i lst = [] train1, cv1 = train_cv_split(df, kunag, matnr, k, l) y_hat_avg = cv1.copy() for j in range(k, l, -1): train, cv = train_cv_split(df, kunag, matnr, j, l) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(cv1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(cv1.quantity, y_hat_avg.pred_column)) mae1 = mean_absolute_error(cv1.quantity, y_hat_avg.pred_column) mae.append(mae1) del y_hat_avg['SES'] l = l - 1 return mae
def gen_plot_forecast(): es_conn = fetchData.elasticSearch( url="https://*****:*****@kf6-stage.ikit.org/es/_search") df = es_conn.get_nginx_reliability(interval='1h') df = df.sort_values('buckets', ascending=True) data_in_window = df.tail(1000) rel_data = data_in_window["reliability"].to_numpy() date_data = data_in_window["buckets"].to_numpy() last_bucket = parse(data_in_window["buckets"].iloc[-1]) if np.isnan(np.sum(rel_data)): print("NaN in data") exit(1) simple_exp_model = SimpleExpSmoothing(rel_data).fit(smoothing_level=.5) predicted_data = np.average(simple_exp_model.forecast(5)) fitted_values = simple_exp_model.fittedvalues for idx, val in enumerate(fitted_values): if val < 0.5: fitted_values[idx] = 0.5 fig = go.Figure() fig.add_trace( go.Scatter(x=date_data, y=rel_data, mode='lines', name="Observed Reliability")) fig.add_trace( go.Scatter(x=date_data, y=fitted_values, mode='lines', name="Predicted Reliability")) print(datetime.datetime.now()) sys.stdout.flush() tm = datetime.datetime.now() return fig, predicted_data, last_bucket, tm
def simple_exponential_smoothing_statsmodels(): N, t, alpha, x0 = 200, 160, 0.5, 20 realisations = pd.Series(sample_gaussian_process(20, 5, N), range(N)) mod = SimpleExpSmoothing(realisations[:t+1]).fit(smoothing_level=alpha, initial_level=x0, optimized=False) forecasts = mod.forecast(N-(t+1)).rename(r'$\alpha=0.5$') plot(realisations, pd.Series(np.nan, range(t+1)).append(forecasts), alpha) py.show()
def evaluate_simp_avg_model(X): """ Evaluate a Simple Expontential Smoothing Model :param X: list or series containing all historical data :return: mse (error metric) and the fitted model """ # Prepare training dataset train_size = int(len(X) * 0.75) train, test = X[0:train_size], X[train_size:] history = [x for x in train] # Make predictions predictions = list() for t in range(len(test)): # Fit model model = SimpleExpSmoothing(history) model_fit = model.fit(smoothing_level=0.6, optimized=False) # Forecast yhat = model_fit.forecast()[0] # Store prediction and move forward one time step predictions.append(yhat) history.append(test[t]) # calculate out of sample error mse = mean_squared_error(test, predictions) return mse, model_fit
def simple_smoothing(data,company): fit1 = SimpleExpSmoothing(data).fit(smoothing_level=0.1, optimized=False) fcast1 = fit1.forecast(1).rename(r'$\alpha=0.1$') print(fit1['fittedvalues']) fcast1.plot(marker='o', color='blue', legend=True) fit1.fittedvalues.plot(marker='o', color='blue') pyplot.title("Trade War Impact - "+company) pyplot.show()
def exponential_smoothing(x, alpha): """ Apply an exponential smoothing to the input data :param x: input time-series :param alpha: smoothness (between 0 and 1) coefficient :return: smoothed time-series """ return SimpleExpSmoothing(x).fit(alpha).fittedvalues
def simpleExponentialSmoothing(self, train, test, trend): if trend == 'no trend': sm = SimpleExpSmoothing(train).fit() sm_pred = sm.forecast(len(test)) rmse_sm = rootMeanSquaredError(test, sm_pred) if rmse_sm < self.rmse: self.rmse = rmse_sm self.__model__ = 'simpleExponentialSmoothing'
def estimate_SES(dataframe, name, alpha, sizeestimate): array = np.asarray(dataframe[name]) model = SimpleExpSmoothing(array) fit = model.fit(smoothing_level=alpha,optimized=False) forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def ES(data=None, horizon=24, alpha=0.3): """ Build ES model => Train model => Get forecasts. :param data: history data. :param horizon: Length of forecasts. :return: list, Forecasts in next h time steps. """ model = SimpleExpSmoothing(data).fit(smoothing_level=alpha) fcasts = model.predict(start=len(data), end=len(data) + horizon) return fcasts
def sess(i, fc_periods, df): df = df saledata = np.asarray(df.iloc[0:, 0]) fit1 = SimpleExpSmoothing(saledata).fit(smoothing_level=0.2, optimized=True) suav = fit1.fittedvalues df['pronostico'] = suav nombre = list(df.columns.values.tolist()) fcast1 = fit1.forecast(fc_periods) return (fcast1)
def fit_model(self, n_predict): fit = SimpleExpSmoothing(self.train).fit() forecast = fit.forecast(n_predict) ds = self.ds_test self.forecast = pd.DataFrame({"ds": ds, "yhat": forecast}) return self.forecast
def plot_all_graphs(data,company): i=0 for df in data: fit1 = SimpleExpSmoothing(df['sentiment']).fit(smoothing_level=1, optimized=False) fcast1 = fit1.forecast(1).rename(r'$\alpha=0.1$') fcast1.plot(marker='o', color='blue', legend=True) fit1.fittedvalues.plot(marker='o', color='blue') pyplot.title("Trade War Impact - " + company[i]) i=i+1 pyplot.show()
def twocolorball_ses_forecast(df): l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() l.append(int(predict[0])) print(l) return l
def SES(paramsList=['pollution.csv', '0.93','pm', 'humidity', 'date'],specialParams=['0.6']): ''' 1.时间序列比较平稳时,选择较小的α值,0.05-0.20。 2.时间序列有波动,但长期趋势没大的变化,可选稍大的α值,0.10-0.40。 3.时间序列波动很大,长期趋势变化大有明显的上升或下降趋势时,宜选较大的α值,0.60-0.80。 4.当时间序列是上升或下降序列,满足加性模型,α取较大值,0.60-1。 ''' path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] es = specialParams[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): fit = SimpleExpSmoothing(np.asarray(train[paramsList[i]])).fit(smoothing_level=float(es), optimized=False) y_hat[paramsList[i]] = fit.forecast(len(test)) y_hat[paramsList[i]] = round(y_hat[paramsList[i]],2) rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def estimate_SES(dataframe, name, alpha, sizeestimate): # SES requires an array to work with, so we convert the column into an array array = np.asarray(dataframe[name]) model = SimpleExpSmoothing(array) fit = model.fit(smoothing_level=alpha,optimized=False) # because this model assumes no trend or seasonality # all forecasts can be the same, i.e. a straight line forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def sesm(i): df = i train = np.asarray(df.iloc[:(round(len(df) * .85)), 0]) hell = df.iloc[(round(len(df) * .85)):, 0] fit1 = SimpleExpSmoothing(train).fit(smoothing_level=0.2, optimized=True) suav = fit1.fittedvalues fcast1 = fit1.forecast(len(hell)) sreal = (sum(hell)) spred = (sum(fcast1)) mape = calculomape(sreal, spred) return (mape)
def SES_f(self, df, a): try: simpleexp = SimpleExpSmoothing(np.array(np.array(df['Actual']))) fit_simpleexp = simpleexp.fit(smoothing_level=a,optimized=False) forecast = fit_simpleexp.forecast()[0] Cluster, Warehouse, WF, YF = generate_attrib(df) self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast}) return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}') except: return print("ERROR:FORECAST-SES")
def ewma(data, col, train, test, frequency): y_hat_avg = test.copy() fit2 = SimpleExpSmoothing(np.asarray(train[col])).fit(smoothing_level=0.6,optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test)) print('Rmse= ', rmse(test[col], y_hat_avg['SES'])) plt.figure(figsize=(16,8)) plt.plot(train[col], label='Train') plt.plot(test[col], label='Test') plt.plot(y_hat_avg['SES'], label='Exponential Smoothing') plt.legend(loc='best') plt.savefig(frequency+'ses.png')