# plt.title(filename) # plt.show() OUTLIER_TYPES.append(to_include[filename][2]) start = time.time() NUMBER_OF_MISSING_TIME_STEPS.append(ts.get_how_many_miss()) ts.set_miss(fill=False) end = time.time() MISSING_TIMES.append(end - start) FROM_NUMENTA.append(to_include[filename][3]) start = time.time() ts.set_seasonality() end = time.time() SEASONALITIES.append(ts.get_seasonality()) PERIODICITIES.append(ts.get_period()) SEASONALITY_TIMES.append(end - start) start = time.time() ts.set_trend() end = time.time() TRENDS.append(ts.get_trend()) TREND_TYPES.append(ts.get_trend_type())
def sarimax(ts_obj, gaussian_window_size, step_size, plot_anomaly_score=False, plot_forecast=False): slide_size = 200 if ts_obj.get_length() >= slide_size: n = slide_size list_df = [ ts_obj.dataframe[i:i + n] for i in range(0, ts_obj.dataframe.shape[0], n) ] anomaly_scores_list = [] times_list = [] forecasts_list = [] for chunk_df in tqdm(list_df): print(ts_obj.name) if len(chunk_df) >= slide_size: chunk_ts_obj = TimeSeries(chunk_df, timestep=ts_obj.timestep, dateformat=ts_obj.dateformat, name=ts_obj.name) # NEED TO SET CHARACTERISTIC OF SEASONALITY ONLY chunk_ts_obj.set_seasonality() chunk_result = sarimax_mini(chunk_ts_obj, gaussian_window_size, step_size, plot_anomaly_score=False, plot_forecast=False) anomaly_scores_list.append(chunk_result["Anomaly Scores"]) times_list.append(chunk_result["Time"]) forecasts_list.append(chunk_result["Forecast"]) anomaly_scores = [] for sublist in anomaly_scores_list: for item in sublist: anomaly_scores.append(item) forecast = [] for sublist in forecasts_list: for item in sublist: forecast.append(item) while len(anomaly_scores) < ts_obj.get_length(): anomaly_scores.append(0) while len(forecast) < ts_obj.get_length(): forecast.append(0) if plot_forecast: plt.plot(forecast, alpha=.7, label="Predictions") plt.plot(ts_obj.dataframe["value"].values, alpha=.5, label="Data") plt.legend() plt.show() if plot_anomaly_score: plt.subplot(211) plt.title("Anomaly Scores") plt.plot(anomaly_scores) plt.ylim([.99, 1]) plt.subplot(212) plt.title("Time Series") plt.plot(ts_obj.dataframe["value"].values) plt.axvline(ts_obj.get_probationary_index(), color="black", label="probationary line") plt.tight_layout() plt.show() return { "Anomaly Scores": np.asarray(anomaly_scores), "Time": sum(times_list), "Forecast": forecast } else: return sarimax_mini(ts_obj, gaussian_window_size, step_size, plot_anomaly_score=plot_anomaly_score, plot_forecast=plot_forecast)