Exemplo n.º 1
0
    # plt.title(filename)
    # plt.show()

    OUTLIER_TYPES.append(to_include[filename][2])

    start = time.time()
    NUMBER_OF_MISSING_TIME_STEPS.append(ts.get_how_many_miss())
    ts.set_miss(fill=False)
    end = time.time()

    MISSING_TIMES.append(end - start)

    FROM_NUMENTA.append(to_include[filename][3])

    start = time.time()
    ts.set_seasonality()
    end = time.time()

    SEASONALITIES.append(ts.get_seasonality())

    PERIODICITIES.append(ts.get_period())

    SEASONALITY_TIMES.append(end - start)

    start = time.time()
    ts.set_trend()
    end = time.time()

    TRENDS.append(ts.get_trend())

    TREND_TYPES.append(ts.get_trend_type())
def sarimax(ts_obj,
            gaussian_window_size,
            step_size,
            plot_anomaly_score=False,
            plot_forecast=False):
    slide_size = 200
    if ts_obj.get_length() >= slide_size:
        n = slide_size
        list_df = [
            ts_obj.dataframe[i:i + n]
            for i in range(0, ts_obj.dataframe.shape[0], n)
        ]

        anomaly_scores_list = []
        times_list = []
        forecasts_list = []
        for chunk_df in tqdm(list_df):
            print(ts_obj.name)
            if len(chunk_df) >= slide_size:
                chunk_ts_obj = TimeSeries(chunk_df,
                                          timestep=ts_obj.timestep,
                                          dateformat=ts_obj.dateformat,
                                          name=ts_obj.name)
                # NEED TO SET CHARACTERISTIC OF SEASONALITY ONLY
                chunk_ts_obj.set_seasonality()
                chunk_result = sarimax_mini(chunk_ts_obj,
                                            gaussian_window_size,
                                            step_size,
                                            plot_anomaly_score=False,
                                            plot_forecast=False)
                anomaly_scores_list.append(chunk_result["Anomaly Scores"])
                times_list.append(chunk_result["Time"])
                forecasts_list.append(chunk_result["Forecast"])

        anomaly_scores = []
        for sublist in anomaly_scores_list:
            for item in sublist:
                anomaly_scores.append(item)

        forecast = []
        for sublist in forecasts_list:
            for item in sublist:
                forecast.append(item)

        while len(anomaly_scores) < ts_obj.get_length():
            anomaly_scores.append(0)

        while len(forecast) < ts_obj.get_length():
            forecast.append(0)

        if plot_forecast:
            plt.plot(forecast, alpha=.7, label="Predictions")
            plt.plot(ts_obj.dataframe["value"].values, alpha=.5, label="Data")
            plt.legend()
            plt.show()

        if plot_anomaly_score:
            plt.subplot(211)
            plt.title("Anomaly Scores")
            plt.plot(anomaly_scores)
            plt.ylim([.99, 1])
            plt.subplot(212)
            plt.title("Time Series")
            plt.plot(ts_obj.dataframe["value"].values)
            plt.axvline(ts_obj.get_probationary_index(),
                        color="black",
                        label="probationary line")
            plt.tight_layout()
            plt.show()

        return {
            "Anomaly Scores": np.asarray(anomaly_scores),
            "Time": sum(times_list),
            "Forecast": forecast
        }

    else:
        return sarimax_mini(ts_obj,
                            gaussian_window_size,
                            step_size,
                            plot_anomaly_score=plot_anomaly_score,
                            plot_forecast=plot_forecast)