Пример #1
0
def test_inference():
    # https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py
    expected = {'S':[RECENT_SECONDS + j for j in range(200)],
                '5S':[RECENT_SECONDS + 5 * j for j in range(200)],
                'T':[RECENT_SECONDS + 60 * j for j in range(200)],
                '5T':[RECENT_SECONDS + 5 * 60 * j for j in range(200)]} # 5 minutes
    for expected_freq, t in expected.items():
        freq = infer_freq_from_epoch(t)
        assert freq==expected_freq
Пример #2
0
 def prophet_fit_and_predict_with_exog_and_advance_vars(
         y: [[float]],
         k: int,
         t: [float],
         a: [[float]],
         model_params: dict = None) -> Tuple[List, List, Any, Any]:
     """ Simpler wrapper for testing - univariate w/ advance vars w/ supplied times and future times  """
     assert len(t) == len(y) + k
     assert len(a) == len(y) + k
     assert isinstance(y[0], List)
     a_cols = ['a' + str(i) for i in range(len(a[0]))]
     df = pd.DataFrame(columns=a_cols, data=a[:-k])
     Y = transpose(y)
     df['y'] = Y[0]
     n_exog = len(y[0]) - 1
     y_cols = ['y'] + ['y' + str(i) for i in range(1, len(y[0]))]
     for i in range(1, n_exog + 1):
         df['y' + str(i)] = Y[i][:len(y)]
     dt = epoch_to_naive_datetime(t)
     df['ds'] = dt[:len(y)]
     kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
     if model_params:
         kwargs_used.update(model_params)
     m = Prophet(**kwargs_used)
     for a_col in a_cols:
         m.add_regressor(name=a_col)
     for y_col in y_cols[1:]:
         m.add_regressor(name=y_col)
     with no_stdout_stderr():
         m.fit(df)
     freq = infer_freq_from_epoch(t)
     future = m.make_future_dataframe(periods=k, freq=freq)
     future['ds'] = dt
     full_a_data = transpose(a)
     for a_col, a_vals in zip(a_cols, full_a_data):
         future[a_col] = a_vals
     for i in range(1, n_exog + 1):  # Just bring forward
         future['y' + str(i)] = Y[i] + [Y[i][-1]] * k
     forecast = m.predict(future)
     x = forecast['yhat'].values[-k:]  # Use m.plot(forecast) to take a peak
     x_std = [
         u - l for u, l in zip(forecast['yhat_upper'].values,
                               forecast['yhat_lower'].values)
     ]
     return x, x_std, forecast, m
Пример #3
0
 def prophet_fit_and_predict_with_time_and_advance_time(y: [float], k: int, t: [float], model_params: dict = None) -> \
 Tuple[List, List, Any, Any]:
     """ Simpler wrapper for testing - univariate only w/ supplied times and future times  """
     assert len(t) == len(y) + k
     df = pd.DataFrame(columns=['y'], data=y)
     dt = epoch_to_naive_datetime(t)
     df['ds'] = dt[:len(y)]
     kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
     if model_params:
         kwargs_used.update(model_params)
     m = Prophet(**kwargs_used)
     with no_stdout_stderr():
         m.fit(df)
     freq = infer_freq_from_epoch(t)
     future = m.make_future_dataframe(periods=k, freq=freq)
     future['ds'] = dt
     forecast = m.predict(future)
     x = forecast['yhat'].values[-k:]  # Use m.plot(forecast) to take a peak
     x_std = [
         u - l for u, l in zip(forecast['yhat_upper'].values,
                               forecast['yhat_lower'].values)
     ]
     return x, x_std, forecast, m
Пример #4
0
    def prophet_iskater_factory(y: [[float]],
                                k: int,
                                a: List = None,
                                t: List = None,
                                e=None,
                                freq: str = None,
                                n_max=1000,
                                recursive: bool = False,
                                model_params: dict = None,
                                return_forecast=True):
        """
        :param y:           A list of observations, each a vector.
        :param k:           Number of steps ahead to predict
        :param a:           Known in advance observations - should be k more of these than y's
        :param t:           Epoch times of observations y. If len(t)=len(y)+k the last k are interpreted as future times.
        :param freq:        'D', '5T' etc, see https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py
        :param n_max:       Maximum number of observations to use, should you wish to prevent prophet from slowing down
        :param recursive    If True, exogenous variables y[1], y[2],... will be predicted forward in time
                                 (obviously this adds to computation time)
        :returns: x         k-vector of predictions
                  x_std     k-vector of standard deviations
                  forecast  full forecast dataframe, familiar to users of fbprophet
        """
        if a:
            assert len(a) == len(y) + k

        if isinstance(y[0], float):
            y = [wrap(yj) for yj in y]

        # Conversion of epoch times to UTC datetime
        # User must supply times, len(y) or len(y)+k, or a valid frequency str
        if t is None:
            if freq is None or not freq:
                freq = PROPHET_META['freq']  # Just assume away ...
            else:
                assert is_valid_freq(
                    freq), 'Freq ' + str(freq) + ' is not a valid frequency'
            dt = pd.date_range(start=EPOCH, periods=len(y), freq=freq)  # UTC
        else:
            freq = infer_freq_from_epoch(t)
            dt = epoch_to_naive_datetime(t)

        if len(dt) == len(y) + k:
            ta = dt
            dt = dt[:len(y)]
        else:
            assert len(dt) == len(
                y), 'Time vector t should be len(y) or len(y)+k'
            ta = None

        # Truncate history so that prophet doesn't take forever to fit
        y_shorter = y[-n_max:]
        a_shorter = a[-(n_max + k):] if a is not None else []  # may be empty
        dt_shorter = dt[-n_max:]

        # Massage data into Prophet friendly dataframe with columns y, y1, ..., yk, a0,...aj
        y_cols = [
            'y' + str(i) if i > 0 else 'y' for i in range(len(y_shorter[-1]))
        ]
        if a:
            a_cols = ['a' + str(i) for i in range(len(a_shorter[-1]))]
            data = [
                list(yi) + list(ai)
                for yi, ai in zip(y_shorter, a_shorter[:-k])
            ]
            df = pd.DataFrame(columns=y_cols + a_cols, data=data)
        else:
            data = [list(yi) for yi in y_shorter]
            df = pd.DataFrame(columns=y_cols, data=data)
        df['ds'] = dt_shorter

        # Instantiate Prophet model, ensure defaults are what we think they are
        kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
        if model_params:
            kwargs_used.update(model_params)
        m = Prophet(**kwargs_used)

        # Add regressors
        for y_col in y_cols[1:]:
            m.add_regressor(name=y_col)
        if a:
            for a_col in a_cols:
                m.add_regressor(name=a_col)

        # Fit the model every invocation ... there isn't any other way
        with no_stdout_stderr():
            m.fit(df)

        # Make future dataframe, adding known-in-advance variables
        future = m.make_future_dataframe(periods=k, freq=freq)
        if a:
            for j, a_col in enumerate(a_cols):
                future[a_col] = [ai[j] for ai in a_shorter]  # Known in advance
        if ta is not None:
            future['ds'] = ta  # override with user supplied future times

        # Next, we wish to add contemporaneously observed variables
        #
        # This is somewhat problematic, for how should we bring exogenously observed variables forward?
        # The simplest answer is, don't use them - only supply 1-vector y observations
        # prophet implicitly assumes all exogenous are known, which is a pretty big shortcoming.
        #
        # However, if we are trying to support y[1:], ...
        #   - It seems consistent to use prophet to predict these forward,
        #   - It also seems likely that this will lead to over-fitting.
        # I'm open to ideas here. Perhaps perform some hackery could effect attenuation of the coefficients
        # assigned to y[1],... such as jiggling past observations. For now we use prophet on each
        # one individually, feeding them the known in advance 'a' variables.

        n_exog = len(y[0]) - 1
        if n_exog > 0:
            for j, y_col in enumerate(y_cols):
                if j > 0:
                    yj = [yi[j] for yi in y_shorter]
                    if recursive:
                        yj_hat, yj_hat_std, yj_forecast, yj_m = prophet_iskater_factory(
                            y=yj,
                            k=k,
                            a=a_shorter,
                            freq=freq,
                            n_max=n_max,
                            recursive=False)
                    else:
                        yj_hat = [yj[-1]] * k
                    future[y_col] = yj + list(yj_hat)

        # Call the prediction function
        forecast = m.predict(future)
        x = list(forecast['yhat'].values[-k:]
                 )  # Use m.plot(forecast) to take a peak

        # Interpret confidence level difference as scale to be returned. TODO: set alpha properly so this really is 1-std
        x_std = list([
            u - l for u, l in zip(forecast['yhat_upper'].values[-k:],
                                  forecast['yhat_lower'].values[-k:])
        ])

        if return_forecast:
            return x, x_std, forecast, m
        else:
            return x, x_std