示例#1
0
def test_innovations_mle():
    # Test for basic use of Yule-Walker estimation
    endog = dta['infl'].iloc[:100]

    # ARMA(1, 1), no trend (since trend would imply GLS estimation)
    desired_p, _ = innovations_mle(endog, order=(1, 0, 1), demean=False)
    mod = ARIMA(endog, order=(1, 0, 1), trend='n')
    res = mod.fit(method='innovations_mle')
    # Note: atol is required only due to precision issues on Windows
    assert_allclose(res.params, desired_p.params, atol=1e-5)

    # SARMA(1, 0)x(1, 0)4, no trend (since trend would imply GLS estimation)
    desired_p, _ = innovations_mle(endog,
                                   order=(1, 0, 0),
                                   seasonal_order=(1, 0, 0, 4),
                                   demean=False)
    mod = ARIMA(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), trend='n')
    res = mod.fit(method='innovations_mle')
    # Note: atol is required only due to precision issues on Windows
    assert_allclose(res.params, desired_p.params, atol=1e-5)
示例#2
0
def predict_autoregressive_integrated_moving_verage(data,
                                                    predict_start=None,
                                                    intervals=15):
    model = ARIMA(data, order=(1, 1, 1))
    model_fit = model.fit()

    if predict_start is None:
        predict_start = len(data) - 2

    return model_fit.predict(predict_start,
                             predict_start + intervals,
                             typ='levels')
 def arima_model(self, time_series, step=5):
     for i in range(step):
         model = ARIMA(
             time_series.astype(float), order=self.order
         )  # error occured here pandas data has been cast to numpy dtype obj so chnage it to float as values will be string, so once we convert all values to float then we can append
         model_fit = model.fit()
         forecast = model_fit.forecast()
         input_data = np.asarray(str(forecast)[7:14])
         time_series.loc[time_series.last_valid_index() +
                         datetime.timedelta(days=1)] = input_data
         time_series.sort_index()
     return time_series
示例#4
0
def predict_tomorrow_sarimax(stock, stock_data, db, params):
    """
    Hyper-parameter tuning with back-testing for SARIMAX.
    """

    history_endog = stock_data["GAIN_LOSS"]

    model = ARIMA(endog=history_endog,
                  order=params["Params"]["order"],
                  seasonal_order=params["Params"]["seasonal_order"])
    model_fit = model.fit()
    prediction = model_fit.forecast(steps=1)
    prediction = float(prediction)

    last_date, next_date = get_dates(stock_data)

    columns = list(stock_data.columns)

    last_stats = stock_data.iloc[-1]

    data_last_stats = {}

    for entry in columns:
        data_last_stats['LAST_' + entry] = format_floats(last_stats[entry], 4)

    next_price = (1 + prediction) * float(data_last_stats["LAST_Close"])
    pred_gain_loss = format_floats(prediction, 4)
    trend_gain_loss = 'pos' if float(prediction) > 0 else 'neg'
    trend_last_gain_loss = 'pos' if float(
        data_last_stats["LAST_GAIN_LOSS"]) > 0 else 'neg'

    history = {
        f"{next_date}_PRED_Price": format_floats(next_price, 2),
        f"{next_date}_PRED_Price_Diff": pred_gain_loss,
        f"{next_date}_PRED_Price_Trend": trend_gain_loss,
        f"{last_date}_REAL_Price": data_last_stats["LAST_Close"],
        f"{last_date}_REAL_Price_Diff": data_last_stats["LAST_GAIN_LOSS"],
        f"{last_date}_REAL_Price_Trend": trend_last_gain_loss
    }

    # Export to current day folder
    export_firebase(data=history,
                    stock=stock,
                    db=db,
                    folder='CURRENT_PREDS',
                    delete=True)

    # Export to history folder
    export_firebase(data=history,
                    stock=stock,
                    db=db,
                    folder='HISTORY_PREDS',
                    delete=False)
示例#5
0
def random_walk(data, test):
    from statsmodels.tsa.arima.model import ARIMA

    import pmdarima as pm

    mod = ARIMA(data, seasonal_order=(0, 1, 0, 12))
    res = mod.fit()

    oos_predictions = res.predict(start=test.index.values[0],
                                  end=test.index.values[-1])

    return res, oos_predictions, res.bic
示例#6
0
def test_low_memory():
    # Basic test that the low_memory option is working
    endog = dta['infl'].iloc[:50]

    mod = ARIMA(endog, order=(1, 0, 0), concentrate_scale=True)
    res1 = mod.fit()
    res2 = mod.fit(low_memory=True)

    # Check that the models produce the same results
    assert_allclose(res2.params, res1.params)
    assert_allclose(res2.llf, res1.llf)

    # Check that the model's basic memory conservation option wasn't changed
    assert_equal(mod.ssm.memory_conserve, 0)

    # Check that low memory was actually used (just check a couple)
    assert_(res2.llf_obs is None)
    assert_(res2.forecasts is None)
    assert_(res2.predicted_state is None)
    assert_(res2.filtered_state is None)
    assert_(res2.smoothed_state is None)
示例#7
0
def fit_forecast_next(dataset):
    p_values = [0, 1, 2, 4, 6, 8, 10]
    d_values = range(0, 3)
    q_values = range(0, 3)
    warnings.filterwarnings("ignore")
    best_cfg, best_score = evaluate_models(dataset, p_values, d_values,
                                           q_values)
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
    model = ARIMA(dataset, order=best_cfg)
    model.k_lags = None
    model_fit = model.fit()
    return model_fit, model_fit.forecast()
示例#8
0
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    history = [x for x in X]
    # make predictions
    model = ARIMA(history, order=arima_order)
    model_fit = model.fit()
    start = 0
    end = len(X)-1
    predictions = model_fit.predict(start, end, typ='levels')
    # calculate out of sample error
    error = metrics.median_absolute_error(X, predictions)
    return error
    def get_predictions_rmse_mape_final(self, min_algo):
        if min_algo == Constants.ARIMA:
            model = ARIMA(self.total, order=(7, 0, 1))
            model_fit = model.fit()
        elif min_algo == Constants.MOVING_AVERAGE:
            model = ARMA(self.total, order=[0, 1])
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.AR:
            model = AR(self.total)
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.ARMA:
            # model = ARMA(self.total, order=[1,0])
            model = ARMA(self.total, order=[2, 1])
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.SARIMA:
            model = SARIMAX(self.total,
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 1, 12))
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.SES:
            model = SimpleExpSmoothing(self.total)
            model_fit = model.fit()

        start_index = len(self.total)
        # end_index = start_index + 11
        end_index = start_index + Constants.NUMBER_OF_PREDICTIONS - 1
        forecast = model_fit.predict(start=start_index, end=end_index)
        for i in range(0, len(forecast)):
            forecast[i] = round(forecast[i])
            if forecast[i] < 0:
                forecast[i] = 0
        return forecast
示例#10
0
def generate_predict_data():

    tickerlist = pd.read_csv("tickerlist.csv")
    order = pd.read_csv("order.csv")
    order = order.set_index('ticker')

    dic = {'ticker': [], 'predict': [], 'bef_close': [], 'close': []}
    ticker = []
    predict = []
    close = []
    bef_close = []

    for i in range(len(tickerlist)):

        try:
            all_data = pd.read_csv("coin/" + tickerlist['name'].iloc[i])
            all_data['time'] = all_data['time'].apply(
                lambda x: datetime(int(x[:4]), int(x[5:7]), int(x[8:10])))

            name = tickerlist['name'].iloc[i]
            first = int(order['first'].loc[name])
            second = int(order['second'].loc[name])
            third = int(order['third'].loc[name])
            temp_order = (first, second, third)

            model = ARIMA(all_data['close'], order=temp_order)
            model_fit = model.fit()
            forecast_data = model_fit.forecast(steps=1)

            print(all_data['time'].iloc[-1])
            print("name :", tickerlist['name'].iloc[i])
            print("close :", all_data['close'].iloc[-1])
            print("predict :", round(forecast_data[len(all_data['close'])], 2))
            print()

            ticker.append(tickerlist['name'].iloc[i])
            predict.append(round(forecast_data[len(all_data['close'])], 2))
            close.append(all_data['close'].iloc[-1])
            bef_close.append(all_data['close'].iloc[-2])

        except:
            continue

    dic['ticker'] = ticker
    dic['predict'] = predict
    dic['close'] = close
    dic['bef_close'] = bef_close

    df = pd.DataFrame(dic)
    df.to_csv("dic.csv")

    return dic
示例#11
0
def predict_moving_average(data, predict_start=None, intervals=15):
    data_norm = np.reshape(scaler.fit_transform(data.reshape(-1, 1)), (-1))
    model = ARIMA(data_norm, order=(2, 1, 5))
    model_fit = model.fit()

    if predict_start is None:
        predict_start = len(data) - 2

    prediction_norm = model_fit.predict(predict_start,
                                        predict_start + intervals,
                                        typ='levels')
    return np.reshape(scaler.inverse_transform(prediction_norm.reshape(-1, 1)),
                      (-1))
示例#12
0
 def arma_model(p_range, q_range, data):
     index = 0
     result = np.array([[]])
     for p in range(p_range):
         pi = []
         for q in range(q_range):
             try:
                 model = ARIMA(data, order=(p, 0, q))
                 model_fit = model.fit()
             except:
                 model_fit.bic = np.nan
             dict[index] = (p, q, model_fit.bic)
             pi.append(model_fit.bic)
             index = index + 1
         result = np.append(result, np.array(pi))
示例#13
0
def evaluate_arima_model(X, arima_order):
    train_size = int(len(X) * 0.67)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    predictions = []
    for t in range(len(test)):
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        y_hat = model_fit.forecast(steps=1)
        predictions.append(y_hat)
        history.append(test[t])
    error = mean_squared_error(test, predictions)
    warnings.filterwarnings("default")
    return error
def test_nonstationary_gls_error():
    # GH-6540
    endog = pd.read_csv(
        io.StringIO(
            """\
data\n
9.112\n9.102\n9.103\n9.099\n9.094\n9.090\n9.108\n9.088\n9.091\n9.083\n9.095\n
9.090\n9.098\n9.093\n9.087\n9.088\n9.083\n9.095\n9.077\n9.082\n9.082\n9.081\n
9.081\n9.079\n9.088\n9.096\n9.081\n9.098\n9.081\n9.094\n9.091\n9.095\n9.097\n
9.108\n9.104\n9.098\n9.085\n9.093\n9.094\n9.092\n9.093\n9.106\n9.097\n9.108\n
9.100\n9.106\n9.114\n9.111\n9.097\n9.099\n9.108\n9.108\n9.110\n9.101\n9.111\n
9.114\n9.111\n9.126\n9.124\n9.112\n9.120\n9.142\n9.136\n9.131\n9.106\n9.112\n
9.119\n9.125\n9.123\n9.138\n9.133\n9.133\n9.137\n9.133\n9.138\n9.136\n9.128\n
9.127\n9.143\n9.128\n9.135\n9.133\n9.131\n9.136\n9.120\n9.127\n9.130\n9.116\n
9.132\n9.128\n9.119\n9.119\n9.110\n9.132\n9.130\n9.124\n9.130\n9.135\n9.135\n
9.119\n9.119\n9.136\n9.126\n9.122\n9.119\n9.123\n9.121\n9.130\n9.121\n9.119\n
9.106\n9.118\n9.124\n9.121\n9.127\n9.113\n9.118\n9.103\n9.112\n9.110\n9.111\n
9.108\n9.113\n9.117\n9.111\n9.100\n9.106\n9.109\n9.113\n9.110\n9.101\n9.113\n
9.111\n9.101\n9.097\n9.102\n9.100\n9.110\n9.110\n9.096\n9.095\n9.090\n9.104\n
9.097\n9.099\n9.095\n9.096\n9.085\n9.097\n9.098\n9.090\n9.080\n9.093\n9.085\n
9.075\n9.067\n9.072\n9.062\n9.068\n9.053\n9.051\n9.049\n9.052\n9.059\n9.070\n
9.058\n9.074\n9.063\n9.057\n9.062\n9.058\n9.049\n9.047\n9.062\n9.052\n9.052\n
9.044\n9.060\n9.062\n9.055\n9.058\n9.054\n9.044\n9.047\n9.050\n9.048\n9.041\n
9.055\n9.051\n9.028\n9.030\n9.029\n9.027\n9.016\n9.023\n9.031\n9.042\n9.035\n
"""
        ),
        index_col=None,
    )
    mod = ARIMA(
        endog,
        order=(18, 0, 39),
        enforce_stationarity=False,
        enforce_invertibility=False,
    )
    with pytest.raises(ValueError, match="Roots of the autoregressive"):
        mod.fit(method="hannan_rissanen", low_memory=True, cov_type="none")
示例#15
0
def arima(df,time_id,lookback, p,d,q):
    Log(LOG_INFO) << "Computing arima(%d,%d,%d) with lookback: %d " % (p,d,q,lookback)
    pd=[]
    for tid in time_id:
        # pdb.set_trace()
        series = np.log(df[OPEN_KEY][tid-lookback:tid].values)
        model = ARIMA(series,order=(p,d,q))
        model_fit = model.fit(method_kwargs={"warn_convergence": False})
        output = model_fit.forecast()
        p0 = np.log(df[OPEN_KEY][tid])
        err = (output[0]-p0)/p0
        pd.append(err)

    pd = np.array(pd)
    return pd.reshape(-1,1)
 def forecast(self, data: np.ndarray):
     if data.size > 1:
         if data.size > self.min_size:
             try:
                 arima_model = ARIMA(data, order=self.order)
                 res = arima_model.fit()
                 res = res.forecast(steps=1)[0]
             except Exception as e:
                 res = np.mean(data)
         else:
             # Use average until we can use ARIMA model?
             res = np.mean(data)
     else:
         res = self.max_loss
     return res
示例#17
0
def arma_model():
    # Autoregressive Moving Average (ARMA)
    np.random.seed(12345)
    arparams = np.array([1, -0.75, 0.25])
    maparams = np.array([1, 0.65, 0.35])
    nobs = 250
    y = arma_generate_sample(arparams, maparams, nobs)
    dates = pd.date_range("1980-1-1", freq="M", periods=nobs)
    y = pd.Series(y, index=dates)

    arima = ARIMA(y, order=(2, 0, 2), trend="n")
    model = arima.fit()
    inference_dataframe = pd.DataFrame([["1999-06-30", "2001-05-31"]], columns=["start", "end"])

    return ModelWithResults(model=model, alg=arima, inference_dataframe=inference_dataframe)
示例#18
0
def arima_model(vEndog, mExog=None, tPDQ=None):
    """
    Fits an ARIMA model. Order can be specified or determined by auto_arima.
    Differently from other models, it does not work on patsy/R formula syntax.

    :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself)
    :param mExog: vector/matrix containing exogenous data. Defaults to None
    :param tPDQ: tuple (p, d, q) containing order of the model;
        p: number of autorregressions (AR)
        q: number of differentiations (I)
        q: number of past prevision errors/moving averages (MA)
        If None (default), performs an auto_arima()

    :return mod: fitted model instance
    """

    ## Creating model
    # If order is specified
    if tPDQ is not None:
        # Conditional on whether there are exogenous variables
        if mExog is None:
            mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust')
        else:
            mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust')
    # If order isn't specified, use auto_arima()
    else:
        mod_arima = auto_arima(y=vEndog, X=mExog)
        mod_arima = mod_arima.fit(y=vEndog, cov_type='robust')

    ## Printing summary and diagnostics
    print(mod_arima.summary())

    print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.")
    print("If there is hetero., the model error can't be a white noise (which is the desired thing).")
    print("Estimaed Density and Jarque-Bera have information on normality.")
    print("In the correlogram, all lollipops must be inside of the shaded area.")

    # Plots
    mod_arima.plot_diagnostics(figsize=(10, 10))
    plt.show()

    # Residual means
    tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit')
    print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.")
    print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).")

    ## Returning
    return mod_arima
示例#19
0
def estimate_time(tickets):
    """
        Fa una stima del tempo di chiusura basata su un elenco di ticket

        ### Parametri
        - `tickets`: la lista dei ticket passati, ciascuno un dizionario con almeno le chiavi
            `"inizio"` e `"fine"`, dei timestamp

        ### Valore ritornato
        Una predizione del tempo impiegato per chiudere il prossimo ticket
    """
    times = list(map(diff_in_seconds, tickets))
    mod = ARIMA(times)
    fitted = mod.fit()
    forecast = fitted.forecast(1, alpha=0.05)
    return timedelta(seconds=forecast[0])
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.66)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit()
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	rmse = sqrt(mean_squared_error(test, predictions))
	return rmse
def test_from_estimation(d, seasonal):
    ar = [0.8] if not seasonal else [0.8, 0, 0, 0.2, -0.16]
    ma = [0.4] if not seasonal else [0.4, 0, 0, 0.2, -0.08]
    ap = ArmaProcess.from_coeffs(ar, ma, 500)
    idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq="Q")
    data = ap.generate_sample(500)
    if d == 1:
        data = np.cumsum(data)
    data = pd.Series(data, index=idx)
    seasonal_order = (1, 0, 1, 4) if seasonal else None
    mod = ARIMA(data, order=(1, d, 1), seasonal_order=seasonal_order)
    res = mod.fit()
    ap_from = ArmaProcess.from_estimation(res)
    shape = (5,) if seasonal else (1,)
    assert ap_from.arcoefs.shape == shape
    assert ap_from.macoefs.shape == shape
示例#22
0
def generate_ARIMA_model():

    tickerlist = pd.read_csv("tickerlist.csv")

    try:
        if not os.path.exists("predict_data"):
            os.makedirs("predict_data")
    except OSError:
        print("Error: Creating directory")

    for i in range(len(tickerlist)):

        all_data = pd.read_csv("./coin/" + tickerlist['name'].iloc[i])
        training_data = pd.read_csv("./training_data/" + tickerlist['name'].iloc[i])
        test_data = pd.read_csv("./test_data/" + tickerlist['name'].iloc[i])

        # 종목별 모델 생성
        model = auto_arima(training_data['close'], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3,
                                 max_q=3, suppress_warnings=True, stepwise=False, seasonal=False, with_intercept=False)

        order = (model.order[0], model.order[1], model.order[2])

        dic = {'time': [], 'predict': [],'close': [],'high':[]}
        time_data = []
        predict_data = []

        # 위에서 생성한 모델과 test_data를 사용해 predict_data를 생성
        for day in range(len(training_data), len(training_data) + len(test_data)):

            model = ARIMA(all_data['close'].iloc[0:day], order=order)
            model_fit = model.fit()

            forecast_data = model_fit.forecast(steps=1)
            predict_data.append(round(forecast_data[day],2))
            time_data.append(all_data['time'].iloc[day])
            print(all_data['time'].iloc[day])

        print(predict_data)
        print(len(predict_data))

        dic['time'] = time_data
        dic['predict'] = predict_data
        dic['close'] = test_data['close']
        dic['high'] = test_data['high']

        df = pd.DataFrame(dic)
        df.to_csv("predict_data/" + tickerlist['name'].iloc[i])
示例#23
0
    def fit(self,model_state: str, endog_data: pd.DataFrame, exp_name: str, exog_data=None):
        from statsmodels.tsa.arima.model import ARIMA
        # Hardcoded values to have consistency in models.
        train_pct = 0.9
        steps = 1
        all_states = endog_data.columns
        total_samples = len(endog_data.index)
        training_sample_size = int(train_pct*total_samples)
        test_sample_size = total_samples - training_sample_size
        
        num_endog_vars = len(endog_data.columns)
        endog_train_data = endog_data.iloc[:training_sample_size]
        exog_train_data = None
        if exog_data is not None:
            exog_train_data = exog_data.iloc[:training_sample_size]

        test_data = endog_data.iloc[training_sample_size:]
        history = endog_train_data.copy()
        for _col in history.columns:
            history[_col].values[:training_sample_size]=history[model_state].values[:training_sample_size]
        history.index = pd.DatetimeIndex(history.index) # Converts datatype str to Datetime
        history.index = history.index.to_period('D')    # Converts Datetime to 0,1,2,...,n
        predictions = pd.DataFrame(columns=endog_train_data.columns)
        for t in range(len(test_data)):
            each_day_predictions = []
            obs = []
            for _state in all_states:   # state_files contains state filenames.
                print(f'{t} day for {_state}')
                model = ARIMA(history[_state],order=(7,0,1),exog=exog_train_data)
                model_fit = model.fit()
                output = model_fit.forecast(steps=steps)
                yhat = output.iloc[0]
                each_day_predictions.append(yhat)
            history.loc[history.index[-1]+pd.offsets.Day(1)]=test_data.iloc[t]
            predictions.loc[history.index[-1]+pd.offsets.Day(1)] = each_day_predictions
    
        fit_score = {'states':[],'r2':[]}
        for _state in endog_data.columns:
            y_obs = test_data[:test_sample_size][_state].to_numpy()
            y_pred = predictions[:,list(endog_data.columns).index(_state)]
            fit_score['states'].append(_state)
            fit_score['r2'].append(r2_score(y_obs,y_pred))

        store_exp_results(fit_score, exog_data is not None, exp_name)
        
        return predictions,test_data
示例#24
0
    def sample(self, lagged_values, lagged_times=None, **ignored):
        """ Find Unique Values to see if outcomes are discrete or continuous """
        uniques = np.unique(lagged_values)
        rev_values = lagged_values[::
                                   -1]  #list(reversed(lagged_values)) # our data are in reverse order, the ARIMA needs the opposite

        if len(uniques) < 0.2 * len(
                rev_values
        ):  #arbitrary cutoff of 20% to determine whether outcomes are continuous or quantized
            prev_cases = [
                i + 1 for i, x in enumerate(rev_values[:-1])
                if x == rev_values[-1]
            ]  # when did this value occur before? List the following values indices
            if len(prev_cases
                   ) > 8:  #arbitrary decision on minimum number of occurrences
                value_list = [
                    x for i, x in enumerate(rev_values) if i in prev_cases
                ]  #submit based on what happened before
            else:
                value_list = rev_values[:]  #not enough data, use the whole set instead
            v = [
                s for s in (np.random.choice(value_list, self.num_predictions))
            ]  #randomly select from the value list and return as answer
        else:
            """ Simple ARIMA """
            # evaluate parameters
            print('ARIMA')
            p_values = [0, 1, 2, 4, 6, 8, 10, 25]
            d_values = range(0, 3)
            q_values = range(0, 3)
            best_order = self.evaluate_models(rev_values, p_values, d_values,
                                              q_values)
            arma_mod = ARIMA(rev_values, order=best_order, trend='n')
            model_fit = arma_mod.fit()
            point_est = model_fit.predict(len(lagged_values),
                                          len(lagged_values),
                                          dynamic=True)
            st_dev = np.std(lagged_values)
            #v = list(np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions))
            v = [
                s for s in (
                    np.random.normal(point_est, st_dev, self.num_predictions))
            ]
            #v = [s for s in (np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions))]
        print(*v, sep=", ")
        return v
示例#25
0
def tsa_factory(y: Y_TYPE,
                s: dict,
                k: int,
                a: A_TYPE = None,
                t: T_TYPE = None,
                e: E_TYPE = None,
                p: int = TSA_P_DEFAULT,
                d: int = TSA_D_DEFAULT,
                q: int = TSA_D_DEFAULT) -> ([float], Any, Any):
    """ Extremely simple univariate, fixed p,d,q ARIMA model that is re-fit each time """

    # TODO: FIX THIS TO USE EMPIRICAL STD, OTHERWISE ENSEMBLES ARE DREADFUL

    y = wrap(y)
    a = wrap(a)

    if not s.get('y'):
        s = {'y': list(), 'a': list(), 'k': k, 'p': {}}
    else:
        # Assert immutability of k, dimensions
        if s['y']:
            assert len(y) == len(s['y'][0])
            assert k == s['k']
        if s['a']:
            assert len(a) == len(s['a'][0])

    if y is None:
        return None, s, None
    else:
        s['y'].append(y)
        if a is not None:
            s['a'].append(a)
        if len(s['y']) > max(2 * k + 5, TSA_META['n_warm']):
            y0s = [y_[0] for y_ in s['y']]
            model = ARIMA(y0s, order=(p, d, q))
            try:
                x = list(model.fit().forecast(steps=k))
            except:
                x = [wrap(y)[0]] * k
        else:
            x = [y[0]] * k

        y0 = wrap(y)[0]
        _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
        x_std_fallback = nonecast(x_std, fill_value=1.0)
        return x, x_std_fallback, s
示例#26
0
def calculate_arima(df):
    df_fill = df.asfreq('W', method='ffill')
    df_returns = df_fill.diff()
    df_returns.iloc[0] = 0

    #Build the model and fit it to the data
    model = ARIMA(df_returns, order=[1, 0, 1])
    fitted_model = model.fit()

    #debug
    #print(fitted_model.summary())
    #print(acorr_ljungbox(fitted_model.resid,lags=[1,2,3,4,5])[1])

    start = df.iloc[[-1]].index
    end = start + timedelta(weeks=10)

    start = pd.to_datetime(str(start.values[0]))
    end = pd.to_datetime(str(end.values[0]))

    start_date = start.strftime('%Y.%m.%d')
    end_date = end.strftime('%Y.%m.%d')

    preds = fitted_model.predict(start=start_date, end=end_date)
    dfp = pd.DataFrame(preds)
    dfp = dfp.rename(columns={"predicted_mean": "price"})

    last_v = df_fill.price.iloc[-1]
    for n, x in enumerate(dfp.price.iloc[:]):
        if n < 1:
            dfp.price[n] = last_v + x
        else:
            dfp.price[n] = dfp.price.iloc[n - 1] + dfp.price.iloc[n]

    df_fill.price = df_fill.price.fillna(0)
    dfp.price = dfp.price.fillna(0)

    for n, x in enumerate(df_fill.price.iloc[:]):
        if n < 1:
            if x < 0.0001:
                df_fill.price[n] = df_fill.price[n + 1]
        else:
            if x < 0.0001:
                df_fill.price[n] = df_fill.price[n - 1]

    return df_fill, dfp
示例#27
0
def get_arima_pred(train, test, p=5, q=1, d=0):
    print()
    history = [x for x in train['close']]
    model_predictions = []
    N_test_observations = len(test)

    for time_point in range(N_test_observations):
        model = ARIMA(history, order=(p, q, d))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        true_test_value = test.iloc[time_point]['close']
        history.append(true_test_value)

    test['forecast'] = model_predictions
    test.index = test['date']
    return test
def arima_insample(ts, g, order, name):
    plt.figure()
    model = ARIMA(ts, order=order)
    fit = model.fit()
    p = pd.Series(dtype=float)
    print(type(fit))
    if g == 1:
        p = fit.predict()
    else:
        i = int(0.1 * len(ts))
        while i < ts.index[-1]:
            p = p.append(fit.predict(start=i, end=i + g - 1, dynamic=True))
            i += g
    plt.plot(ts)
    plt.plot(p)
    plt.title(name + " | [p,d,q] : " + str(order) + " | gap=" + str(g))
    plt.legend(["actual", "predicted"])
    plt.show()
示例#29
0
 def evaluate_arima_model(self, X, arima_order):
     # prepare training dataset
     train_size = int(len(X) * 0.66)
     train, test = X[0:train_size], X[train_size:]
     history = [x for x in train]
     # make predictions
     predictions = list()
     for t in range(len(test)):
         model = ARIMA(history, order=arima_order)
         model_fit = model.fit(
             disp=0
         )  #If disp = 1 or True, convergence information is printed.
         yhat = model_fit.forecast()[0]
         predictions.append(yhat)
         history.append(test[t])
     # calculate out of sample error
     error = mean_squared_error(test, predictions)
     return error
示例#30
0
def create_rmse_scores_per_term(all_global_params_dict, cc_dict):
    stem_time_series_wieghts_dict = {}
    for stem in all_global_params_dict:
        if stem == 'NumWords':
            continue
        print(stem)
        stem_time_series = np.array(all_global_params_dict[stem].sum(axis=0))
        stem_time_series = stem_time_series + cc_dict[stem]
        print(stem_time_series)
        # normalize
        normalize_factor = np.sqrt(np.sum(np.square(stem_time_series)))
        stem_time_series = stem_time_series / normalize_factor
        # diff series
        print(stem_time_series)
        new_stem_ts = []
        for i in range(1, len(stem_time_series)):
            new_stem_ts.append(stem_time_series[i] - stem_time_series[i - 1])
        stem_time_series = np.array(new_stem_ts)
        print(stem_time_series)
        for method in ['MA', 'LR', 'ARMA']:
            curr_score = 0.0
            if method == 'MA':
                for i in range(2, len(stem_time_series)):
                    curr_score += ((0.5 * stem_time_series[i - 2] +
                                    0.5 * stem_time_series[i - 1]) -
                                   stem_time_series[i])**2
            elif method == 'LR':
                regr = linear_model.LinearRegression()
                x_series = stem_time_series[:-1]
                y_series = stem_time_series[1:]
                regr.fit(x_series.reshape(-1, 1), y_series.reshape(-1, 1))
                y_pred = regr.predict(x_series.reshape(-1, 1)).reshape(1, -1)
                for i in range(len(y_series)):
                    curr_score += (y_pred[0][i] - y_series[i])**2
            elif method == 'ARMA':
                model = ARIMA(stem_time_series, order=(1, 0, 1))
                model_fit = model.fit()
                curr_score += np.sum(np.square(model_fit.resid[1:]))
            curr_score = np.sqrt(curr_score / float(len(stem_time_series) - 2))
            if stem not in stem_time_series_wieghts_dict:
                stem_time_series_wieghts_dict[stem] = {}
            stem_time_series_wieghts_dict[stem][method] = curr_score
    print(stem_time_series_wieghts_dict)
    return stem_time_series_wieghts_dict