Python ARIMA.fit示例，statsmodels.tsa.arima.model.ARIMA.fit Python示例

示例#1

0

显示文件

def test_innovations_mle():
    # Test for basic use of Yule-Walker estimation
    endog = dta['infl'].iloc[:100]

    # ARMA(1, 1), no trend (since trend would imply GLS estimation)
    desired_p, _ = innovations_mle(endog, order=(1, 0, 1), demean=False)
    mod = ARIMA(endog, order=(1, 0, 1), trend='n')
    res = mod.fit(method='innovations_mle')
    # Note: atol is required only due to precision issues on Windows
    assert_allclose(res.params, desired_p.params, atol=1e-5)

    # SARMA(1, 0)x(1, 0)4, no trend (since trend would imply GLS estimation)
    desired_p, _ = innovations_mle(endog,
                                   order=(1, 0, 0),
                                   seasonal_order=(1, 0, 0, 4),
                                   demean=False)
    mod = ARIMA(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), trend='n')
    res = mod.fit(method='innovations_mle')
    # Note: atol is required only due to precision issues on Windows
    assert_allclose(res.params, desired_p.params, atol=1e-5)

示例#2

0

显示文件

def predict_autoregressive_integrated_moving_verage(data,
                                                    predict_start=None,
                                                    intervals=15):
    model = ARIMA(data, order=(1, 1, 1))
    model_fit = model.fit()

    if predict_start is None:
        predict_start = len(data) - 2

    return model_fit.predict(predict_start,
                             predict_start + intervals,
                             typ='levels')

示例#3

0

显示文件

文件： TS.py 项目： prashanth-ds/Statistical-And-Graphical-Analysis-of-Major-Pandemics-this-Century

 def arima_model(self, time_series, step=5):
     for i in range(step):
         model = ARIMA(
             time_series.astype(float), order=self.order
         )  # error occured here pandas data has been cast to numpy dtype obj so chnage it to float as values will be string, so once we convert all values to float then we can append
         model_fit = model.fit()
         forecast = model_fit.forecast()
         input_data = np.asarray(str(forecast)[7:14])
         time_series.loc[time_series.last_valid_index() +
                         datetime.timedelta(days=1)] = input_data
         time_series.sort_index()
     return time_series

示例#4

0

显示文件

文件： models.py 项目： fabiocuri/stockprediction

def predict_tomorrow_sarimax(stock, stock_data, db, params):
    """
    Hyper-parameter tuning with back-testing for SARIMAX.
    """

    history_endog = stock_data["GAIN_LOSS"]

    model = ARIMA(endog=history_endog,
                  order=params["Params"]["order"],
                  seasonal_order=params["Params"]["seasonal_order"])
    model_fit = model.fit()
    prediction = model_fit.forecast(steps=1)
    prediction = float(prediction)

    last_date, next_date = get_dates(stock_data)

    columns = list(stock_data.columns)

    last_stats = stock_data.iloc[-1]

    data_last_stats = {}

    for entry in columns:
        data_last_stats['LAST_' + entry] = format_floats(last_stats[entry], 4)

    next_price = (1 + prediction) * float(data_last_stats["LAST_Close"])
    pred_gain_loss = format_floats(prediction, 4)
    trend_gain_loss = 'pos' if float(prediction) > 0 else 'neg'
    trend_last_gain_loss = 'pos' if float(
        data_last_stats["LAST_GAIN_LOSS"]) > 0 else 'neg'

    history = {
        f"{next_date}_PRED_Price": format_floats(next_price, 2),
        f"{next_date}_PRED_Price_Diff": pred_gain_loss,
        f"{next_date}_PRED_Price_Trend": trend_gain_loss,
        f"{last_date}_REAL_Price": data_last_stats["LAST_Close"],
        f"{last_date}_REAL_Price_Diff": data_last_stats["LAST_GAIN_LOSS"],
        f"{last_date}_REAL_Price_Trend": trend_last_gain_loss
    }

    # Export to current day folder
    export_firebase(data=history,
                    stock=stock,
                    db=db,
                    folder='CURRENT_PREDS',
                    delete=True)

    # Export to history folder
    export_firebase(data=history,
                    stock=stock,
                    db=db,
                    folder='HISTORY_PREDS',
                    delete=False)

示例#5

0

显示文件

def random_walk(data, test):
    from statsmodels.tsa.arima.model import ARIMA

    import pmdarima as pm

    mod = ARIMA(data, seasonal_order=(0, 1, 0, 12))
    res = mod.fit()

    oos_predictions = res.predict(start=test.index.values[0],
                                  end=test.index.values[-1])

    return res, oos_predictions, res.bic

示例#6

0

显示文件

文件： test_model.py 项目： tricoffee/statsmodels

def test_low_memory():
    # Basic test that the low_memory option is working
    endog = dta['infl'].iloc[:50]

    mod = ARIMA(endog, order=(1, 0, 0), concentrate_scale=True)
    res1 = mod.fit()
    res2 = mod.fit(low_memory=True)

    # Check that the models produce the same results
    assert_allclose(res2.params, res1.params)
    assert_allclose(res2.llf, res1.llf)

    # Check that the model's basic memory conservation option wasn't changed
    assert_equal(mod.ssm.memory_conserve, 0)

    # Check that low memory was actually used (just check a couple)
    assert_(res2.llf_obs is None)
    assert_(res2.forecasts is None)
    assert_(res2.predicted_state is None)
    assert_(res2.filtered_state is None)
    assert_(res2.smoothed_state is None)

示例#7

0

显示文件

文件： arima_grid.py 项目： SODALITE-EU/refactoring-ml

def fit_forecast_next(dataset):
    p_values = [0, 1, 2, 4, 6, 8, 10]
    d_values = range(0, 3)
    q_values = range(0, 3)
    warnings.filterwarnings("ignore")
    best_cfg, best_score = evaluate_models(dataset, p_values, d_values,
                                           q_values)
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
    model = ARIMA(dataset, order=best_cfg)
    model.k_lags = None
    model_fit = model.fit()
    return model_fit, model_fit.forecast()

示例#8

0

显示文件

def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    history = [x for x in X]
    # make predictions
    model = ARIMA(history, order=arima_order)
    model_fit = model.fit()
    start = 0
    end = len(X)-1
    predictions = model_fit.predict(start, end, typ='levels')
    # calculate out of sample error
    error = metrics.median_absolute_error(X, predictions)
    return error

示例#9

0

显示文件

文件： algorithms.py 项目： adityagator/time-series-forecasts

    def get_predictions_rmse_mape_final(self, min_algo):
        if min_algo == Constants.ARIMA:
            model = ARIMA(self.total, order=(7, 0, 1))
            model_fit = model.fit()
        elif min_algo == Constants.MOVING_AVERAGE:
            model = ARMA(self.total, order=[0, 1])
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.AR:
            model = AR(self.total)
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.ARMA:
            # model = ARMA(self.total, order=[1,0])
            model = ARMA(self.total, order=[2, 1])
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.SARIMA:
            model = SARIMAX(self.total,
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 1, 12))
            model_fit = model.fit(disp=0)
        elif min_algo == Constants.SES:
            model = SimpleExpSmoothing(self.total)
            model_fit = model.fit()

        start_index = len(self.total)
        # end_index = start_index + 11
        end_index = start_index + Constants.NUMBER_OF_PREDICTIONS - 1
        forecast = model_fit.predict(start=start_index, end=end_index)
        for i in range(0, len(forecast)):
            forecast[i] = round(forecast[i])
            if forecast[i] < 0:
                forecast[i] = 0
        return forecast

示例#10

0

显示文件

def generate_predict_data():

    tickerlist = pd.read_csv("tickerlist.csv")
    order = pd.read_csv("order.csv")
    order = order.set_index('ticker')

    dic = {'ticker': [], 'predict': [], 'bef_close': [], 'close': []}
    ticker = []
    predict = []
    close = []
    bef_close = []

    for i in range(len(tickerlist)):

        try:
            all_data = pd.read_csv("coin/" + tickerlist['name'].iloc[i])
            all_data['time'] = all_data['time'].apply(
                lambda x: datetime(int(x[:4]), int(x[5:7]), int(x[8:10])))

            name = tickerlist['name'].iloc[i]
            first = int(order['first'].loc[name])
            second = int(order['second'].loc[name])
            third = int(order['third'].loc[name])
            temp_order = (first, second, third)

            model = ARIMA(all_data['close'], order=temp_order)
            model_fit = model.fit()
            forecast_data = model_fit.forecast(steps=1)

            print(all_data['time'].iloc[-1])
            print("name :", tickerlist['name'].iloc[i])
            print("close :", all_data['close'].iloc[-1])
            print("predict :", round(forecast_data[len(all_data['close'])], 2))
            print()

            ticker.append(tickerlist['name'].iloc[i])
            predict.append(round(forecast_data[len(all_data['close'])], 2))
            close.append(all_data['close'].iloc[-1])
            bef_close.append(all_data['close'].iloc[-2])

        except:
            continue

    dic['ticker'] = ticker
    dic['predict'] = predict
    dic['close'] = close
    dic['bef_close'] = bef_close

    df = pd.DataFrame(dic)
    df.to_csv("dic.csv")

    return dic

示例#11

0

显示文件

def predict_moving_average(data, predict_start=None, intervals=15):
    data_norm = np.reshape(scaler.fit_transform(data.reshape(-1, 1)), (-1))
    model = ARIMA(data_norm, order=(2, 1, 5))
    model_fit = model.fit()

    if predict_start is None:
        predict_start = len(data) - 2

    prediction_norm = model_fit.predict(predict_start,
                                        predict_start + intervals,
                                        typ='levels')
    return np.reshape(scaler.inverse_transform(prediction_norm.reshape(-1, 1)),
                      (-1))

示例#12

0

显示文件

文件： ARMA.py 项目： howardhsumail/ARMA

 def arma_model(p_range, q_range, data):
     index = 0
     result = np.array([[]])
     for p in range(p_range):
         pi = []
         for q in range(q_range):
             try:
                 model = ARIMA(data, order=(p, 0, q))
                 model_fit = model.fit()
             except:
                 model_fit.bic = np.nan
             dict[index] = (p, q, model_fit.bic)
             pi.append(model_fit.bic)
             index = index + 1
         result = np.append(result, np.array(pi))

示例#13

0

显示文件

文件： model_building.py 项目： nkrajew/pharma_sales_proj

def evaluate_arima_model(X, arima_order):
    train_size = int(len(X) * 0.67)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    predictions = []
    for t in range(len(test)):
        warnings.filterwarnings("ignore")
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        y_hat = model_fit.forecast(steps=1)
        predictions.append(y_hat)
        history.append(test[t])
    error = mean_squared_error(test, predictions)
    warnings.filterwarnings("default")
    return error

示例#14

0

显示文件

文件： test_model.py 项目： cisco00/Sentimental-Analysis-on-threat

def test_nonstationary_gls_error():
    # GH-6540
    endog = pd.read_csv(
        io.StringIO(
            """\
data\n
9.112\n9.102\n9.103\n9.099\n9.094\n9.090\n9.108\n9.088\n9.091\n9.083\n9.095\n
9.090\n9.098\n9.093\n9.087\n9.088\n9.083\n9.095\n9.077\n9.082\n9.082\n9.081\n
9.081\n9.079\n9.088\n9.096\n9.081\n9.098\n9.081\n9.094\n9.091\n9.095\n9.097\n
9.108\n9.104\n9.098\n9.085\n9.093\n9.094\n9.092\n9.093\n9.106\n9.097\n9.108\n
9.100\n9.106\n9.114\n9.111\n9.097\n9.099\n9.108\n9.108\n9.110\n9.101\n9.111\n
9.114\n9.111\n9.126\n9.124\n9.112\n9.120\n9.142\n9.136\n9.131\n9.106\n9.112\n
9.119\n9.125\n9.123\n9.138\n9.133\n9.133\n9.137\n9.133\n9.138\n9.136\n9.128\n
9.127\n9.143\n9.128\n9.135\n9.133\n9.131\n9.136\n9.120\n9.127\n9.130\n9.116\n
9.132\n9.128\n9.119\n9.119\n9.110\n9.132\n9.130\n9.124\n9.130\n9.135\n9.135\n
9.119\n9.119\n9.136\n9.126\n9.122\n9.119\n9.123\n9.121\n9.130\n9.121\n9.119\n
9.106\n9.118\n9.124\n9.121\n9.127\n9.113\n9.118\n9.103\n9.112\n9.110\n9.111\n
9.108\n9.113\n9.117\n9.111\n9.100\n9.106\n9.109\n9.113\n9.110\n9.101\n9.113\n
9.111\n9.101\n9.097\n9.102\n9.100\n9.110\n9.110\n9.096\n9.095\n9.090\n9.104\n
9.097\n9.099\n9.095\n9.096\n9.085\n9.097\n9.098\n9.090\n9.080\n9.093\n9.085\n
9.075\n9.067\n9.072\n9.062\n9.068\n9.053\n9.051\n9.049\n9.052\n9.059\n9.070\n
9.058\n9.074\n9.063\n9.057\n9.062\n9.058\n9.049\n9.047\n9.062\n9.052\n9.052\n
9.044\n9.060\n9.062\n9.055\n9.058\n9.054\n9.044\n9.047\n9.050\n9.048\n9.041\n
9.055\n9.051\n9.028\n9.030\n9.029\n9.027\n9.016\n9.023\n9.031\n9.042\n9.035\n
"""
        ),
        index_col=None,
    )
    mod = ARIMA(
        endog,
        order=(18, 0, 39),
        enforce_stationarity=False,
        enforce_invertibility=False,
    )
    with pytest.raises(ValueError, match="Roots of the autoregressive"):
        mod.fit(method="hannan_rissanen", low_memory=True, cov_type="none")

示例#15

0

显示文件

def arima(df,time_id,lookback, p,d,q):
    Log(LOG_INFO) << "Computing arima(%d,%d,%d) with lookback: %d " % (p,d,q,lookback)
    pd=[]
    for tid in time_id:
        # pdb.set_trace()
        series = np.log(df[OPEN_KEY][tid-lookback:tid].values)
        model = ARIMA(series,order=(p,d,q))
        model_fit = model.fit(method_kwargs={"warn_convergence": False})
        output = model_fit.forecast()
        p0 = np.log(df[OPEN_KEY][tid])
        err = (output[0]-p0)/p0
        pd.append(err)

    pd = np.array(pd)
    return pd.reshape(-1,1)

示例#16

0

显示文件

文件： time_trend_predictor.py 项目： jjfeng/aACP_time_trends

 def forecast(self, data: np.ndarray):
     if data.size > 1:
         if data.size > self.min_size:
             try:
                 arima_model = ARIMA(data, order=self.order)
                 res = arima_model.fit()
                 res = res.forecast(steps=1)[0]
             except Exception as e:
                 res = np.mean(data)
         else:
             # Use average until we can use ARIMA model?
             res = np.mean(data)
     else:
         res = self.max_loss
     return res

示例#17

0

显示文件

def arma_model():
    # Autoregressive Moving Average (ARMA)
    np.random.seed(12345)
    arparams = np.array([1, -0.75, 0.25])
    maparams = np.array([1, 0.65, 0.35])
    nobs = 250
    y = arma_generate_sample(arparams, maparams, nobs)
    dates = pd.date_range("1980-1-1", freq="M", periods=nobs)
    y = pd.Series(y, index=dates)

    arima = ARIMA(y, order=(2, 0, 2), trend="n")
    model = arima.fit()
    inference_dataframe = pd.DataFrame([["1999-06-30", "2001-05-31"]], columns=["start", "end"])

    return ModelWithResults(model=model, alg=arima, inference_dataframe=inference_dataframe)

示例#18

0

显示文件

文件： econometric_functions.py 项目： vnery5/Econometria

def arima_model(vEndog, mExog=None, tPDQ=None):
    """
    Fits an ARIMA model. Order can be specified or determined by auto_arima.
    Differently from other models, it does not work on patsy/R formula syntax.

    :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself)
    :param mExog: vector/matrix containing exogenous data. Defaults to None
    :param tPDQ: tuple (p, d, q) containing order of the model;
        p: number of autorregressions (AR)
        q: number of differentiations (I)
        q: number of past prevision errors/moving averages (MA)
        If None (default), performs an auto_arima()

    :return mod: fitted model instance
    """

    ## Creating model
    # If order is specified
    if tPDQ is not None:
        # Conditional on whether there are exogenous variables
        if mExog is None:
            mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust')
        else:
            mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust')
    # If order isn't specified, use auto_arima()
    else:
        mod_arima = auto_arima(y=vEndog, X=mExog)
        mod_arima = mod_arima.fit(y=vEndog, cov_type='robust')

    ## Printing summary and diagnostics
    print(mod_arima.summary())

    print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.")
    print("If there is hetero., the model error can't be a white noise (which is the desired thing).")
    print("Estimaed Density and Jarque-Bera have information on normality.")
    print("In the correlogram, all lollipops must be inside of the shaded area.")

    # Plots
    mod_arima.plot_diagnostics(figsize=(10, 10))
    plt.show()

    # Residual means
    tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit')
    print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.")
    print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).")

    ## Returning
    return mod_arima

示例#19

0

显示文件

文件： ticketclassifier.py 项目： shxdow/aep

def estimate_time(tickets):
    """
        Fa una stima del tempo di chiusura basata su un elenco di ticket

        ### Parametri
        - `tickets`: la lista dei ticket passati, ciascuno un dizionario con almeno le chiavi
            `"inizio"` e `"fine"`, dei timestamp

        ### Valore ritornato
        Una predizione del tempo impiegato per chiudere il prossimo ticket
    """
    times = list(map(diff_in_seconds, tickets))
    mod = ARIMA(times)
    fitted = mod.fit()
    forecast = fitted.forecast(1, alpha=0.05)
    return timedelta(seconds=forecast[0])

示例#20

0

显示文件

文件： grid_search_female_births.py 项目： officegeek/brain

def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.66)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit()
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	rmse = sqrt(mean_squared_error(test, predictions))
	return rmse

示例#21

0

显示文件

文件： test_arima_process.py 项目： timgates42/statsmodels

def test_from_estimation(d, seasonal):
    ar = [0.8] if not seasonal else [0.8, 0, 0, 0.2, -0.16]
    ma = [0.4] if not seasonal else [0.4, 0, 0, 0.2, -0.08]
    ap = ArmaProcess.from_coeffs(ar, ma, 500)
    idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq="Q")
    data = ap.generate_sample(500)
    if d == 1:
        data = np.cumsum(data)
    data = pd.Series(data, index=idx)
    seasonal_order = (1, 0, 1, 4) if seasonal else None
    mod = ARIMA(data, order=(1, d, 1), seasonal_order=seasonal_order)
    res = mod.fit()
    ap_from = ArmaProcess.from_estimation(res)
    shape = (5,) if seasonal else (1,)
    assert ap_from.arcoefs.shape == shape
    assert ap_from.macoefs.shape == shape

示例#22

0

显示文件

文件： generate_ARIMA_model.py 项目： junbrot/TimeSeries

def generate_ARIMA_model():

    tickerlist = pd.read_csv("tickerlist.csv")

    try:
        if not os.path.exists("predict_data"):
            os.makedirs("predict_data")
    except OSError:
        print("Error: Creating directory")

    for i in range(len(tickerlist)):

        all_data = pd.read_csv("./coin/" + tickerlist['name'].iloc[i])
        training_data = pd.read_csv("./training_data/" + tickerlist['name'].iloc[i])
        test_data = pd.read_csv("./test_data/" + tickerlist['name'].iloc[i])

        # 종목별 모델 생성
        model = auto_arima(training_data['close'], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3,
                                 max_q=3, suppress_warnings=True, stepwise=False, seasonal=False, with_intercept=False)

        order = (model.order[0], model.order[1], model.order[2])

        dic = {'time': [], 'predict': [],'close': [],'high':[]}
        time_data = []
        predict_data = []

        # 위에서 생성한 모델과 test_data를 사용해 predict_data를 생성
        for day in range(len(training_data), len(training_data) + len(test_data)):

            model = ARIMA(all_data['close'].iloc[0:day], order=order)
            model_fit = model.fit()

            forecast_data = model_fit.forecast(steps=1)
            predict_data.append(round(forecast_data[day],2))
            time_data.append(all_data['time'].iloc[day])
            print(all_data['time'].iloc[day])

        print(predict_data)
        print(len(predict_data))

        dic['time'] = time_data
        dic['predict'] = predict_data
        dic['close'] = test_data['close']
        dic['high'] = test_data['high']

        df = pd.DataFrame(dic)
        df.to_csv("predict_data/" + tickerlist['name'].iloc[i])

示例#23

0

显示文件

    def fit(self,model_state: str, endog_data: pd.DataFrame, exp_name: str, exog_data=None):
        from statsmodels.tsa.arima.model import ARIMA
        # Hardcoded values to have consistency in models.
        train_pct = 0.9
        steps = 1
        all_states = endog_data.columns
        total_samples = len(endog_data.index)
        training_sample_size = int(train_pct*total_samples)
        test_sample_size = total_samples - training_sample_size
        
        num_endog_vars = len(endog_data.columns)
        endog_train_data = endog_data.iloc[:training_sample_size]
        exog_train_data = None
        if exog_data is not None:
            exog_train_data = exog_data.iloc[:training_sample_size]

        test_data = endog_data.iloc[training_sample_size:]
        history = endog_train_data.copy()
        for _col in history.columns:
            history[_col].values[:training_sample_size]=history[model_state].values[:training_sample_size]
        history.index = pd.DatetimeIndex(history.index) # Converts datatype str to Datetime
        history.index = history.index.to_period('D')    # Converts Datetime to 0,1,2,...,n
        predictions = pd.DataFrame(columns=endog_train_data.columns)
        for t in range(len(test_data)):
            each_day_predictions = []
            obs = []
            for _state in all_states:   # state_files contains state filenames.
                print(f'{t} day for {_state}')
                model = ARIMA(history[_state],order=(7,0,1),exog=exog_train_data)
                model_fit = model.fit()
                output = model_fit.forecast(steps=steps)
                yhat = output.iloc[0]
                each_day_predictions.append(yhat)
            history.loc[history.index[-1]+pd.offsets.Day(1)]=test_data.iloc[t]
            predictions.loc[history.index[-1]+pd.offsets.Day(1)] = each_day_predictions
    
        fit_score = {'states':[],'r2':[]}
        for _state in endog_data.columns:
            y_obs = test_data[:test_sample_size][_state].to_numpy()
            y_pred = predictions[:,list(endog_data.columns).index(_state)]
            fit_score['states'].append(_state)
            fit_score['r2'].append(r2_score(y_obs,y_pred))

        store_exp_results(fit_score, exog_data is not None, exp_name)
        
        return predictions,test_data

示例#24

0

显示文件

    def sample(self, lagged_values, lagged_times=None, **ignored):
        """ Find Unique Values to see if outcomes are discrete or continuous """
        uniques = np.unique(lagged_values)
        rev_values = lagged_values[::
                                   -1]  #list(reversed(lagged_values)) # our data are in reverse order, the ARIMA needs the opposite

        if len(uniques) < 0.2 * len(
                rev_values
        ):  #arbitrary cutoff of 20% to determine whether outcomes are continuous or quantized
            prev_cases = [
                i + 1 for i, x in enumerate(rev_values[:-1])
                if x == rev_values[-1]
            ]  # when did this value occur before? List the following values indices
            if len(prev_cases
                   ) > 8:  #arbitrary decision on minimum number of occurrences
                value_list = [
                    x for i, x in enumerate(rev_values) if i in prev_cases
                ]  #submit based on what happened before
            else:
                value_list = rev_values[:]  #not enough data, use the whole set instead
            v = [
                s for s in (np.random.choice(value_list, self.num_predictions))
            ]  #randomly select from the value list and return as answer
        else:
            """ Simple ARIMA """
            # evaluate parameters
            print('ARIMA')
            p_values = [0, 1, 2, 4, 6, 8, 10, 25]
            d_values = range(0, 3)
            q_values = range(0, 3)
            best_order = self.evaluate_models(rev_values, p_values, d_values,
                                              q_values)
            arma_mod = ARIMA(rev_values, order=best_order, trend='n')
            model_fit = arma_mod.fit()
            point_est = model_fit.predict(len(lagged_values),
                                          len(lagged_values),
                                          dynamic=True)
            st_dev = np.std(lagged_values)
            #v = list(np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions))
            v = [
                s for s in (
                    np.random.normal(point_est, st_dev, self.num_predictions))
            ]
            #v = [s for s in (np.linspace(start=point_est-2*st_dev,stop=point_est+2*st_dev, num=self.num_predictions))]
        print(*v, sep=", ")
        return v

示例#25

0

显示文件

文件： tsaconstant.py 项目： nguyensu/timemachines

def tsa_factory(y: Y_TYPE,
                s: dict,
                k: int,
                a: A_TYPE = None,
                t: T_TYPE = None,
                e: E_TYPE = None,
                p: int = TSA_P_DEFAULT,
                d: int = TSA_D_DEFAULT,
                q: int = TSA_D_DEFAULT) -> ([float], Any, Any):
    """ Extremely simple univariate, fixed p,d,q ARIMA model that is re-fit each time """

    # TODO: FIX THIS TO USE EMPIRICAL STD, OTHERWISE ENSEMBLES ARE DREADFUL

    y = wrap(y)
    a = wrap(a)

    if not s.get('y'):
        s = {'y': list(), 'a': list(), 'k': k, 'p': {}}
    else:
        # Assert immutability of k, dimensions
        if s['y']:
            assert len(y) == len(s['y'][0])
            assert k == s['k']
        if s['a']:
            assert len(a) == len(s['a'][0])

    if y is None:
        return None, s, None
    else:
        s['y'].append(y)
        if a is not None:
            s['a'].append(a)
        if len(s['y']) > max(2 * k + 5, TSA_META['n_warm']):
            y0s = [y_[0] for y_ in s['y']]
            model = ARIMA(y0s, order=(p, d, q))
            try:
                x = list(model.fit().forecast(steps=k))
            except:
                x = [wrap(y)[0]] * k
        else:
            x = [y[0]] * k

        y0 = wrap(y)[0]
        _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
        x_std_fallback = nonecast(x_std, fill_value=1.0)
        return x, x_std_fallback, s

示例#26

0

显示文件

def calculate_arima(df):
    df_fill = df.asfreq('W', method='ffill')
    df_returns = df_fill.diff()
    df_returns.iloc[0] = 0

    #Build the model and fit it to the data
    model = ARIMA(df_returns, order=[1, 0, 1])
    fitted_model = model.fit()

    #debug
    #print(fitted_model.summary())
    #print(acorr_ljungbox(fitted_model.resid,lags=[1,2,3,4,5])[1])

    start = df.iloc[[-1]].index
    end = start + timedelta(weeks=10)

    start = pd.to_datetime(str(start.values[0]))
    end = pd.to_datetime(str(end.values[0]))

    start_date = start.strftime('%Y.%m.%d')
    end_date = end.strftime('%Y.%m.%d')

    preds = fitted_model.predict(start=start_date, end=end_date)
    dfp = pd.DataFrame(preds)
    dfp = dfp.rename(columns={"predicted_mean": "price"})

    last_v = df_fill.price.iloc[-1]
    for n, x in enumerate(dfp.price.iloc[:]):
        if n < 1:
            dfp.price[n] = last_v + x
        else:
            dfp.price[n] = dfp.price.iloc[n - 1] + dfp.price.iloc[n]

    df_fill.price = df_fill.price.fillna(0)
    dfp.price = dfp.price.fillna(0)

    for n, x in enumerate(df_fill.price.iloc[:]):
        if n < 1:
            if x < 0.0001:
                df_fill.price[n] = df_fill.price[n + 1]
        else:
            if x < 0.0001:
                df_fill.price[n] = df_fill.price[n - 1]

    return df_fill, dfp

示例#27

0

显示文件

def get_arima_pred(train, test, p=5, q=1, d=0):
    print()
    history = [x for x in train['close']]
    model_predictions = []
    N_test_observations = len(test)

    for time_point in range(N_test_observations):
        model = ARIMA(history, order=(p, q, d))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        true_test_value = test.iloc[time_point]['close']
        history.append(true_test_value)

    test['forecast'] = model_predictions
    test.index = test['date']
    return test

示例#28

0

显示文件

文件： stock_gui.py 项目： chinmayji/PortfolioOptimization

def arima_insample(ts, g, order, name):
    plt.figure()
    model = ARIMA(ts, order=order)
    fit = model.fit()
    p = pd.Series(dtype=float)
    print(type(fit))
    if g == 1:
        p = fit.predict()
    else:
        i = int(0.1 * len(ts))
        while i < ts.index[-1]:
            p = p.append(fit.predict(start=i, end=i + g - 1, dynamic=True))
            i += g
    plt.plot(ts)
    plt.plot(p)
    plt.title(name + " | [p,d,q] : " + str(order) + " | gap=" + str(g))
    plt.legend(["actual", "predicted"])
    plt.show()

示例#29

0

显示文件

 def evaluate_arima_model(self, X, arima_order):
     # prepare training dataset
     train_size = int(len(X) * 0.66)
     train, test = X[0:train_size], X[train_size:]
     history = [x for x in train]
     # make predictions
     predictions = list()
     for t in range(len(test)):
         model = ARIMA(history, order=arima_order)
         model_fit = model.fit(
             disp=0
         )  #If disp = 1 or True, convergence information is printed.
         yhat = model_fit.forecast()[0]
         predictions.append(yhat)
         history.append(test[t])
     # calculate out of sample error
     error = mean_squared_error(test, predictions)
     return error

示例#30

0

显示文件

文件： lts_pre_process.py 项目： ziv0808/clueweb_history

def create_rmse_scores_per_term(all_global_params_dict, cc_dict):
    stem_time_series_wieghts_dict = {}
    for stem in all_global_params_dict:
        if stem == 'NumWords':
            continue
        print(stem)
        stem_time_series = np.array(all_global_params_dict[stem].sum(axis=0))
        stem_time_series = stem_time_series + cc_dict[stem]
        print(stem_time_series)
        # normalize
        normalize_factor = np.sqrt(np.sum(np.square(stem_time_series)))
        stem_time_series = stem_time_series / normalize_factor
        # diff series
        print(stem_time_series)
        new_stem_ts = []
        for i in range(1, len(stem_time_series)):
            new_stem_ts.append(stem_time_series[i] - stem_time_series[i - 1])
        stem_time_series = np.array(new_stem_ts)
        print(stem_time_series)
        for method in ['MA', 'LR', 'ARMA']:
            curr_score = 0.0
            if method == 'MA':
                for i in range(2, len(stem_time_series)):
                    curr_score += ((0.5 * stem_time_series[i - 2] +
                                    0.5 * stem_time_series[i - 1]) -
                                   stem_time_series[i])**2
            elif method == 'LR':
                regr = linear_model.LinearRegression()
                x_series = stem_time_series[:-1]
                y_series = stem_time_series[1:]
                regr.fit(x_series.reshape(-1, 1), y_series.reshape(-1, 1))
                y_pred = regr.predict(x_series.reshape(-1, 1)).reshape(1, -1)
                for i in range(len(y_series)):
                    curr_score += (y_pred[0][i] - y_series[i])**2
            elif method == 'ARMA':
                model = ARIMA(stem_time_series, order=(1, 0, 1))
                model_fit = model.fit()
                curr_score += np.sum(np.square(model_fit.resid[1:]))
            curr_score = np.sqrt(curr_score / float(len(stem_time_series) - 2))
            if stem not in stem_time_series_wieghts_dict:
                stem_time_series_wieghts_dict[stem] = {}
            stem_time_series_wieghts_dict[stem][method] = curr_score
    print(stem_time_series_wieghts_dict)
    return stem_time_series_wieghts_dict