示例#1
0
def arima_predict(train_dat, n_predictions, p=2, d=0, q=0):
    arima = ARIMA(np.array(train_dat).astype(np.float), [p, d, q])
    diffed_logged_results = arima.fit(trend='c', disp=False)
    preds = diffed_logged_results.predict(len(train_dat),
                                          len(train_dat) + n_predictions - 1,
                                          exog=None, dynamic=False)
    return preds
示例#2
0
 def ARIMA_forcast2(self):
     # this approach forecast 1 data pt at a time, then add the new forecast datapoint to the training data
     # then repeat
     import warnings
     warnings.filterwarnings('ignore')
     
     # test without taking log of data
     # using rolling avg 
     y = vr_df2_ts.values
     train = vr_df2_ts.values[286:574]
     prediction = list()
     for t in range(288):
         modelY = ARIMA(y, order=(1,1,1))
         results = modelY.fit(disp=-1)
         out = results.forecast()
         yhat = out[0]
         prediction.append(yhat)
         y = np.append(y,train[t])
         
     forecast = pd.Series(prediction,index=pd.date_range(start='2017-02-09 00:00:00', periods=288,freq='5min'))
     exog = vr_df2_ts.iloc[286:574]
     exog.set_index(pd.date_range(start='2017-02-09 00:00:00', periods=288,freq='5min'),inplace=True)
     
     plt.plot(vr_df2_ts)
     plt.plot(exog,'g')
     plt.plot(forecast,'r')
    def forecast_by_cluster(self, hold_out_n, n_ahead, order, exog):
        dfit = self.ds_agg_by_c
        
        efit = efor = None
        if hold_out_n > 0:
            # hold out validation required
            dfit = dfit[:-hold_out_n]
            if (exog is not None):
                efit = exog[:-hold_out_n]
                efor = exog[-hold_out_n:]
        else:
            if (exog is not None):
                efit = exog[:-n_ahead]
                efor = exog[-n_ahead:]
        ds_c_for = np.zeros((n_ahead, self.n_clusters))

        for c in tqdm(range(self.n_clusters)):
            cdfit = dfit[:,c]
            if sum(cdfit) == 0:
                ds_c_for[:,c] = 0
                continue
            m = ARIMA(cdfit, exog = efit, order = order)
            mf = m.fit()
            f = mf.forecast(n_ahead, exog = efor, alpha = .95)[0]
            ds_c_for[:,c] = f
        
        self.ds_c_for = ds_c_for
示例#4
0
文件: process.py 项目: pthaike/comp
def arimamodel(ts):
	ts_log, ts_log_diff = trend(ts)
	model = ARIMA(ts_log, order = (2,1,2))
	result_ARIMA = model.fit(disp = -1)

	m = ARIMA(ts, order = (2,1,2)).fit()

	arimares = ARMAResults(m, params = '')

	pre = arimares.forcast(steps = 60)


	# pre = m.predict('20150901', '20151230', dynamic = True)
	print pre

	# prediction back to the original scale
	predictions_ARIMA = backorg(result_ARIMA, ts_log)
	plt.plot(predictions_ARIMA)
	# print (predictions_ARIMA - ts)[40:80]

	plt.plot(ts, color = 'red')

	# plt.plot(ts_log_diff)
	# plt.plot(result_ARIMA.fittedvalues, color = 'red')
	plt.title('RSS: %.4F' % np.sum((result_ARIMA.fittedvalues - ts_log_diff)**2))
	plt.show()
示例#5
0
    def get_grouped_data(self, forecast=False):
        cdf = self.cumulative_sum()
        gdf = self.group_by('M')

        if cdf.shape[0] > gdf.shape[0]:
            df = cdf.to_frame()
            df.columns = ['cumulative sum']
            df['total added'] = gdf.to_frame()['event']
        else:
            df = gdf.to_frame()
            df.columns = ['total added']
            df['cumulative sum'] = cdf.to_frame()['event']

        if forecast:
            mtotals = pd.to_numeric(df['cumulative sum'], downcast='float')
            model = ARIMA(mtotals, order=(10,1,0))
            model_fit = model.fit(disp=0)
            forecast = model_fit.forecast(steps=12)
            dates = pd.date_range('2017-04-30', '2018-06-01', freq='M')
            records = zip([x.to_datetime() for x in dates], forecast[0])
            ndf = pd.DataFrame.from_records(records)
            ndf.columns = ['date', 'forecast']
            ndf.set_index(['date'], inplace=True)
            df = pd.concat([df, ndf], axis=1)

        return df
示例#6
0
文件: process.py 项目: pthaike/comp
def mamodel(ts):
	ts_log, ts_log_diff = trend(ts)
	model = ARIMA(ts_log, order = (0,1,1))
	result_MA = model.fit(disp = -1)
	plt.plot(ts_log_diff)
	plt.plot(result_MA.fittedvalues, color = 'red')
	plt.title('RSS: %.4F' % np.sum((result_MA.fittedvalues - ts_log_diff)**2))
	plt.show(block = False)
示例#7
0
文件: process.py 项目: pthaike/comp
def armodel(ts):
	ts_log, ts_log_diff = trend(ts)
	model = ARIMA(ts_log, order = (1,1,0))
	result_AR = model.fit(disp = -1)
	plt.plot(ts_log_diff)
	plt.plot(result_AR.fittedvalues, color = 'red')
	# pdb.set_trace()
	plt.title('RSS: %.4F' % np.sum((result_AR.fittedvalues - ts_log_diff)**2))
	plt.show(block = False)
示例#8
0
 def ARIMA_fit(self):
     # order=(p,d,q) AR and MA can also be modeled separately by enter 0 for either p or q
     model = ARIMA(ts_log, order=(5,1,5))
     self.results_ARIMA = model.fit(disp=-1)
     
     print(results_ARIMA.summary())
     
     plt.plot(ts_log_diff)
     plt.plot(results_ARIMA.fittedvalues, color='r')
     plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff['in_tpkts'])**2))
示例#9
0
 def fit(self):
   if len(self.df) < self.t_window: return None
   model = ARIMA(self.df, order=(2, 1, 1))
   results_ARIMA = model.fit(disp=-1)
   forecast = results_ARIMA.predict(start = self.t_window, end= self.t_window+2, dynamic= True)
   forecast = forecast.cumsum()
   predictions_ARIMA_log = pd.Series(self.df.ix[self.t_window-1], index=forecast.index)
   predictions_ARIMA_log = predictions_ARIMA_log.add(forecast,fill_value=0)
   predictions_ARIMA = np.exp(predictions_ARIMA_log)
   #print self.df
   return predictions_ARIMA
示例#10
0
def ARIMA_fun( data ):
    lag_pacf = pacf( data, nlags=20, method='ols' )
    lag_acf, ci2, Q  = acf( data, nlags=20 , qstat=True, unbiased=True)

    model = ARIMA(orig_data, order=(1, 1, int(ci2[0]) ) )  
    results_ARIMA = model.fit(disp=-1)
    plt.subplot(121)
    plt.plot( data )
    plt.plot(results_ARIMA.fittedvalues)
    #plt.show()
    return results_ARIMA.fittedvalues
示例#11
0
    def objfunc(order, *params):
        series = params

        try:
            mod = ARIMA(series, order, exog=None)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                res = mod.fit(disp=0, solver='bfgs', maxiter=5000)
        except:
            return float('inf')
        if math.isnan(res.aic):
            return float('inf')
        return res.aic
示例#12
0
    def pridictNextNdays(self,train):
        timeSerize = train[self.selected]
        timeSerize = timeSerize[self.start_train:self.end_train]
        model = ARIMA(timeSerize, order=(self.p,self.d,self.q), freq='D') # build a model
        fitting = model.fit(disp=False)
        forecast, fcasterr, conf_int = fitting.forecast(steps=self.next_ndays, alpha=.05)

        # params = fitting.params
        # residuals = fitting.resid
        # p = fitting.k_ar
        # q = fitting.k_ma
        # k_exog = fitting.k_exog
        # k_trend = fitting.k_trend
        # forecast = _arma_predict_out_of_sample(params,self.next_ndays,residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize))
        return  forecast
示例#13
0
 def testArima(self,train):
     realSerize = train[self.selected]
     timeSerize = realSerize[self.start_train:self.end_train]
     realData = train[self.selected][self.end_train:self.next_ndays]
     model = ARIMA(timeSerize, order=(self.p,self.d, self.q)) # build a model
     fitting = model.fit(disp=False)
     forecast, fcasterr, conf_int = fitting.forecast(steps=self.next_ndays, alpha=.05)
     # params = fitting.params
     # residuals = fitting.resid
     # p = fitting.k_ar
     # q = fitting.k_ma
     # k_exog = fitting.k_exog
     # k_trend = fitting.k_trend
     # forecast = _arma_predict_out_of_sample(params,self.next_ndays,residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize))
     return  {'real':list(realSerize)[self.end_train:self.end_train+self.next_ndays],'pridiction':forecast}
示例#14
0
    def predict_arima_next_days(self, item):
        ts = df_train[item]
        ts = ts.sort_index() # sorting index Date
        ts_last_day = ts[self.fc] # real last data
        ts = ts[0:self.fc] # index 0 until last data - 1

        model = ARIMA(ts, order=(self.p, self.d, self.q)) # build a model
        fitting = model.fit(disp=False)

        # n_days forecasting
        forecast, fcasterr, conf_int = fitting.forecast(steps=self.n_days, alpha=.05)
        # ts:          history until 1 day before self.fc
        # ts[self.fc]: last day
        # forecast:    1 day forecast (time equalto ts[self.fc])
        return ts, ts_last_day, forecast
示例#15
0
def arima(ts, forecast_window):
    logger.info(ts)
    start = int(ts.count() - 1)
    end = int(start + forecast_window)

    ts_log = np.log(ts)
    model = ARIMA(ts_log, order=(0, 1, 2))
    results = model.fit(disp=-1)
    prediction = results.predict(start=start, end=end, dynamic=True)
    future = pd.Series(prediction, copy=True)
    cumsum = future.cumsum()
    prediction_future = future.add(ts_log.ix[-1])
    prediction_future = prediction_future.add(cumsum)
    ts_future = np.exp(prediction_future)

    return ts_future
示例#16
0
class ARIMAModelResult:
    def __init__(self, autoregressive_periods, integrated_order, moving_average_model_periods, training_data, test):
        self.autoregressive_periods = autoregressive_periods
        self.integrated_order = integrated_order
        self.moving_average_model_periods = moving_average_model_periods
        self.model = ARIMA(training_data, order=(
            self.autoregressive_periods,
            self.integrated_order,
            self.moving_average_model_periods
        )
                           )
        self.fit = self.model.fit()
        self.aic = self.fit.aic
        self.predictions = self.fit.forecast(steps=len(test))[0]
        self.model_fitness = mean_squared_error(test, self.predictions)

    def __eq__(self, other):
        return self.model_fitness == other.model_fitness

    def __lt__(self, other):
        return self.model_fitness < other.model_fitness

    def __gt__(self, other):
        return self.model_fitness > other.model_fitness

    def __str__(self):
        return "Autoregressive periods: {}\nIntegraded Order: {}\nMoving Average Model Periods: {}\n Predictions: {}\nMSE: {}".format(
            self.autoregressive_periods,
            self.integrated_order,
            self.moving_average_model_periods,
            self.predictions,
            self.model_fitness
        )
def predictFutureProfit(df, forward):
	results = {}

	for asset in get_assets(df):
		ts = df[asset]
		ts_log = np.log(ts)

		model = ARIMA(ts_log, order=(1, 1, 0))  
		results_ARIMA = model.fit(disp=-1)  
		predictions_diff = results_ARIMA.predict(2, len(ts.index)-1, dynamic=True)
		predictions_diff_cumsum = predictions_diff.cumsum()
		predictions_log = pd.Series(ts_log.ix[0], index=ts_log.index)
		predictions_log = predictions_log.add(predictions_diff_cumsum,fill_value=0)
		predictions = np.exp(predictions_log)
		results[asset] = predictions[-1]

	return results
示例#18
0
文件: arima.py 项目: jjmonster/trade
    def arima(self):
        kl = self.get_kline()
        cp = self.get_close_price(kl)
        date = self.get_date(kl)
        #t = datetime.fromtimestamp(date[-1].timestamp()+24*60*60)
        t = date[-1] + timedelta(days=int(self.day_history/5)) #days seconds ...
        print("predict date:", date[-1],"--->", t)

        dta = pd.Series(cp, index=date)
        print(dta)
        model=ARIMA(dta,order=(4,1,3)) #P D Q
        result=model.fit()
        pred=result.predict( date[-10], t,dynamic=True,typ='levels')
        plt.figure(figsize=(12,8))
        plt.plot(dta, 'ro-')
        plt.xticks(rotation=45)
        plt.plot(pred, 'go-')

        plt.show()
def fitArima(ts):
    import statsmodels.api as sm
    logged_ts = np.log(ts)
    diffed_logged_ts = (logged_ts - logged_ts.shift(7))[7:]
    p = 0
    d = 1
    q = 1
    arima = ARIMA(diffed_logged_ts, [p, d, q], exog=None, freq='D', missing='none')
    diffed_logged_results = arima.fit(trend='c', disp=False)
    predicted_diffed_logged = diffed_logged_results.predict(exog=None, dynamic=False)
    #a=pd.date_range(diffed_logged_ts.index[1], periods=90, freq='D')
    predicted_diffed_logged_ts = pd.Series(predicted_diffed_logged, index=diffed_logged_ts.index[d:])
    predicted_diffed_logged_ts = np.exp(logged_ts.shift(7) + diffed_logged_ts.shift(d) + predicted_diffed_logged_ts)
    
    concatenated = pd.concat([ts, predicted_diffed_logged_ts], axis=1, keys=['original', 'predicted'])
    #a= concatenated
    #a.plot()
    #plt.show()
    return concatenated
def programmer_5():
    discfile = "data/discdata_processed.xls"
    # 残差延迟个数
    lagnum = 12

    data = pd.read_excel(discfile, index_col="COLLECTTIME")
    data = data.iloc[:len(data) - 5]
    xdata = data["CWXT_DB:184:D:\\"]

    # 训练模型并预测,计算残差
    arima = ARIMA(xdata, (0, 1, 1)).fit()
    xdata_pred = arima.predict(typ="levels")
    pred_error = (xdata_pred - xdata).dropna()

    lb, p = acorr_ljungbox(pred_error, lags=lagnum)
    h = (p < 0.05).sum()
    if h > 0:
        print(u"模型ARIMA(0,1,1)不符合白噪声检验")
    else:
        print(u"模型ARIMA(0,1,1)符合白噪声检验")
    print(lb)
示例#21
0
    def run_arima(self):#use current build
        '''
        DEPRECATED:
        Primarily used for testing/debugging.
        Runs statsmodels ARIMA.
        '''

        self.xts = self.X_train.set_index('date')
        self.yts = self.y_train.set_index('date')
        self.yts.astype('float', inplace=True)
        self.arimod = ARIMA(endog = self.yts, order = (2,1,2))#, exog=self.xts)
        self.aresults = self.arimod.fit()
示例#22
0
def arima_model(accounts):
    """Fit ARIMA models for each account"""

    # Model each account
    account_models = {}
    for account_type, account in accounts:
        account_data = accounts[(account_type, account)]
        account_data.name = account

        # ARIMA model order is unknown, so find the highest order that can be fit
        order = 0
        modeled = False
        while not modeled and order < len(ARIMA_ORDERS):
            try:
                model = ARIMA(account_data, order=ARIMA_ORDERS[order])
                results = model.fit()
                modeled = True
                account_models[(account_type, account)] = results
            except  (ValueError, np.linalg.LinAlgError):
                order += 1

    return account_models
示例#23
0
 def ARIMA_forcast3(self):
     # load dataset
     series = pd.Series(vr_df['ACTIVE_FLOWS'][0:7000])
     # seasonal difference
     X = series.values
     cycle = 288 #2016
     differenced = difference(X, cycle)
     # fit model
     model = ARIMA(differenced, order=(1,1,1))
     model_fit = model.fit(disp=0)
     # multi-step out-of-sample forecast
     forecast = model_fit.forecast(steps=2016)[0]
     # invert the differenced forecast to something usable
     history = [x for x in X]
     step = 1
     forecast_values = []
     for yhat in forecast:
         inverted = inverse_difference(history, yhat, cycle)
         #print('Day %d: %f' % (day, inverted))
         forecast_values.append(inverted)
         history.append(vr_df['ACTIVE_FLOWS'][7000+step-1])
         step += 1
示例#24
0
 def ARIMA_forecast4(self):
     # parameters
     num_train_init = 7318 
     num_forecast = 12 #one day = 288 data points
     cycle = 288 #for a total 288 samples per day
     startdate = vr_df.index[num_train]
     field = 'DELETED_FLOWS'
     # array of predicted values
     forecast_values = []
     
     for i in range(0,int(len(vr_df)/num_forecast)):
         # check array for out of bound
         num_train_current = i*num_forecast+num_train_init
         if ((num_train_current) > len(vr_df)):
             break
         # load dataset
         series = pd.Series(vr_df[field][0:num_train_current])
         # Make data stationary: seasonal difference
         X = series.values
         differenced = difference(X, cycle)
         # fit model
         model = ARIMA(differenced, order=(1,1,1))
         model_fit = model.fit(disp=0)
         # multi-step out-of-sample forecast
         forecast = model_fit.forecast(steps=num_forecast)[0]
         # invert the differenced forecast to something usable
         history = [x for x in X]
         step = 1
         for yhat in forecast:
             inverted = inverse_difference(history, yhat, cycle)        
             forecast_values.append(inverted)
             #append actual data
             try:
                 history.append(vr_df[field][num_train_current+step-1])
             except:
                 # reached the end of actual data array, use forecasted values to estimate
                 history.append(inverted)
             step += 1
示例#25
0
 def __init__(self, autoregressive_periods, integrated_order, moving_average_model_periods, training_data, test):
     self.autoregressive_periods = autoregressive_periods
     self.integrated_order = integrated_order
     self.moving_average_model_periods = moving_average_model_periods
     self.model = ARIMA(training_data, order=(
         self.autoregressive_periods,
         self.integrated_order,
         self.moving_average_model_periods
     )
                        )
     self.fit = self.model.fit()
     self.aic = self.fit.aic
     self.predictions = self.fit.forecast(steps=len(test))[0]
     self.model_fitness = mean_squared_error(test, self.predictions)
示例#26
0
def getLikelihood(endog,exog, order = None,n_forecasted_data=1):
    
    '''
    train_en = endog[:predict_start-1]
    test_en = endog[predict_start:]
    print train_en
    print test_en
    train_ex = exog[:predict_start-1]
    test_ex = exog[predict_start:]
    '''
    # Automatically determine values of orders
    if order is None:
        from scipy.optimize import brute
        grid = (slice(1, 3, 1), slice(1, 3, 1),slice(0, 3, 1))
        
        print "############################################"
        print endog
        print "############################################"
        
        try: 
            order =  brute(objfunc, grid, args=(exog, endog), finish=None)
            order = order.astype(int)
        except :
            order = [1,1,3]
        # Model fits given data (endog) with optimized order
        
        
    print "*********************************************"
    print "Choose order of ",
    print order
    print "*********************************************"
    
    model = ARIMA(endog,order).fit(full_output=False,disp=False)
    
    # 1st element of array x is the forecasted data.
    x = model.forecast(n_forecasted_data)
    return x[0]
示例#27
0
def previsao_matematica(reservatId, data):
    seriesArray = Series.from_array(predict_info.getSeries(reservatId, data))
    seriesValues = seriesArray.values

    mathDict = {'calculado': False, 'volumes': [], 'dias': 0}

    #if isNonStationary(seriesValues) == True:
    days_in_year = 1
    differenced = predict_info.difference(seriesValues, days_in_year)
    # fit model
    model = ARIMA(differenced, order=(1,0,1))
    model_fit = model.fit(disp = -1)
    # multi-step out-of-sample forecast
    forecast = model_fit.forecast(steps=180)[0]
    # invert the differenced forecast to something usable
    mathDict['calculado'] = True
    history = [x for x in seriesValues]
    for yhat in forecast:
        inverted = predict_info.inverse_difference(history, yhat, days_in_year)
        history.append(inverted)
        if inverted >= 0.0:
            mathDict['volumes'].append("%.4f" % round((inverted), 4))
            mathDict['dias'] = mathDict['dias'] + 1
    return mathDict
示例#28
0
def get_arima_predictions(y, train_subset, order = [1,0,0], X = None):
    if X == None:
        arima = ARIMA(y[train_subset], order = order).fit()
        predictions = arima.predict()
    else:
        arima = ARIMA(y[train_subset], order = order, 
                      exog = X[train_subset,:]).fit()
        predictions = arima.predict(exog = X[train_subset,:])
    for i in range(max(train_subset)+1,len(y)):
        if X == None:
            arima = ARIMA(y[0:i], order = order).fit()
            predictions = np.append(predictions, 
                                    arima.predict(0, len(y) + i)[-1])
        else:
            arima = ARIMA(y[0:i], order = order, exog = X[0:i,:]).fit()
            predictions = np.append(predictions, 
                                    arima.predict(0, len(y) + i, 
                                                  exog = X[0:i+1,:])[-1])
    return predictions
示例#29
0
# Initialize local variable for time series
trailer_series = subtype_result['Trailer']
# trailer_series = subtype_result_day['Trailer']

# trailer_series = subtype_result['Trailer'].resample('MS').sum()

X = trailer_series.values
train, test = X[0:-52], X[-52:]
history = [x for x in train]
# print(history)
predictions = list()

for t in range(len(test)):
    # fit model
    model = ARIMA(history, order=(4, 1, 0))
    model_fit = model.fit(disp=False, trend='c')
    # single step forecast
    yhat = model_fit.forecast()[0]
    predictions.append(yhat)
    history.append(test[t])

# evaluate forecasts
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)

print(model_fit.summary())
# model_fit.plot_diagnostics(figsize=(16, 8))

# Plot The Forecast
plt.plot(test, color='#ff6832')
示例#30
0
def main():
    # parse arguments
    args = add_args()

    # set the level of logger
    logger.setLevel(logging.DEBUG)
    if not args.verbose:
        logger.setLevel(logging.INFO)
    logger.debug("--------DEBUG enviroment start---------")

    # show the hyperparameters
    logger.info("---------hyperparameter setting---------")
    logger.info(args)

    # set the random seed
    np.random.seed(args.seed)

    # data fetching
    logger.info("-------------Data fetching-------------")
    tickers = \
    [
        ("TSLA", "yahoo"), # 0, TESLA Stock
    ]
    # check if data range is legal.
    if args.month <= 0 or args.month > 24:
        logger.warning("The data range is illegal. Turn to use default 3")
        args.month = 3
    tsla_df = data_loader(
        tickers, args.month)[0]  # get dataframes from "yahoo" finance.
    tsla_close = tsla_df["Close"].resample(
        'D').ffill()  # fullfill the time series.

    # data cleaning
    logger.info("-------------Data cleaning-------------")
    if np.sum(tsla_close.isnull()) > 0:
        logger.debug(
            "The time series contain missing values & we use interpolation to resolve this issue"
        )
        tsla_close = tsla_close.interpolate(method='polynomial',
                                            order=2,
                                            limit_direction='forward',
                                            axis=0)
    # Then, if there is still some missing values, we simply drop this value.abs
    tsla_close = tsla_close.dropna()
    logger.debug(tsla_close)

    # plot the graph describe tsla close
    if args.plot:
        fig = plt.gcf()
        fig.set_size_inches(18.5, 10.5)
        plt.plot(tsla_close, label="Series")
        plt.plot(tsla_close.rolling(int(.05 * len(tsla_close))).mean(),
                 '--',
                 label="Rolling mean")
        plt.plot(tsla_close.rolling(int(.05 * len(tsla_close))).std(),
                 ":",
                 label="Rolling Std")
        plt.legend(loc="best")
        plt.savefig("tesla_description.png")

    # if log transformation
    if args.log:
        tsla_close = tsla_close.apply(np.log)  # log transformation

    # estimate the forecastability of a time series:
    #   Approximate entropy is a technique used to quantify the amount of regularity and the unpredictability of fluctuations over time-series data.
    #   Smaller values indicates that the data is more regular and predictable.
    logger.info("The approximate entropy: " + str(
        app_entropy(U=np.array(tsla_close),
                    r=0.2 * np.std(np.array(tsla_close)))))

    # data splitting
    logger.info("-------------Data splitting------------")
    # check if split_ratio legal.
    if args.split_ratio > 1 or round(len(tsla_close) * args.split_ratio) <= 0:
        logger.warning("Splitting ratio is illegal. Turn to use default 0.7")
        args.split_ratio = 0.7
    train = tsla_close[0:round(len(tsla_close) * args.split_ratio)]
    test = tsla_close[round(len(tsla_close) * args.split_ratio):]

    # time serise decomposition
    logger.info("-------------decomposition-------------")
    # check if period is legal.
    if args.period < 2:
        logger.warning("Seasonal period is illegal. Turn to use default 7.")
        args.period = 7
    trend, seasonal, residual = decompose(train, args.period, args.plot)

    # difference
    logger.debug("-----------------Diff-----------------")
    trend_diff, trend_diff_counts = diff(trend, args.plot, "trend", args.diff)
    logger.debug("trend diff counts: " + str(trend_diff_counts))
    residual_diff, residual_diff_counts = diff(residual, args.plot, "residual",
                                               args.diff)
    logger.debug("residual diff counts: " + str(residual_diff_counts))

    # ARIMA model
    logger.info("-----------ARIMA construction----------")
    trend_model_fit, trend_model_order = ARIMA_model(trend_diff, args,
                                                     "trend_diff")
    logger.info("Trend model parameters: " + str(
        tuple([trend_model_order[0], trend_diff_counts, trend_model_order[1]]))
                )
    residual_model_fit, residual_model_order = ARIMA_model(
        residual_diff, args, "residual_diff")
    logger.info("Residual model parameters: " + str(
        tuple([
            residual_model_order[0], residual_diff_counts,
            residual_model_order[1]
        ])))

    # model summary
    try:
        logger.debug("---------trend model summary----------")
        logger.debug(trend_model_fit.summary())
    except:
        logger.warning("Error occurs in summary, simply skip")
        pass
    try:
        logger.debug("---------resid model summary----------")
        logger.debug(residual_model_fit.summary())
    except:
        logger.warning("Error occurs in summary, simply skip")
        pass

    if args.plot:
        # residual plots of trend model
        trend_model_fit.resid.plot()
        plt.savefig("resid_plt_trend.png")
        plt.close()
        trend_model_fit.resid.plot(kind='kde')
        plt.savefig("kde_resid_plt_trend.png")
        plt.close()

        # residual plots of residual model
        residual_model_fit.resid.plot()
        plt.savefig("resid_plt_residual.png")
        plt.close()
        residual_model_fit.resid.plot(kind='kde')
        plt.savefig("kde_resid_plt_residual.png")
        plt.close()

    logger.debug("-----trend model residual describe----")
    logger.debug(trend_model_fit.resid.describe())  # describe the dataframe
    logger.debug("-----resid model residual describe----")
    logger.debug(residual_model_fit.resid.describe())  # describe the dataframe

    # loss calculation
    logger.info("-----------Loss calculation------------")
    fit_seq = model_predict(trend_model_fit, residual_model_fit, trend,
                            residual, seasonal, trend_diff_counts,
                            residual_diff_counts, False, "", "", args.period)
    if args.log:
        fit_seq = np.exp(fit_seq)
        train = train.apply(np.exp)
    logger.debug(fit_seq)

    # calculate training loss
    training_loss = loss(fit_seq, np.array(train), args.loss)
    logger.info("Training loss: " + str(training_loss))

    # plot train and fitted values in one graph.
    if args.plot:
        plt.figure()
        plt.plot(fit_seq, color='red', label='fit')
        plt.plot(np.array(train), color='blue', label='train')
        plt.legend(loc='best')
        plt.savefig('fit_vs_train.png')
        plt.close()

    if list(test):
        pred_seq = model_predict(trend_model_fit, residual_model_fit, trend,
                                 residual, seasonal, trend_diff_counts,
                                 residual_diff_counts, True,
                                 str(test.index.tolist()[0]),
                                 str(test.index.tolist()[-1]), args.period)
        if args.log:
            pred_seq = np.exp(pred_seq)
            test = test.apply(np.exp)
        logger.debug(pred_seq)

        # calculate testing loss
        testing_loss = loss(pred_seq, np.array(test), args.loss)
        logger.info("Testing loss: " + str(testing_loss))

        # plot test and predicted value in one graph.
        if args.plot:
            plt.figure()
            plt.plot(pred_seq, color="red", label="pred")
            plt.plot(np.array(test), color="blue", label="test")
            plt.legend(loc="best")
            plt.savefig("pred_vs_test.png")
            plt.close()

    # plot several models performance comparison on train set.
    if args.plot:
        logger.info("-----------Model Comparison------------")
        plt.figure()
        # actual value
        plt.plot(np.array(train), color='blue', label="actual")
        # auto-ARIMA with seasonal decompostion
        plt.plot(fit_seq[1:],
                 color='green',
                 label='ARIMA with seasonal decomposition')
        # simple auto-ARIMA
        auto_arima_model_fit, _ = ARIMA_model(train, args, "auto_arima")
        plt.plot(np.array(auto_arima_model_fit.fittedvalues),
                 color='yellow',
                 label='Auto ARIMA')
        # auto-ARIMA with log transfromation.
        auto_log_arima_fit, _ = ARIMA_model(train.apply(np.log), args,
                                            "auto_arima")
        plt.plot(np.array(auto_log_arima_fit.fittedvalues.apply(np.exp)),
                 color='brown',
                 label='Auto ARIMA with log')
        # rolling mean
        plt.plot(np.array(train.rolling(int(.05 * len(train))).mean()),
                 '--',
                 label="Rolling mean")
        # ordinary arima
        plt.plot(np.array(ARIMA(train, (1, 0, 1)).fit(disp=0).fittedvalues),
                 color="coral",
                 label="Ordinary ARIMA")
        plt.legend(loc="best")
        plt.xlabel("days from " +
                   str(train.index.tolist()[0]).replace(" 00:00:00", ""))
        plt.ylabel("stock prices")
        plt.title("Actual Stock Price Compared with Forecasted Stock Price")
        plt.grid(True)
        plt.tight_layout()
        plt.savefig("model_comparison.png")
        plt.close()

        if list(test):
            # calculate testing loss
            loss_dict = dict()
            # auto-ARIMA with seasonal decompostion
            loss_dict["auto sarima"] = testing_loss
            # simple auto-ARIMA
            loss_dict["auto arima"] = loss(
                np.array(
                    auto_arima_model_fit.predict(
                        start=str(test.index.tolist()[0]),
                        end=str(test.index.tolist()[-1]),
                        dynamic=True)), np.array(test), args.loss)
            # auto-ARIMA with log transfromation.
            loss_dict["auto arima log"] = loss(
                np.array(
                    auto_log_arima_fit.predict(
                        start=str(test.index.tolist()[0]),
                        end=str(test.index.tolist()[-1]),
                        dynamic=True).apply(np.exp)), np.array(test),
                args.loss)
            # ordinary arima
            loss_dict["arima"] = loss(
                np.array(
                    ARIMA(train, (1, 0, 1)).fit(disp=0).predict(
                        start=str(test.index.tolist()[0]),
                        end=str(test.index.tolist()[-1]),
                        dynamic=True)), np.array(test), args.loss)
            logger.info(loss_dict)
            plt.figure(figsize=(12, 6))
            loss_df = pd.DataFrame.from_dict(loss_dict, orient='index')
            plt.bar(loss_df.index.tolist(), loss_df.iloc[:, 0])
            plt.ylabel("RMSE")
            plt.legend('')
            plt.title("RMSE for Difference Models on Test Data")
            plt.savefig("RMSE_model_comparison.png")
            plt.close()

    # prediction
    logger.info("--------------prediction---------------")
    prediction = model_predict(trend_model_fit, residual_model_fit, trend,
                               residual, seasonal, trend_diff_counts,
                               residual_diff_counts, True,
                               "2020-12-07 00:00:00", "2020-12-11 00:00:00",
                               args.period)
    if args.log:
        prediction = np.exp(prediction)
    logger.info("2020-12-07 predicted value: " + str(prediction[0]))
    logger.info("2020-12-08 predicted value: " + str(prediction[1]))
    logger.info("2020-12-09 predicted value: " + str(prediction[2]))
    logger.info("2020-12-10 predicted value: " + str(prediction[3]))
    logger.info("2020-12-11 predicted value: " + str(prediction[4]))
    logger.info("--------------Process ends-------------")
示例#31
0
            color='gray')  # lowwer置信区间
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--',
            color='gray')  # upper置信区间
plt.title('Autocorrelation Function')
# p的获取:PACF图中曲线第一次穿过上置信区间.这里p取2
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()

# AR model
model = ARIMA(ts_log, order=(2, 1, 0))
result_AR = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(result_AR.fittedvalues, color='red')
plt.title('AR model RSS:%.4f' % sum(result_AR.fittedvalues - ts_log_diff)**2)
plt.show()

# MA model
model = ARIMA(ts_log, order=(0, 1, 2))
result_MA = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(result_MA.fittedvalues, color='red')
plt.title('MA model RSS:%.4f' % sum(result_MA.fittedvalues - ts_log_diff)**2)
plt.show()

# ARIMA 将两个结合起来  效果更好
def arima_models(ts_log, p, d, q):
    model = ARIMA(ts_log, order=(p, d, q))
    results = model.fit(disp=-1)
    return results
def TrainTimeSeries(dataset, p, d, q, freq):
    if freq > 0:
        model = ARIMA(dataset,order= (p,d,q))
        return model
示例#34
0
    def gen_ohlcv(interval: int) -> go.Figure:
        """Generate OHLCV Chart for BTCUSD with predicted price overlay.

        Args:
            interval: update the graph based on an interval

        """
        # hack to wrap interval around available data.  OOS starts at 1500,
        # df has a total of 2274 rows after processing to wrap around
        # 2274-1500 ~ 750. Reset prediction data to empty df.
        # interval = interval % 750

        # print("interva is {}...".format(interval))

        # read data from source
        # df = get_ohlcv_data(interval - 100, interval)
        df = bitfinex_candles_api()
        df["log_ret"] = np.log(df.Close) - np.log(df.Close.shift(1))

        print("\ndata df loaded, starting prediction...\n")
        # online training and forecast.
        model = ARIMA(df.tail(60)["log_ret"], order=(3, 1, 0)).fit(disp=0)
        pred = model.forecast()[0]

        print("\nprediction ended, writing to output df...")

        # save forecast to output dataframe. should be dB irl.
        next_dt = df.tail(1).index[0] + pd.Timedelta("1 minute")
        config.df_pred.loc[next_dt] = [
            pred[0],
            (np.exp(pred) * df.tail(1).Close.values)[0],
        ]
        print("\nnext datetime is {}...".format(next_dt))
        # get index location of period.
        loc = config.df_pred.index.get_loc(next_dt) + 1
        print("\nloc is {}...".format(loc))

        # slices for the past N periods perdiction for plotting
        df_pred_plot = config.df_pred.iloc[slice(max(0, loc - 30),
                                                 min(loc,
                                                     len(df)))].sort_index()
        print("\n set pred df for plotting...\n", df_pred_plot)

        # plotting ohlc candlestick
        trace_ohlc = go.Candlestick(
            x=df.tail(50).index,
            open=df["Open"].tail(50),
            close=df["Close"].tail(50),
            high=df["High"].tail(50),
            low=df["Low"].tail(50),
            opacity=0.5,
            hoverinfo="skip",
            name="BTCUSD",
        )

        # plotting prediction line
        trace_line = go.Scatter(
            x=df_pred_plot.index,
            y=df_pred_plot.pred_Close,
            line_color="yellow",
            mode="lines+markers",
            name="Predicted Close",
        )

        layout = go.Layout(
            plot_bgcolor=config.app_color["graph_bg"],
            paper_bgcolor=config.app_color["graph_bg"],
            font={"color": "#fff"},
            height=700,
            xaxis={
                "showline": False,
                "showgrid": False,
                "zeroline": False
            },
            yaxis={
                "showgrid": True,
                "showline": True,
                "fixedrange": True,
                "zeroline": True,
                "gridcolor": config.app_color["graph_line"],
                "title": "Price (USD$)",
            },
        )

        return go.Figure(data=[trace_ohlc, trace_line], layout=layout)
def StartARIMAForecasting(Actual, P, D, Q):
    #     print('from function screaming')
    model = ARIMA(Actual, order=(P, D, Q))
    model_fit = model.fit(disp=0)
    prediction = model_fit.forecast()[0]
    return prediction
        ptime.append(temp)
        sumtime.append(len(temp))

#dataset
from statsmodels.tsa.arima_model import ARIMA
time = [float(i) for i in time]
time = pd.Series(time, index=tstamp)

#separate training-test set split
size = int(len(time) - 100)
train, test = time[0:size], time[size:len(time)]
history = [x for x in train]
predictions = list()

#train ARIMA model
model = ARIMA(history, order=(3, 1, 5))
model_fit = model.fit(disp=0)

#forecast the next 100 pieces of data
output = model_fit.forecast(steps=100)[0]
output = [x for x in output]
test = [x for x in test]

fig = plt.figure(figsize=(10, 5))

#plot the predicted vs. expected graph
ax = fig.add_subplot(111)
ax.plot(test, label="Observed")
ax.plot(output, label="Predicted")
plt.xlabel("Time")
plt.ylabel("Number of Crime")
示例#37
0
data.append(train_x2)
data.append(train_x3)
#print(data[0])
#print(train_x)

for currentdata in data:
    TS = currentdata
    final_aic = math.inf
    final_bic = math.inf
    final_order = (0, 0, 0)
    #print(final_order)
    for p in range(0, 3):
        for d in range(1, 3):
            for q in range(0, 3):
                try:
                    model = ARIMA(TS, order=(p, d, q))
                    #print(p,q,d)
                    results_ARIMA = model.fit(disp=-1)
                    current_aic = results_ARIMA.aic  #compute AIC error on the model formed so far
                    current_bic = results_ARIMA.bic  #compute BIC error on the model formed so far
                    #print(p,d,q)
                    if (
                            current_bic < final_bic and current_aic < final_aic
                    ):  #if current error is minimum then update all the order,model etc
                        final_aic = current_aic
                        final_bic = current_bic
                        final_order = (p, d, q)
                        '''results_final_ARIMA = final_arima.fit()
                        print(results_final_ARIMA.summary())
                        #final_accuracy = accuracy(model)'''
                except (ValueError, RuntimeError, TypeError, NameError):
示例#38
0
df.diff().plot()

# In other words, we make the time serie "stationary"



# ======================= ARIMA =======================
# ARIMA model is the combination of these two concepts.
# ARIMA uses the correlation with previous time steps to make forecast.

## ARIMA = AutoRregression + I (remove trend) + Moving Average
# This is the same concept as we have seen in visualization.
# (Moving average is for errors, which we won't use here)

from statsmodels.tsa.arima_model import ARIMA
arima = ARIMA(df.consumption, order=(5,1,0))
model_fit = arima.fit() #(disp=0)
prediction = model_fit.forecast()

df.plot()
prediction.plot()
print(model_fit.summary())



# ================== Train/Test split ==================


'''
END OF THE INTERMEDIATE COURSE
'''
示例#39
0
文件: arima.py 项目: dominpn/PKB-ML
# rolling_mean = df_log.rolling(window=12).mean()
# df_log_minus_mean = df_log - rolling_mean
# df_log_minus_mean.dropna(inplace=True)
# get_stationarity(df_log_minus_mean)
#
# rolling_mean_exp_decay = df_log.ewm(halflife=12, min_periods=0, adjust=True).mean()
# df_log_exp_decay = df_log - rolling_mean_exp_decay
# df_log_exp_decay.dropna(inplace=True)
# get_stationarity(df_log_exp_decay)
#
# df_log_shift = df_log - df_log.shift()
# df_log_shift.dropna(inplace=True)
# get_stationarity(df_log_shift)

decomposition = seasonal_decompose(df_log)
model = ARIMA(df_log, order=(5, 1, 0))
results = model.fit(disp=-1)
#plt.plot(df_log_shift)
plt.plot(results.fittedvalues, color='red')
plt.show()

predictions_ARIMA_diff = pd.Series(results.fittedvalues, copy=True)
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
predictions_ARIMA_log = pd.Series(df_log['PKB'].iloc[0], index=df_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(
    predictions_ARIMA_diff_cumsum, fill_value=0)
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(df)
predictions_ARIMA.head()
plt.plot(predictions_ARIMA)
from pandas.tools.plotting import autocorrelation_plot

def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
print(series.head())
series.plot()
pyplot.show()

autocorrelation_plot(series)
pyplot.show()


# fit model
model = ARIMA(series, order=(5,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()

residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

# http://www.statsmodels.org/devel/generated/statsmodels.tsa.arima_model.ARIMA.predict.html

X = series.values
def time_series_analysis():

    from pandas import datetime

    def parser(x):
        return datetime.strptime('190' + x, '%Y-%m')
        # return datetime.strptime(x,'%Y-%m-%d')

    index1 = randint(1, 1450)  # hard coded
    index2 = randint(1, 1412)
    data, label = get_time_series_index_based_method1(index1)

    series = pd.read_csv('elec.csv',
                         header=0,
                         parse_dates=[0],
                         index_col=0,
                         squeeze=True)
    # series = {  # 'time': pd.Series(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06']),
    #     'time': pd.Series(['1901-01', '1902-01', '1903-01', '1904-01', '1905-01', '1906-01', '1907-01', '1908-01']),
    #     'value': pd.Series([19330.143540669856600, 30641.148325358849700, 23813.397129186604700, 23272.727272727275100,
    #                         22866.028708133973200, 23961.722488038278900, 25856.459330143542400, 29598.086124401913600])}
    # series = {
    #     'time':pd.Series(data),
    #     'value':pd.Series(label)
    # }

    # series = pd.DataFrame(series)
    #     series.plot()
    #     pyplot.show()
    from sklearn.metrics import mean_squared_error
    from pandas import read_csv
    from statsmodels.tsa.arima_model import ARIMA
    series1 = read_csv('shampoo.csv',
                       header=0,
                       parse_dates=[0],
                       index_col=0,
                       squeeze=True)
    # series = {  # 'time': pd.Series(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06']),
    #     'time': pd.Series(['1901-01', '1902-01', '1903-01', '1904-01', '1905-01', '1906-01', '1907-01', '1908-01']),
    #     'value': pd.Series([19330.143540669856600, 30641.148325358849700, 23813.397129186604700, 23272.727272727275100,
    #                         22866.028708133973200, 23961.722488038278900, 25856.459330143542400, 29598.086124401913600])}
    # fit model
    # series = pd.DataFrame(series)
    print("series1.head")
    print(series1.head())
    print("series.head")
    print(series.head())
    # series2 = read_csv('f**k.csv', header=0, parse_dates=[
    #                   0], index_col=0,squeeze=True)#\names=["day","value"])
    series2 = read_csv('ElectricityBy15Minutes.csv',
                       header=0,
                       parse_dates=[0],
                       index_col=0,
                       squeeze=True,
                       names=["day", "value"])

    model = ARIMA(series2, order=(6, 1, 0))
    model_fit = model.fit(disp=0)
    print(model_fit.summary())
    # plot residual errors
    residuals = DataFrame(model_fit.resid)
    # residuals.plot()
    #     pyplot.show()
    residuals.plot(kind='kde')
    pyplot.show()
    print(residuals.describe())
    X = series2.values
    print("X is ")
    print(X)
    size = int(len(X) * 0.66)
    train, test = X[0:size], X[size:len(X)]
    history = [x for x in train]
    predictions = list()

    import time
    time.sleep(5)
    for t in range(300):
        model = ARIMA(history, order=(6, 1, 0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        print('predicted=%f, expected=%f' % (yhat, obs))
    print("test-----")
    print(test[:10])
    print("history-----")
    print(history[-10:])
    print("predictions-----")
    print(predictions)
    error = mean_squared_error(history[-300:], predictions)
    print('Test MSE: %.3f' % error)
    print(predictions)
    generate_comparison_plot(history[-300:], predictions)
    print(cal_error(history[-300:], predictions))
示例#42
0
    #print(sc_rolmean,sc_rolstd)

    check_adfuller(turb_ma_diff['turb(uS)'])
    check_mean_std(turb_ma_diff, 'Turb(FNU)')


#%%
#X = series.values

train, test = turb_train.values, turb_test.values
turb_history = [x for x in train]
turb_predictions = list()
turb_diff = list()
k = 1921
for t in range(len(test)):
    model = ARIMA(turb_history, order=(1, 0, 1))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    turb_predictions.append(yhat)
    obs = test[t]
    turb_history.append(obs)
    diff = obs - yhat
    turb_diff.append(diff)
    print(
        'TurbParameter Index= %d, predicted=%f, expected=%f, difference = %f' %
        (k, yhat, obs, diff))
    k = k + 1
    if (k == 3000):
        break
#test1, test2 = tts(test,test_size = 337, random_state=0, shuffle=False)
示例#43
0
# load data
data = pd.read_csv('wholedata.csv')
series = data['value']

# prepare data
X = series.values
X = X.astype('float32')
train_size = int(len(X) * 0.99)
train, test = X[0:train_size], X[train_size:]
# walk-forward validation
history = [x for x in train]
predictions = list()

for i in range(len(test)):
    # predict
    model = ARIMA(history, order=(2,1,3))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = model_fit.forecast()[0]
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
# errors
residuals = [test[i]-predictions[i] for i in range(len(test))]
residuals = DataFrame(residuals)
print(residuals.describe())
pyplot.figure()
pyplot.subplot(211)
residuals.hist(ax=pyplot.gca())
pyplot.subplot(212)
residuals.plot(kind='kde', ax=pyplot.gca())
示例#44
0
#
# Statsmodels also includes things like ARMA and ARIMA models that can be used to make predictions from time series. This data is not necessarily very stationary and often has strong periodic effects, so these may not necessarily work very well. I'll look at ARIMA predictions for the same set of very high viewcount pages.

# In[54]:

from statsmodels.tsa.arima_model import ARIMA
import warnings

cols = train.columns[1:-1]
for key in top_pages:
    data = np.array(train.loc[top_pages[key], cols], 'f')
    result = None
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore')
        try:
            arima = ARIMA(data, [2, 1, 4])
            result = arima.fit(disp=False)
        except:
            try:
                arima = ARIMA(data, [2, 1, 2])
                result = arima.fit(disp=False)
            except:
                print(train.loc[top_pages[key], 'Page'])
                print('\tARIMA failed')
    #print(result.params)
    pred = result.predict(2, 599, typ='levels')
    x = [i for i in range(600)]
    i = 0

    plt.plot(x[2:len(data)], data[2:], label='Data')
    plt.plot(x[2:], pred, label='ARIMA Model')
示例#45
0
from statsmodels.tsa.arima_model import ARIMA
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
st.write(""" 
AdventureWorks Forecasting
""")


df = pd.read_csv('csv_data/forecast_data.csv', index_col=False)
df['RateChangeDate'] = pd.to_datetime(df['RateChangeDate'])
df.set_index('RateChangeDate', inplace=True)
ts = df['paidAmountSum']

st.text('This is a forecast')
st.line_chart(df)
st.dataframe(df)

model = ARIMA(ts, order=(1, 1, 1))
results = model.fit()
# results.plot_predict(1, 220)
values = st.sidebar.slider("Forecast Range", 200, 300)
st.pyplot(results.plot_predict(1, values))
示例#46
0
#import the dependencies
from random import random
from statsmodels.tsa.arima_model import ARIMA
#Generate randomized dataset in the range of 1 to 1000
dataset = [x + random() for x in range(1, 1000)]
# # fitting the model
arima = ARIMA(dataset, order=(1, 1, 1))
arima_fit = arima.fit(disp=False)
# make prediction
y = arima_fit.predict(len(dataset), len(dataset), typ='levels')
print(y)

#try to fiddle with the parameters of the ARIMA model
示例#47
0
            elif i == len(dataframe):
                break
            else:
                i += 1


#remove_points_with_propagation(df["9"][:1000])

print('Counter: ' + str(counter))
print('Number of rows with 0 as value: ' +
      str((df["9"] == 0).astype(int).sum(axis=0)))
series.plot()
plt.show()

#Creating model
model = ARIMA(series, order=(1, 0, 3))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1, 2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

model_fit.plot_predict(dynamic=False)
plt.xlim(["2018-10-08", "2018-10-12"])
plt.ylabel("Voltage")
plt.show()
    print('\t{}: {}'.format(key, value))

korona_log = np.log(korona_death)
plt.plot(korona_log)
plt.show()

rolling_mean = korona_log.rolling(window=12).mean()
korona_log_minus_mean = korona_log - rolling_mean
korona_log_minus_mean.dropna(inplace=True)
plt.plot(korona_log_minus_mean)
plt.show()

korona_log_shift = korona_log - korona_log.shift()
korona_log_shift.dropna(inplace=True)
plt.plot(korona_log_shift)
plt.show()

model = ARIMA(new_korona.death, order=(1, 1, 2))
model_fit = model.fit(disp=0)
print(model_fit.summary())

residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

model_fit.plot_predict(dynamic=False)
plt.show()
get_ipython().magic('matplotlib inline')
dta = (series - series.mean()) / (series.std())
plt.acorr(dta,maxlags = len(dta) -1, linestyle = "solid", usevlines = False, marker='')
plt.show()
autocorrelation_plot(series)
plt.show()


# In[29]:


from pandas import datetime
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
# fit the model
arima_mod = ARIMA(series,order=(5,1,0))
arima_mod_fit = arima_mod.fit(disp=0)
print(arima_mod_fit.summary())
#Residual Errors
residuals = DataFrame(arima_mod_fit.resid)
residuals.plot()
plt.show()
residuals.plot(kind='kde')
plt.show()
print(residuals.describe())


# In[30]:


from pandas import read_csv
示例#50
0
def arimaForecast(ts):
    #X=ts['Close'].values
    X = ts['high'].values
    #size=int(len(X)*0.98)
    size = len(X) - 100
    train, test = X[0:size], X[size:len(X)]
    test_length = len(test)
    preds = []
    history = [x for x in train]
    backtest = Backtester(500, ts['high'].values, ts.index)
    backtest.buy(train[-1], len(train) - 1)
    bought_at = train[-1]
    sold_at = None
    i = 0
    #print(history)
    #print(test)
    minGlobalDifference = math.inf
    dailyforecast = []
    globaldiffs = []
    n_forecast = 0
    while i < len(test):
        print(i, " : ", test_length)
        if i != 0 and i % 24 == 0:
            plt.delaxes()
            plt.bar(np.arange(len(dailyforecast)), dailyforecast)
            plt.savefig(
                "testingarimaplots/error_histogram{}.png".format(n_forecast))
            print("SUMMARY OF PERIOD MEAN OF DIFFERENCES: {}".format(
                mean(dailyforecast)))
            if min(dailyforecast) < minGlobalDifference:
                minGlobalDifference = min(dailyforecast)
            print("STARTING FORECASTING PERIOD (DAY AHEAD)")
            history = history[0:-24] + [x for x in test[i - 24:i]]
            dailyforecast = []
            n_forecast += 1
        model = ARIMA(history, order=(3, 1, 4))
        fit = model.fit(disp=0)
        out = fit.forecast()
        if out[0] >= bought_at * 1.18:
            backtest.sell(test[i], len(train) + i)
            sold_at = out[0]
        if sold_at != None:
            if out[0] <= sold_at * 0.82:
                backtest.buy(test[i], len(train) + i)
                bought_at = out[0]
        if i % 100 == 0:
            print("WEALTH {} ".format(backtest.getWealth(len(train) + i)))
        if backtest.getWealth(len(train) + i) <= 53:
            return
        preds.append(out[0])
        diff = abs(preds[-1] - test[i])
        dailyforecast.append(diff[0])
        globaldiffs.append(diff[0])
        #if diff>=150: history.append(test[i])
        #else: history.append(preds[-1])
        history.append(test[i])
        print("PREDICTED {} EXPECTED {} DIFFERENCE {}".format(
            out[0], test[i], diff[0]))
        i += 1
    plt.plot(preds)
    plt.plot(test)
    plt.show()
    print(
        "RMSE: {}, SMALLEST DIFF BETWEEN REAL AND PREDICTED {} MEAN OF ABSOLUTE DIFFERENCES {}"
        .format(mean_squared_error(test, preds), minGlobalDifference,
                mean(globaldiffs)))
ts_log_diff.plot(figsize=(15, 6))

test_stationarity(ts_log_diff.dropna())  #.dropna(inplace=True))

# Using decomposition method to decompose time series
from pylab import rcParams
plt.figure(5)
rcParams['figure.figsize'] = 15, 6
#decomposition = sm.tsa.seasonal_decompose(ts_log, model = 'additive')
decomposition = sm.tsa.seasonal_decompose(ts_log, freq=12, model='additive')
decomposition.plot()
### Commented: ở đây decomposition ko có giá trị trả về. Tức là hàm seasonal_decompose ko trả ra kết quả gì cả
### nguyên nhân là do: gọi chuỗi dừng trong Arima, vì ts_log ko có tính dừng, nên khi gọi hàm này ra, kết quả ko có

# Build ARIMA model
arima_model = ARIMA(ts_log, order=(2, 1, 2))
arima_model_fit = arima_model.fit(disp=-1)

plt.figure(6)
plt.plot(ts_log_diff)
plt.plot(arima_model_fit.fittedvalues, color='red')
plt.title('RSS: %.4f' % np.nansum(
    (arima_model_fit.fittedvalues - ts_log_diff)**2))

# Read summary of ARIMA model
print(arima_model_fit.summary())

# Convert predicted values to original scale
predictions_ARIMA_diff = pd.Series(arima_model_fit.fittedvalues, copy=True)
print(predictions_ARIMA_diff.head()
      )  # these are fitted values on the transformed data
示例#52
0
plt.axhline(y=0,linestyle='--',color='gray')


#plot acf
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')


# In[26]:


from statsmodels.tsa.arima_model import ARIMA
#AR Model

model = ARIMA(indexedDataset_logScale,order=(2, 1, 2))

# print(model)
result_AR= model.fit(disp=-1)
plt.plot(datasetLogDiffShifting)
plt.plot(result_AR.fittedvalues,color='red')
plt.title('RSS: %.4f'%sum((result_AR.fittedvalues-datasetLogDiffShifting["#Passengers"])**2))
print('Plotting AR Model')


# In[27]:


from statsmodels.tsa.arima_model import ARIMA
#MA Model
示例#53
0
def ARIMA_model(time_series_diff, args, name):
    """
    time_series_diff: stationary time_series after diff. 
    args: arguments parsed before.
    name: the name of time_series_diff.
    return fitted ARIMA model, parameters for ARIMA model.
    """
    if args.plot and name in ["trend_diff", "residual_diff"]:
        fig, axes = plt.subplots(1, 2, figsize=(16, 3), dpi=100)
        plot_acf(time_series_diff.tolist(),
                 lags=min(50,
                          len(time_series_diff) - 1),
                 ax=axes[0])
        plot_pacf(time_series_diff.tolist(),
                  lags=min(50,
                           len(time_series_diff) - 1),
                  ax=axes[1])
        plt.savefig(name + "_acf_pacf.png")
        plt.close()

    # check if args.ic is illegal.
    if args.ic not in ["bic", "aic"]:
        logger.warning(
            "The information criteria is illegal. Turn to default ic: BIC")
        args.ic = "bic"

    # check the value of convergence tol.
    if args.tol > 0.01:
        logger.warning(
            "The convergence tolerance is too large. Turn to use default value: 1e-8"
        )
        args.tol = 1e-8

    # check the likelihood function used.
    if args.method not in ["css-mle", "mle", "css"]:
        logger.warning(
            "The likelihood function is illegal. Turn to default choice: css-mle"
        )
        args.method = "css-mle"

    evaluate = sm.tsa.arma_order_select_ic(time_series_diff,
                                           ic=args.ic,
                                           trend="c",
                                           max_ar=args.max_ar,
                                           max_ma=args.max_ma)
    # get the parameter for ARIMA model.
    min_order = evaluate[args.ic + "_min_order"]

    # initial the success_flag to false
    success_flag = False
    while not success_flag:
        # construct the ARIMA model.
        model = ARIMA(time_series_diff, order=(
            min_order[0], 0, min_order[1]
        ))  # d is the order of diff, which we have done that perviously.
        # keep finding initial parameters until convergence.
        try:
            model_fit = model.fit(
                disp=False,
                start_params=np.random.rand(min_order[0] + min_order[1] + 1),
                method=args.method,
                trend=
                "c",  # Some posts' experimentation suggests that ARIMA models may be less likely to converge with the trend term disabled, especially when using more than zero MA terms.
                transparams=True,
                solver=
                "lbfgs",  # we turn to use this one, which gives the best RMSE & executation time.
                tol=args.tol,  # The convergence tolerance. Default is 1e-08.
            )
            success_flag = True
        except:
            logger.warning("Error occurs, try another starting parameters.")
            pass

    return model_fit, min_order
示例#54
0
plt.show()

from collections import deque
items = deque(np.asarray(fft_df['absolute'].tolist()))
items.rotate(int(np.floor(len(fft_df)/2)))
plt.figure(figsize=(10, 7), dpi=80)
plt.stem(items)
plt.title('Figure 4: Components of Fourier transforms')
plt.show()

from statsmodels.tsa.arima_model import ARIMA
from pandas import DataFrame
from pandas import datetime

series = data_FT['GS']
model = ARIMA(series, order=(5, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(series)
plt.figure(figsize=(10, 7), dpi=80)
plt.show()
plt.figure(figsize=(12, 6), dpi=100)
plt.plot(test, label='Real')
plt.plot(predictions, color='red', label='Predicted')
plt.xlabel('Days')
plt.ylabel('USD')
plt.title('Figure 5: ARIMA model on GS stock')
plt.legend()
plt.show()
示例#55
0
def smape(y_true, y_pred):
    return np.mean(
        (np.abs(y_true - y_pred) * 200 / (np.abs(y_true) + np.abs(y_pred))))


# In[18]:

train_val = train_set['Open'].values
test_val = test_set['Open'].values
history = [x for x in train_val]
print(type(history))  #this is list of training data
prediction = list()
prediction
for t in range(len(test_val)):
    model = ARIMA(history, order=(3, 1, 0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    prediction.append(yhat)
    obs = test_val[t]
    history.append(obs)
error = mean_squared_error(test_val, prediction)
print("Mean squared error : %0.3f", error)
error2 = smape(test_val, prediction)
print("Symmetric mean absolute percentage error: %0.3f", error2)

# In[19]:

print('Testing Mean Squared Error: %.3f' % error)
print("Symmetric mean absolute percentage error: %0.3f" % error2)
示例#56
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

from statsmodels.tsa.arima_model import ARIMA

SERIES = [16, 20, 32, 40, 20, 18, 11, 21, 4, 6, 31, 48, 43, 49, 37]

model = ARIMA(SERIES, order=(4, 1, 1))
model_fit = model.fit(disp=0)

prediction = model_fit.predict(16, 19, typ='levels')

print prediction
示例#57
0
def parser(x):
    return datetime.strptime('190' + x, '%Y-%m')


series = read_csv('shampoo-sales.csv',
                  header=0,
                  parse_dates=[0],
                  index_col=0,
                  squeeze=True,
                  date_parser=parser)
X = series.values
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
    model = ARIMA(history, order=(5, 1, 0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()
示例#58
0
#-*- coding: utf-8 -*-
#确定最佳p、d、q值
import pandas as pd

#参数初始化
discfile = '../data/discdata_processed.xls'

data = pd.read_excel(discfile, index_col='COLLECTTIME')
data = data.iloc[:len(data) - 5]  #不使用最后5个数据
xdata = data['CWXT_DB:184:D:\\']

from statsmodels.tsa.arima_model import ARIMA

#定阶
pmax = int(len(xdata) / 10)  #一般阶数不超过length/10
qmax = int(len(xdata) / 10)  #一般阶数不超过length/10
bic_matrix = []  #bic矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  #存在部分报错,所以用try来跳过报错。
            tmp.append(ARIMA(xdata, (p, 1, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)

bic_matrix = pd.DataFrame(bic_matrix)  #从中可以找出最小值

p, q = bic_matrix.stack().idxmin()  #先用stack展平,然后用idxmin找出最小值位置。
print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    mpl.rcParams['font.sans-serif'] = [u'SimHei']
    mpl.rcParams['axes.unicode_minus'] = False

    x = data['Passengers'].astype(np.float)
    x = np.log(x)
    print x.head(10)

    show = 'prime'   # 'diff', 'ma', 'prime'
    d = 1
    diff = x - x.shift(periods=d)
    ma = x.rolling(window=12).mean()
    xma = x - ma

    p = 2
    q = 2
    model = ARIMA(endog=x, order=(p, d, q))     # 自回归函数p,差分d,移动平均数q
    arima = model.fit(disp=-1)                  # disp<0:不输出过程
    prediction = arima.fittedvalues
    print type(prediction)
    y = prediction.cumsum() + x[0]
    mse = ((x - y)**2).mean()
    rmse = np.sqrt(mse)

    plt.figure(facecolor='w')
    if show == 'diff':
        plt.plot(x, 'r-', lw=2, label=u'原始数据')
        plt.plot(diff, 'g-', lw=2, label=u'%d阶差分' % d)
        #plt.plot(prediction, 'r-', lw=2, label=u'预测数据')
        title = u'乘客人数变化曲线 - 取对数'
    elif show == 'ma':
        #plt.plot(x, 'r-', lw=2, label=u'原始数据')
示例#60
0
model_fit = model.fit(maxlag=1, method='mle', disp=-1)
# make prediction
yhat = model_fit.predict(0, len(response)+10)
createPlot(yhat, response)


# In[343]:


# Autoregressive Integrated Moving Average
# ## Autoregressive Integrated Moving Average
# 
# http://www.statsmodels.org/dev/generated/statsmodels.tsa.arima_model.ARIMA.html#statsmodels.tsa.arima_model.ARIMA
# 
# https://otexts.com/fpp2/non-seasonal-arima.html
model = ARIMA(endog = response, order=(1, 0, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(0, len(response)+10)
createPlot(yhat, response)


# In[344]:


# Seasonal Autoregressive Integrated Moving Average
# ## Seasonal Autoregressive Integrated Moving Average
# http://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html

# fit model
model = SARIMAX(response, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1))