def setup_class(cls): nobs = 500 ar = [1, -0.5, 0.1] ma = [1, 0.7] dist = lambda n: np.random.standard_t(3, size=n) np.random.seed(8659567) x = arma_generate_sample(ar, ma, nobs, sigma=1, distrvs=dist, burnin=500) mod = Arma(x) order = (2, 1) cls.res_ls = mod.fit(order=order) cls.res = mod.fit_mle(order=order, start_params=np.r_[cls.res_ls[0], 1], method='nm', disp=False) cls.res1_table = np.array( [[ 0.4339072 , -0.08402653, 0.73292344, 1.61661128], [ 0.05854268, 0.05562941, 0.04034178, 0.0511207 ], [ 7.4118102 , -1.51046975, 18.16785075, 31.62341666], [ 0. , 0.1309236 , 0. , 0. ], [ 0.06713617, 0.05469138, 0.03785006, 0.1071093 ], [ 0.05504093, 0.0574849 , 0.04350945, 0.02510928]]) cls.res1_conf_int = np.array([[ 0.31916567, 0.54864874], [-0.19305817, 0.0250051 ], [ 0.65385501, 0.81199188], [ 1.51641655, 1.71680602]]) cls.ls_params = np.array([ 0.43393123, -0.08402678, 0.73293058]) cls.ls_bse = np.array([ 0.0377741 , 0.03567847, 0.02744488])
def mcarma22(niter=10, nsample=1000, ar=None, ma=None, sig=0.5): '''run Monte Carlo for ARMA(2,2) DGP parameters currently hard coded also sample size `nsample` was not a self contained function, used instances from outer scope now corrected ''' #nsample = 1000 #ar = [1.0, 0, 0] if ar is None: ar = [1.0, -0.55, -0.1] #ma = [1.0, 0, 0] if ma is None: ma = [1.0, 0.3, 0.2] results = [] results_bse = [] for _ in range(niter): y2 = arma_generate_sample(ar,ma,nsample+1000, sig)[-nsample:] y2 -= y2.mean() arest2 = Arma(y2) rhohat2a, cov_x2a, infodict, mesg, ier = arest2.fit((2,2)) results.append(rhohat2a) err2a = arest2.geterrors(rhohat2a) sige2a = np.sqrt(np.dot(err2a,err2a)/nsample) #print('sige2a', sige2a, #print('cov_x2a.shape', cov_x2a.shape #results_bse.append(sige2a * np.sqrt(np.diag(cov_x2a))) if cov_x2a is not None: results_bse.append(sige2a * np.sqrt(np.diag(cov_x2a))) else: results_bse.append(np.nan + np.zeros_like(rhohat2a)) return np.r_[ar[1:], ma[1:]], np.array(results), np.array(results_bse)
def SimulatePC1(p=50): m = np.zeros((nyears,p)) for j in range(p): b = arma_generate_sample(ar, ma, nyears) m[:,j] = b - np.mean(b[500:600]) u,s,v = svd(m,full_matrices=False) return(u[:,0])
def setup_class(cls): nobs = 500 ar = [1, -0.5, 0.1] ma = [1, 0.7] dist = lambda n: np.random.standard_t(3, size=n) np.random.seed(8659567) x = arma_generate_sample(ar, ma, nobs, sigma=1, distrvs=dist, burnin=500) mod = TArma(x) order = (2, 1) cls.res_ls = mod.fit(order=order) cls.res = mod.fit_mle(order=order, start_params=np.r_[cls.res_ls[0], 5, 1], method='nm', disp=False) cls.res1_table = np.array( [[ 0.46157133, -0.07694534, 0.70051876, 2.88693312, 0.97283396], [ 0.04957594, 0.04345499, 0.03492473, 0.40854823, 0.05568439], [ 9.31038915, -1.7706905 , 20.05795605, 7.06632146, 17.47049812], [ 0. , 0.07661218, 0. , 0. , 0. ], [ 0.05487968, 0.04213054, 0.03102404, 0.37860956, 0.05228474], [ 0.04649728, 0.04569133, 0.03990779, 0.44315449, 0.05996759]]) cls.res1_conf_int = np.array([[ 0.36440426, 0.55873839], [-0.16211556, 0.00822488], [ 0.63206754, 0.76896998], [ 2.08619331, 3.68767294], [ 0.86369457, 1.08197335]]) cls.ls_params = np.array([ 0.43393123, -0.08402678, 0.73293058]) cls.ls_bse = np.array([ 0.0377741 , 0.03567847, 0.02744488])
def test_arma_generate_sample(dist, ar, ma): # Test that this generates a true ARMA process # (amounts to just a test that scipy.signal.lfilter does what we want) T = 100 np.random.seed(1234) eta = dist(T) # rep1: from module function np.random.seed(1234) rep1 = arma_generate_sample(ar, ma, T, distrvs=dist) # rep2: "manually" create the ARMA process ar_params = -1 * np.array(ar[1:]) ma_params = np.array(ma[1:]) rep2 = _manual_arma_generate_sample(ar_params, ma_params, eta) assert_array_almost_equal(rep1, rep2, 13)
def test_arma_order_select_ic(): # smoke test, assumes info-criteria are right from statsmodels.tsa.arima_process import arma_generate_sample arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 np.random.seed(2014) y = arma_generate_sample(arparams, maparams, nobs) res = arma_order_select_ic(y, ic=['aic', 'bic'], trend='nc') # regression tests in case we change algorithm to minic in sas aic_x = np.array([[ np.nan, 552.7342255 , 484.29687843], [ 562.10924262, 485.5197969 , 480.32858497], [ 507.04581344, 482.91065829, 481.91926034], [ 484.03995962, 482.14868032, 483.86378955], [ 481.8849479 , 483.8377379 , 485.83756612]]) bic_x = np.array([[ np.nan, 559.77714733, 494.86126118], [ 569.15216446, 496.08417966, 494.41442864], [ 517.61019619, 496.99650196, 499.52656493], [ 498.12580329, 499.75598491, 504.99255506], [ 499.49225249, 504.96650341, 510.48779255]]) aic = DataFrame(aic_x, index=lrange(5), columns=lrange(3)) bic = DataFrame(bic_x, index=lrange(5), columns=lrange(3)) assert_almost_equal(res.aic.values, aic.values, 5) assert_almost_equal(res.bic.values, bic.values, 5) assert_equal(res.aic_min_order, (1, 2)) assert_equal(res.bic_min_order, (1, 2)) assert_(res.aic.index.equals(aic.index)) assert_(res.aic.columns.equals(aic.columns)) assert_(res.bic.index.equals(bic.index)) assert_(res.bic.columns.equals(bic.columns)) index = pd.date_range('2000-1-1', freq='M', periods=len(y)) y_series = pd.Series(y, index=index) res_pd = arma_order_select_ic(y_series, max_ar=2, max_ma=1, ic=['aic', 'bic'], trend='nc') assert_almost_equal(res_pd.aic.values, aic.values[:3, :2], 5) assert_almost_equal(res_pd.bic.values, bic.values[:3, :2], 5) assert_equal(res_pd.aic_min_order, (2, 1)) assert_equal(res_pd.bic_min_order, (1, 1)) res = arma_order_select_ic(y, ic='aic', trend='nc') assert_almost_equal(res.aic.values, aic.values, 5) assert_(res.aic.index.equals(aic.index)) assert_(res.aic.columns.equals(aic.columns)) assert_equal(res.aic_min_order, (1, 2))
def test(db): np.random.seed(12345) arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = pandas.Series(y, index=dates) arma_mod = sm.tsa.ARMA(y, order=(2, 2)) arma_res = arma_mod.fit(trend='nc', disp=-1) print(arma_res.summary())
def test_arma_generate_sample(): # Test that this generates a true ARMA process # (amounts to just a test that scipy.signal.lfilter does what we want) T = 100; dists = [np.random.randn] for dist in dists: np.random.seed(1234); eta = dist(T); for ar in arlist: for ma in malist: # rep1: from module function np.random.seed(1234); rep1 = arma_generate_sample(ar, ma, T, distrvs=dist); # rep2: "manually" create the ARMA process ar_params = -1*np.array(ar[1:]); ma_params = np.array(ma[1:]); rep2 = _manual_arma_generate_sample(ar_params, ma_params, eta) assert_array_almost_equal(rep1, rep2, 13);
def calibrate_arma(ar=3, ma=2, data=None): # Fitting an ARMA model for the required data arma = sm.tsa.ARMA(data, (ar, ma)).fit(disp=0) # Capturing the ARMA params params = arma.params # Splitting the params into AR and MA params arparams = np.r_[1, -np.array( params[len(params) - (ar + ma):len(params) - ma])] maparams = np.r_[1, np.array(params[len(params) - ma:])] # Creating a callback function for generating new series def gen(nsample): # Same mean and standard deviation return [normalvariate(np.average(arma.resid), np.std( arma.resid)) for i in range(nsample)] # Generating new time series with the same properties of data return MinMaxScaler((min(data), max(data))).fit_transform( arma_generate_sample(arparams, maparams, len(data), distrvs=gen))
def generate_kindofgarch(nobs, ar, ma, mu=1.): '''simulate garch like process but not squared errors in arma used for initial trial but produces nice graph ''' #garm1, gmam1 = [0.4], [0.2] #pqmax = 1 # res = np.zeros(nobs+pqmax) # rvs = np.random.randn(nobs+pqmax,2) # for t in range(pqmax,nobs+pqmax): # res[i] = #ar = [1.0, -0.99] #ma = [1.0, 0.5] #this has the wrong distribution, should be eps**2 #TODO: use new version tsa.arima.??? instead, has distr option #arest = tsa.arima.ARIMA() #arest = tsa.arima.ARIMA #try class method, ARIMA needs data in constructor from statsmodels.tsa.arima_process import arma_generate_sample h = arma_generate_sample(ar,ma,nobs,0.1) #h = np.abs(h) h = (mu+h)**2 h = np.exp(h) err = np.sqrt(h)*np.random.randn(nobs) return err, h
import numpy as np import numdifftools as ndt from statsmodels.sandbox import tsa from statsmodels.tsa.arma_mle import Arma # local import from statsmodels.tsa.arima_process import arma_generate_sample examples = ['arma'] if 'arma' in examples: print("\nExample 1") print('----------') ar = [1.0, -0.8] ma = [1.0, 0.5] y1 = arma_generate_sample(ar,ma,1000,0.1) y1 -= y1.mean() #no mean correction/constant in estimation so far arma1 = Arma(y1) arma1.nar = 1 arma1.nma = 1 arma1res = arma1.fit_mle(order=(1,1), method='fmin') print(arma1res.params) #Warning need new instance otherwise results carry over arma2 = Arma(y1) arma2.nar = 1 arma2.nma = 1 res2 = arma2.fit(method='bfgs') print(res2.params) print(res2.model.hessian(res2.params))
""" Autoregressive Moving Average (ARMA) Model """ import numpy as np import statsmodels.api as sm # Generate some data from an ARMA process from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(12345) arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) # The conventions of the arma_generate function require that we specify a # 1 for the zero-lag of the AR and MA parameters and that the AR parameters # be negated. ar = np.r_[1, -arparams] ma = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(ar, ma, nobs) # Now, optionally, we can add some dates information. For this example, # we'll use a pandas time series. import pandas dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = pandas.TimeSeries(y, index=dates) arma_mod = sm.tsa.ARMA(y, order=(2, 2)) arma_res = arma_mod.fit(trend='nc', disp=-1)
Author: Josef Perktold """ from __future__ import print_function import numpy as np from statsmodels.tsa.arima_process import arma_generate_sample, ArmaProcess from statsmodels.miscmodels.tmodel import TArma from statsmodels.tsa.arima_model import ARMA nobs = 500 ar = [1, -0.6, -0.1] ma = [1, 0.7] dist = lambda n: np.random.standard_t(3, size=n) np.random.seed(8659567) x = arma_generate_sample(ar, ma, nobs, sigma=1, distrvs=dist, burnin=500) mod = TArma(x) order = (2, 1) res = mod.fit(order=order) res2 = mod.fit_mle(order=order, start_params=np.r_[res[0], 5, 1], method='nm') print(res[0]) proc = ArmaProcess.from_coeffs(res[0][:order[0]], res[0][:order[1]]) print(ar, ma) proc.nobs = nobs # TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__ print(proc.ar, proc.ma) print(proc.ar_roots(), proc.ma_roots())
# # Generate data # print 'Generating data...' # generate labels labels = np.array([1] * nq + [0] * nq + [1] * nq + [0] * nq) # generate static features static = np.vstack((np.random.normal(5.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)))) # generate dynamic features dynamic_q1 = np.vstack([ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q2 = np.vstack([ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q3 = np.vstack([ap.arma_generate_sample(arparams_neg, maparams_neg, seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q4 = np.vstack([ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic = np.vstack((dynamic_q1, dynamic_q2, dynamic_q3, dynamic_q4)) # # Split the dataset into training and test sets # print 'Splitting train and test...' # pick samples for training and for testing train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx))
_attrs = {} _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs, _attrs) _methods = {} _wrap_methods = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods) wrap.populate_wrapper(ARMAResultsWrapper, ARMAResults) if __name__ == "__main__": import numpy as np import statsmodels.api as sm # simulate arma process from statsmodels.tsa.arima_process import arma_generate_sample y = arma_generate_sample([1.0, -0.75], [1.0, 0.25], nsample=1000) arma = ARMA(y) res = arma.fit(trend="nc", order=(1, 1)) np.random.seed(12345) y_arma22 = arma_generate_sample([1.0, -0.85, 0.35], [1, 0.25, -0.9], nsample=1000) arma22 = ARMA(y_arma22) res22 = arma22.fit(trend="nc", order=(2, 2)) # test CSS arma22_css = ARMA(y_arma22) res22css = arma22_css.fit(trend="nc", order=(2, 2), method="css") data = sm.datasets.sunspots.load() ar = ARMA(data.endog) resar = ar.fit(trend="nc", order=(9, 0))
try: import scikits.talkbox.spectral.basic as stbs except ImportError: hastalkbox = False ar = [1., -0.7] #[1,0,0,0,0,0,0,-0.7] ma = [1., 0.3] ar = np.convolve([1.] + [0] * 50 + [-0.6], ar) ar = np.convolve([1., -0.5] + [0] * 49 + [-0.3], ar) n_startup = 1000 nobs = 1000 # throwing away samples at beginning makes sample more "stationary" xo = arma_generate_sample(ar, ma, n_startup + nobs) x = xo[n_startup:] plt.figure() plt.plot(x) rescale = 0 w, h = signal.freqz(ma, ar) sd = np.abs(h)**2 / np.sqrt(2 * np.pi) if np.sum(np.isnan(h)) > 0: # this happens with unit root or seasonal unit root' print('Warning: nan in frequency response h') h[np.isnan(h)] = 1. rescale = 0
import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(12345) arparams = np.array([0.6]) maparams = np.array([0.3]) ar = np.r_[0.6, -arparams] # add zero-lag and negate y = arma_generate_sample(ar, 500) plt.plot(y) plt.show() model = ARIMA(y, (1, 0, 1)).fit(trend='nc', disp=0) plt.plot(model.fittedvalues) plt.show() print('NP') # # # fit model # model = ARIMA(series, order=(5,1,0)) # model. # model_fit = model.fit(disp=0) # # #
''' N=1000 x=np.zeros(N) z=np.random.randn(N) beta1=0.5 for i in np.arange(1,N,1): x[i] = z[i] + beta1*z[i-1] plt.plot(x); plt.show() ''' AR (2) process example ''' tsaplots.plot_acf(x, lags=10, title='correlogram - AR(2)'); plt.show() ''' ARMA(p,q) process example ''' ar=[1, -0.9, 0.08] ma=[1, 0.5, 0.9] samps=arma_generate_sample(ar, ma, 5000) plt.plot(samps); tsaplots.plot_acf(samps, lags=40); tsaplots.plot_pacf(samps, lags=40); plt.show()
from statsmodels.tsa.arima_process import arma_generate_sample # parameter settings lags = 30 # lag value n = 1000 alphas = np.array([0.]) # add zero-lag and negate alphas ar = np.r_[1, -alphas] ma1 = np.r_[1, np.array([0.5, 0.4])] ma2 = np.r_[1, np.array([-0.5, 0.4])] ma3 = np.r_[1, np.array([-0.5, -0.4])] ma4 = np.r_[1, np.array([0.5, -0.4])] f, axarr = plt.subplots(2, 2, figsize=(11, 6)) simulated_data_1 = arma_generate_sample(ar=ar, ma=ma1, nsample=n) plot_pacf(simulated_data_1, lags=lags, ax=axarr[0, 0], title='Sample ACF Plot with ' + r'$\beta_1$=' + str(ma1[1]) + ' and ' + r'$\beta_2$=' + str(ma1[2])) axarr[0, 0].set_xlabel('lags') simulated_data_2 = arma_generate_sample(ar=ar, ma=ma2, nsample=n) plot_pacf(simulated_data_2, lags=lags, ax=axarr[0, 1], title='Sample ACF Plot with ' + r'$\beta_1$=' + str(ma2[1]) + ' and ' + r'$\beta_2$=' + str(ma2[2])) axarr[0, 1].set_xlabel('lags') simulated_data_3 = arma_generate_sample(ar=ar, ma=ma3, nsample=n) plot_pacf(simulated_data_3,
'''generates some ARMA random samples and saves to python module file ''' import numpy as np from statsmodels.sandbox import tsa from statsmodels.tsa.arima_process import arma_generate_sample from .maketests_mlabwrap import HoldIt if __name__ == '__main__': filen = 'savedrvs_tmp.py' np.set_printoptions(precision=14, linewidth=100) # check arma to return same as random.normal np.random.seed(10000) xo = arma_generate_sample([1], [1], nsample=100) xo2 = np.round(xo * 1000).astype(int) np.random.seed(10000) rvs = np.random.normal(size=100) rvs2 = np.round(xo * 1000).astype(int) assert (xo2 == rvs2).all() nsample = 1000 data = HoldIt('rvsdata') np.random.seed(10000) xo = arma_generate_sample([1, -0.8, 0.5], [1], nsample=nsample) data.xar2 = np.round(xo * 1000).astype(int) np.random.seed(10000) xo = np.random.normal(size=nsample) data.xnormal = np.round(xo * 1000).astype(int)
import numpy as np from statsmodels.tsa.arima_process import arma_generate_sample from statsmodels.iolib import savetxt np.random.seed(12345) # no constant y_arma11 = arma_generate_sample([1., -.75],[1., .35], nsample=250) y_arma14 = arma_generate_sample([1., -.75],[1., .35, -.75, .1, .35], nsample=250) y_arma41 = arma_generate_sample([1., -.75, .25, .25, -.75], [1., .35], nsample=250) y_arma22 = arma_generate_sample([1., -.75, .45],[1., .35, -.9], nsample=250) y_arma50 = arma_generate_sample([1., -.75, .35, -.3, -.2, .1], [1.], nsample=250) y_arma02 = arma_generate_sample([1.], [1., .35, -.75], nsample=250) # constant constant = 4.5 y_arma11c = arma_generate_sample([1., -.75],[1., .35], nsample=250) + constant y_arma14c = arma_generate_sample([1., -.75],[1., .35, -.75, .1, .35], nsample=250) + constant y_arma41c = arma_generate_sample([1., -.75, .25, .25, -.75], [1., .35], nsample=250) + constant y_arma22c = arma_generate_sample([1., -.75, .45],[1., .35, -.9], nsample=250) + \ constant y_arma50c = arma_generate_sample([1., -.75, .35, -.3, -.2, .1], [1.],
""" import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima_process import arma_generate_sample, ArmaProcess from statsmodels.miscmodels.tmodel import TArma from statsmodels.tsa.arima_model import ARMA from statsmodels.tsa.arma_mle import Arma nobs = 500 ar = [1, -0.6, -0.1] ma = [1, 0.7] dist = lambda n: np.random.standard_t(3, size=n) np.random.seed(8659567) x = arma_generate_sample(ar, ma, nobs, scale=1, distrvs=dist, burnin=500) mod = TArma(x) order = (2, 1) res = mod.fit(order=order) res2 = mod.fit_mle(order=order, start_params=np.r_[res[0], 5, 1], method='nm') print(res[0]) proc = ArmaProcess.from_coeffs(res[0][:order[0]], res[0][:order[1]]) print(ar, ma) proc.nobs = nobs # TODO: bug nobs is None, not needed ?, used in ArmaProcess.__repr__ print(proc.ar, proc.ma) print(proc.ar_roots(), proc.ma_roots())
''' from __future__ import print_function import numpy as np import statsmodels.api as sm from statsmodels.tsa.arima_process import arma_generate_sample from statsmodels.tsa.arma_mle import Arma as Arma from statsmodels.tsa.arima_process import ARIMA as ARIMA_old from statsmodels.sandbox.tsa.garch import Arma as Armamle_old from statsmodels.tsa.arima import ARMA as ARMA_kf print("\nExample 1") ar = [1.0, -0.6, 0.1] ma = [1.0, 0.5, 0.3] nobs = 1000 y22 = arma_generate_sample(ar, ma, nobs + 1000, 0.5)[-nobs:] y22 -= y22.mean() start_params = [0.1, 0.1, 0.1, 0.1] start_params_lhs = [-0.1, -0.1, 0.1, 0.1] print('truelhs', np.r_[ar[1:], ma[1:]]) ###bug in current version, fixed in Skipper and 1 more ###arr[1:q,:] = params[p+k:p+k+q] # p to p+q short params are MA coeffs ###ValueError: array dimensions are not compatible for copy ##arma22 = ARMA_kf(y22, constant=False, order=(2,2)) ##res = arma22.fit(start_params=start_params) ##print res.params print('\nARIMA new') arest2 = Arma(y22)
# If we test the differenced series, that is the growth rate of moneystock, with a # Null hypothesis of Random Walk with drift, then we can strongly reject the hypothesis # that the growth rate has a unit root (p-value 0.0002) #-------------------------------------------------------------------------------- # To choose the number of lagged terms, p and q, for ARIMA(p,d,q) processes, use the # Box-Jenkins methodology to look at the pattern in the autocorrelation (acf) and # partial autocorrelation (pacf) functions. # scikits.statsmodels.tsa.arima_process contains a class that provides several properties # of ARMA processes and a random process generator. This allows easy comparison of the # theoretical properties of an ARMA process with their empirical counterparts. # For exmaple, define the lag coefficients for an ARMA(2,2) process, generate a random # process and compare the observed and theoretical pacf: ar = np.r_[1., -0.5, -0.2] ma = np.r_[1., 0.2, -0.2] np.random.seed(123) x = ap.arma_generate_sample(ar, ma, 20000, burnin=1000) print "observed pacf: {0}".format(sm.tsa.pacf(x, 5)) theo_ap = ap.ArmaProcess(ar, ma) print " theo pacf: {0}\n".format(theo_ap.pacf(6)) # We can see that observed and theoretical pacf are very close in a large generated # sample like this. #-------------------------------------------------------------------------------- # We can use Statsmodels Autoregressive Moving Average (ARMA) time-series models to # simulate a series: ar_coef = [1, .75, -.25] ma_coef = [1, -.5] nobs = 100 y = ap.arma_generate_sample(ar_coef, ma_coef, nobs) y += 4 # add in a constant # Estimate an ARMA model of the series
return 0 else: return 1 def getP(acf, alpha): if (max(acf) < alpha): return 0 index = np.where(acf > alpha) return min(index) - 1 def getQ(vals): index = 0 for val in vals: if 0 >= val[0] and 0 <= val[1]: return index - 1 index += 1 return 0 ar = [1, 0.8, 0.5, 0.6, 0.4] ma = [1] sampleAR = ts.arma_generate_sample(ar, ma, 1000) boxJenkinsAlgo(sampleAR) ar = [1] ma = [1, -0.8, -0.5, -0.6, -0.4] sampleMA = ts.arma_generate_sample(ar, ma, 1000) boxJenkinsAlgo(sampleMA)
reslst = modc.fit(order=(1, 1)) print(reslst[0]) rescmt = modct.fit_mle(order=(1, 1), start_params=[-0.4, 0.4, 10, 1.], maxiter=500, maxfun=500) print(rescmt.params) mkf = ARMA(x) ##rkf = mkf.fit((1,1)) ##rkf.params rkf = mkf.fit((1, 1), trend='nc') print(rkf.params) np.random.seed(12345) y_arma22 = arma_generate_sample([1., -.85, .35, -0.1], [1, .25, -.7], nsample=1000) ##arma22 = ARMA(y_arma22) ##res22 = arma22.fit(trend = 'nc', order=(2,2)) ##print 'kf ',res22.params ##res22css = arma22.fit(method='css',trend = 'nc', order=(2,2)) ##print 'css', res22css.params mod22 = Arma(y_arma22) resls22 = mod22.fit(order=(2, 2)) print('ls ', resls22[0]) resmle22 = mod22.fit_mle(order=(2, 2), maxfun=2000) print('mle', resmle22.params) f = mod22.forecast() f3 = mod22.forecast3(start=900)[-20:] print(y_arma22[-10:])
fevd.plot() print(res.test_whiteness()) print(res.test_causality('m1', 'realgdp')) print(res.test_normality()) #Example TSA ARMA # Generate some data from an ARMA process arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) # The conventions of the arma_generate function require that we specify a # 1 for the zero-lag of the AR and MA parameters and that the AR parameters # be negated. arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) plt.figure() plt.plot(y) # Now, optionally, we can add some dates information. For this example, # we'll use a pandas time series. dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = Series(y, index=dates) arma_mod = sm.tsa.ARMA(y, order=(2, 2), freq='M') arma_res = arma_mod.fit(trend='nc', disp=-1) print(arma_res.params) plt.show()
from statsmodels.graphics.tsaplots import plot_acf plot_acf(x, lags=10) plt.show() ########## ARMA example from statsmodels.tsa.arima_process import arma_generate_sample import statsmodels as sm np.random.seed(100) alpha1 = -0.5 #alpha2 = -0.5 ma1 = 1.1 ma2 = 0.5 ar = [1, alpha1] ma = [1, ma1, ma2] y = arma_generate_sample(ar, ma, 10000) plt.plot( arma_generate_sample(ar, ma, 100), color='k', linewidth='1', label= 'ARMA process $\u03C6_1$ = %2.1f, $\u03B8_1$ = %2.1f, $\u03B8_2$ = %2.1f' % (-alpha1, ma1, ma2)) plt.legend() plt.show() ########## ARIMA Model Fit from pmdarima.arima import auto_arima model = auto_arima( y, seasonality=False,
import numdifftools as ndt import statsmodels.api as sm from statsmodels.sandbox import tsa from statsmodels.tsa.arma_mle import Arma # local import from statsmodels.tsa.arima_process import arma_generate_sample examples = ['arma'] if 'arma' in examples: print "\nExample 1" print '----------' ar = [1.0, -0.8] ma = [1.0, 0.5] y1 = arma_generate_sample(ar,ma,1000,0.1) y1 -= y1.mean() #no mean correction/constant in estimation so far arma1 = Arma(y1) arma1.nar = 1 arma1.nma = 1 arma1res = arma1.fit_mle(order=(1,1), method='fmin') print arma1res.params #Warning need new instance otherwise results carry over arma2 = Arma(y1) arma2.nar = 1 arma2.nma = 1 res2 = arma2.fit(method='bfgs') print res2.params print res2.model.hessian(res2.params)
result = adfuller(city_stationary['city_population']) # Plot the differenced time series fig, ax = plt.subplots() city_stationary.plot(ax=ax) plt.show() # Print the test statistic and the p-value print('ADF Statistic:', result[0]) print('p-value:', result[1]) amazon_diff = amazon.diff().dropna() result_diff = adfuller(amazon_diff['close']) print(result_diff) amazon_log = np.log((amazon/amazon.shift(1)).dropna()) result_log = adfuller(amazon_log['close']) print(result_log) #Generating ARMA data from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(3) # Set coefficients ar_coefs = [1, 0.2] ma_coefs = [1, 0.3, 0.4] # Generate data y = arma_generate_sample(ar_coefs, ma_coefs, nsample=100, sigma=0.5, ) plt.plot(y) plt.ylabel(r'$y_t$') plt.xlabel(r'$t$') plt.show()
B_i.append(B_i_i) V = C0.copy() U = C0.copy() for j in range(i): V -= np.dot(A_i[j], self.C[j+1, :, :].T) U -= np.dot(B_i[j], self.C[j+1, :, :]) A = A_i.copy() B = B_i.copy() return A if __name__ == '__main__': k, d, r, seed = 2, 1, 0.05, 123 T1, T2 = 5, 7 cf = ChangeFinder(k, d, r, T1, T2, seed) #X1 = multivariate_normal([0.0, 0.0], [[1.0, 0.0], [0.0, 1.0]]).rvs(10000) #X2 = multivariate_normal([1.0, 1.0], [[1.0, 0.8], [0.8, 1.0]]).rvs(1000) #X = np.vstack((X1, X2)) #X = np.vstack((X1)) X = arma_generate_sample([1.0, -0.6, 0.5], [1.0, 0.0, 0.0], 100000) X = X.reshape(-1, 1) for i in range(X.shape[0]): cf.update(X[i, :])
df = pd.DataFrame(index=index, columns=columns) df = df.fillna(0) # With 0s rather than NaNs print(df.head()) # In[4]: # Simulation of ARMA processes for i in np.arange(0,len(df.columns)): arparams = np.array([1, random.uniform(-0.99,0.99), random.uniform(-0.99,0.99)]) maparams = np.array([1, random.uniform(-0.99,0.99), random.uniform(-0.99,0.99)]) df.iloc[:,i] = arma_generate_sample(arparams, maparams, nsample) print(df.head()) # In[5]: # Plots df.plot(subplots=True, figsize=(30,20)) # ## VAR # In[6]:
# Generate data # print 'Generating data...' # generate labels labels = np.array([1] * nq + [0] * nq + [1] * nq + [0] * nq) # generate static features static = np.vstack((np.random.normal(5.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)), np.random.normal(0.0, 1.0, (nq, nfeatures)))) # generate dynamic features dynamic_q1 = np.vstack([ ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures) ]).reshape(nq, nseqfeatures, seqlen) dynamic_q2 = np.vstack([ ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures) ]).reshape(nq, nseqfeatures, seqlen) dynamic_q3 = np.vstack([ ap.arma_generate_sample(arparams_neg, maparams_neg, seqlen) for i in range(nq * nseqfeatures) ]).reshape(nq, nseqfeatures, seqlen) dynamic_q4 = np.vstack([ ap.arma_generate_sample(arparams_pos, maparams_pos, seqlen) for i in range(nq * nseqfeatures) ]).reshape(nq, nseqfeatures, seqlen) dynamic = np.vstack((dynamic_q1, dynamic_q2, dynamic_q3, dynamic_q4))
#Example TSA ARMA import numpy as np import statsmodels.api as sm # Generate some data from an ARMA process from statsmodels.tsa.arima_process import arma_generate_sample arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) # The conventions of the arma_generate function require that we specify a # 1 for the zero-lag of the AR and MA parameters and that the AR parameters # be negated. arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) plt.figure() plt.plot(y) #Now, optionally, we can add some dates information. For this example, # we'll use a pandas time series. dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = Series(y, index=dates) arma_mod = sm.tsa.ARMA(y, freq='M') #arma_res = arma_mod.fit(order=(2,2), trend='nc', disp=-1) #fails #old pandas 0.4.0: AttributeError: 'TimeSeries' object has no attribute 'name' #arma_res.params plt.show()
_attrs = {} _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs, _attrs) _methods = {} _wrap_methods = wrap.union_dicts( tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods) wrap.populate_wrapper(ARMAResultsWrapper, ARMAResults) if __name__ == "__main__": import numpy as np import statsmodels.api as sm # simulate arma process from statsmodels.tsa.arima_process import arma_generate_sample y = arma_generate_sample([1., -.75],[1.,.25], nsample=1000) arma = ARMA(y) res = arma.fit(trend='nc', order=(1,1)) np.random.seed(12345) y_arma22 = arma_generate_sample([1.,-.85,.35],[1,.25,-.9], nsample=1000) arma22 = ARMA(y_arma22) res22 = arma22.fit(trend = 'nc', order=(2,2)) # test CSS arma22_css = ARMA(y_arma22) res22css = arma22_css.fit(trend='nc', order=(2,2), method='css') data = sm.datasets.sunspots.load() ar = ARMA(data.endog)
##number of point of the time series nsample = 1024 ## Simulate a simple sinusoidal function x1 = np.linspace(0, 100, nsample) y=np.sin(x1) + 13826""" import statsmodels.api as sm from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(12345) arparams = np.array([0.9, -0.9]) maparams = np.array([-0.5, 1]) ar = np.r_[1, -arparams] ma = np.r_[1, maparams] y = pd.DataFrame(arma_generate_sample(arparams, maparams, 499) + 13825.680000, columns=["AUD/USD"]) fxfwd = pd.concat([fxfwd, y]) #plt.plot(fxfwd) # Splitting the Dataset train = fxfwd[:int(0.8 * (len(fxfwd)))] valid = fxfwd[int(0.8 * (len(fxfwd))):] # split a univariate sequence into samples def split_sequence(sequence, n_steps): X, y = list(), list() for i in range(len(sequence)):
try: import scikits.talkbox.spectral.basic as stbs except ImportError: hastalkbox = False ar = [1., -0.7]#[1,0,0,0,0,0,0,-0.7] ma = [1., 0.3] ar = np.convolve([1.]+[0]*50 +[-0.6], ar) ar = np.convolve([1., -0.5]+[0]*49 +[-0.3], ar) n_startup = 1000 nobs = 1000 # throwing away samples at beginning makes sample more "stationary" xo = arma_generate_sample(ar,ma,n_startup+nobs) x = xo[n_startup:] plt.figure() plt.plot(x) rescale = 0 w, h = signal.freqz(ma, ar) sd = np.abs(h)**2/np.sqrt(2*np.pi) if np.sum(np.isnan(h)) > 0: # this happens with unit root or seasonal unit root' print('Warning: nan in frequency response h') h[np.isnan(h)] = 1.
x = 1+np.arange(12) for blocksize in [2,2,3,3,4,4,5,5,5]: print(blocksize, block_shuffle(x, blocksize)) # tests of discretise print('\ntests of discretise') x = 1+np.arange(12) np.random.shuffle(x) print('x:', x) for k in [1,2,3,4,12]: print(k, discretise(x, k)) # tests of discretise + entropy_rate # print('\ntests of discretise + entropy_rate') # x = np.random.randint(0,2**8,size=100000).astype(np.uint32) # xb = bytearray(x) # print('x[:4]: ',x[:4],'xb[:4]: ',xb[:4],'list(xb)[:4]',list(xb)[:4]) # for method in ['lzma','bz2']: # print(method,'undiscretised',entropy_rate(bytearray(x))) # for discretisation_level in [2**j for j in [1,2,3,5,8,17]]: # xd = discretise(x,discretisation_level) # print(method,'discretised',discretisation_level, # entropy_rate(bytearray(xd), method=method)) # tests of dependogram print('\ntests of dependogram') x = arma_generate_sample([1,.5],[1],nsample=10000).astype(np.float64) xb = bytearray(x) print('x[:5]',x[:5]) # print(dependogram(x, blocksizes=[1,2,3,100], nshuffles=100, nbins=4, plot=True)) # dependogram(x, nshuffles=100, nbins=8, plot=True) # dependogram(x, nshuffles=100, nbins=6, plot=True) dependogram(x, nshuffles=100, nbins=2, plot=False, method='lzma') dependogram(x, nshuffles=100, nbins=2, plot=True, method='bz2')
print(reslst[0]) rescmt = modct.fit_mle(order=(1,1), start_params=[-0.4,0.4, 10, 1.],maxiter=500, maxfun=500) print(rescmt.params) from statsmodels.tsa.arima_model import ARMA mkf = ARMA(x) ##rkf = mkf.fit((1,1)) ##rkf.params rkf = mkf.fit((1,1), trend='nc') print(rkf.params) from statsmodels.tsa.arima_process import arma_generate_sample np.random.seed(12345) y_arma22 = arma_generate_sample([1.,-.85,.35, -0.1],[1,.25,-.7], nsample=1000) ##arma22 = ARMA(y_arma22) ##res22 = arma22.fit(trend = 'nc', order=(2,2)) ##print 'kf ',res22.params ##res22css = arma22.fit(method='css',trend = 'nc', order=(2,2)) ##print 'css', res22css.params mod22 = Arma(y_arma22) resls22 = mod22.fit(order=(2,2)) print('ls ', resls22[0]) resmle22 = mod22.fit_mle(order=(2,2), maxfun=2000) print('mle', resmle22.params) f = mod22.forecast() f3 = mod22.forecast3(start=900)[-20:] print(y_arma22[-10:])
def ar1_sim(y, p, t=None): ''' Produce p realizations of an AR(1) process of length n with lag-1 autocorrelation g calculated from `y` and (if provided) `t` Parameters ---------- y : array a time series; NaNs not allowed p : int column dimension (number of surrogates) t : array the time axis of the series Returns ------- ysim : array n by p matrix of simulated AR(1) vector See Also -------- pyleoclim.utils.tsmodel.ar1_model : Simulates a (possibly irregularly-sampled) AR(1) process with given decay constant tau, à la REDFIT. pyleoclim.utils.tsmodel.ar1_fit : Returns the lag-1 autocorrelation from AR(1) fit OR persistence from tauest. pyleoclim.utils.tsmodel.ar1_fit_evenly : Returns the lag-1 autocorrelation from AR(1) fit assuming even temporal spacing. pyleoclim.utils.tsmodel.tau_estimation : Estimates the temporal decay scale of an (un)evenly spaced time series. ''' n = np.size(y) ysim = np.empty(shape=(n, p)) # declare array sig = np.std(y) if is_evenly_spaced(t): g = ar1_fit_evenly(y) # specify model parameters (statmodel want lag0 coefficent as unity) ar = np.r_[1, -g] # AR model parameter ma = np.r_[1, 0.0] # MA model parameters sig_n = sig * np.sqrt( 1 - g**2 ) # theoretical noise variance for the process to achieve the same variance as y # simulate AR(1) model for each column for i in np.arange(p): #ysim[:, i] = sm.tsa.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=50, sigma=sig_n) # old statsmodels syntax #ysim[:, i] = sm.tsa.ArmaProcess(ar, ma).generate_sample(nsample=n, scale=sig_n, burnin=50) # statsmodels v0.11.1-? ysim[:, i] = arma_generate_sample(ar, ma, nsample=n, scale=sig_n, burnin=50) # statsmodels v0.12+ else: # tau_est = ar1_fit(y, t=t, detrend=detrend, params=params) tau_est = tau_estimation(y, t) for i in np.arange(p): # the output of ar1_model has unit variance, # multiply by sig to be consistent with the original input timeseries ysim[:, i] = ar1_model(t, tau_est, output_sigma=sig) if p == 1: ysim = ysim[:, 0] return ysim
def generate_lstm_wins(nsamples, nfeatures, nseqfeatures, seqlen): # size of 1/4th of a dataset nq = nsamples / 4 # dynamical feature distribution parameters (for arma) orders = [np.random.randint(1, 5, 4) for rep in range(nseqfeatures)] arparams_pos = [np.hstack((1.0, np.random.uniform(-0.1, 0.1, ord[0]))) for ord in orders] arparams_neg = [np.hstack((1.0, np.random.uniform(-0.1, 0.1, ord[1]))) for ord in orders] maparams_pos = [np.hstack((1.0, np.random.uniform(-0.5, 0.5, ord[2]))) for ord in orders] maparams_neg = [np.hstack((1.0, np.random.uniform(-0.5, 0.5, ord[3]))) for ord in orders] # # Generate data # print 'Generating data...' # generate labels labels = np.array([1] * nq + [0] * nq + [1] * nq + [0] * nq) # generate distribution parameters for the static features smean_pos = np.random.uniform(0, 2, nfeatures) ssigm_pos = [1.0] * nfeatures smean_neg = np.random.uniform(0, 2, nfeatures) ssigm_neg = [1.0] * nfeatures # generate static features static_q1 = np.vstack([np.random.normal(smean_pos[f], ssigm_pos[f], nq) for f in range(nfeatures)]) static_q2 = np.vstack([np.random.normal(smean_neg[f], ssigm_neg[f], nq) for f in range(nfeatures)]) static_q3 = np.vstack([np.random.normal(smean_neg[f], ssigm_neg[f], nq) for f in range(nfeatures)]) static_q4 = np.vstack([np.random.normal(smean_neg[f], ssigm_neg[f], nq) for f in range(nfeatures)]) static = np.vstack((static_q1.T, static_q2.T, static_q3.T, static_q4.T)) # generate dynamic features dynamic_q1 = np.vstack([ap.arma_generate_sample(arparams_pos[i % nseqfeatures], maparams_pos[i % nseqfeatures], seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q2 = np.vstack([ap.arma_generate_sample(arparams_pos[i % nseqfeatures], maparams_pos[i % nseqfeatures], seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q3 = np.vstack([ap.arma_generate_sample(arparams_neg[i % nseqfeatures], maparams_neg[i % nseqfeatures], seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic_q4 = np.vstack([ap.arma_generate_sample(arparams_pos[i % nseqfeatures], maparams_pos[i % nseqfeatures], seqlen) for i in range(nq * nseqfeatures)]).reshape(nq, nseqfeatures, seqlen) dynamic = np.vstack((dynamic_q1, dynamic_q2, dynamic_q3, dynamic_q4)) # # Split the dataset into training and test sets # print 'Splitting train and test...' # pick samples for training and for testing train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx)) train_static = static[train_idx, :] train_dynamic = dynamic[train_idx, :, :] test_static = static[test_idx, :] test_dynamic = dynamic[test_idx, :, :] train_labels = labels[train_idx] test_labels = labels[test_idx] return train_static, train_dynamic, test_static, test_dynamic, train_labels, test_labels
#1D Random walk N=500 #number of observations #Initialise X=np.zeros((1,N), float) epsilon=np.random.normal(loc=0.0, scale=1, size=(1,N)) X[0,0]=epsilon[0,0] #Generate Random Walk for i in range(N-1): X[0,i+1]=X[0,i]+epsilon[0,i] plt.plot(X[0,:]) #ARMA(2,2) arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) arparams = np.r_[1, -arparams]#step is zero is unweighted maparams = np.r_[1, maparams]#step is zero is unweighted y = arma_generate_sample(arparams, maparams, 500)#mean zero variance 1 plt.plot(y) #AR(1) #Stable X=np.zeros((1,N), float) epsilon=np.random.normal(loc=0.0, scale=1, size=(1,N)) X[0,0]=epsilon[0,0] for i in range(N-1): X[0,i+1]=0.5*X[0,i]+epsilon[0,i] plt.plot(X[0,:]) #Unstable for i in range(N-1): X[0,i+1]=2*X[0,i]+epsilon[0,i] plt.plot(X[0,:]) #Estimate ACF and PACF acf_X=acf(np.squeeze(X), nlags=10)
def GAP(self, method, measure, nrefs=250, maxClusters=10): """ Calculates optimal number of clusters using Gap Statistic from Tibshirani, Walther, Hastie :param nrefs: number of sample reference datasets to create :param maxClusters: Maximum number of clusters to test for Returns: (k_opt, resultsdf) """ if method not in { 'k_medoids', 'hierarchical_single', 'hierarchical_complete', 'hierarchical_ward' }: raise ValueError( "method not from set {'k_medoids', 'hierarchical_complete', 'hierarchical_ward'}" ) if measure not in { 'manhattan', 'euclidean', 'chebyshev', 'dtw', 'lcss', 'dft', 'dwt' }: raise ValueError( "measure not from set 'manhattan', 'euclidean', 'chebyshev', 'dtw', 'lcss', 'dft', 'dwt'" ) self.create_distance_matrix(measure) dm = self.distances.dm np.random.seed(123) gaps = np.zeros((len(range(1, maxClusters)), )) s_ks = np.zeros((len(range(1, maxClusters)), )) resultsdf = pd.DataFrame({'clusterCount': [], 'gap': [], 's_k': []}) for gap_index, k in enumerate(range(1, maxClusters)): # Holder for reference dispersion results refDisps = np.zeros(nrefs) # For n references, generate random sample and perform clutsering getting resulting dispersion of each loop for i in range(nrefs): # Create new random reference set ts_matrix_random = [] for j in range(len(self.distances.ts_matrix)): arparams = np.array([0.9, -0.5]) maparams = np.array([0.25, 0]) ar = np.r_[1, -arparams] # add zero-lag and negate ma = np.r_[1, maparams] # add zero-lag y = arma_generate_sample(ar, ma, len(self.distances.ts_matrix[0])) y_norm = [ 100 * (float(l) - min(y)) / (max(y) - min(y)) for l in y ] ts_matrix_random.append(y_norm) self.distances.ts_matrix = np.array(ts_matrix_random) self.create_distance_matrix(measure) # Fit to it if method == 'k_medoids': refDisp = self.k_medoids(k) elif method == 'hierarchical_single': refDisp = self.hierarchical('single', k) elif method == 'hierarchical_complete': refDisp = self.hierarchical('complete', k) elif method == 'hierarchical_ward': refDisp = self.hierarchical('ward', k) refDisps[i] = refDisp # Fit cluster to original data and create dispersion self.distances.ts_matrix = np.array(self.ts_matrix) self.distances.dm = dm if method == 'k_medoids': origDisp = self.k_medoids(k) elif method == 'hierarchical_single': origDisp = self.hierarchical('single', k) elif method == 'hierarchical_complete': origDisp = self.hierarchical('complete', k) elif method == 'hierarchical_ward': origDisp = self.hierarchical('ward', k) # Calculate gap statistic gap = np.mean(np.log(refDisps)) - np.log(origDisp) sd_k = np.std(np.log(refDisps)) s_k = sd_k * np.sqrt(1 + 1 / nrefs) # Assign this loop's gap statistic to gaps gaps[gap_index] = gap s_ks[gap_index] = s_k resultsdf = resultsdf.append( { 'clusterCount': k, 'gap': gap, 's_k': s_k }, ignore_index=True) # if method == 'k_medoids': if np.nonzero( np.array(gaps[0:(maxClusters - 2)]) >= np.array( (gaps[1:(maxClusters - 1)] - s_ks[1:(maxClusters - 1)])))[0].tolist() and k > 1: k_opt = np.nonzero( np.array(gaps[0:(maxClusters - 2)]) >= np.array((gaps[1:( maxClusters - 1)] - s_ks[1:(maxClusters - 1)])))[0][0] + 1 else: k_opt = gaps.argmax() + 1 return (k_opt, resultsdf)
daily_samples=365 daily_sigma=np.sqrt(25.18) amplitude_ar=np.r_[1, amplitude_model.fit(disp=0).arparams] amplitude_ma=np.r_[1,amplitude_model.fit(disp=0).maparams] amplitude_samples=365 amplitude_sigma=np.sqrt(28.86) simulation=np.array([]) paths = 10 k=0 sampling = [[None]] * (paths + 1) while(k<paths): hourly_residual_simulation, sampling[k] = armaSampling.arma_generate_sample_and_noise(ar=hourly_ar, ma=hourly_ma, nsample=hourly_samples,sigma=hourly_sigma, write_sample=False) # arma_generate_sample(ar=hourly_ar, ma=hourly_ma, nsample=hourly_samples,sigma=hourly_sigma) daily_residual_simulation=arma_generate_sample(ar=daily_ar, ma=daily_ma, nsample=daily_samples,sigma=daily_sigma) amplitude_residual_simulation=arma_generate_sample(ar=amplitude_ar, ma=amplitude_ma, nsample=amplitude_samples,sigma=amplitude_sigma) i=0 path=np.zeros(365*24) while (i<len(hourly_residual_simulation)): path[i]=(hourly_explain[i]+hourly_residual_simulation[i])*(amplitude_explain[i//24]+amplitude_residual_simulation[i//24])+daily_residual_simulation[i//24]+daily_explain[i//24] i=i+1 if(k==0): simulation=path.copy() else: simulation=np.c_[simulation,path] k=k+1 sampling = zip(*sampling) pd.DataFrame(sampling).to_csv(os.getcwd() + '/simulation/sampling/normal_sampling' + str(paths) + 'paths.csv')
''' from __future__ import print_function import numpy as np import statsmodels.api as sm from statsmodels.tsa.arima_process import arma_generate_sample from statsmodels.tsa.arma_mle import Arma as Arma from statsmodels.tsa.arima_process import ARIMA as ARIMA_old from statsmodels.sandbox.tsa.garch import Arma as Armamle_old print("\nExample 1") ar = [1.0, -0.6, 0.1] ma = [1.0, 0.5, 0.3] nobs = 1000 y22 = arma_generate_sample(ar, ma, nobs+1000, 0.5)[-nobs:] y22 -= y22.mean() start_params = [0.1, 0.1, 0.1, 0.1] start_params_lhs = [-0.1, -0.1, 0.1, 0.1] print('truelhs', np.r_[ar[1:], ma[1:]]) ###bug in current version, fixed in Skipper and 1 more ###arr[1:q,:] = params[p+k:p+k+q] # p to p+q short params are MA coeffs ###ValueError: array dimensions are not compatible for copy ##from statsmodels.tsa.arima import ARMA as ARMA_kf ##arma22 = ARMA_kf(y22, constant=False, order=(2,2))
mainv, wasinvertible = invertibleroots(ma) if not wasinvertible: start_params = res.params.copy() start_params[self.nar:self.nar + self.nma] = mainv[1:] #need to add args kwds res = self.fit(start_params=start_params) return res if __name__ == '__main__': nobs = 50 ar = [1.0, -0.8, 0.1] ma = [1.0, 0.1, 0.2] #ma = [1] np.random.seed(9875789) y = arma_generate_sample(ar, ma, nobs, 2) y -= y.mean() #I have not checked treatment of mean yet, so remove mod = MLEGLS(y) mod.nar, mod.nma = 2, 2 #needs to be added, no init method mod.nobs = len(y) res = mod.fit(start_params=[0.1, -0.8, 0.2, 0.1, 1.]) print('DGP', ar, ma) print(res.params) from statsmodels.regression import yule_walker print(yule_walker(y, 2)) #resi = mod.fit_invertible(start_params=[0.1,0,0.2,0, 0.5]) #print(resi.params arpoly, mapoly = getpoly(mod, res.params[:-1]) data = sunspots.load(as_pandas=False)
''' import numpy as np from statsmodels.tsa.arima_process import arma_generate_sample from .maketests_mlabwrap import HoldIt if __name__ == '__main__': filen = 'savedrvs_tmp.py' np.set_printoptions(precision=14, linewidth=100) # check arma to return same as random.normal np.random.seed(10000) xo = arma_generate_sample([1], [1], nsample=100) xo2 = np.round(xo*1000).astype(int) np.random.seed(10000) rvs = np.random.normal(size=100) rvs2 = np.round(xo*1000).astype(int) assert (xo2==rvs2).all() nsample = 1000 data = HoldIt('rvsdata') np.random.seed(10000) xo = arma_generate_sample([1, -0.8, 0.5], [1], nsample=nsample) data.xar2 = np.round(xo*1000).astype(int) np.random.seed(10000) xo = np.random.normal(size=nsample) data.xnormal = np.round(xo*1000).astype(int)
使用 arima_process.arma_generate_sample() 产生 ARMA 样本 使用 ar_model.AR() 从样本构建 AR 模型 使用 ar_model.AR.select_order() 给 AR 模型定阶,定阶准则为 AIC 或 BIC 使用 stattools.acovf() 计算序列样本自协方差函数 利用样本自协方差函数,使用 armaME() 得到 AR 模型矩估计 (Yule-Walker 估计) 以上函数,除了 armaME(),均来自于第三方库 statsmodel.tsa 其使用方法请参考官方帮助文档,或在导入库后使用 help 命令查询 ''' print('\n*********************\n chapter6: 1.5\n*********************') def dist(x): '''从均匀分布 U(-4,4) 中产生随机数''' return 8 * np.random.random_sample(x) - 4 ar, ma = (1, 0.9, 1.4, 0.7, 0.6), (1, ) series = arima_process.arma_generate_sample(ar, ma, 500, distrvs=dist) ARmodel = ar_model.AR(series) maxlag = 12 print('\n----order selection using AIC----\n') print('upper bound of order: %d' % maxlag) ARorder_aic = ARmodel.select_order(maxlag, 'aic', trend='nc') print('order: %d' % ARorder_aic) armaAcovf = stattools.acovf(series, nlag=ARorder_aic, fft=False) armaYW = armaME(ARorder_aic, 0, armaAcovf) print('----order selection using BIC----\n') print('upper bound of order: %d' % maxlag) ARorder_bic = ARmodel.select_order(maxlag, 'bic', trend='nc') print('order: %d' % ARorder_bic) armaAcovf = stattools.acovf(series, nlag=ARorder_bic, fft=False)
import matplotlib.mlab as mlab from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response from statsmodels.tsa.arima_process import arma_acovf, arma_acf, ARIMA #from movstat import acf, acovf #from statsmodels.sandbox.tsa import acf, acovf, pacf from statsmodels.tsa.stattools import acf, acovf, pacf ar = [1., -0.6] #ar = [1., 0.] ma = [1., 0.4] #ma = [1., 0.4, 0.6] #ma = [1., 0.] mod = ''#'ma2' x = arma_generate_sample(ar, ma, 5000) x_acf = acf(x)[:10] x_ir = arma_impulse_response(ar, ma) #print x_acf[:10] #print x_ir[:10] #irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:] #print irc2[:10] #print irc2[:10]/irc2[0] #print irc2[:10-1] / irc2[1:10] #print x_acf[:10-1] / x_acf[1:10] # detrend helper from matplotlib.mlab def detrend(x, key=None): if key is None or key=='constant': return detrend_mean(x)
import statsmodels.tsa.arima_process as ts import statsmodels.api as sm from statsmodels.tsa.stattools import adfuller import matplotlib.pyplot as plt ar = [1, 0.7, 0.6, .5, .4] ma = [1] n = 1000 sample = ts.arma_generate_sample(ar, ma, n) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(311) ax.plot(sample) ax1 = fig.add_subplot(312) fig = sm.graphics.tsa.plot_acf(sample, lags=40, ax=ax1) ax2 = fig.add_subplot(313) fig = sm.graphics.tsa.plot_pacf(sample, lags=40, ax=ax2) plt.show() result = adfuller(sample) print('ADF Statistic: %f' % result[0]) print('p-value: %f' % result[1]) print('Critical Values:') for key, value in result[4].items(): print('\t%s: %.3f' % (key, value)) ar = [1, 0, -1.5, 0, 0.5] ma = [1] sample = ts.arma_generate_sample(ar, ma, n) fig = plt.figure(figsize=(12, 8))