def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lutkepohl (2005) book Source for data files: www.jmulti.de """ from statsmodels.compat.pandas import datetools as dt from collections import deque from datetime import datetime import pandas import re regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*')) with open(path, 'rb') as f: lines = deque(f) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = np.genfromtxt(path, names=True, skip_header=to_skip + 1) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q'): frequencies.BQuarterEnd(), asbytes('M'): frequencies.BMonthEnd(), asbytes('A'): frequencies.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] from pandas import DatetimeIndex # pylint: disable=E0611 date_range = DatetimeIndex(start=start_date, freq=offset, periods=n) return data, date_range
def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lütkepohl (2005) book Source for data files: www.jmulti.de """ from collections import deque from datetime import datetime import re regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*')) with open(path, 'rb') as f: lines = deque(f) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip + 1).to_records(index=False)) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q'): frequencies.BQuarterEnd(), asbytes('M'): frequencies.BMonthEnd(), asbytes('A'): frequencies.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] date_range = pd.date_range(start=start_date, freq=offset, periods=n) return data, date_range
ax.vlines(xs, [0], acorr[:, i]) ax.axhline(0, color='k') ax.set_ylim([-1, 1]) # hack? ax.set_xlim([-1, xs[-1] + 1]) mpl.rcParams['font.size'] = old_size #Example TSA descriptive data = sm.datasets.macrodata.load() mdata = data.data df = DataFrame.from_records(mdata) quarter_end = frequencies.BQuarterEnd() df.index = [quarter_end.rollforward(datetime(int(y), int(q) * 3, 1)) for y, q in zip(df.pop('year'), df.pop('quarter'))] logged = np.log(df.ix[:, ['m1', 'realgdp', 'cpi']]) logged.plot(subplots=True) log_difference = logged.diff().dropna() plot_acf_multiple(log_difference.values) #Example TSA VAR model = tsa.VAR(log_difference, freq='D') print(model.select_order()) res = model.fit(2) print(res.summary())