def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lutkepohl (2005) book Source for data files: www.jmulti.de """ from statsmodels.compat.pandas import datetools as dt from collections import deque from datetime import datetime import pandas import re regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*')) with open(path, 'rb') as f: lines = deque(f) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = np.genfromtxt(path, names=True, skip_header=to_skip + 1) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q'): frequencies.BQuarterEnd(), asbytes('M'): frequencies.BMonthEnd(), asbytes('A'): frequencies.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] from pandas import DatetimeIndex # pylint: disable=E0611 date_range = DatetimeIndex(start=start_date, freq=offset, periods=n) return data, date_range
def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lütkepohl (2005) book Source for data files: www.jmulti.de """ from collections import deque from datetime import datetime import re regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*')) with open(path, 'rb') as f: lines = deque(f) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip + 1).to_records(index=False)) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q'): frequencies.BQuarterEnd(), asbytes('M'): frequencies.BMonthEnd(), asbytes('A'): frequencies.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] date_range = pd.date_range(start=start_date, freq=offset, periods=n) return data, date_range