def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lutkepohl (2005) book Source for data files: www.jmulti.de """ from collections import deque from datetime import datetime import pandas import pandas.core.datetools as dt import re from statsmodels.compatnp.py3k import asbytes regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*')) lines = deque(open(path, 'rb')) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = np.genfromtxt(path, names=True, skip_header=to_skip+1) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q') : dt.BQuarterEnd(), asbytes('M') : dt.BMonthEnd(), asbytes('A') : dt.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] try: from pandas import DatetimeIndex # pylint: disable=E0611 date_range = DatetimeIndex(start=start_date, freq=offset, periods=n) except ImportError: from pandas import DateRange date_range = DateRange(start_date, offset=offset, periods=n) return data, date_range
def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lutkepohl (2005) book Source for data files: www.jmulti.de """ from collections import deque from datetime import datetime import pandas import pandas.core.datetools as dt import re regex = re.compile('<(.*) (\w)([\d]+)>.*') lines = deque(open(path)) to_skip = 0 while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = np.genfromtxt(path, names=True, skip_header=to_skip+1) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { 'Q' : dt.BQuarterEnd(), 'M' : dt.BMonthEnd(), 'A' : dt.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] date_range = pandas.DateRange(start_date, offset=offset, periods=n) return data, date_range