def vector_autoregression_example(): mdata = sm.datasets.macrodata.load_pandas().data # Prepare the dates index. dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates['year'] + 'Q' + dates['quarter'] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # Make a VAR model. model = VAR(data) results = model.fit(2) print(results.summary()) # Plots input time series. results.plot() # Plots time series autocorrelation function. results.plot_acorr() # Lag order selection. model.select_order(15) results = model.fit(maxlags=15, ic='aic') # Forecast. lag_order = results.k_ar results.forecast(data.values[-lag_order:], 5) results.plot_forecast(10) # Impulse response analysis. # Impulse responses are the estimated responses to a unit impulse in one of the variables. # They are computed in practice using the MA(infinity) representation of the VAR(p) process. irf = results.irf(10) irf.plot(orth=False) irf.plot(impulse='realgdp') irf.plot_cum_effects(orth=False) # Forecast error variance decomposition (FEVD). fevd = results.fevd(5) print(fevd.summary()) results.fevd(20).plot() # Statistical tests. # Granger causality. results.test_causality('realgdp', ['realinv', 'realcons'], kind='f') # Normality. results.test_normality() # Whiteness of residuals. results.test_whiteness()
def gen_forecast_w(df): ''' Generates calibrated 6 period forecast of GPD using VAR model. Uses GDP as-a-whole approach to predict GDP directly. Parameters ---------- df = dataframe with relevant data Returns ------- Out : 6 period point forecast, 6 period lower interval, 6 period upper interval format = 6x1 numpy arrays ''' dfc = pd.DataFrame(df, copy=True) mdata = dfc[[ 'pcgdp', 'mancap', 'unem', 'pctot', 'pcbusinv', 'pcC', 'pcI', 'pcipi', 'pcsp500' ]] dates = dfc[['year', 'month']].astype(int).astype(str) dates.reset_index(inplace=True, drop=True) monthly = dates['year'] + "M" + dates['month'] monthly = dates_from_str(monthly) mdata.index = pd.DatetimeIndex(monthly) maw = VAR(mdata, freq='m') results = maw.fit(maxlags=12, ic='bic') lag_order = results.k_ar w_correction = np.array([(.95 / .9), (.95 / .9), (.95 / .9), (.95 / .9), (.95 / .9), (.95 / .9)]) w_point_fcast = results.forecast_interval(mdata.values[-lag_order:], 6)[0] w_lower_bounds = results.forecast_interval(mdata.values[-lag_order:], 6)[1] w_upper_bounds = results.forecast_interval(mdata.values[-lag_order:], 6)[2] w_pf = np.array([ w_point_fcast[0][0], w_point_fcast[1][0], w_point_fcast[2][0], w_point_fcast[3][0], w_point_fcast[4][0], w_point_fcast[5][0] ]) w_lib = np.array([ w_lower_bounds[0][0], w_lower_bounds[1][0], w_lower_bounds[2][0], w_lower_bounds[3][0], w_lower_bounds[4][0], w_lower_bounds[5][0] ]) w_uib = np.array([ w_upper_bounds[0][0], w_upper_bounds[1][0], w_upper_bounds[2][0], w_upper_bounds[3][0], w_upper_bounds[4][0], w_upper_bounds[5][0] ]) w_adjustment = np.array(w_uib - w_lib) * w_correction - (w_uib - w_lib) w_adjustment = w_adjustment / 2 w_lib = w_lib - w_adjustment w_uib = w_uib + w_adjustment return w_pf, w_lib, w_uib
def __init__(self, data): ''' data: dataframe type, must have one column 'date' 'date' type: str or datetime like data must be stationary based on VaR model assumptions ''' if type(data['date'].iloc[0]) == str: data.index = dates_from_str(data['date'].values) else: data.index = data['date'].values del data['date'] self.data = data
def sort_and_prep_data(raw_data): glob_dates = [] glob_cases = [] for datapoint in raw_data['data']: glob_dates.append(datapoint['date']) glob_cases.append(datapoint['newCases']) dataReady = pandas.DataFrame(columns=['New Cases']) dataReady['New Cases'] = glob_cases dates = dates_from_str(glob_dates) dataReady.index = pandas.DatetimeIndex(dates) #dataReady.plot() #plt.pyplot.show() return dataReady
def test(self): mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] from statsmodels.tsa.base.datetools import dates_from_str quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # print(type(data)) # print(data) model = VAR(data) results = model.fit(2) # print(results.summary()) gc_result = results.test_causality(['realgdp', 'realcons', 'realinv'], ['realgdp', 'realcons', 'realinv'], kind='f') print(gc_result.summary())
def read_futures(file): futures_data = pd.read_csv(file, usecols=["收盘价:螺纹指数", "收盘价:热卷指数", "指标名称"]) quarterly = futures_data["指标名称"].astype(str) quarterly = dates_from_str(quarterly[2001:2373]) futures_data = futures_data[2000:2373] futures_data = futures_data[["收盘价:螺纹指数", "收盘价:热卷指数"]] futures_data_luowen = futures_data["收盘价:螺纹指数"] futures_data_rejuan = futures_data["收盘价:热卷指数"] print("############### - 输出 luowen 的 数据 - #############") print(futures_data_luowen) print("############### - 输出 luowen 的 ADF - #############") adf(futures_data_luowen) print("############### - 输出 rejuan 的 数据 - #############") print(futures_data_luowen) print("############### - 输出 rejuan 的 ADF - #############") adf(futures_data_rejuan) futures_data_luowen_new = [] futures_data_rejuan_new = [] for i in range(2001, 2373, 1): futures_data_luowen_new.append(futures_data_luowen[i] / futures_data_luowen[i - 1]) for i in range(2001, 2373, 1): futures_data_rejuan_new.append(futures_data_rejuan[i] / futures_data_rejuan[i - 1]) return DataFrame( { "luowen": futures_data_luowen_new, "rejuan": futures_data_rejuan_new }, columns=["luowen", "rejuan"]), quarterly
def test_correct_nobs(): # GH6748 mdata = sm.datasets.macrodata.load_pandas().data # prepare the dates index dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() data.index.freq = data.index.inferred_freq data_exog = pd.DataFrame(index=data.index) data_exog['exovar1'] = np.random.normal(size=data_exog.shape[0]) # make a VAR model model = VAR(endog=data, exog=data_exog) results = model.fit(maxlags=1) irf = results.irf_resim(orth=False, repl=100, steps=10, seed=1, burn=100, cum=False) assert irf.shape == (100, 11, 3, 3)
""" from TimeSeries_Tests import * from statsmodels.tsa.api import VAR, DynamicVAR import statsmodels.api as sm from statsmodels.tsa.base.datetools import dates_from_str ######### # Load pre-loaded macroeconomic data from PANDAS ######### mdata = sm.datasets.macrodata.load_pandas().data # prepare the dates index dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata1 = mdata[['realgdp', 'cpi', 'unemp', 'infl']] mdata1.index = pd.DatetimeIndex(quarterly) """ a. Take Log difference of the level variables b. Take differences of the rate variables """ mdata1['realgdp_logdiff'] = pd.Series( np.log(mdata1['realgdp']).diff().dropna()) mdata1['cpi_logdiff'] = pd.Series(np.log(mdata1['cpi']).diff().dropna()) mdata1['unemp_diff'] = pd.Series(mdata1['unemp'].diff().dropna()) mdata1['infl_diff'] = pd.Series(mdata1['infl'].diff().dropna()) """ a. Drop NA
import numpy as np from statsmodels.tsa.api import VAR from statsmodels.api import datasets as ds from statsmodels.tsa.base.datetools import dates_from_str import pandas mdata = ds.macrodata.load_pandas().data # prepare the dates index dates = mdata[['year', 'quarter']].astype(int).astype('S4') quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() model = VAR(data) est = model.fit(maxlags=2) def plot_input(): est.plot() def plot_acorr(): est.plot_acorr() def plot_irf(): est.irf().plot()
import pandas as pd from statsmodels.tsa.base.datetools import dates_from_str import numpy as np from arch.unitroot import ADF from statsmodels.tsa.api import VAR import seaborn as sns import matplotlib.pyplot as plt sns.set() data = pd.read_excel('hw12/quarterly.7775706_第一章.xlsx').dropna() quarterly = dates_from_str(data['DATE']) mdata = data[['r10', 'Tbill', 'IndProd', 'Unemp']] mdata.index = pd.DatetimeIndex(quarterly) mdata['r'] = mdata['r10'] - mdata['Tbill'] mdata['IndProd'] = np.log(mdata['IndProd']).diff() mdata['Unemp'] = mdata['Unemp'].diff() mdata = mdata.drop(['r10', 'Tbill'], axis=1).dropna() # ADF Test print(ADF(mdata['r']).summary()) print(ADF(mdata['IndProd']).summary()) print(ADF(mdata['Unemp']).summary()) # VAR fit (no constant term) results = VAR(mdata).fit(ic='bic', verbose=True, trend='nc') results.plot() print(results.summary()) # Selected lag order
if k + bet > total: bet = total - k dates = ori_data.iloc[k:k + bet, 0] if dates.shape[0] < 12: break start_date = dates.iloc[0] end_date = dates.iloc[bet - 1] for i in range(bet): # print(dates.iloc[i].replace(' ', 'Q')) new_date = dates.iloc[i].split(' ') r = new_date[0] + 'Q' + str(int(new_date[1]) // 4 + 1) dates.iloc[i] = r # print(dates) # print(dates.shape) # quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(dates) mdata = ori_data.iloc[k:k + bet, 1:] # print(mdata) # print(quarterly) mdata.index = pd.DatetimeIndex(quarterly) # print(mdata) data = np.log(mdata).diff().dropna() # print(data) if data.shape[0] <= 1: continue res = model(data) # brand = brand[:-4] print(res) if np.isnan(res): res = random.random()
import numpy as np from scipy import stats from statsmodels.tsa.base.datetools import dates_from_str from statsmodels.tsa.api import VAR data = pd.read_csv('RepairTrain.csv') sales = pd.read_csv("SaleTrain.csv") out = pd.read_csv("Output_TargetID_Mapping.csv") needed_out_vars={} for r in out.values: needed_out_vars.setdefault(r[0],[]).append(r[1]) d_grp = data.groupby(['module_category','component_category','year/month(repair)'],as_index=False).agg({'number_repair':np.sum}) d = dates_from_str(d_grp['year/month(repair)']) d_grp['d'] = d d_grp = d_grp.sort(['module_category','component_category','d'],ascending=True) d_grp.index = pd.DatetimeIndex(d_grp['d']) del d_grp['year/month(repair)'] modules = [ 'M'+ str(i) for i in range(1,10)] components = [] for i in range(1,10): components.append('P0' + str(i)) for i in range(10,32): components.append('P' + str(i)) #2007/03 #2009/12
manual_load_data() # -------------------- # | | # | !!START HERE!! | # | | # --------------------- # Here is the actual VAR for country in MASTER_DICT: # Currently, the VAR runs one country at a time, not sure if we want to change that later print 'Doing analysis of: ' + country # Creates a general dataframe, sorts by year, then removes year df = pd.DataFrame(data=MASTER_DICT[country]) df = df.dropna() # how='all' dates = dates_from_str(df.index) df.index = pd.DatetimeIndex(dates) # THIS DROPS THE N/A DATA, MIGHT REVISE LATER # For example, countries only have like 10 entries sometimes that are perfect # However, inputing a zero would skew the results, which is worse than limited data # So we will have to make that decision later. currently the model just drops any non-data entries # Regression engine doesn't like columns of only zeros (or constants), this gets rid of those for column in df: pd.to_numeric(df[column]) total = 0 conse_dupe_counter = 0 old_val = 0 for datum in df[column]: try: if datum == old_val:
def gen_forecast_bp(df): ''' Generates calibrated 6 period forecast of GPD using VAR model. Uses GDP by-part approach to predict GDP by aggregation of component parts according to the equation GDP = C + I + G + net exports. Parameters ---------- df = dataframe with relevant data, percentage = defaults to True, setting to False toggles output from percentage change format to direct values Returns ------- Out : 6 period point forecast, 6 period lower interval, 6 period upper interval output format is 6X1 numpy arrays ''' def pc_convert(pre, post): return ((post - pre) / pre) * 100 dfc = pd.DataFrame(df, copy=True) bp_data = dfc[[ 'C', 'I', 'G', 'net_exports', 'unem', 'meanprice', 'mancap', 'man_industelect', 'electtot' ]] dates = dfc[['year', 'month']].astype(int).astype(str) dates.reset_index(inplace=True, drop=True) monthly = dates['year'] + "M" + dates['month'] monthly = dates_from_str(monthly) bp_data.index = pd.DatetimeIndex(monthly) b_p = VAR(bp_data, freq='m') bp_results = b_p.fit(maxlags=12, ic='bic') lag_order = bp_results.k_ar bp_correction = np.array([(.95 / .91), (.95 / .88), (.95 / .87), (.95 / .87), (.95 / .85), (.95 / .83)]) point_fcast = bp_results.forecast_interval(bp_data.values[-lag_order:], 6)[0] lower_bounds = bp_results.forecast_interval(bp_data.values[-lag_order:], 6)[1] upper_bounds = bp_results.forecast_interval(bp_data.values[-lag_order:], 6)[2] # Aggregate to Point Forecast bp_pf = [] bp_lib = [] bp_uib = [] bp_pf.append( pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(point_fcast[0][0:4]))) bp_pf.append( pc_convert(np.sum(point_fcast[0][0:4]), np.sum(point_fcast[1][0:4]))) bp_pf.append( pc_convert(np.sum(point_fcast[1][0:4]), np.sum(point_fcast[2][0:4]))) bp_pf.append( pc_convert(np.sum(point_fcast[2][0:4]), np.sum(point_fcast[3][0:4]))) bp_pf.append( pc_convert(np.sum(point_fcast[3][0:4]), np.sum(point_fcast[4][0:4]))) bp_pf.append( pc_convert(np.sum(point_fcast[4][0:4]), np.sum(point_fcast[5][0:4]))) bp_lib.append( pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(lower_bounds[0][0:4]))) bp_lib.append( pc_convert(np.sum(lower_bounds[0][0:4]), np.sum(lower_bounds[1][0:4]))) bp_lib.append( pc_convert(np.sum(lower_bounds[1][0:4]), np.sum(lower_bounds[2][0:4]))) bp_lib.append( pc_convert(np.sum(lower_bounds[2][0:4]), np.sum(lower_bounds[3][0:4]))) bp_lib.append( pc_convert(np.sum(lower_bounds[3][0:4]), np.sum(lower_bounds[4][0:4]))) bp_lib.append( pc_convert(np.sum(lower_bounds[4][0:4]), np.sum(lower_bounds[5][0:4]))) bp_uib.append( pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(upper_bounds[0][0:4]))) bp_uib.append( pc_convert(np.sum(upper_bounds[0][0:4]), np.sum(upper_bounds[1][0:4]))) bp_uib.append( pc_convert(np.sum(upper_bounds[1][0:4]), np.sum(upper_bounds[2][0:4]))) bp_uib.append( pc_convert(np.sum(upper_bounds[2][0:4]), np.sum(upper_bounds[3][0:4]))) bp_uib.append( pc_convert(np.sum(upper_bounds[3][0:4]), np.sum(upper_bounds[4][0:4]))) bp_uib.append( pc_convert(np.sum(upper_bounds[4][0:4]), np.sum(upper_bounds[5][0:4]))) bp_pf = np.array(bp_pf) bp_lib = np.array(bp_lib) bp_uib = np.array(bp_uib) bp_adjustment = np.array(bp_uib - bp_lib) * bp_correction - (bp_uib - bp_lib) bp_adjustment = bp_adjustment / 2 bp_lib = bp_lib - bp_adjustment bp_uib = bp_uib + bp_adjustment return bp_pf, bp_lib, bp_uib
chicago_cases = [] chicago_deaths = [] chicago_dates = [] chicago_ventilators = [] chicago_icu = [] chicago_tests = [] with open(feature_file_cases, newline='') as fh: spamreader = csv.reader(fh, delimiter=',', quotechar='|') for item in spamreader: if item[2] == 'Illinois': chicago_cases.append(item[8]) chicago_deaths.append(item[9]) chicago_dates.append(item[0]) fh.close() chicago_dates = dates_from_str(chicago_dates) dataf['New Cases'] = chicago_cases dataf['New Deaths'] = chicago_deaths dataf.index = pd.DatetimeIndex(chicago_dates) temp_dates = [] with open(feature_file_tests, newline='') as fh: spamreader = csv.reader(fh, delimiter=',', quotechar='|') for item in spamreader: if item[1] == 'Illinois': chicago_tests.append(item[14]) temp_dates.append(item[2].split(' ')[0]) fh.close() temp_dates = dates_from_str(temp_dates)
# Useful for viewing all columns in notebook # pd.set_option('display.max_columns', 40) df = pd.read_pickle('cleaned_421.pkl', compression='zip') '''~~~~~~~~~~~~~~~~~~~~~~~~~~Part 1: Basic VAR Model Creation Code~~~~~~~~~~~~~~~~~~~~~~~~~~''' # Target dataset creation and time formatting dfc = pd.DataFrame(df, copy=True) mdata = dfc[[ 'pcgdp', 'mancap', 'unem', 'pctot', 'pcbusinv', 'pcC', 'pcI', 'pcipi', 'pcsp500' ]] dates = dfc[['year', 'month']].astype(int).astype(str) dates.reset_index(inplace=True, drop=True) monthly = dates['year'] + "M" + dates['month'] monthly = dates_from_str(monthly) mdata.index = pd.DatetimeIndex(monthly) maw = VAR(mdata, freq='m') # Traditionally with 6 lags: results = maw.fit(6) # Or to autoselect lag order based on info criterion: results = maw.fit(maxlags=12, ic='bic') # Note that we have to specify the “initial value” for the forecast: lag_order = results.k_ar # this equals the number of lags used to build model # To gen forecast of six steps out: results.forecast(mdata.values[-lag_order:], 5) # provides forecast and intervals
def get_by_parts_calibration_data(df, n_results): ''' Runs VAR code and saves resulting predictions to lists, then drops most recent row of data and repeats n_results times. Coded for save calibration data for 6 period forecast. Parameters ---------- df: dataframe containing data for use in VAR model n_results: int, number of periods backwards to run the model and retain results, must be greater than 6 Returns ------- out : pandas dataframe containing forecasting results from backtesting as well as target variable for comparison. ''' def pc_convert(pre, post): return ((post - pre) / pre) * 100 dfc = pd.DataFrame(df, copy=True) bp_data = dfc[[ 'C', 'I', 'G', 'net_exports', 'unem', 'meanprice', 'mancap', 'man_industelect', 'electtot' ]] dates = dfc[['year', 'month']].astype(int).astype(str) dates.reset_index(inplace=True, drop=True) monthly = dates['year'] + "M" + dates['month'] monthly = dates_from_str(monthly) bp_data.index = pd.DatetimeIndex(monthly) fm1 = [] fm2 = [] fm3 = [] fm4 = [] fm5 = [] fm6 = [] um1 = [] um2 = [] um3 = [] um4 = [] um5 = [] um6 = [] lm1 = [] lm2 = [] lm3 = [] lm4 = [] lm5 = [] lm6 = [] for i in range((n_results + 5)): bp_data.drop(bp_data.tail(1).index, inplace=True) b_p = VAR(bp_data, freq='m') results = b_p.fit(maxlags=12, ic='bic') # results = b_p.fit(6) lag_order = results.k_ar point_fcast = results.forecast_interval(bp_data.values[-lag_order:], 6)[0] lower_bounds = results.forecast_interval(bp_data.values[-lag_order:], 6)[1] upper_bounds = results.forecast_interval(bp_data.values[-lag_order:], 6)[2] # Stata code for example # pcforecast = ((forecast- L.forecast)/L.forecast)*100 fm1.insert( 0, pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(point_fcast[0][0:4]))) fm2.insert( 0, pc_convert(np.sum(point_fcast[0][0:4]), np.sum(point_fcast[1][0:4]))) fm3.insert( 0, pc_convert(np.sum(point_fcast[1][0:4]), np.sum(point_fcast[2][0:4]))) fm4.insert( 0, pc_convert(np.sum(point_fcast[2][0:4]), np.sum(point_fcast[3][0:4]))) fm5.insert( 0, pc_convert(np.sum(point_fcast[3][0:4]), np.sum(point_fcast[4][0:4]))) fm6.insert( 0, pc_convert(np.sum(point_fcast[4][0:4]), np.sum(point_fcast[5][0:4]))) lm1.insert( 0, pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(lower_bounds[0][0:4]))) lm2.insert( 0, pc_convert(np.sum(lower_bounds[0][0:4]), np.sum(lower_bounds[1][0:4]))) lm3.insert( 0, pc_convert(np.sum(lower_bounds[1][0:4]), np.sum(lower_bounds[2][0:4]))) lm4.insert( 0, pc_convert(np.sum(lower_bounds[2][0:4]), np.sum(lower_bounds[3][0:4]))) lm5.insert( 0, pc_convert(np.sum(lower_bounds[3][0:4]), np.sum(lower_bounds[4][0:4]))) lm6.insert( 0, pc_convert(np.sum(lower_bounds[4][0:4]), np.sum(lower_bounds[5][0:4]))) um1.insert( 0, pc_convert(np.sum(bp_data.iloc[-1][0:4]), np.sum(upper_bounds[0][0:4]))) um2.insert( 0, pc_convert(np.sum(upper_bounds[0][0:4]), np.sum(upper_bounds[1][0:4]))) um3.insert( 0, pc_convert(np.sum(upper_bounds[1][0:4]), np.sum(upper_bounds[2][0:4]))) um4.insert( 0, pc_convert(np.sum(upper_bounds[2][0:4]), np.sum(upper_bounds[3][0:4]))) um5.insert( 0, pc_convert(np.sum(upper_bounds[3][0:4]), np.sum(upper_bounds[4][0:4]))) um6.insert( 0, pc_convert(np.sum(upper_bounds[4][0:4]), np.sum(upper_bounds[5][0:4]))) # Then to trim lists to proper intervals fm1 = fm1[5:] fm2 = fm2[4:-1] fm3 = fm3[3:-2] fm4 = fm4[2:-3] fm5 = fm5[1:-4] fm6 = fm6[:-5] um1 = um1[5:] um2 = um2[4:-1] um3 = um3[3:-2] um4 = um4[2:-3] um5 = um5[1:-4] um6 = um6[:-5] lm1 = lm1[5:] lm2 = lm2[4:-1] lm3 = lm3[3:-2] lm4 = lm4[2:-3] lm5 = lm5[1:-4] lm6 = lm6[:-5] out = pd.DataFrame( (list( zip(fm1, fm2, fm3, fm4, fm5, fm6, lm1, lm2, lm3, lm4, lm5, lm6, um1, um2, um3, um4, um5, um6))), columns=[ 'p1p', 'p2p', 'p3p', 'p4p', 'p5p', 'p6p', 'p1l', 'p2l', 'p3l', 'p4l', 'p5l', 'p6l', 'p1u', 'p2u', 'p3u', 'p4u', 'p5u', 'p6u' ], index=df.index[-n_results:]) out['actual'] = dfc['pcgdp'][-n_results:] return out