def augmentation(X, Y, noise = False, bootstrapping = True, noiseSTD = [0.1/2, 0.1/2, 0.01/2, 0.0002/2,0.01/2,0.02/2], nr_boot =1000, bootstrap_bl_size = 488, boot_freq = 100): if noise: Xn = X.copy() for i, j, k in np.ndindex(X.shape): Xn[i, j, k] += np.random.normal(0, 1)*noiseSTD[k] X = np.vstack([X, Xn]) Y = np.vstack([Y, Y]) if bootstrapping: Xb = X.copy() pt = PowerTransformer(method='yeo-johnson', standardize=True) for i in range(Xb.shape[0]): pt.fit(Xb[i]) lambda_param = pt.lambdas_ transformed = pt.transform(Xb[i]) result = seasonal_decompose(transformed, model='additive', freq=boot_freq) # Moving Block Bootstrap on Residuals bootstrapRes = MBB(bootstrap_bl_size, result.resid) for data in bootstrapRes.bootstrap(nr_boot): bs_x = data[0][0] reconSeriesYC = result.trend + result.seasonal + bs_x Xb[i] = pt.inverse_transform(reconSeriesYC) for i,j,k in np.ndindex(X.shape): if np.isnan(Xb[i,j,k]): Xb[i,j,k] = X[i,j,k] X = np.vstack([X, Xb]) Y = np.vstack([Y, Y]) return X, Y
def test_uneven_sampling(bs_setup): bs = MovingBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df) for _, kw in bs.bootstrap(10): assert kw["y"].shape == bs_setup.y_series.shape assert kw["x"].shape == bs_setup.x_df.shape bs = CircularBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df) for _, kw in bs.bootstrap(10): assert kw["y"].shape == bs_setup.y_series.shape assert kw["x"].shape == bs_setup.x_df.shape
def test_uneven_sampling(self): bs = MovingBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df) for _, kw in bs.bootstrap(10): assert kw['y'].shape == self.y_series.shape assert kw['x'].shape == self.x_df.shape bs = CircularBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df) for _, kw in bs.bootstrap(10): assert kw['y'].shape == self.y_series.shape assert kw['x'].shape == self.x_df.shape
def moving_block_bootstrap_method(X, Y, block_size=50, n_samples=50): boot_samples = [] bs = MovingBlockBootstrap(block_size, X, y=Y) for samp in bs.bootstrap(n_samples): boot_samples.append((samp[0][0], samp[1]['y'])) return boot_samples
def test_smoke(self): num_bootstrap = 20 def func(y): return y.mean(axis=0) bs = StationaryBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = MovingBlockBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = CircularBlockBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = MovingBlockBootstrap(10, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = CircularBlockBootstrap(10, self.y) cov = bs.cov(func, reps=num_bootstrap)
def test_str(bs_setup): bs = IIDBootstrap(bs_setup.y_series) expected = "IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)" assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>IID Bootstrap</strong>(" + "<strong>no. pos. inputs</strong>: 1, " + "<strong>no. keyword inputs</strong>: 0, " + "<strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected) bs = StationaryBootstrap(10, bs_setup.y_series, bs_setup.x_df) expected = ("Stationary Bootstrap(block size: 10, no. pos. " "inputs: 2, no. keyword inputs: 0)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) bs = CircularBlockBootstrap(block_size=20, y=bs_setup.y_series, x=bs_setup.x_df) expected = ("Circular Block Bootstrap(block size: 20, no. pos. " "inputs: 0, no. keyword inputs: 2)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>Circular Block Bootstrap</strong>" + "(<strong>block size</strong>: 20, " + "<strong>no. pos. inputs</strong>: 0, " + "<strong>no. keyword inputs</strong>: 2," + " <strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected) bs = MovingBlockBootstrap(block_size=20, y=bs_setup.y_series, x=bs_setup.x_df) expected = ("Moving Block Bootstrap(block size: 20, no. pos. " "inputs: 0, no. keyword inputs: 2)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>Moving Block Bootstrap</strong>" + "(<strong>block size</strong>: 20, " + "<strong>no. pos. inputs</strong>: 0, " + "<strong>no. keyword inputs</strong>: 2," + " <strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected)
def test_str(self): bs = IIDBootstrap(self.y_series) expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>IID Bootstrap</strong>(' + \ '<strong>no. pos. inputs</strong>: 1, ' + \ '<strong>no. keyword inputs</strong>: 0, ' + \ '<strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected) bs = StationaryBootstrap(10, self.y_series, self.x_df) expected = 'Stationary Bootstrap(block size: 10, no. pos. ' \ 'inputs: 2, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) bs = CircularBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df) expected = 'Circular Block Bootstrap(block size: 20, no. pos. ' \ 'inputs: 0, no. keyword inputs: 2)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>Circular Block Bootstrap</strong>' + \ '(<strong>block size</strong>: 20, ' \ + '<strong>no. pos. inputs</strong>: 0, ' + \ '<strong>no. keyword inputs</strong>: 2,' + \ ' <strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected) bs = MovingBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df) expected = 'Moving Block Bootstrap(block size: 20, no. pos. ' \ 'inputs: 0, no. keyword inputs: 2)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>Moving Block Bootstrap</strong>' + \ '(<strong>block size</strong>: 20, ' \ + '<strong>no. pos. inputs</strong>: 0, ' + \ '<strong>no. keyword inputs</strong>: 2,' + \ ' <strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected)
def mbb_bootstrap(self): """ return paths simulated using the moving block bootstrap params: ------- - self: see above return: ------- - none """ print("\nMB BOOTSTRAP \n") bs = MovingBlockBootstrap(self.blocksize, self.data) out_mbb = boot(N_paths=self.n_paths, method=bs, obs_path=self.data, add_noise=self.add_noise) if self.store_sim: self.simulated_paths['MBB'] = out_mbb.iloc[:, :out_mbb. shape[1] if out_mbb. shape[1] < 100 else 100] self.store_output = investment_horizons( observed_path=self.data, sims=out_mbb, investment_horizons=self.ih, freq=self.frequency, sum_stats=self.stats, perf_functions=self.perf_functions, store_output_dic=self.store_output, simulation_tech='MBB', plotting=self.plotting) return None
def metrics(): import hypothesisTest.bets as bets import hypothesisTest.data as data import numpy as np import hypothesisTest.plots as plots import hypothesisTest.helper_functions as hf import pandas as pd import math as mth from scipy import stats datasets = data.datasets_dict benchmark_returns = datasets['benchmark'][bets.clean_values_from_weights] rf_returns = datasets['rf_rate'][bets.clean_values_from_weights] fama_factors = datasets['Fama_French'][bets.clean_values_from_weights] cleaned_index = bets.cleaned_index_weights res_dict = dict() ############################################################################################## res_dict['cleaned_index'] = cleaned_index #A. General Characterstics #1. Time range res_dict['START_DATE'] = cleaned_index.min() res_dict['END_DATE'] = cleaned_index.max() res_dict['TIME_RANGE_DAYS'] = ( (cleaned_index.max() - cleaned_index.min()).astype('timedelta64[D]')) / np.timedelta64( 1, 'D') #years = ((end_date-start_date).astype('timedelta64[Y]'))/np.timedelta64(1, 'Y') res_dict['TOTAL_BARS'] = len(cleaned_index) #2. Average AUM res_dict['AVERAGE_AUM'] = np.nanmean( np.nansum(np.abs(bets.dollars_at_open), axis=1)) #3. Capacity of Strategy #4. Leverage (!!! Double check -something to do with sum of long_lev and short_lev > 1) res_dict['AVERAGE_POSITION_SIZE'] = np.nanmean( np.nansum(bets.dollars_at_open, axis=1)) res_dict['NET_LEVERAGE'] = round( res_dict['AVERAGE_POSITION_SIZE'] / res_dict['AVERAGE_AUM'], 2) #5. Turnover daily_shares = np.nansum(bets.purchased_shares, axis=1) daily_value_traded = np.nansum(np.abs(bets.dollars_at_open), axis=1) daily_turnover = daily_shares / (2 * daily_value_traded) res_dict['AVERAGE_DAILY_TURNOVER'] = np.mean(daily_turnover) #6. Correlation to underlying res_dict['CORRELATION_WITH_UNDERLYING'] = np.corrcoef( bets.underlying_daily_returns, bets.strategy_daily_returns)[0, 1] #7. Ratio of longs res_dict['LONG_RATIO'] = ((bets.cleaned_strategy_weights > 0).sum()) / ( np.ones(bets.cleaned_strategy_weights.shape, dtype=bool).sum()) #8. Maximum dollar position size res_dict['MAX_SIZE'] = np.nanmax(np.abs(bets.cleaned_strategy_weights)) #9. Stability of Wealth Process cum_log_returns = np.log1p(bets.strategy_daily_returns).cumsum() rhat = stats.linregress(np.arange(len(cum_log_returns)), cum_log_returns)[2] res_dict['STABILITY_OF_WEALTH_PROCESS'] = rhat**2 ############################################################################################## # B. Performance measures #1. Equity curves def equity_curve(amount, ret): ret = hf.shift_array(ret, 1, 0) return amount * np.cumprod(1 + ret) curves = dict() curves['Strategy'] = equity_curve(bets.starting_value, bets.strategy_daily_returns) curves['Buy & Hold Underlying'] = equity_curve( bets.starting_value, bets.underlying_daily_returns) curves['Benchmark'] = equity_curve(bets.starting_value, benchmark_returns) curves['Risk free Asset'] = equity_curve(bets.starting_value, rf_returns) curves['Long Contribution'] = equity_curve(bets.starting_value, bets.long_contribution) curves['Short Contribution'] = equity_curve(bets.starting_value, bets.short_contribution) plot_data_DF1 = pd.DataFrame([]) plot_data_DF1['time'] = cleaned_index plot_data_DF1['time'] = plot_data_DF1['time'].astype(np.int64) / int(1e6) plot_data_DF1['yValue'] = curves['Strategy'] # plot_data_DF2 = pd.DataFrame([]) # plot_data_DF2['time']=plot_data_DF1['time'] # plot_data_DF2['yValue'] = curves['Benchmark'] plotData1 = [[plot_data_DF1['time'][n], curves['Strategy'][n]] for n in range(len(cleaned_index))] plotData2 = [[plot_data_DF1['time'][n], curves['Benchmark'][n]] for n in range(len(cleaned_index))] # for n in range(len(cleaned_index)): # plotData1.append([plot_data_DF1['time'][n], curves['Strategy'][n]]) # plotData2.append([plot_data_DF1['time'][n], curves['Benchmark'][n]]) # plots.equity_curves_plot(cleaned_index, curves) res_dict['curves'] = curves #2. Pnl from long positions check long_pnl res_dict['PNL_FROM_STRATEGY'] = curves['Strategy'][-1] res_dict['PNL_FROM_LONG'] = curves['Long Contribution'][-1] #3. Annualized rate of return (Check this) res_dict['ANNUALIZED_AVERAGE_RATE_OF_RETURN'] = round( ((1 + np.mean(bets.strategy_daily_returns))**(365) - 1) * 100, 2) res_dict['CUMMULATIVE_RETURN'] = ( np.cumprod(1 + bets.strategy_daily_returns)[-1] - 1) yrs = res_dict['TOTAL_BARS'] / 252 res_dict['CAGR_STRATEGY'] = ( (curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1 res_dict['CAGR_BENCHMARK'] = ( (curves['Benchmark'][-1] / curves['Benchmark'][0])**(1 / yrs)) - 1 #4. Hit Ratio res_dict['HIT_RATIO'] = round( ((bets.daily_pnl > 0).sum()) / ((bets.daily_pnl > 0).sum() + (bets.daily_pnl < 0).sum() + (bets.daily_pnl == 0).sum()) * 100, 2) ############################################################################################## # C. Runs # 1. Runs concentration def runs(returns): wght = returns / returns.sum() hhi = (wght**2).sum() hhi = (hhi - returns.shape[0]**-1) / (1. - returns.shape[0]**-1) return hhi res_dict['HHI_PLUS'] = runs( bets.strategy_daily_returns[bets.strategy_daily_returns > 0]) res_dict['HHI_MINUS'] = runs( bets.strategy_daily_returns[bets.strategy_daily_returns < 0]) # 2. Drawdown and Time under Water def MDD(returns): def returns_to_dollars(amount, ret): return amount * np.cumprod(1 + ret) doll_series = pd.Series(returns_to_dollars(100, returns)) Roll_Max = doll_series.cummax() Daily_Drawdown = doll_series / Roll_Max - 1.0 Max_Daily_Drawdown = Daily_Drawdown.cummin() return Max_Daily_Drawdown DD_strategy = MDD(bets.strategy_daily_returns) DD_benchmark = MDD(benchmark_returns) res_dict['MDD_STRATEGY'] = DD_strategy.min() res_dict['MDD_BENCHMARK'] = DD_benchmark.min() #3. 95 percentile res_dict['95PERCENTILE_DRAWDOWN_STRATEGY'] = DD_strategy.quantile(0.05) res_dict['95PERCENTILE_DRAWDOWN_BENCHMARK'] = DD_benchmark.quantile(0.05) ############################################################################################# # D. Efficiency #1. Sharpe Ratio excess_returns = bets.strategy_daily_returns - rf_returns res_dict['SHARPE_RATIO'] = round( mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns), 2) #from statsmodels.graphics.tsaplots import plot_acf #plot_acf(excess_returns) #2. sortino Ratio res_dict['SORTINO_RATIO'] = mth.sqrt(252) * np.mean( excess_returns) / np.std( excess_returns[excess_returns < np.mean(excess_returns)]) #2.Probabilistic Sharpe ratio from scipy.stats import norm from scipy.stats import kurtosis, skew g_3 = skew(excess_returns) g_4 = kurtosis(excess_returns) res_dict['PROBABILISTIC_SHARPE_RATIO'] = norm.cdf( ((res_dict['SHARPE_RATIO'] - 2) * mth.sqrt(len(excess_returns) - 1)) / (mth.sqrt(1 - (g_3 * res_dict['SHARPE_RATIO']) + (0.25 * (g_4 - 1) * res_dict['SHARPE_RATIO'] * res_dict['SHARPE_RATIO'])))) #3.Information ratio excess_returns_benchmark = bets.strategy_daily_returns - benchmark_returns res_dict['INFORMATION_RATIO'] = mth.sqrt(252) * np.mean( excess_returns_benchmark) / np.std(excess_returns_benchmark) #3. t_stat & P-value m = np.mean(excess_returns) s = np.std(excess_returns) n = len(excess_returns) t_stat = (m / s) * mth.sqrt(n) res_dict['t_STATISTIC'] = t_stat pval = stats.t.sf(np.abs(t_stat), n**2 - 1) * 2 # Must be two-sided as we're looking at <> 0 res_dict['p-VALUE'] = round(pval * 100, 2) if pval <= 0.0001: res_dict['SIGNIFICANCE_AT_0.01%'] = 'STATISTICALLY_SIGNIFICANT' else: res_dict['SIGNIFICANCE_AT_0.01%'] = 'NOT_STATISTICALLY_SIGNIFICANT' #4. Omega Ratio returns_less_thresh = excess_returns - (((100)**(1 / 252)) - 1) numer = sum(returns_less_thresh[returns_less_thresh > 0.0]) denom = -1.0 * sum(returns_less_thresh[returns_less_thresh < 0.0]) res_dict['OMEGA_RATIO'] = numer / denom #5. Tail Ratio res_dict['TAIL_RATIO'] = np.abs( np.percentile(bets.strategy_daily_returns, 95)) / np.abs( np.percentile(bets.strategy_daily_returns, 5)) #6. Rachev Ratio left_threshold = np.percentile(excess_returns, 5) right_threshold = np.percentile(excess_returns, 95) CVAR_left = -1 * (np.nanmean( excess_returns[excess_returns <= left_threshold])) CVAR_right = (np.nanmean( excess_returns[excess_returns >= right_threshold])) res_dict['RACHEV_RATIO'] = CVAR_right / CVAR_left ############################################################################################# # E. RISK MEASURES #1. SKEWNESS, KURTOSIS res_dict['SKEWNESS'] = stats.skew(bets.strategy_daily_returns, bias=False) res_dict['KURTOSIS'] = stats.kurtosis(bets.strategy_daily_returns, bias=False) #2. ANNUALIZED VOLATILITY res_dict['ANNUALIZED_VOLATILITY'] = np.std( bets.strategy_daily_returns) * np.sqrt(252) #3. MAR Ratio res_dict['MAR_RATIO'] = (res_dict['CAGR_STRATEGY']) / abs( res_dict['MDD_STRATEGY']) ############################################################################################# # F. Classification scores sign_positions = np.sign(bets.purchased_shares).flatten() sign_profits = np.sign(bets.pnl).flatten() invalid = np.argwhere(np.isnan(sign_positions + sign_profits)) sign_positions_final = np.delete(sign_positions, invalid) sign_profits_final = np.delete(sign_profits, invalid) from sklearn.metrics import precision_recall_fscore_support as score precision, recall, fscore, support = score(sign_profits_final, sign_positions_final) precision = np.float16(np.int16(precision * 100000)) / 100000.0 recall = np.float16(np.int16(recall * 100000)) / 100000.0 fscore = np.float16(np.int16(fscore * 100000)) / 100000.0 support = np.float16(np.int16(support * 100000)) / 100000.0 res_dict['CLASSIFICATION_DATA'] = { 'Class': ['-1', '0', '1'], 'Precision': list(precision), 'Recall': list(recall), 'F-Score': list(fscore), 'Support': list(support) } # res_dict['CLASSIFICATION_DATA']= #pd.DataFrame(res_dict['CLASSIFICATION_DATA']) ############################################################################################# # G. Factor Analysis import statsmodels.formula.api as sm # module for stats models from statsmodels.iolib.summary2 import summary_col def assetPriceReg(excess_ret, fama): df_stock_factor = pd.DataFrame({ 'ExsRet': excess_ret, 'MKT': fama[:, 0], 'SMB': fama[:, 1], 'HML': fama[:, 2], 'RMW': fama[:, 3], 'CMA': fama[:, 4] }) CAPM = sm.ols(formula='ExsRet ~ MKT', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) FF3 = sm.ols(formula='ExsRet ~ MKT + SMB + HML', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) FF5 = sm.ols(formula='ExsRet ~ MKT + SMB + HML + RMW + CMA', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) CAPMtstat = CAPM.tvalues FF3tstat = FF3.tvalues FF5tstat = FF5.tvalues CAPMcoeff = CAPM.params FF3coeff = FF3.params FF5coeff = FF5.params # DataFrame with coefficients and t-stats results_df = pd.DataFrame( { 'CAPMcoeff': CAPMcoeff, 'CAPMtstat': CAPMtstat, 'FF3coeff': FF3coeff, 'FF3tstat': FF3tstat, 'FF5coeff': FF5coeff, 'FF5tstat': FF5tstat }, index=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA']) dfoutput = summary_col( [CAPM, FF3, FF5], stars=True, float_format='%0.4f', model_names=[ 'CAPM', 'Fama-French 3 Factors', 'Fama-French 5 factors' ], info_dict={ 'N': lambda x: "{0:d}".format(int(x.nobs)), 'Adjusted R2': lambda x: "{:.4f}".format(x.rsquared_adj) }, regressor_order=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA']) print(dfoutput) return dfoutput, results_df res_dict['FACTOR_RES'], _ = assetPriceReg(excess_returns, fama_factors) ############################################################################################# # H. Bootstrap Stats # 1. Sharpe Bootstrap from arch.bootstrap import MovingBlockBootstrap from numpy.random import RandomState bs_sharpe = MovingBlockBootstrap(5, excess_returns, random_state=RandomState(1234)) def sharpe(y): return (mth.sqrt(252) * np.mean(y)) / np.std(y) res = bs_sharpe.apply(sharpe, 10000) # plots.density_plot_bootstrap(res,res_dict['SHARPE_RATIO']) ############################################################################################ return res_dict, [plotData1, plotData2]
def get_MBB_reminders(num_samples, remainder): reminders = np.zeros((num_samples, remainder.size)) bs = MBB(3, remainder) for i, data in enumerate(bs.bootstrap(num_samples)): reminders[i] = data[0][0] return reminders
def metrics(datasets_dict, clean_values_from_weights, cleaned_index_weights, daily_pnl, pnl, strategy_log_returns, dollars_at_open, purchased_shares, underlying_daily_returns, cleaned_strategy_weights, starting_value, long_contribution, short_contribution, strategy_daily_returns): import numpy as np import hypothesisTest.utilities as hf import pandas as pd import math as mth from scipy import stats from scipy.stats import norm from scipy.stats import kurtosis, skew datasets = datasets_dict benchmark_returns = datasets['benchmark'][clean_values_from_weights] rf_returns = datasets['rf_rate'][clean_values_from_weights] fama_factors = datasets['Fama_French'][clean_values_from_weights] cleaned_index = cleaned_index_weights excess_returns = strategy_log_returns - rf_returns res_dict = dict() ############################################################################################## #A. General Characterstics #1. Time res_dict['START_DATE'] = hf.to_datetime( cleaned_index.min()).strftime("%d %B, %Y") res_dict['END_DATE'] = hf.to_datetime( cleaned_index.max()).strftime("%d %B, %Y") res_dict['TIME_RANGE_DAYS'] = '{0:.0f} days'.format( int(((cleaned_index.max() - cleaned_index.min()).astype('timedelta64[D]')) / np.timedelta64(1, 'D'))) res_dict['TOTAL_BARS'] = '{0:.0f} bars'.format(int(len(cleaned_index))) sr = mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns) def minTRL(sharpe, skew, kurtosis, target_sharpe=0, prob=0.95): from scipy.stats import norm min_track = (1 + (1 - skew * sharpe + sharpe**2 * (kurtosis - 1) / 4.0) * (norm.ppf(prob) / (sharpe - target_sharpe))**2) return min_track g_3 = skew(excess_returns) g_4 = kurtosis(excess_returns) res_dict['MIN_TRL_SRGE1_99%'] = '{0:.0f} bars or {1:.2f} years'.format( minTRL(sr, g_3, g_4, 1, 0.99), minTRL(sr, g_3, g_4, 1, 0.99) / 252) res_dict['MIN_TRL_SRGE2_99%'] = '{0:.0f} bars or {1:.2f} years'.format( minTRL(sr, g_3, g_4, 2, 0.99), minTRL(sr, g_3, g_4, 2, 0.99) / 252) res_dict['MIN_TRL_SRGE3_99%'] = '{0:.0f} bars or {1:.2f} years'.format( minTRL(sr, g_3, g_4, 3, 0.99), minTRL(sr, g_3, g_4, 3, 0.99) / 252) #2. Average AUM avg_aum = np.nanmean(np.nansum(np.abs(dollars_at_open), axis=1)) res_dict['AVERAGE_AUM'] = hf.millions_fmt(avg_aum) #3. Capacity of Strategy # res_dict['STRATEGY_CAPACITY'] = hf.millions_fmt(0) #4. Leverage (!!! Double check -something to do with sum of long_lev and short_lev > 1) avg_pos_size = np.nanmean(np.nansum(dollars_at_open, axis=1)) res_dict['AVERAGE_POSITION_SIZE'] = hf.millions_fmt(avg_pos_size) res_dict['NET_LEVERAGE'] = '{0:.1f}'.format(avg_pos_size / avg_aum) #5. Turnover daily_shares = np.nansum(purchased_shares, axis=1) daily_value_traded = np.nansum(np.abs(dollars_at_open), axis=1) daily_turnover = daily_shares / (2 * daily_value_traded) res_dict['AVERAGE_DAILY_TURNOVER'] = hf.millions_fmt( np.mean(daily_turnover)) #6. Correlation to underlying res_dict['CORRELATION_WITH_UNDERLYING'] = '{0:.2f}'.format( np.corrcoef(underlying_daily_returns, strategy_log_returns)[0, 1]) #7. Ratio of longs res_dict['LONG_RATIO'] = '{0:.2f} %'.format( (((cleaned_strategy_weights > 0).sum()) / (np.ones(cleaned_strategy_weights.shape, dtype=bool).sum())) * 100) #8. Maximum dollar position size res_dict['MAX_SIZE'] = '{0:.2f} %'.format( np.nanmax(np.abs(cleaned_strategy_weights)) * 100) #9. Stability of Wealth Process cum_log_returns = np.log1p(strategy_log_returns).cumsum() rhat = stats.linregress(np.arange(len(cum_log_returns)), cum_log_returns)[2] res_dict['STABILITY_OF_WEALTH_PROCESS'] = '{0:.2f} %'.format( (rhat**2) * 100) ############################################################################################## # B. Performance measures #1. Equity curves def equity_curve(amount, ret): ret = hf.shift_array(ret, 1, 0) return amount * np.cumprod(1 + ret) curves = dict() curves['Strategy'] = equity_curve(starting_value, strategy_daily_returns) curves['Buy & Hold Underlying'] = equity_curve(starting_value, underlying_daily_returns) curves['Benchmark'] = equity_curve(starting_value, benchmark_returns) curves['Risk free Asset'] = equity_curve(starting_value, rf_returns) curves['Long Contribution'] = equity_curve(starting_value, long_contribution) curves['Short Contribution'] = equity_curve(starting_value, short_contribution) curves['time_index'] = cleaned_index df = pd.DataFrame.from_dict(curves) df = df.set_index('time_index') res_dict['PLOT_CURVES_DATA'] = df #2. Pnl from long positions check long_pnl res_dict['PNL_FROM_STRATEGY'] = hf.millions_fmt(curves['Strategy'][-1]) res_dict['PNL_FROM_LONG'] = hf.millions_fmt( curves['Long Contribution'][-1]) #3. Annualized rate of return (Check this) res_dict['ANNUALIZED_MEAN_RETURN'] = '{0:.2f} %'.format( (((1 + np.mean(strategy_daily_returns))**(365) - 1)) * 100) res_dict['CUMMULATIVE_RETURN'] = '{0:.2f} %'.format( (np.cumprod(1 + strategy_daily_returns)[-1] - 1) * 100) yrs = int(len(cleaned_index)) / 252 cagr_strategy = (( (curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1) res_dict['CAGR_STRATEGY'] = '{0:.2f} %'.format( (((curves['Strategy'][-1] / curves['Strategy'][0])**(1 / yrs)) - 1) * 100) res_dict['CAGR_BENCHMARK'] = '{0:.2f} %'.format( (((curves['Benchmark'][-1] / curves['Benchmark'][0])**(1 / yrs)) - 1) * 100) #4. Hit Ratio res_dict['HIT_RATIO'] = '{0:.2f} %'.format( (((daily_pnl > 0).sum()) / ((daily_pnl > 0).sum() + (daily_pnl < 0).sum() + (daily_pnl == 0).sum())) * 100) ############################################################################################## # C. Runs # 1. Runs concentration def runs(returns): wght = returns / returns.sum() hhi = (wght**2).sum() hhi = (hhi - returns.shape[0]**-1) / (1. - returns.shape[0]**-1) return hhi res_dict['HHI_PLUS'] = '{0:.5f}'.format( runs(strategy_log_returns[strategy_log_returns > 0])) res_dict['HHI_MINUS'] = '{0:.5f}'.format( runs(strategy_log_returns[strategy_log_returns < 0])) # 2. Drawdown and Time under Water def MDD(returns): def returns_to_dollars(amount, ret): return amount * np.cumprod(1 + ret) doll_series = pd.Series(returns_to_dollars(100, returns)) Roll_Max = doll_series.cummax() Daily_Drawdown = doll_series / Roll_Max - 1.0 Max_Daily_Drawdown = Daily_Drawdown.cummin() return Max_Daily_Drawdown DD_strategy = MDD(strategy_log_returns) DD_benchmark = MDD(benchmark_returns) mdd_strat = DD_strategy.min() res_dict['MDD_STRATEGY'] = '{0:.2f} %'.format(DD_strategy.min() * 100) res_dict['MDD_BENCHMARK'] = '{0:.2f} %'.format(DD_benchmark.min() * 100) #3. 95 percentile # res_dict['95PERCENTILE_DRAWDOWN_STRATEGY']=DD_strategy.quantile(0.05) # res_dict['95PERCENTILE_DRAWDOWN_BENCHMARK']=DD_benchmark.quantile(0.05) ############################################################################################# # D. Efficiency #1. Sharpe Ratio excess_returns = strategy_log_returns - rf_returns res_dict['SHARPE_RATIO'] = '{0:.2f}'.format( mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns)) res_dict['PROBABILISTIC_SR_GE_1'] = '{0:.2f} %'.format((norm.cdf( ((sr - 1) * mth.sqrt((len(excess_returns) - 1) / 252)) / (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100) res_dict['PROBABILISTIC_SR_GE_2'] = '{0:.2f} %'.format((norm.cdf( ((sr - 2) * mth.sqrt((len(excess_returns) - 1) / 252)) / (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100) res_dict['PROBABILISTIC_SR_GE_3'] = '{0:.2f} %'.format((norm.cdf( ((sr - 3) * mth.sqrt((len(excess_returns) - 1) / 252)) / (mth.sqrt(1 - (g_3 * sr) + (0.25 * (g_4 - 1) * sr * sr))))) * 100) #2. sortino Ratio res_dict['SORTINO_RATIO'] = '{0:.2f}'.format( mth.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns[excess_returns < np.mean(excess_returns)])) #2.Probabilistic Sharpe ratio #3.Information ratio excess_returns_benchmark = strategy_log_returns - benchmark_returns res_dict['INFORMATION_RATIO'] = '{0:.2f}'.format( mth.sqrt(252) * np.mean(excess_returns_benchmark) / np.std(excess_returns_benchmark)) #3. t_stat & P-value m = np.mean(excess_returns) s = np.std(excess_returns) n = len(excess_returns) t_stat = (m / s) * mth.sqrt(n) res_dict['t_STATISTIC'] = '{0:.2f}'.format(t_stat) pval = stats.t.sf(np.abs(t_stat), n**2 - 1) * 2 # Must be two-sided as we're looking at <> 0 res_dict['p-VALUE'] = '{0:.5f} %'.format(pval * 100) if pval <= 0.0001: res_dict['SIGNIFICANCE_AT_0.01%'] = 'STATISTICALLY_SIGNIFICANT' else: res_dict['SIGNIFICANCE_AT_0.01%'] = 'NOT_STATISTICALLY_SIGNIFICANT' #4. Omega Ratio returns_less_thresh = excess_returns - (((100)**(1 / 252)) - 1) numer = sum(returns_less_thresh[returns_less_thresh > 0.0]) denom = -1.0 * sum(returns_less_thresh[returns_less_thresh < 0.0]) res_dict['OMEGA_RATIO'] = '{0:.2f}'.format(numer / denom) #5. Tail Ratio res_dict['TAIL_RATIO'] = '{0:.2f}'.format( np.abs(np.percentile(strategy_log_returns, 95)) / np.abs(np.percentile(strategy_log_returns, 5))) #6. Rachev Ratio left_threshold = np.percentile(excess_returns, 5) right_threshold = np.percentile(excess_returns, 95) CVAR_left = -1 * (np.nanmean( excess_returns[excess_returns <= left_threshold])) CVAR_right = (np.nanmean( excess_returns[excess_returns >= right_threshold])) res_dict['RACHEV_RATIO'] = '{0:.2f}'.format(CVAR_right / CVAR_left) ############################################################################################# # E. RISK MEASURES #1. SKEWNESS, KURTOSIS res_dict['SKEWNESS'] = '{0:.2f}'.format( stats.skew(strategy_log_returns, bias=False)) res_dict['KURTOSIS'] = '{0:.2f}'.format( stats.kurtosis(strategy_log_returns, bias=False)) #2. ANNUALIZED VOLATILITY res_dict['ANNUALIZED_VOLATILITY'] = '{0:.2f} %'.format( np.std(strategy_log_returns) * np.sqrt(252) * 100) #3. MAR Ratio res_dict['MAR_RATIO'] = '{0:.2f}'.format((cagr_strategy) / abs(mdd_strat)) #4 Tracking Error res_dict['TRACKING_ERROR'] = '{0:.4f}'.format( np.std(strategy_log_returns - benchmark_returns, ddof=1)) percentile = 0.001 res_dict['VaR_99.9'] = '{0:.3f} %'.format( np.percentile(np.sort(strategy_log_returns), percentile * 100) * 100) ############################################################################################# # F. Classification scores sign_positions = np.sign(purchased_shares).flatten() sign_profits = np.sign(pnl).flatten() invalid = np.argwhere(np.isnan(sign_positions + sign_profits)) sign_positions_final = np.delete(sign_positions, invalid) sign_profits_final = np.delete(sign_profits, invalid) from sklearn.metrics import precision_recall_fscore_support as score precision, recall, fscore, support = score(sign_profits_final, sign_positions_final) decimals = 3 precision = np.round(precision, decimals) recall = np.round(recall, decimals) fscore = np.round(fscore, decimals) support = np.round(support, decimals) try: res_dict['CLASSIFICATION_DATA'] = { 'Class': ['-1', '0', '1'], 'Precision': list(precision), 'Recall': list(recall), 'F-Score': list(fscore), 'Support': list(support) } res_dict['CLASSIFICATION_DATA'] = pd.DataFrame( res_dict['CLASSIFICATION_DATA']) res_dict['CLASSIFICATION_DATA'] = res_dict[ 'CLASSIFICATION_DATA'].set_index('Class') except: res_dict['CLASSIFICATION_DATA'] = { 'Class': ['-1', '0', '1'], 'Precision': ['NaN', 'NaN', 'NaN'], 'Recall': ['NaN', 'NaN', 'NaN'], 'F-Score': ['NaN', 'NaN', 'NaN'], 'Support': ['NaN', 'NaN', 'NaN'] } res_dict['CLASSIFICATION_DATA'] = pd.DataFrame( res_dict['CLASSIFICATION_DATA']) res_dict['CLASSIFICATION_DATA'] = res_dict[ 'CLASSIFICATION_DATA'].set_index('Class') ############################################################################################# # G. Factor Analysis import statsmodels.formula.api as sm # module for stats models from statsmodels.iolib.summary2 import summary_col def assetPriceReg(excess_ret, fama, t_decimals, coeff_decimals): df_stock_factor = pd.DataFrame({ 'ExsRet': excess_ret, 'MKT': fama[:, 0], 'SMB': fama[:, 1], 'HML': fama[:, 2], 'RMW': fama[:, 3], 'CMA': fama[:, 4] }) CAPM = sm.ols(formula='ExsRet ~ MKT', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) FF3 = sm.ols(formula='ExsRet ~ MKT + SMB + HML', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) FF5 = sm.ols(formula='ExsRet ~ MKT + SMB + HML + RMW + CMA', data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) CAPMtstat = np.round(CAPM.tvalues, t_decimals) FF3tstat = np.round(FF3.tvalues, t_decimals) FF5tstat = np.round(FF5.tvalues, t_decimals) CAPMcoeff = np.round(CAPM.params, coeff_decimals) FF3coeff = np.round(FF3.params, coeff_decimals) FF5coeff = np.round(FF5.params, coeff_decimals) # DataFrame with coefficients and t-stats results_df = pd.DataFrame( { 'CAPM_coeff': CAPMcoeff, 'CAPM_tstat': CAPMtstat, 'FF3_coeff': FF3coeff, 'FF3_tstat': FF3tstat, 'FF5_coeff': FF5coeff, 'FF5_tstat': FF5tstat }, index=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA']) dfoutput = summary_col( [CAPM, FF3, FF5], stars=True, float_format='%0.4f', model_names=[ 'CAPM', 'Fama-French 3 Factors', 'Fama-French 5 factors' ], info_dict={ 'N': lambda x: "{0:d}".format(int(x.nobs)), 'Adjusted R2': lambda x: "{:.4f}".format(x.rsquared_adj) }, regressor_order=['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA']) return dfoutput, results_df _, res_dict['FACTOR_RES'] = assetPriceReg(excess_returns, fama_factors, 2, 5) ############################################################################################# # H. Bootstrap Stats # 1. Sharpe Bootstrap from arch.bootstrap import MovingBlockBootstrap from numpy.random import RandomState def geom_mean(y): log_ret = np.log(1 + y) geom = np.exp(np.sum(log_ret) / len(log_ret)) - 1 return geom geo_avg = geom_mean(strategy_daily_returns) detrended_ret = strategy_daily_returns - geo_avg bs_sharpe = MovingBlockBootstrap(5, detrended_ret, random_state=RandomState(1234)) res = bs_sharpe.apply(geom_mean, 10000) # plots.density_plot_bootstrap(res,geo_avg) p_val = (res <= geo_avg).sum() / len(res) res_dict['SHARPE_BS'] = res res_dict['SHARPE_BS_GEOM_AVG'] = str(round(geo_avg, 5)) res_dict['GM_BOOTSTRAP_p_val'] = '{0:.3f} %'.format(p_val * 100) ############################################################################################ return res_dict