def exp_diff(x, type): if type == 'dollar': xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max())) else: xret = pd.expanding_apply( x, lambda xx: (xx[-1] - xx.max()) / xx.max()) return xret
def maxDD(self, normalize=True, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.total_return(-1, rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(risk_measures.maxDD(returns, normalize)) if window > 0: ret = pd.rolling_apply(returns, window, risk_measures.maxDD, kwargs={"normalize": normalize}) if window == -1: ret = pd.expanding_apply(returns, risk_measures.maxDD, kwargs={"normalize": normalize}) return ret
def CVaR(self, alpha, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(risk_measures.CVaR(returns, alpha)) if window > 0: ret = pd.rolling_apply(returns, window, risk_measures.CVaR, kwargs={"alpha": alpha}) if window == -1: ret = pd.expanding_apply(returns, risk_measures.CVaR, kwargs={"alpha": alpha}) return ret
def semivariance(self, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(risk_measures.semivariance(returns)) if window > 0: ret = pd.rolling_apply(returns, window, risk_measures.semivariance) if window == -1: ret = pd.expanding_apply(returns, risk_measures.semivariance) return ret
def total_return(self, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) + 1 if window == 0: ret = np.asscalar(np.prod(returns)) if window > 0: ret = pd.rolling_apply(returns, window, np.prod) if window == -1: ret = pd.expanding_apply(returns, np.prod) return ret
def expanding_percentileofscore(series, min_periods=None): import scipy.stats as stats def _percentile(arr): score = arr[-1] vals = arr[:-1] return stats.percentileofscore(vals, score) notnull = series.dropna() if notnull.empty: return pd.Series(np.nan, index=series.index) else: return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex(series.index)
def expanding_percentileofscore(series, min_periods=None): import scipy.stats as stats def _percentile(arr): score = arr[-1] vals = arr[:-1] return stats.percentileofscore(vals, score) notnull = series.dropna() if notnull.empty: return pd.Series(np.nan, index=series.index) else: return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex( series.index)
def lro(df_data, i_period, i_index): """ Last Recent occur Example: lro([True,False,True,True,False],3,1)=2 :param df_data: df_data: a Series of boolean value, result from conditions such as 'ema27 > ema50 and cci10>100 or typ>ema27' :param i_period: time length :param i_index: find the index of the i_index_th value (forwards) :return: time gap ( index difference) """ assert i_index <= i_period <= len(df_data) index = df_data.index.get_loc(time) df_subdata = df_data[index - i_period + 1 : index + 1] try: return i_peiod - 1 - pd.expanding_apply(df_subdata, lambda x: x.tolist().count(True)).tolist().index(i_index) except ValueError: return -1
def comput_idicators(df, trading_days, required, save_file, save_address, whole=1): # TODO:net_value has some problem. # columns needed col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss'] df_valid = df.ix[:, col] start_balance = df.index[df['rebalancing'] == 1][0] df_valid = df_valid[df_valid.index >= start_balance] # daily return df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1)) # benchmark_net_value df_valid[ 'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0] # benchmark_return df_valid['benchmark_return'] = (df_valid['benchmark']- df_valid['benchmark'].shift(1))/\ df_valid['benchmark'].shift(1) # Annualized return df_valid['Annu_return'] = pd.expanding_mean( df_valid['return']) * trading_days # Volatility df_valid.loc[:, 'algo_volatility'] = pd.expanding_std( df_valid['return']) * np.sqrt(trading_days) df_valid.loc[:, 'xret'] = df_valid[ 'return'] - df_valid['Interest_rate'] / trading_days / 100 df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[ 'benchmark_return'] def ratio(x): return np.nanmean(x) / np.nanstd(x) # sharpe ratio df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\ * np.sqrt(trading_days) # information ratio df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\ * np.sqrt(trading_days) # Sortino ratio def modify_ratio(x, re): re /= trading_days ret = np.nanmean(x) - re st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size return ret / np.sqrt(st_d) df_valid.loc[:, 'sortino'] = pd.expanding_apply( df_valid['return'], modify_ratio, args=(required, )) * np.sqrt(trading_days) # Transfer infs to NA df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan # hit_rate wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0, 0.0) df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len) # 95% VaR df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\ np.sqrt(trading_days) # 95% CVaR df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'], lambda x: x[x < np.nanpercentile(x, 5)].mean())\ * np.sqrt(trading_days) if whole == 1: # max_drawdown def exp_diff(x, type): if type == 'dollar': xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max())) else: xret = pd.expanding_apply( x, lambda xx: (xx[-1] - xx.max()) / xx.max()) return xret # dollar # xret = exp_diff(df_valid['cum_profit'],'dollar') # df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret)) # percentage xret = exp_diff(df_valid['nav'], 'percentage') df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret)) # max_drawdown_duration: # drawdown_enddate is the first time for restoring the max def drawdown_end(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index > minloc] # if never recovering,then return nan try: return x_sub[x_sub == 0].index[0] except: return np.nan def drawdown_start(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index < minloc] try: return x_sub[x_sub == 0].index[-1] except: return np.nan df_valid['max_drawdown_start'] = pd.Series() df_valid['max_drawdown_end'] = pd.Series() df_valid['max_drawdown_start'].ix[-1] = drawdown_start( df_valid['nav'], 'percentage') df_valid['max_drawdown_end'].ix[-1] = drawdown_end( df_valid['nav'], 'percentage') df_valid.to_csv(save_address) # =====result visualization===== plt.figure(1) if whole == 1: plt.subplot(224) plt.plot(df_valid['nav'], label='strategy') plt.plot(df_valid['benchmark'], label='S&P500') plt.xlabel('Date') plt.legend(loc=0, shadow=True) plt.ylabel('Nav') plt.title('Nav of ' + save_file + ' & SP500') # plt.subplot(223) # plt.plot(df_valid['cum_profit'],label = 'strategy') # plt.xlabel('Date') # plt.ylabel('Cum_profit') # plt.title('Cum_profit of ' + save_file) plt.subplot(221) plt.plot(df_valid['return'], label='strategy') plt.xlabel('Date') plt.ylabel('Daily_return') plt.title('Daily Return of ' + save_file) plt.subplot(222) x_return = df_valid[df_valid['return'].notna()].loc[:, 'return'] y_return = df_valid[ df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return'] mu = x_return.mean() sigma = x_return.std() mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) count_x, _, _ = plt.hist(x_return, mybins, normed=1, alpha=0.5, label='strategy') count_y, _, _ = plt.hist(y_return, mybins, normed=1, alpha=0.5, label='S&P500') plt.ylabel('density') plt.xlabel('daily_return') plt.title('Histogram of Daily Return for ' + save_file + ' & SP500') plt.grid(True) # add normal distribution line y = mlab.normpdf(mybins, mu, sigma) plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy') plt.legend(loc=0, shadow=True) # plt.tight_layout() plt.show() return df_valid
GG_index["accu_value"] = GG_index["trade_value"] data = data.append(GG_index.reset_index()) data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)] data = data.drop_duplicates(subset=["id", "trade_date"]) data = data.sort(["id", "trade_date"]).reset_index(drop=True) #data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))] #data = data[data.trade_date.isin(np.arange(start_date, end_date ))] data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x, "%U")) / 2 for x in data.trade_date ] data_grouped = data.groupby(["id", "trade_biweek"]) data['loss'] = data_grouped.accu_value.apply( lambda x: pd.expanding_apply(x, lambda y: (y[-1] / (np.max(y))) - 1)) data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(), 'loss'] data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(), 'trade_date'] data_result = pd.DataFrame() data_result['biweek_first_date'] = data_grouped.trade_date.first() data_result['biweek_last_date'] = data_grouped.trade_date.last() data_result['biweek_start_value'] = data_grouped.accu_value.first() data_result['biweek_last_value'] = data_grouped.accu_value.last() data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value) - 1 data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value, 2, lambda x: (x[1] / x[0]) - 1) for k, v in data_result.reset_index(level=0).groupby(["id"]) ]).values
GG_index["trade_value"] = GG_index.trade_value + formated_index.loc[:,["trade_date","trade_value"]].set_index(["trade_date"]) * index_symbol["ratio"] data = data.append(formated_index) GG_index["accu_value"] = GG_index["trade_value"] data = data.append(GG_index.reset_index()) data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)] data = data.drop_duplicates(subset=["id","trade_date"]) data = data.sort(["id","trade_date"]).reset_index(drop=True) #data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))] #data = data[data.trade_date.isin(np.arange(start_date, end_date ))] data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ] data_grouped = data.groupby(["id","trade_biweek"]) data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1)) data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss'] data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date'] data_result = pd.DataFrame() data_result['biweek_first_date'] = data_grouped.trade_date.first() data_result['biweek_last_date'] = data_grouped.trade_date.last() data_result['biweek_start_value'] = data_grouped.accu_value.first() data_result['biweek_last_value'] = data_grouped.accu_value.last() data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1 data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2']) data['rtn'] = data.groupby(['id']).apply(lambda y:pd.rolling_apply(y['accu_value'],2,lambda x:(x[1]/x[0])-1)).values
# -*- author: Sean -*- import numpy as np import pandas as pd import datetime input_file = "raw.csv" print "loading data from %s" % input_file data = pd.read_csv(input_file,names = ["id","name","desc","trade_date","trade_value","accu_value"], header=0,dtype = {'id':int,'trade_value':float,'accu_value':float},parse_dates = ["trade_date"]) data = data.sort(["id","trade_date"]) data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ] data_grouped = data.groupby(["id","trade_biweek"]) data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1)) data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss'] data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date'] data_result = pd.DataFrame() data_result['biweek_start_value'] = data_grouped.accu_value.first() data_result['biweek_last_value'] = data_grouped.accu_value.last() data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1 data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values #data_result['gain2'] = pd.rolling_apply(data_result['last'],2,lambda x:(x[1]/x[0])-1) data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2'])
GG_index["trade_value"] = GG_index.trade_value + formated_index.loc[:,["trade_date","trade_value"]].set_index(["trade_date"]) * index_symbol["ratio"] data = data.append(formated_index) #index_data = pd.read_csv("index.csv",parse_dates= ["Date"]) #data = data[data.trade_date.isin(index_data.Date)] GG_index["accu_value"] = GG_index["trade_value"] data = data.append(GG_index.reset_index()) data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)] data = data.drop_duplicates(subset=["id","trade_date"]) data = data.sort(["id","trade_date"]).reset_index(drop=True) data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))] data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ] data_grouped = data.groupby(["id","trade_biweek"]) data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1)) data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss'] data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date'] data_result = pd.DataFrame() data_result['biweek_first_date'] = data_grouped.trade_date.first() data_result['biweek_last_date'] = data_grouped.trade_date.last() data_result['biweek_start_value'] = data_grouped.accu_value.first() data_result['biweek_last_value'] = data_grouped.accu_value.last() data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1 data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2']) data['rtn'] = data.groupby(['id']).apply(lambda y:pd.rolling_apply(y['accu_value'],2,lambda x:(x[1]/x[0])-1)).values