Пример #1
0
 def exp_diff(x, type):
     if type == 'dollar':
         xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max()))
     else:
         xret = pd.expanding_apply(
             x, lambda xx: (xx[-1] - xx.max()) / xx.max())
     return xret
Пример #2
0
 def maxDD(self, normalize=True, window=0, rebalanced=True, from_date=None, to_date=None):
     ret = None
     returns = self.total_return(-1, rebalanced, from_date, to_date)
     
     if window == 0:
         ret = np.asscalar(risk_measures.maxDD(returns, normalize))
     if window > 0:
         ret = pd.rolling_apply(returns, window, risk_measures.maxDD, kwargs={"normalize": normalize})
     if window == -1:
         ret = pd.expanding_apply(returns, risk_measures.maxDD, kwargs={"normalize": normalize})
     
     return ret
Пример #3
0
 def CVaR(self, alpha, window=0, rebalanced=True, from_date=None, to_date=None):
     ret = None
     returns = self.returns(rebalanced, from_date, to_date)
     
     if window == 0:
         ret = np.asscalar(risk_measures.CVaR(returns, alpha))
     if window > 0:
         ret = pd.rolling_apply(returns, window, risk_measures.CVaR, kwargs={"alpha": alpha})
     if window == -1:
         ret = pd.expanding_apply(returns, risk_measures.CVaR, kwargs={"alpha": alpha})
         
     return ret
Пример #4
0
    def semivariance(self, window=0, rebalanced=True, from_date=None, to_date=None):
        ret = None
        returns = self.returns(rebalanced, from_date, to_date)
        
        if window == 0:
            ret = np.asscalar(risk_measures.semivariance(returns))
        if window > 0:
            ret = pd.rolling_apply(returns, window, risk_measures.semivariance)
        if window == -1:
            ret = pd.expanding_apply(returns, risk_measures.semivariance)

        return  ret
Пример #5
0
    def total_return(self, window=0, rebalanced=True, from_date=None, to_date=None):
        ret = None
        returns = self.returns(rebalanced, from_date, to_date) + 1

        if window == 0:
            ret = np.asscalar(np.prod(returns))
        if window > 0:
            ret = pd.rolling_apply(returns, window, np.prod)
        if window == -1:
            ret = pd.expanding_apply(returns, np.prod)
        
        return  ret
Пример #6
0
def expanding_percentileofscore(series, min_periods=None):
    import scipy.stats as stats

    def _percentile(arr):
        score = arr[-1]
        vals = arr[:-1]
        return stats.percentileofscore(vals, score)

    notnull = series.dropna()
    if notnull.empty:
        return pd.Series(np.nan, index=series.index)
    else:
        return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex(series.index)
Пример #7
0
def expanding_percentileofscore(series, min_periods=None):
    import scipy.stats as stats

    def _percentile(arr):
        score = arr[-1]
        vals = arr[:-1]
        return stats.percentileofscore(vals, score)

    notnull = series.dropna()
    if notnull.empty:
        return pd.Series(np.nan, index=series.index)
    else:
        return pd.expanding_apply(notnull,
                                  _percentile,
                                  min_periods=min_periods).reindex(
                                      series.index)
Пример #8
0
def lro(df_data, i_period, i_index):
    """
    Last Recent occur
    Example: lro([True,False,True,True,False],3,1)=2
    :param df_data: df_data: a Series of boolean value, result from conditions such as 'ema27 > ema50 and cci10>100 or typ>ema27'
    :param i_period: time length
    :param i_index: find the index of the i_index_th value (forwards)
    :return: time gap ( index difference)
    """
    assert i_index <= i_period <= len(df_data)
    index = df_data.index.get_loc(time)
    df_subdata = df_data[index - i_period + 1 : index + 1]
    try:
        return i_peiod - 1 - pd.expanding_apply(df_subdata, lambda x: x.tolist().count(True)).tolist().index(i_index)
    except ValueError:
        return -1
Пример #9
0
def comput_idicators(df,
                     trading_days,
                     required,
                     save_file,
                     save_address,
                     whole=1):
    # TODO:net_value has some problem.
    # columns needed
    col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss']
    df_valid = df.ix[:, col]
    start_balance = df.index[df['rebalancing'] == 1][0]
    df_valid = df_valid[df_valid.index >= start_balance]

    # daily return
    df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1))
    # benchmark_net_value
    df_valid[
        'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0]
    # benchmark_return
    df_valid['benchmark_return'] = (df_valid['benchmark']-
                                           df_valid['benchmark'].shift(1))/\
                                   df_valid['benchmark'].shift(1)
    # Annualized return
    df_valid['Annu_return'] = pd.expanding_mean(
        df_valid['return']) * trading_days
    # Volatility
    df_valid.loc[:, 'algo_volatility'] = pd.expanding_std(
        df_valid['return']) * np.sqrt(trading_days)
    df_valid.loc[:, 'xret'] = df_valid[
        'return'] - df_valid['Interest_rate'] / trading_days / 100
    df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[
        'benchmark_return']

    def ratio(x):
        return np.nanmean(x) / np.nanstd(x)

    # sharpe ratio
    df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\
                                * np.sqrt(trading_days)
    # information ratio
    df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\
                                * np.sqrt(trading_days)

    # Sortino ratio
    def modify_ratio(x, re):
        re /= trading_days
        ret = np.nanmean(x) - re
        st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size
        return ret / np.sqrt(st_d)

    df_valid.loc[:, 'sortino'] = pd.expanding_apply(
        df_valid['return'], modify_ratio,
        args=(required, )) * np.sqrt(trading_days)
    # Transfer infs to NA
    df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan
    df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan
    # hit_rate
    wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0,
                    0.0)
    df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len)
    # 95% VaR
    df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\
                      np.sqrt(trading_days)
    # 95% CVaR
    df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'],
                                          lambda x: x[x < np.nanpercentile(x, 5)].mean())\
                       * np.sqrt(trading_days)

    if whole == 1:
        # max_drawdown
        def exp_diff(x, type):
            if type == 'dollar':
                xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max()))
            else:
                xret = pd.expanding_apply(
                    x, lambda xx: (xx[-1] - xx.max()) / xx.max())
            return xret
    # dollar
    #     xret = exp_diff(df_valid['cum_profit'],'dollar')
    #     df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret))
    # percentage

        xret = exp_diff(df_valid['nav'], 'percentage')
        df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret))

        # max_drawdown_duration:
        # drawdown_enddate is the first time for restoring the max
        def drawdown_end(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index > minloc]
            # if never recovering,then return nan
            try:
                return x_sub[x_sub == 0].index[0]
            except:
                return np.nan

        def drawdown_start(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index < minloc]
            try:
                return x_sub[x_sub == 0].index[-1]
            except:
                return np.nan

        df_valid['max_drawdown_start'] = pd.Series()
        df_valid['max_drawdown_end'] = pd.Series()
        df_valid['max_drawdown_start'].ix[-1] = drawdown_start(
            df_valid['nav'], 'percentage')
        df_valid['max_drawdown_end'].ix[-1] = drawdown_end(
            df_valid['nav'], 'percentage')
    df_valid.to_csv(save_address)
    # =====result visualization=====
    plt.figure(1)
    if whole == 1:
        plt.subplot(224)
        plt.plot(df_valid['nav'], label='strategy')
        plt.plot(df_valid['benchmark'], label='S&P500')
    plt.xlabel('Date')
    plt.legend(loc=0, shadow=True)
    plt.ylabel('Nav')
    plt.title('Nav of ' + save_file + ' & SP500')

    # plt.subplot(223)
    # plt.plot(df_valid['cum_profit'],label = 'strategy')
    # plt.xlabel('Date')
    # plt.ylabel('Cum_profit')
    # plt.title('Cum_profit of ' + save_file)

    plt.subplot(221)
    plt.plot(df_valid['return'], label='strategy')
    plt.xlabel('Date')
    plt.ylabel('Daily_return')
    plt.title('Daily Return of ' + save_file)

    plt.subplot(222)
    x_return = df_valid[df_valid['return'].notna()].loc[:, 'return']
    y_return = df_valid[
        df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return']
    mu = x_return.mean()
    sigma = x_return.std()
    mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    count_x, _, _ = plt.hist(x_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='strategy')
    count_y, _, _ = plt.hist(y_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='S&P500')
    plt.ylabel('density')
    plt.xlabel('daily_return')
    plt.title('Histogram of Daily Return for ' + save_file + ' & SP500')
    plt.grid(True)
    # add normal distribution line
    y = mlab.normpdf(mybins, mu, sigma)
    plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy')
    plt.legend(loc=0, shadow=True)
    # plt.tight_layout()
    plt.show()
    return df_valid
Пример #10
0
GG_index["accu_value"] = GG_index["trade_value"]
data = data.append(GG_index.reset_index())

data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)]
data = data.drop_duplicates(subset=["id", "trade_date"])
data = data.sort(["id", "trade_date"]).reset_index(drop=True)
#data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))]
#data = data[data.trade_date.isin(np.arange(start_date, end_date ))]
data["trade_biweek"] = [
    x.year * 100 + int(datetime.datetime.strftime(x, "%U")) / 2
    for x in data.trade_date
]
data_grouped = data.groupby(["id", "trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(
    lambda x: pd.expanding_apply(x, lambda y: (y[-1] / (np.max(y))) - 1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(), 'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(), 'trade_date']

data_result = pd.DataFrame()
data_result['biweek_first_date'] = data_grouped.trade_date.first()
data_result['biweek_last_date'] = data_grouped.trade_date.last()
data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value /
                           data_result.biweek_start_value) - 1
data_result['earning2'] = pd.concat([
    pd.rolling_apply(v.biweek_last_value, 2, lambda x: (x[1] / x[0]) - 1)
    for k, v in data_result.reset_index(level=0).groupby(["id"])
]).values
Пример #11
0
	GG_index["trade_value"] = GG_index.trade_value + formated_index.loc[:,["trade_date","trade_value"]].set_index(["trade_date"]) * index_symbol["ratio"]

	data = data.append(formated_index)


GG_index["accu_value"] = GG_index["trade_value"]
data = data.append(GG_index.reset_index())

data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)]
data = data.drop_duplicates(subset=["id","trade_date"])
data = data.sort(["id","trade_date"]).reset_index(drop=True)
#data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))]
#data = data[data.trade_date.isin(np.arange(start_date, end_date ))]
data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ]
data_grouped = data.groupby(["id","trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date']

data_result = pd.DataFrame()
data_result['biweek_first_date'] = data_grouped.trade_date.first()
data_result['biweek_last_date'] = data_grouped.trade_date.last()
data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1
data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values

data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2'])

data['rtn'] = data.groupby(['id']).apply(lambda y:pd.rolling_apply(y['accu_value'],2,lambda x:(x[1]/x[0])-1)).values
Пример #12
0
# -*- author: Sean -*-

import numpy as np
import pandas as pd
import datetime


input_file = "raw.csv"
print "loading data from %s" % input_file
data = pd.read_csv(input_file,names = ["id","name","desc","trade_date","trade_value","accu_value"], header=0,dtype = {'id':int,'trade_value':float,'accu_value':float},parse_dates = ["trade_date"])

data = data.sort(["id","trade_date"])
data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ]

data_grouped = data.groupby(["id","trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date']


data_result = pd.DataFrame()

data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1
data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values

#data_result['gain2'] = pd.rolling_apply(data_result['last'],2,lambda x:(x[1]/x[0])-1)

data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2'])
Пример #13
0
	GG_index["trade_value"] = GG_index.trade_value + formated_index.loc[:,["trade_date","trade_value"]].set_index(["trade_date"]) * index_symbol["ratio"]

	data = data.append(formated_index)

	#index_data = pd.read_csv("index.csv",parse_dates= ["Date"])
#data = data[data.trade_date.isin(index_data.Date)]
GG_index["accu_value"] = GG_index["trade_value"]
data = data.append(GG_index.reset_index())

data = data[data.trade_date.isin(data[data.id == benchmark_id].trade_date)]
data = data.drop_duplicates(subset=["id","trade_date"])
data = data.sort(["id","trade_date"]).reset_index(drop=True)
data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))]
data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ]
data_grouped = data.groupby(["id","trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date']

data_result = pd.DataFrame()
data_result['biweek_first_date'] = data_grouped.trade_date.first()
data_result['biweek_last_date'] = data_grouped.trade_date.last()
data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1
data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values

data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2'])

data['rtn'] = data.groupby(['id']).apply(lambda y:pd.rolling_apply(y['accu_value'],2,lambda x:(x[1]/x[0])-1)).values