def _ROIstats(rois): mu = rois.mean() stdev = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, ddf = Performance.SortinoFull(rois) sortinop, ddp = Performance.SortinoPartial(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] return { "mu": mu, "stdev": stdev, "skew": skew, "kurt": kurt, "sharpe": sharpe, "sortinof": sortinof, "sortinop": sortinop, "ddf": ddf, "ddp": ddp }
def comparisonStats(): symbols = [ 'TAIEX', '0050', ] startDate = date(2005, 1, 3) endDate = date(2013, 12, 31) statIO = StringIO() statIO.write('symbol & $R_{C}$(\%) & $R_{A}$(\%) & ') statIO.write('$\mu$(\%) & $\sigma$(\%) & skew & kurt & ') statIO.write('$S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n') for idx, symbol in enumerate(symbols): df = pd.read_pickle( os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol)) print symbol, df.columns tmp = df[startDate:endDate] rois = tmp['adjROI'].values mean = rois.mean() std = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, dd = Performance.SortinoFull(rois) print rois # k2, pval = spstats.normaltest(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] rtmp = rois / 100 + 1 rtmp[1] -= 0.001425 #buy fee rtmp[-1] -= 0.004425 #sell fee R_cum = rtmp[1:].prod() - 1 AR_cum = np.power((R_cum + 1), 1. / 9) - 1 statIO.write( ' %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n' % (symbol, R_cum * 100, AR_cum * 100, mean, std, skew, kurt, sharpe * 100, sortinof * 100, JB, ADF)) print symbol, R_cum, AR_cum resFile = os.path.join(ExpResultsDir, 'comparison_daily_stats.txt') with open(resFile, 'wb') as fout: fout.write(statIO.getvalue()) statIO.close() statIO.close()
def y2yBuyHold(): t = time.time() n_rvs = range(5, 50 + 5, 5) years = range(2005, 2013 + 1) resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio") avgIO = StringIO() avgIO.write( 'startDate, endDate, n_stock, wealth1, wealth2, wROI(%), JB, ADF,') avgIO.write( 'meanROI(%%), Sharpe(%%), SortinoFull(%%), SortinoPartial(%%),') avgIO.write(' downDevFull, downDevPartial\n') for n_rv in n_rvs: df = pd.read_pickle( os.path.join(resultDir, "wealthSum_n%s.pkl" % (n_rv))) for year in years: startDate = date(year, 1, 1) endDate = date(year, 12, 31) print startDate, endDate wealths = df[startDate:endDate] wrois = wealths.pct_change() wrois[0] = 0 wealth1 = wealths[0] wealth2 = wealths[-1] * (1 - 0.004425) roi = (wealth2 / wealth1 - 1) ret = sss.jarque_bera(wrois) JB = ret[1] ret2 = sts.adfuller(wrois) ADF = ret2[1] sharpe = Performance.Sharpe(wrois) sortinof, ddf = Performance.SortinoFull(wrois) sortinop, ddp = Performance.SortinoPartial(wrois) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s," % (wealths.index[0].strftime("%Y-%m-%d"), wealths.index[-1].strftime("%Y-%m-%d"), n_rv, wealth1, wealth2, roi * 100, JB, ADF)) avgIO.write("%s,%s,%s,%s," % (wrois.mean() * 100, sharpe * 100, sortinof * 100, sortinop * 100)) avgIO.write("%s,%s\n" % (ddf * 100, ddp * 100)) resFile = os.path.join(ExpResultsDir, 'y2yfixedBuyandHold_result_2005.csv') with open(resFile, 'wb') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "y2yBuyandHold OK, elapsed %.3f secs" % (time.time() - t)
def fixedSymbolWCVaRSPPortfolio(symbols, startDate, endDate, money=1e6, hist_periods=range(20, 130, 10), n_scenario=200, buyTransFee=0.001425, sellTransFee=0.004425, alpha=0.95, scenFunc="Moment", solver="cplex", save_pkl=False, save_csv=True, debug=False): ''' the different distributions are estimated from variant hist_length @param symbols, list, target assets @param startDate, endDate, datetime.date, 交易的起始,結束日期 @param money, positive float, 初使資金 @param hist_periods, list, 用於計算moment與corr mtx的歷史資料長度 @param n_scenario, positive integer, 每一期產生的scenario個數 @param buyTransFee, sellTransFee, float, 買進與賣出手續費 @param alpha, float, confidence level of the CVaR @scenFunc, string, 產生scenario的function @solver, string, 解stochastic programming的solver @return { "n_rv": n_rv, "T": T, "allRiskyRetMtx": allRiskyRetMtx, #size: n_rv * (hist_period+T) #[0:hist_period]用於估計moments與corrMtx "riskFreeRetVec": riskFreeRetVec, #size: T+1 "buyTransFeeMtx": buyTransFeeMtx, #size: n_rv * T "sellTransFeeMtx": sellTransFeeMtx, #size: n_rv * T "allocatedWealth": allocatedWealth, #size: n_rv "depositWealth": depositWealth, #size: 1 "transDates": transDates, #size: (T+1) "fullTransDates": fullTransDates, #size: (hist_period+T) "alpha": alpha #size:1 } ''' t0 = time.time() assert len(hist_periods) >= 1 param = constructModelMtx(symbols, startDate, endDate, money, max(hist_periods), buyTransFee, sellTransFee, alpha, debug) print "constructModelMtx %.3f secs" % (time.time() - t0) n_rv, T = param['n_rv'], param['T'] allRiskyRetMtx = param['allRiskyRetMtx'] riskFreeRetVec = param['riskFreeRetVec'] buyTransFeeMtx = param['buyTransFeeMtx'] sellTransFeeMtx = param['sellTransFeeMtx'] allocatedWealth = param['allocatedWealth'] depositWealth = param['depositWealth'] transDates = param['transDates'] fullTransDates = param['fullTransDates'] #process from t=0 to t=(T+1) buyProcess = np.zeros((n_rv, T)) sellProcess = np.zeros((n_rv, T)) wealthProcess = np.zeros((n_rv, T + 1)) depositProcess = np.zeros(T + 1) VaRProcess = np.zeros(T) WCVaRProcess = np.zeros(T) genScenErrDates = [] scenErrStringIO = StringIO() for tdx in xrange(T): tloop = time.time() transDate = pd.to_datetime(transDates[tdx]).strftime("%Y%m%d") #投資時已知當日的ret(即已經知道當日收盤價) t = time.time() if scenFunc == "Moment": scenMatrics = [] #只要有一組hist_period可抽出樣本即可 converged = False #multiple hist_period for hist_period in hist_periods: subRiskyRetMtx = allRiskyRetMtx[:, tdx:(hist_period + tdx)] assert subRiskyRetMtx.shape[1] == hist_period moments = np.empty((n_rv, 4)) moments[:, 0] = subRiskyRetMtx.mean(axis=1) moments[:, 1] = subRiskyRetMtx.std(axis=1) moments[:, 2] = spstats.skew(subRiskyRetMtx, axis=1) moments[:, 3] = spstats.kurtosis(subRiskyRetMtx, axis=1) corrMtx = np.corrcoef(subRiskyRetMtx) for order in xrange(-3, 0): MaxErrMom, MaxErrCorr = 10**(order), 10**(order) try: scenMtx = HeuristicMomentMatching( moments, corrMtx, n_scenario, MaxErrMom, MaxErrCorr) scenMatrics.append(scenMtx) except ValueError as e: print e scenErrStringIO.write("%s p%s: %s\n" % (transDate, hist_period, e)) else: converged = True break else: raise ValueError("unknown scenFunc %s" % (scenFunc)) #scenMatrics, shape, L(may less than L) * M * S scenMatrics = np.array(scenMatrics) print "%s-%s - generate scen. mtx, %.3f secs" % (transDate, scenFunc, time.time() - t) if converged: #successful generating scenarios, solve SP t = time.time() riskyRet = allRiskyRetMtx[:, hist_period + tdx] riskFreeRet = riskFreeRetVec[tdx] buyTransFee = buyTransFeeMtx[:, tdx] sellTransFee = sellTransFeeMtx[:, tdx] predictRiskyRet = scenMatrics predictRiskFreeRet = 0 results = WorstCVaRPortfolioSP(symbols, riskyRet, riskFreeRet, allocatedWealth, depositWealth, buyTransFee, sellTransFee, alpha, predictRiskyRet, predictRiskFreeRet, n_scenario, probs=None, solver=solver) VaRProcess[tdx] = results['VaR'] WCVaRProcess[tdx] = results['WCVaR'] print "%s - %s solve SP, %.3f secs" % (transDate, solver, time.time() - t) else: #failed generating scenarios genScenErrDates.append(transDate) results = None #realized today return allocatedWealth = allocatedWealth * ( 1 + allRiskyRetMtx[:, hist_period + tdx]) depositWealth = depositWealth * (1 + riskFreeRetVec[tdx]) if converged and results is not None: #buy action for idx, value in enumerate(results['buys']): allocatedWealth[idx] += value buy = (1 + buyTransFeeMtx[idx, tdx]) * value buyProcess[idx, tdx] = buy depositWealth -= buy #sell action for idx, value in enumerate(results['sells']): allocatedWealth[idx] -= value sell = (1 - sellTransFeeMtx[idx, tdx]) * value sellProcess[idx, tdx] = sell depositWealth += sell #log wealth and signal process wealthProcess[:, tdx] = allocatedWealth depositProcess[tdx] = depositWealth print '*' * 80 trainDates = [ pd.to_datetime(fullTransDates[tdx]).strftime("%Y%m%d"), pd.to_datetime(fullTransDates[hist_period + tdx - 1]).strftime("%Y%m%d") ] print 'fixedSymbolWCVaRSPPortfolio %s-%s n%s-p%s-s%s-a%s --scenFunc %s --solver %s, genscenErr:[%s]' % ( startDate, endDate, n_rv, ":".join(str(h) for h in hist_periods), n_scenario, alpha, scenFunc, solver, len(genScenErrDates)) print 'transDate %s (train:%s-%s) WCVaR SP OK, current wealth %s, %.3f secs' % ( transDate, trainDates[0], trainDates[1], allocatedWealth.sum() + depositWealth, time.time() - tloop) print '*' * 80 #end of for #最後一期只結算不買賣 wealthProcess[:, -1] = allocatedWealth * (1 + allRiskyRetMtx[:, -1]) depositProcess[-1] = depositWealth * (1 + riskFreeRetVec[-1]) finalWealth = (np.dot(allocatedWealth, (1 + allRiskyRetMtx[:, -1])) + depositWealth * (1 + riskFreeRetVec[-1])) print "final wealth %s" % (finalWealth) #setup result directory t1 = pd.to_datetime(transDates[0]).strftime("%Y%m%d") t2 = pd.to_datetime(transDates[-1]).strftime("%Y%m%d") rnd = time.strftime("%y%m%d%H%M%S") layer0Dir = "%s" % (fixedSymbolWCVaRSPPortfolio.__name__) layer1Dir = "LargestMarketValue_200501" layer2Dir = "%s_n%s_p%s_s%s_a%s" % ( fixedSymbolWCVaRSPPortfolio.__name__, n_rv, "-".join( str(h) for h in hist_periods), n_scenario, alpha) layer3Dir = "%s-%s_%s" % (t1, t2, rnd) resultDir = os.path.join(ExpResultsDir, layer0Dir, layer1Dir, layer2Dir, layer3Dir) if not os.path.exists(resultDir): os.makedirs(resultDir) #store data in pkl df_buyProc = pd.DataFrame(buyProcess.T, index=transDates[:-1], columns=["%s_buy" % (sym) for sym in symbols]) df_sellProc = pd.DataFrame(sellProcess.T, index=transDates[:-1], columns=["%s_sell" % (sym) for sym in symbols]) df_action = pd.merge(df_buyProc, df_sellProc, left_index=True, right_index=True) df_wealth = pd.DataFrame(wealthProcess.T, index=transDates, columns=symbols) deposits = pd.Series(depositProcess.T, index=transDates) df_wealth['deposit'] = deposits #computing wealth ROI wealths = df_wealth.sum(axis=1) wealthROIs = wealths.pct_change() wealthROIs[0] = 0 df_risk = pd.DataFrame({ "VaR": pd.Series(VaRProcess.T, index=transDates[:-1]), "WCVaR": pd.Series(WCVaRProcess.T, index=transDates[:-1]) }) records = { "actionProcess": df_action, "wealthProcess": df_wealth, "riskProcess": df_risk } #save pkl and csv for name, df in records.items(): if save_pkl: pklFileName = os.path.join(resultDir, "%s.pkl" % (name)) df.to_pickle(pklFileName) if save_csv: csvFileName = os.path.join(resultDir, "%s.csv" % (name)) df.to_csv(csvFileName) #write scen error if len(genScenErrDates): scenErrFile = os.path.join(resultDir, "scenErr.txt") with open(scenErrFile, 'wb') as fout: fout.write(scenErrStringIO.getvalue()) scenErrStringIO.close() #generating summary files summary = { "n_rv": n_rv, "T": T, "scenario": n_scenario, "alpha": alpha, "symbols": ",".join(symbols), "transDates": [pd.to_datetime(t).strftime("%Y%m%d") for t in transDates], #(T+1) "hist_period": "-".join(str(h) for h in hist_periods), "buyTransFee": buyTransFee[0], "sellTransFee": sellTransFee[0], "final_wealth": finalWealth, "wealth_ROI_mean": wealthROIs.mean(), "wealth_ROI_std": wealthROIs.std(), "wealth_ROI_Sharpe": Performance.Sharpe(wealthROIs), "wealth_ROI_SortinoFull": Performance.SortinoFull(wealthROIs), "wealth_ROI_SortinoPartial": Performance.SortinoPartial(wealthROIs), "scenFunc": scenFunc, "scen_err_cnt": len(genScenErrDates), "scen_err_dates": genScenErrDates, "machine": platform.node(), "elapsed": time.time() - t0 } fileName = os.path.join(resultDir, 'summary.json') with open(fileName, 'w') as fout: json.dump(summary, fout, indent=4) print "fixedSymbolWCVaRSPPortfolio %s-%s n%s-p%s-s%s-a%s --scenFunc %s --solver %s\nsimulation ok, %.3f secs" % ( startDate, endDate, n_rv, hist_period, n_scenario, alpha, scenFunc, solver, time.time() - t0)
def y2yResults(modelType="fixed"): ''' ''' global ExpResultsDir if modelType == "fixed": n_rvs = range(5, 55, 5) hist_periods = range(50, 130, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85", "0.9", "0.95") myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio", "LargestMarketValue_200501") elif modelType == "dynamic": n_rvs = range(5, 55, 5) hist_periods = range(90, 120 + 10, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7") myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio", "LargestMarketValue_200501_rv50") for n_rv in n_rvs: t = time() avgIO = StringIO() avgIO.write( 'run, startDate, endDate, n_rv, period, alpha, w1, w1-std, w2, w2-std, wROI(%), wROI-std,' ) avgIO.write( 'dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') avgIO.write( 'StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate, scen err\n' ) for period in hist_periods: if n_rv == 50 and period == 50: continue for alpha in alphas: if modelType == "fixed": dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s" % ( n_rv, period, alpha) elif modelType == "dynamic": dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s" % ( n_rv, period, alpha) exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*")) if len(exps) > 3: exps = exps[:3] years = range(2005, 2013 + 1) d1, d2 = len(exps), len(years) wealth1, wealth2, ROI_Cs = np.zeros((d1, d2)), np.zeros( (d1, d2)), np.zeros((d1, d2)) dROIs, stdevs, skews, kurts = np.zeros((d1, d2)), np.zeros( (d1, d2)), np.zeros((d1, d2)), np.zeros((d1, d2)) JBs, ADFs = np.zeros((d1, d2)), np.zeros((d1, d2)) sharpes = np.zeros((d1, d2)) sortinops, downDevP = np.zeros((d1, d2)), np.zeros((d1, d2)) sortinofs, downDevF = np.zeros((d1, d2)), np.zeros((d1, d2)) CVaRFailRates, VaRFailRates = np.zeros((d1, d2)), np.zeros( (d1, d2)) for edx, exp in enumerate(exps): wealth_df = pd.read_pickle( os.path.join(exp, 'wealthProcess.pkl')) risk_df = pd.read_pickle( os.path.join(exp, 'riskProcess.pkl')) for ydx, year in enumerate(years): startDate = date(year, 1, 1) endDate = date(year, 12, 31) exp_wealth_df = wealth_df[startDate:endDate] exp_risk_df = risk_df[startDate:endDate] #wealth wealth = exp_wealth_df.sum(axis=1) wealth[-1] *= (1 - 0.004425) wealth1[edx, ydx] = wealth[0] wealth2[edx, ydx] = wealth[-1] #cum ROI roi = (wealth[-1] / wealth[0] - 1) wrois = wealth.pct_change() wrois[0] = 0 ROI_Cs[edx, ydx] = roi * 100 #stats dROIs[edx, ydx] = wrois.mean() * 100 stdevs[edx, ydx] = wrois.std() * 100 skews[edx, ydx] = spstats.skew(wrois) kurts[edx, ydx] = spstats.kurtosis(wrois) #JB, ADF ret = sss.jarque_bera(wrois) JB = ret[1] ret2 = sts.adfuller(wrois) ADF = ret2[1] JBs[edx, ydx] = JB ADFs[edx, ydx] = ADF #Sharpe sharpe = Performance.Sharpe(wrois) sharpes[edx, ydx] = sharpe * 100 sortinof, ddf = Performance.SortinoFull(wrois) sortinofs[edx, ydx] = sortinof * 100 downDevF[edx, ydx] = ddf * 100 sortinop, ddp = Performance.SortinoPartial(wrois) sortinops[edx, ydx] = sortinop * 100 downDevP[edx, ydx] = ddp * 100 CVaRFailRate, VaRFailRate = VaRBackTest( exp_wealth_df, exp_risk_df) CVaRFailRates[edx, ydx] = CVaRFailRate * 100 VaRFailRates[edx, ydx] = VaRFailRate * 100 for ydx, year in enumerate(years): startDate = date(year, 1, 1) endDate = date(year, 12, 31) exp_df = wealth_df[startDate:endDate] #avgIO.write('run, startDate, endDate, n_rv, period, alpha, w1, w1-std, w2, w2-std, wROI(%), wROI-std,' ) #avgIO.write('dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') #avgIO.write('StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate\n') avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % ( len(exps), exp_df.index[0].strftime("%Y-%m-%d"), exp_df.index[-1].strftime("%Y-%m-%d"), n_rv, period, alpha, wealth1[:, ydx].mean(), wealth1[:, ydx].std(), wealth2[:, ydx].mean(), wealth2[:, ydx].std(), ROI_Cs[:, ydx].mean(), ROI_Cs[:, ydx].std(), )) avgIO.write( "%s,%s,%s,%s,%s,%s,%s,%s," % (dROIs[:, ydx].mean(), stdevs[:, ydx].mean(), skews[:, ydx].mean(), kurts[:, ydx].mean(), sharpes[:, ydx].mean(), sharpes[:, ydx].std(), sortinofs[:, ydx].mean(), sortinofs[:, ydx].std())) avgIO.write( "%s,%s,%s,%s,%s,%s,%s,%s\n" % (sortinops[:, ydx].mean(), sortinops[:, ydx].std(), downDevF[:, ydx].mean(), downDevP[:, ydx].mean(), max(JBs[:, ydx]), max( ADFs[:, ydx]), CVaRFailRates[:, ydx].mean(), VaRFailRates[:, ydx].mean())) print "n_rv:%s p:%s a:%s endDate:%s run:%s" % ( n_rv, period, alpha, endDate, edx + 1) if modelType == "fixed": resFile = os.path.join( ExpResultsDir, 'avg_y2yfixedSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv)) elif modelType == "dynamic": resFile = os.path.join( ExpResultsDir, 'avg_y2ydynamicSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv)) with open(resFile, 'ab') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
def parseSymbolResults(modelType="fixed"): '''whole period''' if modelType == "fixed": n_rvs = range(5, 55, 5) hist_periods = range(50, 130, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85", "0.9", "0.95", '0.99') myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio", "LargestMarketValue_200501") elif modelType == "dynamic": n_rvs = range(5, 55, 5) hist_periods = range(90, 120 + 10, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7") myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio", "LargestMarketValue_200501_rv50") for n_rv in n_rvs: t = time() avgIO = StringIO() avgIO.write( 'run, n_rv, period, alpha, time, wealth, wealth-std, wROI(%), wROI-std,' ) avgIO.write( 'dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,') avgIO.write( 'StP(%%), Stp-std, downDevF, downDevP, JB, ADF, CVaRfailRate, VaRfailRate, scen err\n' ) for period in hist_periods: if n_rv == 50 and period == 50: continue for alpha in alphas: if modelType == "fixed": dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s" % ( n_rv, period, alpha) elif modelType == "dynamic": dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s" % ( n_rv, period, alpha) exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*")) wealths, ROI_Cs, dROIs, stdevs, skews, kurts =[], [], [], [], [], [] JBs, ADFs = [], [] sharpes, sortinofs, sortinops, downDevF, downDevP = [],[],[],[],[] CVaRFailRates, VaRFailRates = [], [] elapsed, scenerr = [], [] if len(exps) > 3: exps = exps[:3] if len(exps) == 0: avgIO.write('NA,' * 26 + '\n') continue for edx, exp in enumerate(exps): print exp summaryFile = os.path.join(exp, "summary.json") summary = json.load(open(summaryFile)) print dirName #wealth and cum ROI wealth = float(summary['final_wealth']) wealths.append(wealth) ROI_Cs.append((wealth / 1e6 - 1) * 100.0) elapsed.append(float(summary['elapsed'])) scenerr.append(summary['scen_err_cnt']) try: dROIs.append(float(summary['wealth_ROI_mean']) * 100) stdevs.append(float(summary['wealth_ROI_stdev']) * 100) skews.append(float(summary['wealth_ROI_skew'])) kurts.append(float(summary['wealth_ROI_kurt'])) sharpes.append( float(summary['wealth_ROI_Sharpe']) * 100) sortinofs.append( float(summary['wealth_ROI_SortinoFull']) * 100) sortinops.append( float(summary['wealth_ROI_SortinoPartial']) * 100) downDevF.append( (float(summary['wealth_ROI_downDevFull'])) * 100) downDevP.append( (float(summary['wealth_ROI_downDevPartial'])) * 100) JBs.append(float(summary['wealth_ROI_JBTest'])) ADFs.append(float(summary['wealth_ROI_ADFTest'])) except (KeyError, TypeError): #read wealth process print "read raw df n_rv-period-alpha: %s-%s-%s:%s" % ( n_rv, period, alpha, edx + 1) df = pd.read_pickle( os.path.join(exp, 'wealthProcess.pkl')) proc = df.sum(axis=1) wrois = proc.pct_change() wrois[0] = 0 dROI = wrois.mean() dROIs.append(dROI * 100) summary['wealth_ROI_mean'] = dROI stdev = wrois.std() stdevs.append(stdev) summary['wealth_ROI_stdev'] = stdev skew = spstats.skew(wrois) skews.append(skew) summary['wealth_ROI_skew'] = skew kurt = spstats.kurtosis(wrois) kurts.append(kurt) summary['wealth_ROI_kurt'] = kurt sharpe = Performance.Sharpe(wrois) sharpes.append(sharpe * 100) summary['wealth_ROI_Sharpe'] = sharpe sortinof, ddf = Performance.SortinoFull(wrois) sortinofs.append(sortinof * 100) downDevF.append(ddf * 100) summary['wealth_ROI_SortinoFull'] = sortinof summary['wealth_ROI_downDevFull'] = ddf sortinop, ddp = Performance.SortinoPartial(wrois) sortinops.append(sortinop * 100) downDevP.append(ddp * 100) summary['wealth_ROI_SortinoPartial'] = sortinop summary['wealth_ROI_downDevPartial'] = ddp ret = sss.jarque_bera(wrois) JB = ret[1] JBs.append(JB) summary['wealth_ROI_JBTest'] = JB ret2 = sts.adfuller(wrois) ADF = ret2[1] ADFs.append(ADF) summary['wealth_ROI_ADFTest'] = ADF fileName = os.path.join(exp, 'summary.json') with open(fileName, 'w') as fout: json.dump(summary, fout, indent=4) try: CVaRFailRate = float(summary['CVaR_failRate'] * 100) VaRFailRate = float(summary['VaR_failRate'] * 100) CVaRFailRates.append(CVaRFailRate) VaRFailRates.append(VaRFailRate) except (KeyError, TypeError): wealth_df = pd.read_pickle( os.path.join(exp, 'wealthProcess.pkl')) risk_df = pd.read_pickle( os.path.join(exp, 'riskProcess.pkl')) CVaRFailRate, VaRFailRate = VaRBackTest( wealth_df, risk_df) CVaRFailRates.append(CVaRFailRate * 100) VaRFailRates.append(VaRFailRate * 100) summary['VaR_failRate'] = VaRFailRate summary['CVaR_failRate'] = CVaRFailRate print "CVaR fail:%s, VaR fail:%s" % (CVaRFailRate, VaRFailRate) fileName = os.path.join(exp, 'summary.json') with open(fileName, 'w') as fout: json.dump(summary, fout, indent=4) wealths = np.asarray(wealths) ROI_Cs = np.asarray(ROI_Cs) dROIs = np.asarray(dROIs) stdevs = np.asarray(stdevs) skews = np.asarray(skews) kurts = np.asarray(kurts) JBs = np.asarray(JBs) ADFs = np.asarray(ADFs) sharpes = np.asarray(sharpes) sortinofs = np.asarray(sortinofs) sortinops = np.asarray(sortinops) downDevF = np.asarray(downDevF) downDevP = np.asarray(downDevP) CVaRFailRates = np.asarray(CVaRFailRates) VaRFailRates = np.asarray(VaRFailRates) elapsed = np.asarray(elapsed) scenerr = np.asarray(scenerr) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s," % (len(ROI_Cs), n_rv, period, alpha, elapsed.mean(), wealths.mean(), wealths.std(), ROI_Cs.mean(), ROI_Cs.std())) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s," % (dROIs.mean(), stdevs.mean(), skews.mean(), kurts.mean(), sharpes.mean(), sharpes.std(), sortinofs.mean(), sortinofs.std())) avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (sortinops.mean(), sortinops.std(), downDevF.mean(), downDevP.mean(), max(JBs), max(ADFs), CVaRFailRates.mean(), VaRFailRates.mean(), scenerr.mean())) if modelType == "fixed": resFile = os.path.join( ExpResultsDir, 'avg_fixedSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv)) elif modelType == "dynamic": resFile = os.path.join( ExpResultsDir, 'avg_dynamicSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv)) with open(resFile, 'wb') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
def individualSymbolStats(): '''個股的統計分析 ''' symbols = [ '2330', '2412', '2882', '6505', '2317', '2303', '2002', '1303', '1326', '1301', '2881', '2886', '2409', '2891', '2357', '2382', '3045', '2883', '2454', '2880', '2892', '4904', '2887', '2353', '2324', '2801', '1402', '2311', '2475', '2888', '2408', '2308', '2301', '2352', '2603', '2884', '2890', '2609', '9904', '2610', '1216', '1101', '2325', '2344', '2323', '2371', '2204', '1605', '2615', '2201', ] startDate = date(2005, 1, 3) endDate = date(2013, 12, 31) statIO = StringIO() statIO.write( 'rank & symbol & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n' ) for idx, symbol in enumerate(symbols): df = pd.read_pickle( os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol)) tmp = df[startDate:endDate] rois = tmp['adjROI'].values mean = rois.mean() std = rois.std() skew = spstats.skew(rois) kurt = spstats.kurtosis(rois) sharpe = Performance.Sharpe(rois) sortinof, dd = Performance.SortinoFull(rois) # sortinop = Performance.SortinoPartial(rois) ret = sss.jarque_bera(rois) JB = ret[1] ret2 = sts.adfuller(rois) ADF = ret2[1] rtmp = rois / 100 + 1 rtmp[1] -= 0.001425 #buy fee rtmp[-1] -= 0.004425 #sell fee R_cum = rtmp[1:].prod() - 1 AR_cum = np.power((R_cum + 1), 1. / 9) - 1 #'rank & symbol & $R_{C}$ & $R_{A}$ $\mu$ & $\sigma$ & skew & kurt & JB & ADF & $S_p$ & $S_o$ statIO.write( '%2d & %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n' % (idx + 1, symbol, R_cum * 100, AR_cum * 100, mean, std, skew, kurt, sharpe * 100, sortinof * 100, JB, ADF)) print symbol, R_cum, AR_cum resFile = os.path.join(ExpResultsDir, 'symbol_daily_stats.txt') with open(resFile, 'wb') as fout: fout.write(statIO.getvalue()) statIO.close() statIO.close()
def parseWCVaRSymbolResults(): n_rvs = range(5, 55, 5) hist_periods = range(70, 130, 10) alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85", "0.9", "0.95") global ExpResultsDir myDir = os.path.join(ExpResultsDir, "fixedSymbolWCVaRSPPortfolio", "LargestMarketValue_200501") for n_rv in n_rvs: t = time() avgIO = StringIO() avgIO.write( 'run, n_stock ,n_rv, hist_period, alpha, runtime, wealth, wROI(%), dROI(%%), Sharpe(%%), SortinoFull(%%), SortinoPartial(%%), scen err\n' ) for alpha in alphas: dirName = "fixedSymbolWCVaRSPPortfolio_n%s_p70-80-90-100-110-120_s100_a%s" % ( n_rv, alpha) exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*")) wealths, rois, elapsed, scenerr = [], [], [], [] sharpe, sortinof, sortinop, dROI = [], [], [], [] for exp in exps: summary = json.load(open(os.path.join(exp, "summary.json"))) wealth = float(summary['final_wealth']) print dirName, wealth wealths.append(wealth) rois.append((wealth / 1e6 - 1) * 100.0) elapsed.append(float(summary['elapsed'])) scenerr.append(summary['scen_err_cnt']) try: sharpe.append(float(summary['wealth_ROI_Sharpe']) * 100) sortinof.append( float(summary['wealth_ROI_SortinoFull']) * 100) sortinop.append( float(summary['wealth_ROI_SortinoPartial']) * 100) dROI.append((float(summary['wealth_ROI_mean'])) * 100) except KeyError: #read wealth process csvfile = os.path.join(exp, 'wealthProcess.csv') df = pd.read_csv(csvfile, index_col=0, parse_dates=True) proc = df.sum(axis=1) wrois = proc.pct_change() wrois[0] = 0 dROI.append(wrois.mean() * 100) sharpe.append(Performance.Sharpe(wrois) * 100) sortinof.append(Performance.SortinoFull(wrois) * 100) sortinop.append(Performance.SortinoPartial(wrois) * 100) rois = np.asarray(rois) wealths = np.asarray(wealths) elapsed = np.asarray(elapsed) scenerr = np.asarray(scenerr) sharpe = np.asarray(sharpe) sortinof = np.asarray(sortinof) sortinop = np.asarray(sortinop) dROI = np.asarray(dROI) avgIO.write( "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" % (len(rois), n_rv, n_rv, "70-80-90-100-110-120", alpha, elapsed.mean(), wealths.mean(), rois.mean(), dROI.mean(), sharpe.mean(), sortinof.mean(), sortinop.mean(), scenerr.mean())) resFile = os.path.join( ExpResultsDir, 'avg_fixedWCVaRSPPortfolio_n%s_result_2005.csv' % (n_rv)) with open(resFile, 'wb') as fout: fout.write(avgIO.getvalue()) avgIO.close() print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
def buyHoldPortfolio(symbols, startDate=date(2005, 1, 3), endDate=date(2013, 12, 31), money=1e6, buyTransFee=0.001425, sellTransFee=0.004425, save_latex=False, save_csv=True, debug=False): t = time.time() #read df dfs = [] transDates = None for symbol in symbols: df = pd.read_pickle( os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol)) tmp = df[startDate:endDate] startIdx = df.index.get_loc(tmp.index[0]) endIdx = df.index.get_loc(tmp.index[-1]) data = df[startIdx:endIdx + 1]['adjROI'] / 100. #check all data have the same transDates if transDates is None: transDates = data.index.values if not np.all(transDates == data.index.values): raise ValueError('symbol %s do not have the same trans. dates' % (symbol)) dfs.append(data) #initialize n_rv = len(dfs) symbols.append('deposit') wealthProcess = pd.DataFrame(columns=symbols, index=transDates) #allocation for symbol in symbols[:-1]: wealthProcess[symbol][transDates[0]] = money / n_rv * (1 - buyTransFee) wealthProcess['deposit'] = 0 #buy and hold for sdx, symbol in enumerate(symbols[:-1]): for tdx, transDate in enumerate(transDates[1:]): tm1 = transDates[tdx] roi = dfs[sdx][transDate] wealthProcess[symbol][transDate] = wealthProcess[symbol][tm1] * ( 1 + roi) #sell in the last period for symbol in symbols[:-1]: wealthProcess[symbol][-1] *= (1 - sellTransFee) wealth = wealthProcess.sum(axis=1) pROI = (wealth[-1] / 1e6 - 1) * 100 prois = wealth.pct_change() prois[0] = 0 ret = sss.jarque_bera(prois) JB = ret[1] ret2 = sts.adfuller(prois) ADF = ret2[1] resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio") if not os.path.exists(resultDir): os.makedirs(resultDir) fileName = os.path.join(resultDir, 'BuyandHold_result_2005.csv') statName = os.path.join(resultDir, 'BuyandHold_result_2005.txt') df_name = os.path.join(resultDir, "wealthProcess_n%s.pkl" % (len(dfs))) df2_name = os.path.join(resultDir, "wealthSum_n%s.pkl" % (len(dfs))) csv_name = os.path.join(resultDir, "wealthProcess_n%s.csv" % (len(dfs))) csv2_name = os.path.join(resultDir, "wealthSum_n%s.csv" % (len(dfs))) wealthProcess.to_csv(csv_name) wealth.to_csv(csv2_name) wealthProcess.to_pickle(df_name) wealth.to_pickle(df2_name) csvIO = StringIO() statIO = StringIO() if not os.path.exists(fileName): csvIO.write('n_rv, wealth, wROI(%), ROI(%%), stdev, skew, kurt,') csvIO.write('Sp(%%), StF(%%), StP(%%), downDevF, downDevP, JB, ADF\n') statIO.write( '$n$ & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%) & JB & ADF \\\ \hline \n' ) sharpe = Performance.Sharpe(prois) sortinof, ddf = Performance.SortinoFull(prois) sortinop, ddp = Performance.SortinoPartial(prois) csvIO.write('%s,%s,%s,%s,%s,%s,%s,' % (n_rv, wealth[-1], pROI, prois.mean() * 100, prois.std() * 100, spstats.skew(prois), spstats.kurtosis(prois))) csvIO.write('%s,%s,%s,%s,%s,%s,%s\n' % (sharpe * 100, sortinof * 100, sortinop * 100, ddf * 100, ddp * 100, JB, ADF)) statIO.write( '%2d & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n' % (n_rv, pROI, (np.power(wealth[-1] / 1e6, 1. / 9) - 1) * 100, prois.mean() * 100, prois.std() * 100, spstats.skew(prois), spstats.kurtosis(prois), sharpe * 100, sortinof * 100, JB, ADF)) with open(fileName, 'ab') as fout: fout.write(csvIO.getvalue()) csvIO.close() with open(statName, 'ab') as fout: fout.write(statIO.getvalue()) statIO.close() print "buyhold portfolio %s %s_%s pROI:%.3f%%, %.3f secs" % ( startDate, endDate, n_rv, pROI, time.time() - t)