Пример #1
0
def _ROIstats(rois):
    mu = rois.mean()
    stdev = rois.std()
    skew = spstats.skew(rois)
    kurt = spstats.kurtosis(rois)

    sharpe = Performance.Sharpe(rois)
    sortinof, ddf = Performance.SortinoFull(rois)
    sortinop, ddp = Performance.SortinoPartial(rois)

    ret = sss.jarque_bera(rois)
    JB = ret[1]

    ret2 = sts.adfuller(rois)
    ADF = ret2[1]
    return {
        "mu": mu,
        "stdev": stdev,
        "skew": skew,
        "kurt": kurt,
        "sharpe": sharpe,
        "sortinof": sortinof,
        "sortinop": sortinop,
        "ddf": ddf,
        "ddp": ddp
    }
Пример #2
0
def comparisonStats():
    symbols = [
        'TAIEX',
        '0050',
    ]

    startDate = date(2005, 1, 3)
    endDate = date(2013, 12, 31)

    statIO = StringIO()

    statIO.write('symbol & $R_{C}$(\%) & $R_{A}$(\%) & ')
    statIO.write('$\mu$(\%) & $\sigma$(\%) & skew & kurt & ')
    statIO.write('$S_p$(\%) & $S_o$(\%)  & JB & ADF \\\ \hline \n')

    for idx, symbol in enumerate(symbols):
        df = pd.read_pickle(
            os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol))
        print symbol, df.columns
        tmp = df[startDate:endDate]
        rois = tmp['adjROI'].values

        mean = rois.mean()
        std = rois.std()
        skew = spstats.skew(rois)
        kurt = spstats.kurtosis(rois)
        sharpe = Performance.Sharpe(rois)
        sortinof, dd = Performance.SortinoFull(rois)
        print rois
        #         k2, pval = spstats.normaltest(rois)

        ret = sss.jarque_bera(rois)
        JB = ret[1]

        ret2 = sts.adfuller(rois)
        ADF = ret2[1]

        rtmp = rois / 100 + 1
        rtmp[1] -= 0.001425  #buy fee
        rtmp[-1] -= 0.004425  #sell fee
        R_cum = rtmp[1:].prod() - 1
        AR_cum = np.power((R_cum + 1), 1. / 9) - 1

        statIO.write(
            ' %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'
            % (symbol, R_cum * 100, AR_cum * 100, mean, std, skew, kurt,
               sharpe * 100, sortinof * 100, JB, ADF))
        print symbol, R_cum, AR_cum

    resFile = os.path.join(ExpResultsDir, 'comparison_daily_stats.txt')
    with open(resFile, 'wb') as fout:
        fout.write(statIO.getvalue())
        statIO.close()

    statIO.close()
Пример #3
0
def y2yBuyHold():
    t = time.time()
    n_rvs = range(5, 50 + 5, 5)
    years = range(2005, 2013 + 1)
    resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio")

    avgIO = StringIO()
    avgIO.write(
        'startDate, endDate, n_stock, wealth1, wealth2,  wROI(%), JB, ADF,')
    avgIO.write(
        'meanROI(%%), Sharpe(%%), SortinoFull(%%), SortinoPartial(%%),')
    avgIO.write(' downDevFull, downDevPartial\n')

    for n_rv in n_rvs:
        df = pd.read_pickle(
            os.path.join(resultDir, "wealthSum_n%s.pkl" % (n_rv)))

        for year in years:
            startDate = date(year, 1, 1)
            endDate = date(year, 12, 31)
            print startDate, endDate
            wealths = df[startDate:endDate]
            wrois = wealths.pct_change()
            wrois[0] = 0

            wealth1 = wealths[0]
            wealth2 = wealths[-1] * (1 - 0.004425)
            roi = (wealth2 / wealth1 - 1)

            ret = sss.jarque_bera(wrois)
            JB = ret[1]
            ret2 = sts.adfuller(wrois)
            ADF = ret2[1]

            sharpe = Performance.Sharpe(wrois)
            sortinof, ddf = Performance.SortinoFull(wrois)
            sortinop, ddp = Performance.SortinoPartial(wrois)

            avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s," %
                        (wealths.index[0].strftime("%Y-%m-%d"),
                         wealths.index[-1].strftime("%Y-%m-%d"), n_rv, wealth1,
                         wealth2, roi * 100, JB, ADF))
            avgIO.write("%s,%s,%s,%s," % (wrois.mean() * 100, sharpe * 100,
                                          sortinof * 100, sortinop * 100))
            avgIO.write("%s,%s\n" % (ddf * 100, ddp * 100))

    resFile = os.path.join(ExpResultsDir, 'y2yfixedBuyandHold_result_2005.csv')
    with open(resFile, 'wb') as fout:
        fout.write(avgIO.getvalue())
        avgIO.close()
    print "y2yBuyandHold OK, elapsed %.3f secs" % (time.time() - t)
def fixedSymbolWCVaRSPPortfolio(symbols,
                                startDate,
                                endDate,
                                money=1e6,
                                hist_periods=range(20, 130, 10),
                                n_scenario=200,
                                buyTransFee=0.001425,
                                sellTransFee=0.004425,
                                alpha=0.95,
                                scenFunc="Moment",
                                solver="cplex",
                                save_pkl=False,
                                save_csv=True,
                                debug=False):
    '''
    the different distributions are estimated from variant hist_length
    
    @param symbols, list, target assets
    @param startDate, endDate, datetime.date, 交易的起始,結束日期
    @param money, positive float, 初使資金
    @param hist_periods, list, 用於計算moment與corr mtx的歷史資料長度
    @param n_scenario, positive integer, 每一期產生的scenario個數
    @param buyTransFee, sellTransFee, float, 買進與賣出手續費
    @param alpha, float, confidence level of the CVaR
    @scenFunc, string, 產生scenario的function
    @solver, string, 解stochastic programming的solver
    
    @return {
        "n_rv": n_rv,
        "T": T,
        "allRiskyRetMtx": allRiskyRetMtx,   #size: n_rv * (hist_period+T)
        
        #[0:hist_period]用於估計moments與corrMtx
        "riskFreeRetVec": riskFreeRetVec,   #size: T+1
        "buyTransFeeMtx": buyTransFeeMtx,   #size: n_rv * T
        "sellTransFeeMtx": sellTransFeeMtx, #size: n_rv * T
        "allocatedWealth": allocatedWealth, #size: n_rv
        "depositWealth": depositWealth,     #size: 1 
        "transDates": transDates,           #size: (T+1)
        "fullTransDates": fullTransDates,   #size: (hist_period+T)
         "alpha": alpha                      #size:1
        }
    
    '''
    t0 = time.time()
    assert len(hist_periods) >= 1
    param = constructModelMtx(symbols, startDate, endDate, money,
                              max(hist_periods), buyTransFee, sellTransFee,
                              alpha, debug)
    print "constructModelMtx %.3f secs" % (time.time() - t0)

    n_rv, T = param['n_rv'], param['T']
    allRiskyRetMtx = param['allRiskyRetMtx']
    riskFreeRetVec = param['riskFreeRetVec']
    buyTransFeeMtx = param['buyTransFeeMtx']
    sellTransFeeMtx = param['sellTransFeeMtx']
    allocatedWealth = param['allocatedWealth']
    depositWealth = param['depositWealth']
    transDates = param['transDates']
    fullTransDates = param['fullTransDates']

    #process from t=0 to t=(T+1)
    buyProcess = np.zeros((n_rv, T))
    sellProcess = np.zeros((n_rv, T))
    wealthProcess = np.zeros((n_rv, T + 1))
    depositProcess = np.zeros(T + 1)
    VaRProcess = np.zeros(T)
    WCVaRProcess = np.zeros(T)

    genScenErrDates = []
    scenErrStringIO = StringIO()

    for tdx in xrange(T):
        tloop = time.time()
        transDate = pd.to_datetime(transDates[tdx]).strftime("%Y%m%d")

        #投資時已知當日的ret(即已經知道當日收盤價)
        t = time.time()
        if scenFunc == "Moment":
            scenMatrics = []

            #只要有一組hist_period可抽出樣本即可
            converged = False

            #multiple hist_period
            for hist_period in hist_periods:
                subRiskyRetMtx = allRiskyRetMtx[:, tdx:(hist_period + tdx)]
                assert subRiskyRetMtx.shape[1] == hist_period

                moments = np.empty((n_rv, 4))
                moments[:, 0] = subRiskyRetMtx.mean(axis=1)
                moments[:, 1] = subRiskyRetMtx.std(axis=1)
                moments[:, 2] = spstats.skew(subRiskyRetMtx, axis=1)
                moments[:, 3] = spstats.kurtosis(subRiskyRetMtx, axis=1)
                corrMtx = np.corrcoef(subRiskyRetMtx)

                for order in xrange(-3, 0):
                    MaxErrMom, MaxErrCorr = 10**(order), 10**(order)
                    try:
                        scenMtx = HeuristicMomentMatching(
                            moments, corrMtx, n_scenario, MaxErrMom,
                            MaxErrCorr)
                        scenMatrics.append(scenMtx)
                    except ValueError as e:
                        print e
                        scenErrStringIO.write("%s p%s: %s\n" %
                                              (transDate, hist_period, e))
                    else:
                        converged = True
                        break
        else:
            raise ValueError("unknown scenFunc %s" % (scenFunc))

        #scenMatrics, shape, L(may less than L) * M * S
        scenMatrics = np.array(scenMatrics)
        print "%s-%s - generate scen. mtx, %.3f secs" % (transDate, scenFunc,
                                                         time.time() - t)

        if converged:
            #successful generating scenarios, solve SP
            t = time.time()
            riskyRet = allRiskyRetMtx[:, hist_period + tdx]
            riskFreeRet = riskFreeRetVec[tdx]
            buyTransFee = buyTransFeeMtx[:, tdx]
            sellTransFee = sellTransFeeMtx[:, tdx]
            predictRiskyRet = scenMatrics
            predictRiskFreeRet = 0
            results = WorstCVaRPortfolioSP(symbols,
                                           riskyRet,
                                           riskFreeRet,
                                           allocatedWealth,
                                           depositWealth,
                                           buyTransFee,
                                           sellTransFee,
                                           alpha,
                                           predictRiskyRet,
                                           predictRiskFreeRet,
                                           n_scenario,
                                           probs=None,
                                           solver=solver)

            VaRProcess[tdx] = results['VaR']
            WCVaRProcess[tdx] = results['WCVaR']
            print "%s - %s solve SP, %.3f secs" % (transDate, solver,
                                                   time.time() - t)
        else:
            #failed generating scenarios
            genScenErrDates.append(transDate)
            results = None

        #realized today return
        allocatedWealth = allocatedWealth * (
            1 + allRiskyRetMtx[:, hist_period + tdx])
        depositWealth = depositWealth * (1 + riskFreeRetVec[tdx])

        if converged and results is not None:
            #buy action
            for idx, value in enumerate(results['buys']):
                allocatedWealth[idx] += value
                buy = (1 + buyTransFeeMtx[idx, tdx]) * value
                buyProcess[idx, tdx] = buy
                depositWealth -= buy

            #sell action
            for idx, value in enumerate(results['sells']):
                allocatedWealth[idx] -= value
                sell = (1 - sellTransFeeMtx[idx, tdx]) * value
                sellProcess[idx, tdx] = sell
                depositWealth += sell

        #log wealth and signal process
        wealthProcess[:, tdx] = allocatedWealth
        depositProcess[tdx] = depositWealth

        print '*' * 80
        trainDates = [
            pd.to_datetime(fullTransDates[tdx]).strftime("%Y%m%d"),
            pd.to_datetime(fullTransDates[hist_period + tdx -
                                          1]).strftime("%Y%m%d")
        ]

        print 'fixedSymbolWCVaRSPPortfolio %s-%s n%s-p%s-s%s-a%s --scenFunc %s --solver %s, genscenErr:[%s]' % (
            startDate, endDate, n_rv, ":".join(str(h) for h in hist_periods),
            n_scenario, alpha, scenFunc, solver, len(genScenErrDates))

        print 'transDate %s (train:%s-%s) WCVaR SP OK, current wealth %s, %.3f secs' % (
            transDate, trainDates[0], trainDates[1],
            allocatedWealth.sum() + depositWealth, time.time() - tloop)
        print '*' * 80
        #end of for

    #最後一期只結算不買賣
    wealthProcess[:, -1] = allocatedWealth * (1 + allRiskyRetMtx[:, -1])
    depositProcess[-1] = depositWealth * (1 + riskFreeRetVec[-1])

    finalWealth = (np.dot(allocatedWealth, (1 + allRiskyRetMtx[:, -1])) +
                   depositWealth * (1 + riskFreeRetVec[-1]))
    print "final wealth %s" % (finalWealth)

    #setup result directory
    t1 = pd.to_datetime(transDates[0]).strftime("%Y%m%d")
    t2 = pd.to_datetime(transDates[-1]).strftime("%Y%m%d")
    rnd = time.strftime("%y%m%d%H%M%S")

    layer0Dir = "%s" % (fixedSymbolWCVaRSPPortfolio.__name__)
    layer1Dir = "LargestMarketValue_200501"
    layer2Dir = "%s_n%s_p%s_s%s_a%s" % (
        fixedSymbolWCVaRSPPortfolio.__name__, n_rv, "-".join(
            str(h) for h in hist_periods), n_scenario, alpha)
    layer3Dir = "%s-%s_%s" % (t1, t2, rnd)
    resultDir = os.path.join(ExpResultsDir, layer0Dir, layer1Dir, layer2Dir,
                             layer3Dir)
    if not os.path.exists(resultDir):
        os.makedirs(resultDir)

    #store data in pkl
    df_buyProc = pd.DataFrame(buyProcess.T,
                              index=transDates[:-1],
                              columns=["%s_buy" % (sym) for sym in symbols])
    df_sellProc = pd.DataFrame(sellProcess.T,
                               index=transDates[:-1],
                               columns=["%s_sell" % (sym) for sym in symbols])
    df_action = pd.merge(df_buyProc,
                         df_sellProc,
                         left_index=True,
                         right_index=True)

    df_wealth = pd.DataFrame(wealthProcess.T,
                             index=transDates,
                             columns=symbols)
    deposits = pd.Series(depositProcess.T, index=transDates)
    df_wealth['deposit'] = deposits

    #computing wealth ROI
    wealths = df_wealth.sum(axis=1)
    wealthROIs = wealths.pct_change()
    wealthROIs[0] = 0

    df_risk = pd.DataFrame({
        "VaR":
        pd.Series(VaRProcess.T, index=transDates[:-1]),
        "WCVaR":
        pd.Series(WCVaRProcess.T, index=transDates[:-1])
    })

    records = {
        "actionProcess": df_action,
        "wealthProcess": df_wealth,
        "riskProcess": df_risk
    }

    #save pkl and csv
    for name, df in records.items():
        if save_pkl:
            pklFileName = os.path.join(resultDir, "%s.pkl" % (name))
            df.to_pickle(pklFileName)
        if save_csv:
            csvFileName = os.path.join(resultDir, "%s.csv" % (name))
            df.to_csv(csvFileName)

    #write scen error
    if len(genScenErrDates):
        scenErrFile = os.path.join(resultDir, "scenErr.txt")
        with open(scenErrFile, 'wb') as fout:
            fout.write(scenErrStringIO.getvalue())
    scenErrStringIO.close()

    #generating summary files
    summary = {
        "n_rv": n_rv,
        "T": T,
        "scenario": n_scenario,
        "alpha": alpha,
        "symbols": ",".join(symbols),
        "transDates":
        [pd.to_datetime(t).strftime("%Y%m%d") for t in transDates],  #(T+1)
        "hist_period": "-".join(str(h) for h in hist_periods),
        "buyTransFee": buyTransFee[0],
        "sellTransFee": sellTransFee[0],
        "final_wealth": finalWealth,
        "wealth_ROI_mean": wealthROIs.mean(),
        "wealth_ROI_std": wealthROIs.std(),
        "wealth_ROI_Sharpe": Performance.Sharpe(wealthROIs),
        "wealth_ROI_SortinoFull": Performance.SortinoFull(wealthROIs),
        "wealth_ROI_SortinoPartial": Performance.SortinoPartial(wealthROIs),
        "scenFunc": scenFunc,
        "scen_err_cnt": len(genScenErrDates),
        "scen_err_dates": genScenErrDates,
        "machine": platform.node(),
        "elapsed": time.time() - t0
    }

    fileName = os.path.join(resultDir, 'summary.json')
    with open(fileName, 'w') as fout:
        json.dump(summary, fout, indent=4)

    print "fixedSymbolWCVaRSPPortfolio %s-%s n%s-p%s-s%s-a%s --scenFunc %s --solver %s\nsimulation ok, %.3f secs" % (
        startDate, endDate, n_rv, hist_period, n_scenario, alpha, scenFunc,
        solver, time.time() - t0)
Пример #5
0
def y2yResults(modelType="fixed"):
    '''
    '''

    global ExpResultsDir
    if modelType == "fixed":
        n_rvs = range(5, 55, 5)
        hist_periods = range(50, 130, 10)
        alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85",
                  "0.9", "0.95")
        myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio",
                             "LargestMarketValue_200501")

    elif modelType == "dynamic":
        n_rvs = range(5, 55, 5)
        hist_periods = range(90, 120 + 10, 10)
        alphas = ("0.5", "0.55", "0.6", "0.65", "0.7")
        myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio",
                             "LargestMarketValue_200501_rv50")

    for n_rv in n_rvs:
        t = time()
        avgIO = StringIO()
        avgIO.write(
            'run, startDate, endDate, n_rv, period, alpha,  w1, w1-std, w2, w2-std, wROI(%), wROI-std,'
        )
        avgIO.write(
            'dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,')
        avgIO.write(
            'StP(%%), Stp-std, downDevF, downDevP,  JB, ADF, CVaRfailRate, VaRfailRate, scen err\n'
        )

        for period in hist_periods:
            if n_rv == 50 and period == 50:
                continue

            for alpha in alphas:
                if modelType == "fixed":
                    dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s" % (
                        n_rv, period, alpha)
                elif modelType == "dynamic":
                    dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s" % (
                        n_rv, period, alpha)

                exps = glob(os.path.join(myDir, dirName,
                                         "20050103-20131231_*"))
                if len(exps) > 3:
                    exps = exps[:3]

                years = range(2005, 2013 + 1)
                d1, d2 = len(exps), len(years)

                wealth1, wealth2, ROI_Cs = np.zeros((d1, d2)), np.zeros(
                    (d1, d2)), np.zeros((d1, d2))
                dROIs, stdevs, skews, kurts = np.zeros((d1, d2)), np.zeros(
                    (d1, d2)), np.zeros((d1, d2)), np.zeros((d1, d2))
                JBs, ADFs = np.zeros((d1, d2)), np.zeros((d1, d2))
                sharpes = np.zeros((d1, d2))
                sortinops, downDevP = np.zeros((d1, d2)), np.zeros((d1, d2))
                sortinofs, downDevF = np.zeros((d1, d2)), np.zeros((d1, d2))
                CVaRFailRates, VaRFailRates = np.zeros((d1, d2)), np.zeros(
                    (d1, d2))

                for edx, exp in enumerate(exps):
                    wealth_df = pd.read_pickle(
                        os.path.join(exp, 'wealthProcess.pkl'))
                    risk_df = pd.read_pickle(
                        os.path.join(exp, 'riskProcess.pkl'))

                    for ydx, year in enumerate(years):
                        startDate = date(year, 1, 1)
                        endDate = date(year, 12, 31)

                        exp_wealth_df = wealth_df[startDate:endDate]
                        exp_risk_df = risk_df[startDate:endDate]

                        #wealth
                        wealth = exp_wealth_df.sum(axis=1)
                        wealth[-1] *= (1 - 0.004425)
                        wealth1[edx, ydx] = wealth[0]
                        wealth2[edx, ydx] = wealth[-1]

                        #cum ROI
                        roi = (wealth[-1] / wealth[0] - 1)
                        wrois = wealth.pct_change()
                        wrois[0] = 0
                        ROI_Cs[edx, ydx] = roi * 100

                        #stats
                        dROIs[edx, ydx] = wrois.mean() * 100
                        stdevs[edx, ydx] = wrois.std() * 100
                        skews[edx, ydx] = spstats.skew(wrois)
                        kurts[edx, ydx] = spstats.kurtosis(wrois)

                        #JB, ADF
                        ret = sss.jarque_bera(wrois)
                        JB = ret[1]
                        ret2 = sts.adfuller(wrois)
                        ADF = ret2[1]
                        JBs[edx, ydx] = JB
                        ADFs[edx, ydx] = ADF

                        #Sharpe
                        sharpe = Performance.Sharpe(wrois)
                        sharpes[edx, ydx] = sharpe * 100

                        sortinof, ddf = Performance.SortinoFull(wrois)
                        sortinofs[edx, ydx] = sortinof * 100
                        downDevF[edx, ydx] = ddf * 100

                        sortinop, ddp = Performance.SortinoPartial(wrois)
                        sortinops[edx, ydx] = sortinop * 100
                        downDevP[edx, ydx] = ddp * 100

                        CVaRFailRate, VaRFailRate = VaRBackTest(
                            exp_wealth_df, exp_risk_df)
                        CVaRFailRates[edx, ydx] = CVaRFailRate * 100
                        VaRFailRates[edx, ydx] = VaRFailRate * 100

                for ydx, year in enumerate(years):
                    startDate = date(year, 1, 1)
                    endDate = date(year, 12, 31)

                    exp_df = wealth_df[startDate:endDate]

                    #avgIO.write('run, startDate, endDate, n_rv, period, alpha,  w1, w1-std, w2, w2-std, wROI(%), wROI-std,' )
                    #avgIO.write('dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,')
                    #avgIO.write('StP(%%), Stp-std, downDevF, downDevP,  JB, ADF, CVaRfailRate, VaRfailRate\n')

                    avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % (
                        len(exps),
                        exp_df.index[0].strftime("%Y-%m-%d"),
                        exp_df.index[-1].strftime("%Y-%m-%d"),
                        n_rv,
                        period,
                        alpha,
                        wealth1[:, ydx].mean(),
                        wealth1[:, ydx].std(),
                        wealth2[:, ydx].mean(),
                        wealth2[:, ydx].std(),
                        ROI_Cs[:, ydx].mean(),
                        ROI_Cs[:, ydx].std(),
                    ))

                    avgIO.write(
                        "%s,%s,%s,%s,%s,%s,%s,%s," %
                        (dROIs[:, ydx].mean(), stdevs[:, ydx].mean(),
                         skews[:, ydx].mean(), kurts[:, ydx].mean(),
                         sharpes[:, ydx].mean(), sharpes[:, ydx].std(),
                         sortinofs[:, ydx].mean(), sortinofs[:, ydx].std()))
                    avgIO.write(
                        "%s,%s,%s,%s,%s,%s,%s,%s\n" %
                        (sortinops[:, ydx].mean(), sortinops[:, ydx].std(),
                         downDevF[:, ydx].mean(), downDevP[:, ydx].mean(),
                         max(JBs[:, ydx]), max(
                             ADFs[:, ydx]), CVaRFailRates[:, ydx].mean(),
                         VaRFailRates[:, ydx].mean()))

                    print "n_rv:%s p:%s a:%s endDate:%s run:%s" % (
                        n_rv, period, alpha, endDate, edx + 1)

        if modelType == "fixed":
            resFile = os.path.join(
                ExpResultsDir,
                'avg_y2yfixedSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv))
        elif modelType == "dynamic":
            resFile = os.path.join(
                ExpResultsDir,
                'avg_y2ydynamicSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv))

        with open(resFile, 'ab') as fout:
            fout.write(avgIO.getvalue())
        avgIO.close()
        print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
Пример #6
0
def parseSymbolResults(modelType="fixed"):
    '''whole period'''

    if modelType == "fixed":
        n_rvs = range(5, 55, 5)
        hist_periods = range(50, 130, 10)
        alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85",
                  "0.9", "0.95", '0.99')
        myDir = os.path.join(ExpResultsDir, "fixedSymbolSPPortfolio",
                             "LargestMarketValue_200501")

    elif modelType == "dynamic":
        n_rvs = range(5, 55, 5)
        hist_periods = range(90, 120 + 10, 10)
        alphas = ("0.5", "0.55", "0.6", "0.65", "0.7")
        myDir = os.path.join(ExpResultsDir, "dynamicSymbolSPPortfolio",
                             "LargestMarketValue_200501_rv50")

    for n_rv in n_rvs:
        t = time()
        avgIO = StringIO()
        avgIO.write(
            'run, n_rv, period, alpha, time, wealth, wealth-std, wROI(%), wROI-std,'
        )
        avgIO.write(
            'dROI(%%), stdev, skew, kurt, Sp(%%), Sp-std, StF(%%), StF-std,')
        avgIO.write(
            'StP(%%), Stp-std, downDevF, downDevP,  JB, ADF, CVaRfailRate, VaRfailRate, scen err\n'
        )

        for period in hist_periods:
            if n_rv == 50 and period == 50:
                continue

            for alpha in alphas:
                if modelType == "fixed":
                    dirName = "fixedSymbolSPPortfolio_n%s_p%s_s200_a%s" % (
                        n_rv, period, alpha)
                elif modelType == "dynamic":
                    dirName = "dynamicSymbolSPPortfolio_n%s_p%s_s200_a%s" % (
                        n_rv, period, alpha)

                exps = glob(os.path.join(myDir, dirName,
                                         "20050103-20131231_*"))
                wealths, ROI_Cs, dROIs, stdevs, skews, kurts =[], [], [], [], [], []
                JBs, ADFs = [], []
                sharpes, sortinofs, sortinops,  downDevF, downDevP = [],[],[],[],[]
                CVaRFailRates, VaRFailRates = [], []
                elapsed, scenerr = [], []

                if len(exps) > 3:
                    exps = exps[:3]

                if len(exps) == 0:
                    avgIO.write('NA,' * 26 + '\n')
                    continue

                for edx, exp in enumerate(exps):
                    print exp
                    summaryFile = os.path.join(exp, "summary.json")
                    summary = json.load(open(summaryFile))
                    print dirName

                    #wealth and cum ROI
                    wealth = float(summary['final_wealth'])
                    wealths.append(wealth)
                    ROI_Cs.append((wealth / 1e6 - 1) * 100.0)

                    elapsed.append(float(summary['elapsed']))
                    scenerr.append(summary['scen_err_cnt'])
                    try:
                        dROIs.append(float(summary['wealth_ROI_mean']) * 100)
                        stdevs.append(float(summary['wealth_ROI_stdev']) * 100)
                        skews.append(float(summary['wealth_ROI_skew']))
                        kurts.append(float(summary['wealth_ROI_kurt']))
                        sharpes.append(
                            float(summary['wealth_ROI_Sharpe']) * 100)
                        sortinofs.append(
                            float(summary['wealth_ROI_SortinoFull']) * 100)
                        sortinops.append(
                            float(summary['wealth_ROI_SortinoPartial']) * 100)
                        downDevF.append(
                            (float(summary['wealth_ROI_downDevFull'])) * 100)
                        downDevP.append(
                            (float(summary['wealth_ROI_downDevPartial'])) *
                            100)
                        JBs.append(float(summary['wealth_ROI_JBTest']))
                        ADFs.append(float(summary['wealth_ROI_ADFTest']))

                    except (KeyError, TypeError):
                        #read wealth process
                        print "read raw df n_rv-period-alpha: %s-%s-%s:%s" % (
                            n_rv, period, alpha, edx + 1)
                        df = pd.read_pickle(
                            os.path.join(exp, 'wealthProcess.pkl'))

                        proc = df.sum(axis=1)
                        wrois = proc.pct_change()
                        wrois[0] = 0

                        dROI = wrois.mean()
                        dROIs.append(dROI * 100)
                        summary['wealth_ROI_mean'] = dROI

                        stdev = wrois.std()
                        stdevs.append(stdev)
                        summary['wealth_ROI_stdev'] = stdev

                        skew = spstats.skew(wrois)
                        skews.append(skew)
                        summary['wealth_ROI_skew'] = skew

                        kurt = spstats.kurtosis(wrois)
                        kurts.append(kurt)
                        summary['wealth_ROI_kurt'] = kurt

                        sharpe = Performance.Sharpe(wrois)
                        sharpes.append(sharpe * 100)
                        summary['wealth_ROI_Sharpe'] = sharpe

                        sortinof, ddf = Performance.SortinoFull(wrois)
                        sortinofs.append(sortinof * 100)
                        downDevF.append(ddf * 100)
                        summary['wealth_ROI_SortinoFull'] = sortinof
                        summary['wealth_ROI_downDevFull'] = ddf

                        sortinop, ddp = Performance.SortinoPartial(wrois)
                        sortinops.append(sortinop * 100)
                        downDevP.append(ddp * 100)
                        summary['wealth_ROI_SortinoPartial'] = sortinop
                        summary['wealth_ROI_downDevPartial'] = ddp

                        ret = sss.jarque_bera(wrois)
                        JB = ret[1]
                        JBs.append(JB)
                        summary['wealth_ROI_JBTest'] = JB

                        ret2 = sts.adfuller(wrois)
                        ADF = ret2[1]
                        ADFs.append(ADF)
                        summary['wealth_ROI_ADFTest'] = ADF

                        fileName = os.path.join(exp, 'summary.json')
                        with open(fileName, 'w') as fout:
                            json.dump(summary, fout, indent=4)

                    try:
                        CVaRFailRate = float(summary['CVaR_failRate'] * 100)
                        VaRFailRate = float(summary['VaR_failRate'] * 100)
                        CVaRFailRates.append(CVaRFailRate)
                        VaRFailRates.append(VaRFailRate)

                    except (KeyError, TypeError):
                        wealth_df = pd.read_pickle(
                            os.path.join(exp, 'wealthProcess.pkl'))
                        risk_df = pd.read_pickle(
                            os.path.join(exp, 'riskProcess.pkl'))

                        CVaRFailRate, VaRFailRate = VaRBackTest(
                            wealth_df, risk_df)
                        CVaRFailRates.append(CVaRFailRate * 100)
                        VaRFailRates.append(VaRFailRate * 100)
                        summary['VaR_failRate'] = VaRFailRate
                        summary['CVaR_failRate'] = CVaRFailRate

                        print "CVaR fail:%s, VaR fail:%s" % (CVaRFailRate,
                                                             VaRFailRate)

                        fileName = os.path.join(exp, 'summary.json')
                        with open(fileName, 'w') as fout:
                            json.dump(summary, fout, indent=4)

                wealths = np.asarray(wealths)
                ROI_Cs = np.asarray(ROI_Cs)
                dROIs = np.asarray(dROIs)
                stdevs = np.asarray(stdevs)
                skews = np.asarray(skews)
                kurts = np.asarray(kurts)
                JBs = np.asarray(JBs)
                ADFs = np.asarray(ADFs)

                sharpes = np.asarray(sharpes)
                sortinofs = np.asarray(sortinofs)
                sortinops = np.asarray(sortinops)
                downDevF = np.asarray(downDevF)
                downDevP = np.asarray(downDevP)

                CVaRFailRates = np.asarray(CVaRFailRates)
                VaRFailRates = np.asarray(VaRFailRates)

                elapsed = np.asarray(elapsed)
                scenerr = np.asarray(scenerr)

                avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s," %
                            (len(ROI_Cs), n_rv, period, alpha, elapsed.mean(),
                             wealths.mean(), wealths.std(), ROI_Cs.mean(),
                             ROI_Cs.std()))
                avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s," %
                            (dROIs.mean(), stdevs.mean(), skews.mean(),
                             kurts.mean(), sharpes.mean(), sharpes.std(),
                             sortinofs.mean(), sortinofs.std()))
                avgIO.write("%s,%s,%s,%s,%s,%s,%s,%s,%s\n" %
                            (sortinops.mean(), sortinops.std(),
                             downDevF.mean(), downDevP.mean(), max(JBs),
                             max(ADFs), CVaRFailRates.mean(),
                             VaRFailRates.mean(), scenerr.mean()))

        if modelType == "fixed":
            resFile = os.path.join(
                ExpResultsDir,
                'avg_fixedSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv))
        elif modelType == "dynamic":
            resFile = os.path.join(
                ExpResultsDir,
                'avg_dynamicSymbolSPPortfolio_n%s_result_2005.csv' % (n_rv))

        with open(resFile, 'wb') as fout:
            fout.write(avgIO.getvalue())
        avgIO.close()
        print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
Пример #7
0
def individualSymbolStats():
    '''個股的統計分析
    '''
    symbols = [
        '2330',
        '2412',
        '2882',
        '6505',
        '2317',
        '2303',
        '2002',
        '1303',
        '1326',
        '1301',
        '2881',
        '2886',
        '2409',
        '2891',
        '2357',
        '2382',
        '3045',
        '2883',
        '2454',
        '2880',
        '2892',
        '4904',
        '2887',
        '2353',
        '2324',
        '2801',
        '1402',
        '2311',
        '2475',
        '2888',
        '2408',
        '2308',
        '2301',
        '2352',
        '2603',
        '2884',
        '2890',
        '2609',
        '9904',
        '2610',
        '1216',
        '1101',
        '2325',
        '2344',
        '2323',
        '2371',
        '2204',
        '1605',
        '2615',
        '2201',
    ]

    startDate = date(2005, 1, 3)
    endDate = date(2013, 12, 31)

    statIO = StringIO()
    statIO.write(
        'rank & symbol & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%)  & JB & ADF \\\ \hline \n'
    )

    for idx, symbol in enumerate(symbols):
        df = pd.read_pickle(
            os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol))
        tmp = df[startDate:endDate]
        rois = tmp['adjROI'].values

        mean = rois.mean()
        std = rois.std()
        skew = spstats.skew(rois)
        kurt = spstats.kurtosis(rois)
        sharpe = Performance.Sharpe(rois)
        sortinof, dd = Performance.SortinoFull(rois)
        #         sortinop = Performance.SortinoPartial(rois)

        ret = sss.jarque_bera(rois)
        JB = ret[1]

        ret2 = sts.adfuller(rois)
        ADF = ret2[1]

        rtmp = rois / 100 + 1
        rtmp[1] -= 0.001425  #buy fee
        rtmp[-1] -= 0.004425  #sell fee
        R_cum = rtmp[1:].prod() - 1
        AR_cum = np.power((R_cum + 1), 1. / 9) - 1

        #'rank & symbol & $R_{C}$ & $R_{A}$ $\mu$ & $\sigma$ & skew & kurt & JB & ADF & $S_p$ & $S_o$
        statIO.write(
            '%2d & %s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'
            % (idx + 1, symbol, R_cum * 100, AR_cum * 100, mean, std, skew,
               kurt, sharpe * 100, sortinof * 100, JB, ADF))
        print symbol, R_cum, AR_cum

    resFile = os.path.join(ExpResultsDir, 'symbol_daily_stats.txt')
    with open(resFile, 'wb') as fout:
        fout.write(statIO.getvalue())
        statIO.close()

    statIO.close()
Пример #8
0
def parseWCVaRSymbolResults():
    n_rvs = range(5, 55, 5)
    hist_periods = range(70, 130, 10)
    alphas = ("0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", "0.85",
              "0.9", "0.95")
    global ExpResultsDir

    myDir = os.path.join(ExpResultsDir, "fixedSymbolWCVaRSPPortfolio",
                         "LargestMarketValue_200501")
    for n_rv in n_rvs:
        t = time()
        avgIO = StringIO()
        avgIO.write(
            'run, n_stock ,n_rv, hist_period, alpha, runtime, wealth, wROI(%), dROI(%%), Sharpe(%%), SortinoFull(%%), SortinoPartial(%%), scen err\n'
        )

        for alpha in alphas:
            dirName = "fixedSymbolWCVaRSPPortfolio_n%s_p70-80-90-100-110-120_s100_a%s" % (
                n_rv, alpha)
            exps = glob(os.path.join(myDir, dirName, "20050103-20131231_*"))
            wealths, rois, elapsed, scenerr = [], [], [], []
            sharpe, sortinof, sortinop, dROI = [], [], [], []
            for exp in exps:
                summary = json.load(open(os.path.join(exp, "summary.json")))
                wealth = float(summary['final_wealth'])
                print dirName, wealth
                wealths.append(wealth)
                rois.append((wealth / 1e6 - 1) * 100.0)
                elapsed.append(float(summary['elapsed']))
                scenerr.append(summary['scen_err_cnt'])
                try:
                    sharpe.append(float(summary['wealth_ROI_Sharpe']) * 100)
                    sortinof.append(
                        float(summary['wealth_ROI_SortinoFull']) * 100)
                    sortinop.append(
                        float(summary['wealth_ROI_SortinoPartial']) * 100)
                    dROI.append((float(summary['wealth_ROI_mean'])) * 100)
                except KeyError:
                    #read wealth process
                    csvfile = os.path.join(exp, 'wealthProcess.csv')
                    df = pd.read_csv(csvfile, index_col=0, parse_dates=True)
                    proc = df.sum(axis=1)
                    wrois = proc.pct_change()
                    wrois[0] = 0

                    dROI.append(wrois.mean() * 100)
                    sharpe.append(Performance.Sharpe(wrois) * 100)
                    sortinof.append(Performance.SortinoFull(wrois) * 100)
                    sortinop.append(Performance.SortinoPartial(wrois) * 100)

            rois = np.asarray(rois)
            wealths = np.asarray(wealths)
            elapsed = np.asarray(elapsed)
            scenerr = np.asarray(scenerr)
            sharpe = np.asarray(sharpe)
            sortinof = np.asarray(sortinof)
            sortinop = np.asarray(sortinop)
            dROI = np.asarray(dROI)

            avgIO.write(
                "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" %
                (len(rois), n_rv, n_rv, "70-80-90-100-110-120", alpha,
                 elapsed.mean(), wealths.mean(), rois.mean(), dROI.mean(),
                 sharpe.mean(), sortinof.mean(), sortinop.mean(),
                 scenerr.mean()))

        resFile = os.path.join(
            ExpResultsDir,
            'avg_fixedWCVaRSPPortfolio_n%s_result_2005.csv' % (n_rv))
        with open(resFile, 'wb') as fout:
            fout.write(avgIO.getvalue())
        avgIO.close()
        print "n_rv:%s OK, elapsed %.3f secs" % (n_rv, time() - t)
Пример #9
0
def buyHoldPortfolio(symbols,
                     startDate=date(2005, 1, 3),
                     endDate=date(2013, 12, 31),
                     money=1e6,
                     buyTransFee=0.001425,
                     sellTransFee=0.004425,
                     save_latex=False,
                     save_csv=True,
                     debug=False):
    t = time.time()

    #read df
    dfs = []
    transDates = None
    for symbol in symbols:
        df = pd.read_pickle(
            os.path.join(PklBasicFeaturesDir, '%s.pkl' % symbol))
        tmp = df[startDate:endDate]
        startIdx = df.index.get_loc(tmp.index[0])
        endIdx = df.index.get_loc(tmp.index[-1])

        data = df[startIdx:endIdx + 1]['adjROI'] / 100.

        #check all data have the same transDates
        if transDates is None:
            transDates = data.index.values
        if not np.all(transDates == data.index.values):
            raise ValueError('symbol %s do not have the same trans. dates' %
                             (symbol))
        dfs.append(data)

    #initialize
    n_rv = len(dfs)
    symbols.append('deposit')
    wealthProcess = pd.DataFrame(columns=symbols, index=transDates)

    #allocation
    for symbol in symbols[:-1]:
        wealthProcess[symbol][transDates[0]] = money / n_rv * (1 - buyTransFee)
    wealthProcess['deposit'] = 0

    #buy and hold
    for sdx, symbol in enumerate(symbols[:-1]):
        for tdx, transDate in enumerate(transDates[1:]):
            tm1 = transDates[tdx]
            roi = dfs[sdx][transDate]
            wealthProcess[symbol][transDate] = wealthProcess[symbol][tm1] * (
                1 + roi)

    #sell in the last period
    for symbol in symbols[:-1]:
        wealthProcess[symbol][-1] *= (1 - sellTransFee)

    wealth = wealthProcess.sum(axis=1)
    pROI = (wealth[-1] / 1e6 - 1) * 100
    prois = wealth.pct_change()
    prois[0] = 0

    ret = sss.jarque_bera(prois)
    JB = ret[1]

    ret2 = sts.adfuller(prois)
    ADF = ret2[1]

    resultDir = os.path.join(ExpResultsDir, "BuyandHoldPortfolio")
    if not os.path.exists(resultDir):
        os.makedirs(resultDir)

    fileName = os.path.join(resultDir, 'BuyandHold_result_2005.csv')
    statName = os.path.join(resultDir, 'BuyandHold_result_2005.txt')

    df_name = os.path.join(resultDir, "wealthProcess_n%s.pkl" % (len(dfs)))
    df2_name = os.path.join(resultDir, "wealthSum_n%s.pkl" % (len(dfs)))
    csv_name = os.path.join(resultDir, "wealthProcess_n%s.csv" % (len(dfs)))
    csv2_name = os.path.join(resultDir, "wealthSum_n%s.csv" % (len(dfs)))
    wealthProcess.to_csv(csv_name)
    wealth.to_csv(csv2_name)
    wealthProcess.to_pickle(df_name)
    wealth.to_pickle(df2_name)

    csvIO = StringIO()
    statIO = StringIO()
    if not os.path.exists(fileName):

        csvIO.write('n_rv, wealth, wROI(%), ROI(%%), stdev, skew, kurt,')
        csvIO.write('Sp(%%), StF(%%), StP(%%), downDevF, downDevP,  JB, ADF\n')
        statIO.write(
            '$n$ & $R_{C}$(\%) & $R_{A}$(\%) & $\mu$(\%) & $\sigma$(\%) & skew & kurt & $S_p$(\%) & $S_o$(\%)  & JB & ADF \\\ \hline \n'
        )

    sharpe = Performance.Sharpe(prois)
    sortinof, ddf = Performance.SortinoFull(prois)
    sortinop, ddp = Performance.SortinoPartial(prois)

    csvIO.write('%s,%s,%s,%s,%s,%s,%s,' %
                (n_rv, wealth[-1], pROI, prois.mean() * 100, prois.std() * 100,
                 spstats.skew(prois), spstats.kurtosis(prois)))
    csvIO.write('%s,%s,%s,%s,%s,%s,%s\n' %
                (sharpe * 100, sortinof * 100, sortinop * 100, ddf * 100,
                 ddp * 100, JB, ADF))
    statIO.write(
        '%2d &  %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2e & %4.2e \\\ \hline \n'
        % (n_rv, pROI, (np.power(wealth[-1] / 1e6, 1. / 9) - 1) * 100,
           prois.mean() * 100, prois.std() * 100, spstats.skew(prois),
           spstats.kurtosis(prois), sharpe * 100, sortinof * 100, JB, ADF))

    with open(fileName, 'ab') as fout:
        fout.write(csvIO.getvalue())
    csvIO.close()

    with open(statName, 'ab') as fout:
        fout.write(statIO.getvalue())
    statIO.close()

    print "buyhold portfolio %s %s_%s pROI:%.3f%%, %.3f secs" % (
        startDate, endDate, n_rv, pROI, time.time() - t)