Пример #1
0
 def load_data(self, assets):
     contlist = {}
     exp_dates = {}
     dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig)
     cnx = dbaccess.connect(**dbconfig)
     for i, prod in enumerate(assets):
         cont_mth, exch = dbaccess.prod_main_cont_exch(prod)
         self.contlist[prod], _ = misc.contract_range(prod, exch, cont_mth, self.start_date, self.end_date)
         self.exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]]
         edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ]
         sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ]
         self.data_store[prod] = {}
         for cont, sd, ed in zip(contlist[prod], sdates, edates):
             if self.sim_freq == 'd':
                 tmp_df = dbaccess.load_daily_data_to_df(cnx, 'fut_min', cont, sd, ed)
             else:
                 minid_start = 1500
                 minid_end = 2114
                 if prod in misc.night_session_markets:
                     minid_start = 300
                 tmp_df = dbaccess.load_min_data_to_df(cnx, 'fut_min', cont, sd, ed, minid_start, minid_end)
                 misc.cleanup_mindata(tmp_df, prod)
             tmp_df['contract'] = cont
             self.data_store[prod][cont] = tmp_df
             cnx.close()
Пример #2
0
 def load_data(self, assets):
     contlist = {}
     exp_dates = {}
     dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig)
     cnx = dbaccess.connect(**dbconfig)
     for i, prod in enumerate(assets):
         cont_mth, exch = dbaccess.prod_main_cont_exch(prod)
         self.contlist[prod], _ = misc.contract_range(
             prod, exch, cont_mth, self.start_date, self.end_date)
         self.exp_dates[prod] = [
             misc.contract_expiry(cont) for cont in contlist[prod]
         ]
         edates = [
             misc.day_shift(d, self.config['rollrule'])
             for d in exp_dates[prod]
         ]
         sdates = [
             misc.day_shift(d, self.sim_period) for d in exp_dates[prod]
         ]
         self.data_store[prod] = {}
         for cont, sd, ed in zip(contlist[prod], sdates, edates):
             if self.sim_freq == 'd':
                 tmp_df = dbaccess.load_daily_data_to_df(
                     cnx, 'fut_min', cont, sd, ed)
             else:
                 minid_start = 1500
                 minid_end = 2114
                 if prod in misc.night_session_markets:
                     minid_start = 300
                 tmp_df = dbaccess.load_min_data_to_df(
                     cnx, 'fut_min', cont, sd, ed, minid_start, minid_end)
                 misc.cleanup_mindata(tmp_df, prod)
             tmp_df['contract'] = cont
             self.data_store[prod][cont] = tmp_df
             cnx.close()
Пример #3
0
def r_breaker( asset, start_date, end_date, scenarios, freqs, config):
    nearby  = config['nearby']
    rollrule = config['rollrule']
    start_d = misc.day_shift(start_date, '-1b')
    file_prefix = config['file_prefix'] + '_' + asset + '_'
    ddf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'd', need_shift=True)
    mdf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'm', need_shift=True)
    mdf = backtest.cleanup_mindata(mdf, asset)
    #ddf = dh.conv_ohlc_freq(mdf, 'D')
    output = {}
    for ix, freq in enumerate(freqs):
        if freq !='1min':
            df = dh.conv_ohlc_freq(mdf, freq)
        else:
            df = mdf
        for iy, k in enumerate(scenarios):
            idx = ix*10+iy
            config['k'] = k
            (res, closed_trades, ts) = r_breaker_sim( ddf, df, config)
            output[idx] = res
            print 'saving results for scen = %s' % str(idx)
            all_trades = {}
            for i, tradepos in enumerate(closed_trades):
                all_trades[i] = strat.tradepos2dict(tradepos)
            fname = file_prefix + str(idx) + '_trades.csv'
            trades = pd.DataFrame.from_dict(all_trades).T  
            trades.to_csv(fname)
            fname = file_prefix + str(idx) + '_dailydata.csv'
            ts.to_csv(fname)
    fname = file_prefix + 'stats.csv'
    res = pd.DataFrame.from_dict(output)
    res.to_csv(fname)
    return 
Пример #4
0
def dual_thrust( asset, start_date, end_date, scenarios, config):
    nearby  = config['nearby']
    rollrule = config['rollrule']
    start_d = misc.day_shift(start_date, '-4b')
    file_prefix = config['file_prefix'] + '_' + asset + '_'
    ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True)
    mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'm', need_shift=True)
    mdf = backtest.cleanup_mindata(mdf, asset)
    #ddf = dh.conv_ohlc_freq(mdf, 'D')
    output = {}
    for ix, s in enumerate(scenarios):
        config['win'] = s[1]
        config['k'] = s[0]
        config['m'] = s[2]
        (res, closed_trades, ts) = dual_thrust_sim( ddf, mdf, config)
        output[ix] = res
        print 'saving results for scen = %s' % str(ix)
        all_trades = {}
        for i, tradepos in enumerate(closed_trades):
            all_trades[i] = strat.tradepos2dict(tradepos)
        fname = file_prefix + str(ix) + '_trades.csv'
        trades = pd.DataFrame.from_dict(all_trades).T  
        trades.to_csv(fname)
        fname = file_prefix + str(ix) + '_dailydata.csv'
        ts.to_csv(fname)
    fname = file_prefix + 'stats.csv'
    res = pd.DataFrame.from_dict(output)
    res.to_csv(fname)
    return 
Пример #5
0
def turtle( asset, start_date, end_date, systems, config):
    rollrule = config['rollrule']
    nearby   = config['nearby']
    file_prefix = config['file_prefix'] + '_' + asset + '_'
    start_d  = misc.day_shift(start_date, '-'+str(max([ max(sys) for sys in systems]))+'b')
    ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True)
    mdf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'm', need_shift=True)
    #ddf = dh.conv_ohlc_freq(mdf, 'D')
    output = {}
    for ix, sys in enumerate(systems):
        config['signals'] = sys[:3]
        config['max_loss'] = sys[3]
        config['max_pos'] = sys[4]
        (res, closed_trades, ts) = turtle_sim( ddf, mdf, config)
        output[ix] = res
        print 'saving results for scen = %s' % str(ix)
        all_trades = {}
        for i, tradepos in enumerate(closed_trades):
            all_trades[i] = strat.tradepos2dict(tradepos)
        fname = file_prefix + str(ix) + '_trades.csv'
        trades = pd.DataFrame.from_dict(all_trades).T  
        trades.to_csv(fname)
        fname = file_prefix + str(ix) + '_dailydata.csv'
        ts.to_csv(fname)
    fname = file_prefix + 'stats.csv'
    res = pd.DataFrame.from_dict(output)
    res.to_csv(fname)
    return 
Пример #6
0
def flat_ir_curve(tday, rate):
    tenors = ['1W', '2W', '1M', '3M', '6M', '9M', '1Y', '3Y']
    output = []
    for ten in tenors:
        data = [ten, misc.day_shift(tday, ten.lower()), rate]
        output.append(data)
    return output
Пример #7
0
def dual_thrust( asset, start_date, end_date, scenarios, config):
    nearby  = config['nearby']
    rollrule = config['rollrule']
    start_d = misc.day_shift(start_date, '-2b')
    file_prefix = config['file_prefix'] + '_' + asset + '_'
    ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True)
    mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'm', need_shift=True)
    #ddf = dh.conv_ohlc_freq(mdf, 'D')
    output = {}
    for ix, s in enumerate(scenarios):
        config['win'] = s[1]
        config['k'] = s[0]
        config['m'] = s[2]
        (res, closed_trades, ts) = dual_thrust_sim( ddf, mdf, config)
        output[ix] = res
        print 'saving results for scen = %s' % str(ix)
        all_trades = {}
        for i, tradepos in enumerate(closed_trades):
            all_trades[i] = strat.tradepos2dict(tradepos)
        fname = file_prefix + str(ix) + '_trades.csv'
        trades = pd.DataFrame.from_dict(all_trades).T  
        trades.to_csv(fname)
        fname = file_prefix + str(ix) + '_dailydata.csv'
        ts.to_csv(fname)
    fname = file_prefix + 'stats.csv'
    res = pd.DataFrame.from_dict(output)
    res.to_csv(fname)
    return 
Пример #8
0
def dual_thrust(asset, start_date, end_date, scenarios, config):
    nearby = config["nearby"]
    rollrule = config["rollrule"]
    start_d = misc.day_shift(start_date, "-2b")
    file_prefix = config["file_prefix"] + "_" + asset + "_"
    mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, "m", need_shift=True)
    mdf = backtest.cleanup_mindata(mdf, asset)
    output = {}
    for ix, s in enumerate(scenarios):
        config["win"] = s[1]
        config["k"] = s[0]
        config["m"] = s[2]
        config["f"] = s[3]
        (res, closed_trades, ts) = dual_thrust_sim(mdf, config)
        output[ix] = res
        print "saving results for scen = %s" % str(ix)
        all_trades = {}
        for i, tradepos in enumerate(closed_trades):
            all_trades[i] = strat.tradepos2dict(tradepos)
        fname = file_prefix + str(ix) + "_trades.csv"
        trades = pd.DataFrame.from_dict(all_trades).T
        trades.to_csv(fname)
        fname = file_prefix + str(ix) + "_dailydata.csv"
        ts.to_csv(fname)
    fname = file_prefix + "stats.csv"
    res = pd.DataFrame.from_dict(output)
    res.to_csv(fname)
    return
Пример #9
0
def load_csv_to_db(edate,
                   save_loc='C:\\dev\\data\\',
                   freq='m',
                   is_replace=False):
    cnx = dbaccess.connect(**dbaccess.dbconfig)
    cont_list = misc.filter_main_cont(edate, False)
    if freq not in ['m', 'd']:
        return False
    for cont in cont_list:
        if freq == 'm':
            filename = save_loc + cont + '_min.csv'
        else:
            filename = save_loc + cont + '_daily.csv'
        if not os.path.isfile(filename):
            continue
        data_reader = csv.reader(file(filename, 'rb'))
        mindata_list = []
        for idx, line in enumerate(data_reader):
            if idx > 0:
                if 'nan' in [line[0], line[2], line[3], line[4], line[6]]:
                    continue
                min_data = {}
                min_data['volume'] = int(float(line[0]))
                if min_data['volume'] <= 0: continue
                dtime = datetime.datetime.strptime(line[1],
                                                   '%Y-%m-%d %H:%M:%S.%f')
                if freq == 'm':
                    min_data['datetime'] = dtime.replace(microsecond=0)
                else:
                    min_data['date'] = dtime.date()
                min_data['high'] = float(line[2])
                min_data['low'] = float(line[3])
                min_data['close'] = float(line[4])
                if line[5] == 'nan':
                    oi = 0
                else:
                    oi = int(float(line[5]))
                min_data['openInterest'] = oi
                min_data['open'] = float(line[6])
                if freq == 'm':
                    min_data['min_id'] = get_min_id(dtime)
                    trading_date = dtime.date()
                    if min_data['min_id'] < 600:
                        trading_date = misc.day_shift(trading_date, '1b')
                    min_data['date'] = trading_date
                    mindata_list.append(min_data)
                else:
                    print cont
                    dbaccess.insert_daily_data(cont,
                                               min_data,
                                               is_replace=is_replace,
                                               dbtable='fut_daily')
        if freq == 'm':
            print cont
            dbaccess.bulkinsert_min_data(cont,
                                         mindata_list,
                                         is_replace=is_replace)
    return True
Пример #10
0
def get_pnl_stats(df_list, marginrate, freq, tenors = ['3m', '6m', '1y', '2y', '3y'], start_capital = 10000.0):
    sum_pnl = pd.Series(name='pnl')
    sum_margin = pd.Series(name='margin')
    sum_cost = pd.Series(name='cost')
    if freq == 'm':
        index_col = ['date', 'min_id']
    else:
        index_col = ['date']
    for df in df_list:
        xdf = df.reset_index().set_index(index_col)
        if 'traded_price' in xdf.columns:
            field = 'traded_price'
        else:
            field  = 'close'
        pnl = xdf['pos'].shift(1).fillna(0.0) * (xdf[field] - xdf[field].shift(1)).fillna(0.0)
        if 'closeout' in xdf.columns:
            pnl = pnl + xdf['closeout']
        # pnl = pnl + (xdf['pos'] - xdf['pos'].shift(1).fillna(0.0)) * (xdf['close'] - xdf['traded_price'])
        if len(sum_pnl) == 0:
            sum_pnl = pd.Series(pnl, name='pnl')
        else:
            sum_pnl = sum_pnl.add(pnl, fill_value=0)
        margin = pd.Series(
            pd.concat([xdf.pos * marginrate[0] * xdf.close, -xdf.pos * marginrate[1] * xdf.close], join='outer',
                      axis=1).max(1), name='margin')
        if len(sum_margin) == 0:
            sum_margin = margin
        else:
            sum_margin = sum_margin.add(margin, fill_value=0)
        if len(sum_cost) == 0:
            sum_cost = xdf['cost']
        else:
            sum_cost = sum_cost.add(xdf['cost'], fill_value=0)
    if freq == 'm':
        daily_pnl = pd.Series(sum_pnl.groupby(level=0).sum(), name='daily_pnl')
        daily_margin = pd.Series(sum_margin.groupby(level=0).last(), name='daily_margin')
        daily_cost = pd.Series(sum_cost.groupby(level=0).sum(), name='daily_cost')
    else:
        daily_pnl = sum_pnl
        daily_margin = sum_margin
        daily_cost = sum_cost
    daily_pnl.name = 'daily_pnl'
    daily_margin.name = 'daily_margin'
    daily_cost.name = 'daily_cost'
    cum_pnl = pd.Series(daily_pnl.cumsum() + daily_cost.cumsum() + start_capital, name='cum_pnl')
    df = pd.concat([cum_pnl, daily_pnl, daily_margin, daily_cost], join='outer', axis=1)
    res = {}
    for tenor in tenors:
        edate = df.index[-1]
        sdate = misc.day_shift(edate, '-' + tenor)
        pnl_df = df[df.index >= sdate]
        res_by_tenor = pnl_stats(pnl_df)
        for field in res_by_tenor:
            res[field + '_' + tenor] = 0 if np.isnan(res_by_tenor[field]) else res_by_tenor[field]
        if sdate < df.index[0]:
            break
    return res, df
Пример #11
0
def get_wind_data(inst_list, start_date, end_date, save_loc='C:\\dev\\data\\', freq='m'):
    exch_map = {v: k for k, v in wind_exch_map.items()}
    for instID in inst_list:
        exch = misc.inst2exch(instID)
        ex = exch_map[exch]
        ticker = instID + '.' + ex
        product = misc.inst2product(instID)
        sdate = start_date
        edate = end_date
        stime = datetime.time(9, 0, 0)
        etime = datetime.time(15, 0, 0)
        if product in ['T', 'TF']:
            stime = datetime.time(9, 15, 0)
            etime = datetime.time(15, 15, 0)
        elif product in misc.night_session_markets:
            stime = datetime.time(21, 0, 0)
            sdate = misc.day_shift(sdate, '-1b')
        smin = datetime.datetime.combine(sdate, stime)
        emin = datetime.datetime.combine(edate, etime)
        fields = 'open,high,low,close,volume,oi'
        try:
            if freq == 'm':
                outfile = save_loc + instID + '_min.csv'
                if os.path.isfile(outfile):
                    continue
                raw_data = w.wsi(ticker, fields, smin, emin)
                if len(raw_data.Data) > 1:
                    output = {'datetime': raw_data.Times,
                              'open': raw_data.Data[0],
                              'high': raw_data.Data[1],
                              'low': raw_data.Data[2],
                              'close': raw_data.Data[3],
                              'volume': raw_data.Data[4],
                              'openInterest': raw_data.Data[5]}
                    dump2csvfile(output, outfile)
                else:
                    print "no min data obtained for ticker=%s" % ticker
            elif freq == 'd':
                outfile = save_loc + instID + '_daily.csv'
                if os.path.isfile(outfile):
                    continue
                raw_data = w.wsd(ticker, fields, start_date, end_date)
                if len(raw_data.Data) > 1:
                    output = {'datetime': raw_data.Times,
                              'open': raw_data.Data[0],
                              'high': raw_data.Data[1],
                              'low': raw_data.Data[2],
                              'close': raw_data.Data[3],
                              'volume': raw_data.Data[4],
                              'openInterest': raw_data.Data[5]}
                    dump2csvfile(output, outfile)
            else:
                print "no daily data obtained for ticker=%s" % ticker
        except ValueError:
            pass
    w.stop()
    return True
Пример #12
0
def process_min_id(df):
    df['min_id'] = df['datetime'].apply(lambda x:
                                        ((x.hour + 6) % 24) * 100 + x.minute)
    flag = df['min_id'] >= 1000
    df.loc[flag, 'date'] = df['datetime'][flag].apply(lambda x: x.date())
    df['date'] = df['date'].fillna(method='bfill')
    flag = pd.isnull(df['date'])
    df.loc[flag, 'date'] = df['datetime'][flag].apply(
        lambda x: misc.day_shift(x.date(), '1b'))
    return df
Пример #13
0
def fxfwd_db_loader(market_data, fwd_index, dep_tenors = []):
    curve_info = cmq_crv_defn.FX_Curve_Map[fwd_index]
    mkey = market_data['market_key']
    mdate = market_data['market_date']
    mkt_db = market_data['market_db']
    cnx = dbaccess.connect(**mkt_db)
    df = dbaccess.load_fut_curve(cnx, fwd_index, mkey, dbtable = 'fx_daily', field = 'ccy')
    if len(df) == 0:
        print "FXFwd data is not available for %s on %s" % (fwd_index, mdate)
    df['expiry'] = df['tenor'].apply(lambda x: misc.day_shift(mdate, x.lower()))
    return df[['tenor', 'expiry', 'rate']].values.tolist()
Пример #14
0
 def prepare_data(self, asset_idx, cont_idx=0):
     assets = self.sim_assets[asset_idx]
     cont_map = self.cont_maplist[asset_idx]
     cont = self.contlist[assets[0]][cont_idx]
     edate = misc.day_shift(self.exp_dates[assets[0]][cont_idx],
                            self.config['rollrule'])
     if self.sim_mode == 'OR':
         mdf = self.min_data[assets[0]][cont]
         mdf = mdf[mdf.date <= edate]
     else:
         mode_keylist = self.sim_mode.split('-')
         smode = mode_keylist[0]
         cmode = mode_keylist[1]
         all_data = []
         if smode == 'TS':
             all_data = [
                 self.min_data[assets[0]][self.contlist[assets[0]][cont_idx
                                                                   + i]]
                 for i in cont_map
             ]
         else:
             all_data = [
                 self.min_data[asset][self.contlist[asset][cont_idx + i]]
                 for asset, i in zip(assets, cont_map)
             ]
         if cmode == 'Full':
             mdf = pd.concat(all_data, axis=1, join='inner')
             mdf.columns = [
                 iter + str(i) for i, x in enumerate(all_data)
                 for iter in x.columns
             ]
             mdf = mdf[mdf.date0 < edate]
         else:
             for i, (coeff,
                     tmpdf) in enumerate(zip(self.calc_coeffs, all_data)):
                 if i == 0:
                     xopen = tmpdf['open'] * coeff
                     xclose = tmpdf['close'] * coeff
                 else:
                     xopen = xopen + tmpdf['open'] * coeff
                     xclose = xclose + tmpdf['close'] * coeff
             xopen = xopen.dropna()
             xclose = xclose.dropna()
             xhigh = pd.concat([xopen, xclose], axis=1).max(axis=1)
             xlow = pd.concat([xopen, xclose], axis=1).min(axis=1)
             col_list = ['date', 'min_id', 'volume', 'openInterest']
             mdf = pd.concat([xopen, xhigh, xlow, xclose] +
                             [all_data[0][col] for col in col_list],
                             axis=1,
                             join='inner')
             mdf.columns = ['open', 'high', 'low', 'close'] + col_list
             mdf['contract'] = cont
     self.config['mdf'] = mdf
Пример #15
0
def pnl_stats_by_tenor(df, tenors):
    res = {}
    for tenor in tenors:
        edate = df.index[-1]
        sdate = misc.day_shift(edate, '-' + tenor)
        pnl_df = df[df.index >= sdate]
        res_by_tenor = pnl_stats(pnl_df)
        for field in res_by_tenor:
            res[field + '_' + tenor] = 0 if np.isnan(res_by_tenor[field]) else res_by_tenor[field]
        if sdate < df.index[0]:
            break
    return res
Пример #16
0
def pnl_stats_by_tenor(df, tenors):
    res = {}
    for tenor in tenors:
        edate = df.index[-1]
        sdate = misc.day_shift(edate, '-' + tenor)
        pnl_df = df[df.index >= sdate]
        res_by_tenor = pnl_stats(pnl_df)
        for field in res_by_tenor:
            res[field + '_' + tenor] = 0 if np.isnan(
                res_by_tenor[field]) else res_by_tenor[field]
        if sdate < df.index[0]:
            break
    return res
Пример #17
0
def ircurve_db_loader(market_data, fwd_index, dep_tenors = []):
    curve_info = cmq_crv_defn.IR_Curve_Map[fwd_index]
    ir_idx = curve_info['ir_index']
    mdate = market_data['market_date']
    mkey = market_data['market_key']
    mkt_db = market_data['market_db']
    cnx = dbaccess.connect(**mkt_db)
    df = dbaccess.load_fut_curve(cnx, ir_idx, mkey, dbtable='ir_daily', field='ir_index')
    if len(df) == 0:
        print "IRCurve data is not available for %s on %s" % (ir_idx, mkey)
    df = df[~df['tenor'].isin(IR_Exclusion_Tenors)]
    df['expiry'] = df['tenor'].apply(lambda x: misc.day_shift(mdate, x.lower()))
    df['rate'] = df['rate']/100.0
    return df[['tenor', 'expiry', 'rate']].values.tolist()
Пример #18
0
def calc_cov_by_asset(df, asset = None, start_date = None, end_date = None, tenor = None, bias = False):
    if asset == None:
        columns = df.columns
    else:
        columns = [ col for col in df.columns if col.startswith(asset + '-')]
    xdf = df[columns].fillna(0.0)
    if end_date:
        xdf = xdf[df.index <= end_date]
    end_date = xdf.index[-1]
    if tenor:
        start_date = misc.day_shift(end_date, tenor)
    if start_date:
        xdf = xdf[xdf.index >= start_date]
    cov = np.cov(xdf.values.T, bias = bias)
    avg = np.mean(xdf.values, axis = 0)
    weights = np.linalg.inv(cov).dot(avg)
    res = {'weight': weights, 'avg': avg, 'cov': cov, 'columns': columns}
    return res
Пример #19
0
 def prepare_data(self, asset_idx, cont_idx = 0):
     assets = self.sim_assets[asset_idx]
     cont_map = self.cont_maplist[asset_idx]
     cont = self.contlist[assets[0]][cont_idx]
     edate = misc.day_shift(self.exp_dates[assets[0]][cont_idx], self.config['rollrule'])
     if self.sim_mode == 'OR':
         df = self.data_store[assets[0]][cont]
         df = df[df.date <= edate]
     else:
         mode_keylist = self.sim_mode.split('-')
         smode = mode_keylist[0]
         cmode = mode_keylist[1]
         all_data = []
         if smode == 'TS':
             all_data = [self.data_store[assets[0]][self.contlist[assets[0]][cont_idx+i]] for i in cont_map]
         else:
             all_data = [self.data_store[asset][self.contlist[asset][cont_idx+i]] for asset, i in zip(assets, cont_map)]
         if cmode == 'Full':
             df = pd.concat(all_data, axis = 1, join = 'inner')
             df.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
             df = df[ df.date0 < edate]
         else:
             for i, (coeff, tmpdf) in enumerate(zip(self.calc_coeffs, all_data)):
                 if i == 0:
                     xopen = tmpdf['open'] * coeff
                     xclose = tmpdf['close'] * coeff
                 else:
                     xopen = xopen + tmpdf['open'] * coeff
                     xclose = xclose + tmpdf['close'] * coeff
             xopen = xopen.dropna()
             xclose = xclose.dropna()
             xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
             xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
             col_list = ['date', 'min_id', 'volume', 'openInterest']
             df = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
             df.columns = ['open', 'high', 'low', 'close'] + col_list
             df['contract'] = cont
     self.config['df'] = df
def calc_cov_by_asset(df,
                      asset=None,
                      start_date=None,
                      end_date=None,
                      tenor=None,
                      bias=False):
    if asset == None:
        columns = df.columns
    else:
        columns = [col for col in df.columns if col.startswith(asset + '-')]
    xdf = df[columns].fillna(0.0)
    if end_date:
        xdf = xdf[df.index <= end_date]
    end_date = xdf.index[-1]
    if tenor:
        start_date = misc.day_shift(end_date, tenor)
    if start_date:
        xdf = xdf[xdf.index >= start_date]
    cov = np.cov(xdf.values.T, bias=bias)
    avg = np.mean(xdf.values, axis=0)
    weights = np.linalg.inv(cov).dot(avg)
    res = {'weight': weights, 'avg': avg, 'cov': cov, 'columns': columns}
    return res
Пример #21
0
def simcontract_min(config_file):
    sim_config = {}
    with open(config_file, 'r') as fp:
        sim_config = json.load(fp)
    bktest_split = sim_config['sim_func'].split('.')
    run_sim = __import__('.'.join(bktest_split[:-1]))
    for i in range(1, len(bktest_split)):
        run_sim = getattr(run_sim, bktest_split[i])
    dir_name = config_file.split('.')[0]
    dir_name = dir_name.split(os.path.sep)[-1]
    test_folder = get_bktest_folder()
    file_prefix = test_folder + dir_name + os.path.sep
    if not os.path.exists(file_prefix):
        os.makedirs(file_prefix)
    sim_list = sim_config['products']
    if type(sim_list[0]).__name__ != 'list':
        sim_list = [[str(asset)] for asset in sim_list]
    sim_mode = sim_config.get('sim_mode', 'OR')
    calc_coeffs = sim_config.get('calc_coeffs', [1, -1])
    cont_maplist = sim_config.get('cont_maplist', [])
    sim_period = sim_config.get('sim_period', '-12m')
    need_daily = sim_config.get('need_daily', False)
    if len(cont_maplist) == 0:
        cont_maplist = [[0]] * len(sim_list)
    config = {}
    start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date()
    config['start_date'] = start_date
    end_date   = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date()
    config['end_date'] = end_date
    scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']]
    outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \
                + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \
                    'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \
                    'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \
                    'profit_per_win', 'profit_per_loss']
    scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))]
    config.update(sim_config['config'])
    if 'pos_class' in sim_config:
        config['pos_class'] = eval(sim_config['pos_class'])
    if 'proc_func' in sim_config:
        config['proc_func'] = eval(sim_config['proc_func'])
    file_prefix = file_prefix + sim_config['sim_name']
    if 'close_daily' in config and config['close_daily']:
        file_prefix = file_prefix + 'daily_'
    config['file_prefix'] = file_prefix
    summary_df = pd.DataFrame()
    fname = config['file_prefix'] + 'summary.csv'
    if os.path.isfile(fname):
        summary_df = pd.DataFrame.from_csv(fname)
    for assets, cont_map in zip(sim_list, cont_maplist):
        file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_'
        fname = file_prefix + 'stats.json'
        output = {'total': {}, 'cont': {}}
        if os.path.isfile(fname):
            with open(fname, 'r') as fp:
                output = json.load(fp)
        #if len(output['total'].keys()) == len(scenarios):
        #    continue
        min_data = {}
        day_data = {}
        config['tick_base'] = 0
        config['marginrate'] = (0, 0)
        rollrule = '-50b'
        config['exit_min'] = config.get('exit_min', 2057)
        config['no_trade_set'] = config.get('no_trade_set', [])
        if assets[0] in ['cu', 'al', 'zn']:
            rollrule = '-1b'
        elif assets[0] in ['IF', 'IH', 'IC']:
            rollrule = '-2b'
        elif assets[0] in ['au', 'ag']:
            rollrule = '-25b'
        elif assets[0] in ['TF', 'T']:
            rollrule = '-20b'
        rollrule = config.get('rollrule', rollrule)
        contlist = {}
        exp_dates = {}
        for i, prod in enumerate(assets):
            cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod)
            contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date)
            exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]]
            edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ]
            sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ]
            config['tick_base'] += trade_offset_dict[prod]
            config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod]))
            min_data[prod] = {}
            day_data[prod] = {}
            for cont, sd, ed in zip(contlist[prod], sdates, edates):
                minid_start = 1500
                minid_end = 2114
                if prod in misc.night_session_markets:
                    minid_start = 300
                tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data')
                tmp_df['contract'] = cont
                min_data[prod][cont] = cleanup_mindata( tmp_df, prod)
                if need_daily:
                    tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data')
                    day_data[prod][cont] = tmp_df
        if 'offset' in sim_config:
            config['offset'] = sim_config['offset'] * config['tick_base']
        else:
            config['offset'] = config['tick_base']
        for ix, s in enumerate(scenarios):
            fname1 = file_prefix + str(ix) + '_trades.csv'
            fname2 = file_prefix + str(ix) + '_dailydata.csv'
            if os.path.isfile(fname1) and os.path.isfile(fname2):
                continue
            for key, seq in zip(sim_config['scen_keys'], s):
                config[key] = sim_config[key][seq]
            df_list = []
            trade_list = []
            for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
                cont = contlist[assets[0]][idx]
                edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule)
                if sim_mode == 'OR':
                    mdf = min_data[assets[0]][cont]
                    mdf = mdf[mdf.date <= edate]
                    if need_daily:
                        ddf = day_data[assets[0]][cont]
                        config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                else:
                    mode_keylist = sim_mode.split('-')
                    smode = mode_keylist[0]
                    cmode = mode_keylist[1]
                    all_data = []
                    if smode == 'TS':
                        all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                    else:
                        all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)]
                    if cmode == 'Full':
                        mdf = pd.concat(all_data, axis = 1, join = 'inner')
                        mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                        mdf = mdf[ mdf.date0 < edate]
                    else:
                        #print all_data[0], all_data[1]
                        for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                            if i == 0:
                                xopen = tmpdf['open'] * coeff
                                xclose = tmpdf['close'] * coeff
                            else:
                                xopen = xopen + tmpdf['open'] * coeff
                                xclose = xclose + tmpdf['close'] * coeff
                        xopen = xopen.dropna()
                        xclose = xclose.dropna()
                        xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                        xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                        col_list = ['date', 'min_id', 'volume', 'openInterest']                        
                        mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                        mdf.columns = ['open', 'high', 'low', 'close'] + col_list
                        mdf['contract'] = cont
                        #print mdf
                    if need_daily:
                        if smode == 'TS':
                            all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                        else:
                            all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)]
                        if cmode == 'Full':
                            ddf = pd.concat(all_data, axis = 1, join = 'inner')
                            ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                            config['ddf'] = ddf[ddf.index <= edate]
                        else:
                            for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                                if i == 0:
                                    xopen = tmpdf['open'] * coeff
                                    xclose = tmpdf['close'] * coeff
                                else:
                                    xopen = xopen + tmpdf['open'] * coeff
                                    xclose = xclose + tmpdf['close'] * coeff
                            xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                            xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                            col_list = ['volume', 'openInterest']
                            ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                            ddf.columns = ['open', 'high', 'low', 'close'] + col_list
                            ddf['contract'] = cont
                            config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                df = mdf.copy(deep = True)
                df, closed_trades = run_sim( df, config)
                df_list.append(df)
                trade_list = trade_list + closed_trades
                (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm')
                res_trade = get_trade_stats( trade_list )
                res = dict( res_pnl.items() + res_trade.items())
                res.update(dict(zip(sim_config['scen_keys'], s)))
                res['asset'] = cont
                if cont not in output['cont']:
                    output['cont'][cont] = {}
                output['cont'][cont][ix] = res
            (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm')
            res_trade = get_trade_stats( trade_list )
            res = dict( res_pnl.items() + res_trade.items())
            res.update(dict(zip(sim_config['scen_keys'], s)))
            res['asset'] = '_'.join(assets)
            output['total'][ix] = res
            print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix))
            all_trades = {}
            for i, tradepos in enumerate(trade_list):
                all_trades[i] = strat.tradepos2dict(tradepos)
            trades = pd.DataFrame.from_dict(all_trades).T
            trades.to_csv(fname1)
            ts.to_csv(fname2)
            fname = file_prefix + 'stats.json'
            try:
                with open(fname, 'w') as ofile:
                    json.dump(output, ofile)
            except:
                continue
        cont_df = pd.DataFrame()
        for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
            cont = contlist[assets[0]][idx]
            if cont not in output['cont']:
                continue
            res = scen_dict_to_df(output['cont'][cont])
            out_res = res[outcol_list]
            if len(cont_df) == 0:
                cont_df = out_res[:20].copy(deep = True)
            else:
                cont_df = cont_df.append(out_res[:20])
        fname = file_prefix + 'cont_stat.csv'
        cont_df.to_csv(fname)
        res = scen_dict_to_df(output['total'])
        out_res = res[outcol_list]
        if len(summary_df) == 0:
            summary_df = out_res[:20].copy(deep = True)
        else:
            summary_df = summary_df.append(out_res[:20])
        fname = config['file_prefix'] + 'summary.csv'
        summary_df.to_csv(fname)
    return