Exemplo n.º 1
0
 def load_data(self, assets):
     contlist = {}
     exp_dates = {}
     dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig)
     cnx = dbaccess.connect(**dbconfig)
     for i, prod in enumerate(assets):
         cont_mth, exch = dbaccess.prod_main_cont_exch(prod)
         self.contlist[prod], _ = misc.contract_range(prod, exch, cont_mth, self.start_date, self.end_date)
         self.exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]]
         edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ]
         sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ]
         self.data_store[prod] = {}
         for cont, sd, ed in zip(contlist[prod], sdates, edates):
             if self.sim_freq == 'd':
                 tmp_df = dbaccess.load_daily_data_to_df(cnx, 'fut_min', cont, sd, ed)
             else:
                 minid_start = 1500
                 minid_end = 2114
                 if prod in misc.night_session_markets:
                     minid_start = 300
                 tmp_df = dbaccess.load_min_data_to_df(cnx, 'fut_min', cont, sd, ed, minid_start, minid_end)
                 misc.cleanup_mindata(tmp_df, prod)
             tmp_df['contract'] = cont
             self.data_store[prod][cont] = tmp_df
             cnx.close()
Exemplo n.º 2
0
 def load_data(self, assets):
     contlist = {}
     exp_dates = {}
     dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig)
     cnx = dbaccess.connect(**dbconfig)
     for i, prod in enumerate(assets):
         cont_mth, exch = dbaccess.prod_main_cont_exch(prod)
         self.contlist[prod], _ = misc.contract_range(
             prod, exch, cont_mth, self.start_date, self.end_date)
         self.exp_dates[prod] = [
             misc.contract_expiry(cont) for cont in contlist[prod]
         ]
         edates = [
             misc.day_shift(d, self.config['rollrule'])
             for d in exp_dates[prod]
         ]
         sdates = [
             misc.day_shift(d, self.sim_period) for d in exp_dates[prod]
         ]
         self.data_store[prod] = {}
         for cont, sd, ed in zip(contlist[prod], sdates, edates):
             if self.sim_freq == 'd':
                 tmp_df = dbaccess.load_daily_data_to_df(
                     cnx, 'fut_min', cont, sd, ed)
             else:
                 minid_start = 1500
                 minid_end = 2114
                 if prod in misc.night_session_markets:
                     minid_start = 300
                 tmp_df = dbaccess.load_min_data_to_df(
                     cnx, 'fut_min', cont, sd, ed, minid_start, minid_end)
                 misc.cleanup_mindata(tmp_df, prod)
             tmp_df['contract'] = cont
             self.data_store[prod][cont] = tmp_df
             cnx.close()
Exemplo n.º 3
0
def snap_curr_market():
    cont_list = {}
    all_conts = []
    tday = datetime.date.today()
    for asset, exch, cont_mth in live_asset_list:
        conts, _ = misc.contract_range(asset, exch, cont_mth, tday, tday)
        cont_list[asset] = [ c for c in conts if misc.contract_expiry(c) >= tday ]
        all_conts += cont_list[asset]
    res = web_sina_api.get_fut_quotes(all_conts)
    return cont_list, res
Exemplo n.º 4
0
def comfwd_db_loader(market_data, fwd_index, dep_tenors=[]):
    curve_info = cmq_crv_defn.COM_Curve_Map[fwd_index]
    prod_code = curve_info['instID']
    mdate = market_data['market_date']
    cnx = dbaccess.connect(**dbaccess.dbconfig)
    df = dbaccess.load_fut_curve(cnx, prod_code, mdate)
    if len(df) == 0:
        print "COMFwd data is not available for %s on %s" % (fwd_index, mdate)
    df['date'] = df['instID'].apply(lambda x: misc.inst2cont(x))
    df['expiry'] = df['instID'].apply(lambda x: misc.contract_expiry(x, []))
    return df[['date', 'expiry', 'close']].values.tolist()
Exemplo n.º 5
0
def comfwd_db_loader(market_data, fwd_index, dep_tenors = []):
    curve_info = cmq_crv_defn.COM_Curve_Map[fwd_index]
    prod_code = curve_info['instID']
    mkt_db = market_data['market_db']
    mdate = market_data['market_date']
    mkey = market_data['market_key']
    cnx = dbaccess.connect(**mkt_db)
    df = dbaccess.load_fut_curve(cnx, prod_code, mkey)
    if len(df) == 0:
        print "COMFwd data is not available for %s on %s" % (fwd_index, mkey)
    df['date'] = df['instID'].apply(lambda x: misc.inst2cont(x))
    df['expiry'] = df['instID'].apply(lambda x: misc.contract_expiry(x, []))
    df = df[pd.to_datetime(df.date).dt.month.isin(curve_info['active_mths'])]
    return df[['date', 'expiry', 'close']].values.tolist()
def save_hist_data(start_date, end_date,
                   index_list = [],
                   product_codes=[],
                   spot_list = [],
                   min_bar = False):
    conn = dbaccess.connect(**dbaccess.dbconfig)
    tday = datetime.date.today()
    for symbol, cmd_idx, desc in index_list:
        df = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume'], start_date, end_date)
        if len(df)> 0:
            df['instID'] = cmd_idx
            exch = symbol.split('.')[-1]
            df['exch'] = exch
            print "saving daily data for instID = %s with number of data pts = %s" % (cmd_idx, len(df))
            df.to_sql('fut_daily', conn, 'sqlite', if_exists='append', index=False)
    for prodcode in product_codes:
        cont_mth, exch = dbaccess.prod_main_cont_exch(prodcode)
        cont_list, _ = misc.contract_range(prodcode, exch, cont_mth, start_date, end_date)
        exp_dates = [misc.contract_expiry(cont) for cont in cont_list]
        exch2wind_dict = dict([ (v, k) for k, v in wind_exch_map.iteritems( )])
        for cont, exp in zip(cont_list, exp_dates):
            if exp >= start_date:
                ex = exch2wind_dict[exch]
                symbol = cont + '.' + ex
                ddf = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume', 'oi'], max(exp - datetime.timedelta(days = 400), start_date), min(exp, tday), freq = 'd')
                if len(ddf) > 0:
                    print "saving daily data for instID = %s with number of data pts = %s" % (cont, len(ddf))
                    ddf['instID'] = cont
                    ddf['exch'] = exch
                    ddf.to_sql('fut_daily', conn, 'sqlite', if_exists='append', index=False)
                if min_bar:
                    mdf = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume', 'oi'], max(exp - datetime.timedelta(days = 400),start_date), min(exp, tday), freq='m')
                    if len(mdf) > 0:
                        print "saving min data for instID = %s with number of data pts = %s" % (cont, len(mdf))
                        mdf['instID'] = cont
                        mdf['exch'] = exch
                        mdf = process_min_id(mdf)
                        mdf.to_sql('fut_min', conn, 'sqlite', if_exists='append', index=False)
    for symbol, spotID, desc in spot_list:
        df = load_symbol(symbol, ['close'], start_date, end_date)
        if len(df)> 0:
            df['spotID'] = spotID
            print "saving daily data for spotID = %s with number of data pts = %s" % (spotID, len(df))
            df.to_sql('spot_daily', conn, 'sqlite', if_exists='append', index=False)
Exemplo n.º 7
0
def simcontract_min(config_file):
    sim_config = {}
    with open(config_file, 'r') as fp:
        sim_config = json.load(fp)
    bktest_split = sim_config['sim_func'].split('.')
    run_sim = __import__('.'.join(bktest_split[:-1]))
    for i in range(1, len(bktest_split)):
        run_sim = getattr(run_sim, bktest_split[i])
    dir_name = config_file.split('.')[0]
    dir_name = dir_name.split(os.path.sep)[-1]
    test_folder = get_bktest_folder()
    file_prefix = test_folder + dir_name + os.path.sep
    if not os.path.exists(file_prefix):
        os.makedirs(file_prefix)
    sim_list = sim_config['products']
    if type(sim_list[0]).__name__ != 'list':
        sim_list = [[str(asset)] for asset in sim_list]
    sim_mode = sim_config.get('sim_mode', 'OR')
    calc_coeffs = sim_config.get('calc_coeffs', [1, -1])
    cont_maplist = sim_config.get('cont_maplist', [])
    sim_period = sim_config.get('sim_period', '-12m')
    need_daily = sim_config.get('need_daily', False)
    if len(cont_maplist) == 0:
        cont_maplist = [[0]] * len(sim_list)
    config = {}
    start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date()
    config['start_date'] = start_date
    end_date   = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date()
    config['end_date'] = end_date
    scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']]
    outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \
                + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \
                    'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \
                    'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \
                    'profit_per_win', 'profit_per_loss']
    scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))]
    config.update(sim_config['config'])
    if 'pos_class' in sim_config:
        config['pos_class'] = eval(sim_config['pos_class'])
    if 'proc_func' in sim_config:
        config['proc_func'] = eval(sim_config['proc_func'])
    file_prefix = file_prefix + sim_config['sim_name']
    if 'close_daily' in config and config['close_daily']:
        file_prefix = file_prefix + 'daily_'
    config['file_prefix'] = file_prefix
    summary_df = pd.DataFrame()
    fname = config['file_prefix'] + 'summary.csv'
    if os.path.isfile(fname):
        summary_df = pd.DataFrame.from_csv(fname)
    for assets, cont_map in zip(sim_list, cont_maplist):
        file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_'
        fname = file_prefix + 'stats.json'
        output = {'total': {}, 'cont': {}}
        if os.path.isfile(fname):
            with open(fname, 'r') as fp:
                output = json.load(fp)
        #if len(output['total'].keys()) == len(scenarios):
        #    continue
        min_data = {}
        day_data = {}
        config['tick_base'] = 0
        config['marginrate'] = (0, 0)
        rollrule = '-50b'
        config['exit_min'] = config.get('exit_min', 2057)
        config['no_trade_set'] = config.get('no_trade_set', [])
        if assets[0] in ['cu', 'al', 'zn']:
            rollrule = '-1b'
        elif assets[0] in ['IF', 'IH', 'IC']:
            rollrule = '-2b'
        elif assets[0] in ['au', 'ag']:
            rollrule = '-25b'
        elif assets[0] in ['TF', 'T']:
            rollrule = '-20b'
        rollrule = config.get('rollrule', rollrule)
        contlist = {}
        exp_dates = {}
        for i, prod in enumerate(assets):
            cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod)
            contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date)
            exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]]
            edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ]
            sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ]
            config['tick_base'] += trade_offset_dict[prod]
            config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod]))
            min_data[prod] = {}
            day_data[prod] = {}
            for cont, sd, ed in zip(contlist[prod], sdates, edates):
                minid_start = 1500
                minid_end = 2114
                if prod in misc.night_session_markets:
                    minid_start = 300
                tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data')
                tmp_df['contract'] = cont
                min_data[prod][cont] = cleanup_mindata( tmp_df, prod)
                if need_daily:
                    tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data')
                    day_data[prod][cont] = tmp_df
        if 'offset' in sim_config:
            config['offset'] = sim_config['offset'] * config['tick_base']
        else:
            config['offset'] = config['tick_base']
        for ix, s in enumerate(scenarios):
            fname1 = file_prefix + str(ix) + '_trades.csv'
            fname2 = file_prefix + str(ix) + '_dailydata.csv'
            if os.path.isfile(fname1) and os.path.isfile(fname2):
                continue
            for key, seq in zip(sim_config['scen_keys'], s):
                config[key] = sim_config[key][seq]
            df_list = []
            trade_list = []
            for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
                cont = contlist[assets[0]][idx]
                edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule)
                if sim_mode == 'OR':
                    mdf = min_data[assets[0]][cont]
                    mdf = mdf[mdf.date <= edate]
                    if need_daily:
                        ddf = day_data[assets[0]][cont]
                        config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                else:
                    mode_keylist = sim_mode.split('-')
                    smode = mode_keylist[0]
                    cmode = mode_keylist[1]
                    all_data = []
                    if smode == 'TS':
                        all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                    else:
                        all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)]
                    if cmode == 'Full':
                        mdf = pd.concat(all_data, axis = 1, join = 'inner')
                        mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                        mdf = mdf[ mdf.date0 < edate]
                    else:
                        #print all_data[0], all_data[1]
                        for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                            if i == 0:
                                xopen = tmpdf['open'] * coeff
                                xclose = tmpdf['close'] * coeff
                            else:
                                xopen = xopen + tmpdf['open'] * coeff
                                xclose = xclose + tmpdf['close'] * coeff
                        xopen = xopen.dropna()
                        xclose = xclose.dropna()
                        xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                        xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                        col_list = ['date', 'min_id', 'volume', 'openInterest']                        
                        mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                        mdf.columns = ['open', 'high', 'low', 'close'] + col_list
                        mdf['contract'] = cont
                        #print mdf
                    if need_daily:
                        if smode == 'TS':
                            all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                        else:
                            all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)]
                        if cmode == 'Full':
                            ddf = pd.concat(all_data, axis = 1, join = 'inner')
                            ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                            config['ddf'] = ddf[ddf.index <= edate]
                        else:
                            for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                                if i == 0:
                                    xopen = tmpdf['open'] * coeff
                                    xclose = tmpdf['close'] * coeff
                                else:
                                    xopen = xopen + tmpdf['open'] * coeff
                                    xclose = xclose + tmpdf['close'] * coeff
                            xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                            xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                            col_list = ['volume', 'openInterest']
                            ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                            ddf.columns = ['open', 'high', 'low', 'close'] + col_list
                            ddf['contract'] = cont
                            config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                df = mdf.copy(deep = True)
                df, closed_trades = run_sim( df, config)
                df_list.append(df)
                trade_list = trade_list + closed_trades
                (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm')
                res_trade = get_trade_stats( trade_list )
                res = dict( res_pnl.items() + res_trade.items())
                res.update(dict(zip(sim_config['scen_keys'], s)))
                res['asset'] = cont
                if cont not in output['cont']:
                    output['cont'][cont] = {}
                output['cont'][cont][ix] = res
            (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm')
            res_trade = get_trade_stats( trade_list )
            res = dict( res_pnl.items() + res_trade.items())
            res.update(dict(zip(sim_config['scen_keys'], s)))
            res['asset'] = '_'.join(assets)
            output['total'][ix] = res
            print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix))
            all_trades = {}
            for i, tradepos in enumerate(trade_list):
                all_trades[i] = strat.tradepos2dict(tradepos)
            trades = pd.DataFrame.from_dict(all_trades).T
            trades.to_csv(fname1)
            ts.to_csv(fname2)
            fname = file_prefix + 'stats.json'
            try:
                with open(fname, 'w') as ofile:
                    json.dump(output, ofile)
            except:
                continue
        cont_df = pd.DataFrame()
        for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
            cont = contlist[assets[0]][idx]
            if cont not in output['cont']:
                continue
            res = scen_dict_to_df(output['cont'][cont])
            out_res = res[outcol_list]
            if len(cont_df) == 0:
                cont_df = out_res[:20].copy(deep = True)
            else:
                cont_df = cont_df.append(out_res[:20])
        fname = file_prefix + 'cont_stat.csv'
        cont_df.to_csv(fname)
        res = scen_dict_to_df(output['total'])
        out_res = res[outcol_list]
        if len(summary_df) == 0:
            summary_df = out_res[:20].copy(deep = True)
        else:
            summary_df = summary_df.append(out_res[:20])
        fname = config['file_prefix'] + 'summary.csv'
        summary_df.to_csv(fname)
    return
Exemplo n.º 8
0
def load_db_to_xlmkt(tag, xlfile = MKT_LATEST_XLFILE):
    if os.path.isfile(xlfile):
        book = openpyxl.load_workbook(xlfile)
        writer = pd.ExcelWriter(xlfile, engine='openpyxl')
        writer.book = book
        writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
    else:
        writer = pd.ExcelWriter(xlfile)
    cnx = dbaccess.connect(**dbaccess.mktsnap_dbconfig)
    req_data = {'i':None, 'fef':None, 'USD/CNY':None}
    prod_map = dict([(prod, cont) for (prod, exch, cont) in live_asset_list])
    xl_structure = {"fut_daily": "COMM", "fx_daily": "FX", "ir_daily": "IR"}
    for tab_key in ['fut_daily', 'fx_daily', 'ir_daily']:
        for idx, prod_code in enumerate(all_asset_list[tab_key]):
            if tab_key == 'fut_daily':
                df = dbaccess.load_fut_curve(cnx, prod_code, tag)
                df['product_code'] = prod_code
                df['tenor_label'] = df['instID'].apply(lambda x: misc.inst2cont(x))
                df['expiry_date'] = df['instID'].apply(lambda x: misc.contract_expiry(x, []))
                df['exch'] = misc.prod2exch(prod_code)
                df.rename(columns = {'close': 'COMFwd'}, inplace = True)
                if prod_code in prod_map:
                    df = df[pd.to_datetime(df['tenor_label']).dt.month.isin(prod_map[prod_code])]
                if prod_code in option_markets:
                    vol_tbl = dbaccess.load_cmvol_curve(cnx, prod_code, tag)
                    vol_tbl = vol_tbl.set_index('tenor_label')
                    vol_tbl.drop(['expiry_date'], axis = 1,inplace = True)
                    df = df.set_index('tenor_label')
                    df = pd.concat([df, vol_tbl], axis = 1)
                    df = df.reset_index()
                    df.rename(columns = {'index': 'tenor_label'}, inplace = True)
                else:
                    for key in ['COMVolATM', 'COMVolV90', 'COMVolV75', 'COMVolV25', 'COMVolV10']:
                        if key == 'COMVolATM':
                            df[key] = 0.2
                        else:
                            df[key] = 0.0
                df = df[['product_code', 'instID', 'exch', 'tenor_label', 'expiry_date', 'COMFwd', \
                         'COMVolATM', 'COMVolV90', 'COMVolV75', 'COMVolV25', 'COMVolV10']].fillna(method = 'ffill')
                df['CalSpread'] = (df['COMFwd'] - df['COMFwd'].shift(-1)).fillna(method = 'ffill')
                multi = 13
            elif tab_key == 'fx_daily':
                df = dbaccess.load_fut_curve(cnx, prod_code, tag, dbtable='fx_daily', field='ccy')
                df = df[df['rate']>0]
                df['fwd_points'] = df['rate'] - df['rate'][0]
                df = df[['ccy', 'tenor', 'rate', 'fwd_points']]
                multi = 5
            elif tab_key == 'ir_daily':
                df = dbaccess.load_fut_curve(cnx, prod_code, tag, dbtable='ir_daily', field='ir_index')
                df = df[['ir_index', 'tenor', 'rate']]
                multi = 4
            df.to_excel(writer, xl_structure[tab_key], index = False, startcol = idx * multi, startrow = 1)
            if prod_code in req_data:
                req_data[prod_code] = df
    #do the SGX-DCE spread calc
    #try:
    tday =  datetime.date.today()
    if tday >= req_data['i']['tenor_label'][0]:
        dce_prompt =  req_data['i']['instID'][1]
    else:
        dce_prompt = req_data['i']['instID'][0]
    sgx_prompt = 'fef' + dce_prompt[-4:]
    sgx_price = float(req_data['fef'].loc[req_data['fef']['instID'] == sgx_prompt, 'COMFwd'])
    dce_price = float(req_data['i'].loc[req_data['i']['instID'] == dce_prompt, 'COMFwd'])
    fx = float(req_data['USD/CNY']['rate'][0])
    sgx_dce_spd =  sgx_price - (dce_price - 30.0/0.92)/1.16/fx
    wb = writer.book
    wb['COMM']['F1'] = sgx_dce_spd
    #except:
    #    print "failed to update SGX-DCE spread"
    writer.save()