def load_data(self, assets): contlist = {} exp_dates = {} dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig) cnx = dbaccess.connect(**dbconfig) for i, prod in enumerate(assets): cont_mth, exch = dbaccess.prod_main_cont_exch(prod) self.contlist[prod], _ = misc.contract_range(prod, exch, cont_mth, self.start_date, self.end_date) self.exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ] self.data_store[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): if self.sim_freq == 'd': tmp_df = dbaccess.load_daily_data_to_df(cnx, 'fut_min', cont, sd, ed) else: minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = dbaccess.load_min_data_to_df(cnx, 'fut_min', cont, sd, ed, minid_start, minid_end) misc.cleanup_mindata(tmp_df, prod) tmp_df['contract'] = cont self.data_store[prod][cont] = tmp_df cnx.close()
def load_data(self, assets): contlist = {} exp_dates = {} dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig) cnx = dbaccess.connect(**dbconfig) for i, prod in enumerate(assets): cont_mth, exch = dbaccess.prod_main_cont_exch(prod) self.contlist[prod], _ = misc.contract_range( prod, exch, cont_mth, self.start_date, self.end_date) self.exp_dates[prod] = [ misc.contract_expiry(cont) for cont in contlist[prod] ] edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ] self.data_store[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): if self.sim_freq == 'd': tmp_df = dbaccess.load_daily_data_to_df( cnx, 'fut_min', cont, sd, ed) else: minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = dbaccess.load_min_data_to_df( cnx, 'fut_min', cont, sd, ed, minid_start, minid_end) misc.cleanup_mindata(tmp_df, prod) tmp_df['contract'] = cont self.data_store[prod][cont] = tmp_df cnx.close()
def r_breaker( asset, start_date, end_date, scenarios, freqs, config): nearby = config['nearby'] rollrule = config['rollrule'] start_d = misc.day_shift(start_date, '-1b') file_prefix = config['file_prefix'] + '_' + asset + '_' ddf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'd', need_shift=True) mdf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'm', need_shift=True) mdf = backtest.cleanup_mindata(mdf, asset) #ddf = dh.conv_ohlc_freq(mdf, 'D') output = {} for ix, freq in enumerate(freqs): if freq !='1min': df = dh.conv_ohlc_freq(mdf, freq) else: df = mdf for iy, k in enumerate(scenarios): idx = ix*10+iy config['k'] = k (res, closed_trades, ts) = r_breaker_sim( ddf, df, config) output[idx] = res print 'saving results for scen = %s' % str(idx) all_trades = {} for i, tradepos in enumerate(closed_trades): all_trades[i] = strat.tradepos2dict(tradepos) fname = file_prefix + str(idx) + '_trades.csv' trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname) fname = file_prefix + str(idx) + '_dailydata.csv' ts.to_csv(fname) fname = file_prefix + 'stats.csv' res = pd.DataFrame.from_dict(output) res.to_csv(fname) return
def dual_thrust( asset, start_date, end_date, scenarios, config): nearby = config['nearby'] rollrule = config['rollrule'] start_d = misc.day_shift(start_date, '-4b') file_prefix = config['file_prefix'] + '_' + asset + '_' ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True) mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'm', need_shift=True) mdf = backtest.cleanup_mindata(mdf, asset) #ddf = dh.conv_ohlc_freq(mdf, 'D') output = {} for ix, s in enumerate(scenarios): config['win'] = s[1] config['k'] = s[0] config['m'] = s[2] (res, closed_trades, ts) = dual_thrust_sim( ddf, mdf, config) output[ix] = res print 'saving results for scen = %s' % str(ix) all_trades = {} for i, tradepos in enumerate(closed_trades): all_trades[i] = strat.tradepos2dict(tradepos) fname = file_prefix + str(ix) + '_trades.csv' trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname) fname = file_prefix + str(ix) + '_dailydata.csv' ts.to_csv(fname) fname = file_prefix + 'stats.csv' res = pd.DataFrame.from_dict(output) res.to_csv(fname) return
def turtle( asset, start_date, end_date, systems, config): rollrule = config['rollrule'] nearby = config['nearby'] file_prefix = config['file_prefix'] + '_' + asset + '_' start_d = misc.day_shift(start_date, '-'+str(max([ max(sys) for sys in systems]))+'b') ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True) mdf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'm', need_shift=True) #ddf = dh.conv_ohlc_freq(mdf, 'D') output = {} for ix, sys in enumerate(systems): config['signals'] = sys[:3] config['max_loss'] = sys[3] config['max_pos'] = sys[4] (res, closed_trades, ts) = turtle_sim( ddf, mdf, config) output[ix] = res print 'saving results for scen = %s' % str(ix) all_trades = {} for i, tradepos in enumerate(closed_trades): all_trades[i] = strat.tradepos2dict(tradepos) fname = file_prefix + str(ix) + '_trades.csv' trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname) fname = file_prefix + str(ix) + '_dailydata.csv' ts.to_csv(fname) fname = file_prefix + 'stats.csv' res = pd.DataFrame.from_dict(output) res.to_csv(fname) return
def flat_ir_curve(tday, rate): tenors = ['1W', '2W', '1M', '3M', '6M', '9M', '1Y', '3Y'] output = [] for ten in tenors: data = [ten, misc.day_shift(tday, ten.lower()), rate] output.append(data) return output
def dual_thrust( asset, start_date, end_date, scenarios, config): nearby = config['nearby'] rollrule = config['rollrule'] start_d = misc.day_shift(start_date, '-2b') file_prefix = config['file_prefix'] + '_' + asset + '_' ddf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'd', need_shift=True) mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, 'm', need_shift=True) #ddf = dh.conv_ohlc_freq(mdf, 'D') output = {} for ix, s in enumerate(scenarios): config['win'] = s[1] config['k'] = s[0] config['m'] = s[2] (res, closed_trades, ts) = dual_thrust_sim( ddf, mdf, config) output[ix] = res print 'saving results for scen = %s' % str(ix) all_trades = {} for i, tradepos in enumerate(closed_trades): all_trades[i] = strat.tradepos2dict(tradepos) fname = file_prefix + str(ix) + '_trades.csv' trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname) fname = file_prefix + str(ix) + '_dailydata.csv' ts.to_csv(fname) fname = file_prefix + 'stats.csv' res = pd.DataFrame.from_dict(output) res.to_csv(fname) return
def dual_thrust(asset, start_date, end_date, scenarios, config): nearby = config["nearby"] rollrule = config["rollrule"] start_d = misc.day_shift(start_date, "-2b") file_prefix = config["file_prefix"] + "_" + asset + "_" mdf = misc.nearby(asset, nearby, start_d, end_date, rollrule, "m", need_shift=True) mdf = backtest.cleanup_mindata(mdf, asset) output = {} for ix, s in enumerate(scenarios): config["win"] = s[1] config["k"] = s[0] config["m"] = s[2] config["f"] = s[3] (res, closed_trades, ts) = dual_thrust_sim(mdf, config) output[ix] = res print "saving results for scen = %s" % str(ix) all_trades = {} for i, tradepos in enumerate(closed_trades): all_trades[i] = strat.tradepos2dict(tradepos) fname = file_prefix + str(ix) + "_trades.csv" trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname) fname = file_prefix + str(ix) + "_dailydata.csv" ts.to_csv(fname) fname = file_prefix + "stats.csv" res = pd.DataFrame.from_dict(output) res.to_csv(fname) return
def load_csv_to_db(edate, save_loc='C:\\dev\\data\\', freq='m', is_replace=False): cnx = dbaccess.connect(**dbaccess.dbconfig) cont_list = misc.filter_main_cont(edate, False) if freq not in ['m', 'd']: return False for cont in cont_list: if freq == 'm': filename = save_loc + cont + '_min.csv' else: filename = save_loc + cont + '_daily.csv' if not os.path.isfile(filename): continue data_reader = csv.reader(file(filename, 'rb')) mindata_list = [] for idx, line in enumerate(data_reader): if idx > 0: if 'nan' in [line[0], line[2], line[3], line[4], line[6]]: continue min_data = {} min_data['volume'] = int(float(line[0])) if min_data['volume'] <= 0: continue dtime = datetime.datetime.strptime(line[1], '%Y-%m-%d %H:%M:%S.%f') if freq == 'm': min_data['datetime'] = dtime.replace(microsecond=0) else: min_data['date'] = dtime.date() min_data['high'] = float(line[2]) min_data['low'] = float(line[3]) min_data['close'] = float(line[4]) if line[5] == 'nan': oi = 0 else: oi = int(float(line[5])) min_data['openInterest'] = oi min_data['open'] = float(line[6]) if freq == 'm': min_data['min_id'] = get_min_id(dtime) trading_date = dtime.date() if min_data['min_id'] < 600: trading_date = misc.day_shift(trading_date, '1b') min_data['date'] = trading_date mindata_list.append(min_data) else: print cont dbaccess.insert_daily_data(cont, min_data, is_replace=is_replace, dbtable='fut_daily') if freq == 'm': print cont dbaccess.bulkinsert_min_data(cont, mindata_list, is_replace=is_replace) return True
def get_pnl_stats(df_list, marginrate, freq, tenors = ['3m', '6m', '1y', '2y', '3y'], start_capital = 10000.0): sum_pnl = pd.Series(name='pnl') sum_margin = pd.Series(name='margin') sum_cost = pd.Series(name='cost') if freq == 'm': index_col = ['date', 'min_id'] else: index_col = ['date'] for df in df_list: xdf = df.reset_index().set_index(index_col) if 'traded_price' in xdf.columns: field = 'traded_price' else: field = 'close' pnl = xdf['pos'].shift(1).fillna(0.0) * (xdf[field] - xdf[field].shift(1)).fillna(0.0) if 'closeout' in xdf.columns: pnl = pnl + xdf['closeout'] # pnl = pnl + (xdf['pos'] - xdf['pos'].shift(1).fillna(0.0)) * (xdf['close'] - xdf['traded_price']) if len(sum_pnl) == 0: sum_pnl = pd.Series(pnl, name='pnl') else: sum_pnl = sum_pnl.add(pnl, fill_value=0) margin = pd.Series( pd.concat([xdf.pos * marginrate[0] * xdf.close, -xdf.pos * marginrate[1] * xdf.close], join='outer', axis=1).max(1), name='margin') if len(sum_margin) == 0: sum_margin = margin else: sum_margin = sum_margin.add(margin, fill_value=0) if len(sum_cost) == 0: sum_cost = xdf['cost'] else: sum_cost = sum_cost.add(xdf['cost'], fill_value=0) if freq == 'm': daily_pnl = pd.Series(sum_pnl.groupby(level=0).sum(), name='daily_pnl') daily_margin = pd.Series(sum_margin.groupby(level=0).last(), name='daily_margin') daily_cost = pd.Series(sum_cost.groupby(level=0).sum(), name='daily_cost') else: daily_pnl = sum_pnl daily_margin = sum_margin daily_cost = sum_cost daily_pnl.name = 'daily_pnl' daily_margin.name = 'daily_margin' daily_cost.name = 'daily_cost' cum_pnl = pd.Series(daily_pnl.cumsum() + daily_cost.cumsum() + start_capital, name='cum_pnl') df = pd.concat([cum_pnl, daily_pnl, daily_margin, daily_cost], join='outer', axis=1) res = {} for tenor in tenors: edate = df.index[-1] sdate = misc.day_shift(edate, '-' + tenor) pnl_df = df[df.index >= sdate] res_by_tenor = pnl_stats(pnl_df) for field in res_by_tenor: res[field + '_' + tenor] = 0 if np.isnan(res_by_tenor[field]) else res_by_tenor[field] if sdate < df.index[0]: break return res, df
def get_wind_data(inst_list, start_date, end_date, save_loc='C:\\dev\\data\\', freq='m'): exch_map = {v: k for k, v in wind_exch_map.items()} for instID in inst_list: exch = misc.inst2exch(instID) ex = exch_map[exch] ticker = instID + '.' + ex product = misc.inst2product(instID) sdate = start_date edate = end_date stime = datetime.time(9, 0, 0) etime = datetime.time(15, 0, 0) if product in ['T', 'TF']: stime = datetime.time(9, 15, 0) etime = datetime.time(15, 15, 0) elif product in misc.night_session_markets: stime = datetime.time(21, 0, 0) sdate = misc.day_shift(sdate, '-1b') smin = datetime.datetime.combine(sdate, stime) emin = datetime.datetime.combine(edate, etime) fields = 'open,high,low,close,volume,oi' try: if freq == 'm': outfile = save_loc + instID + '_min.csv' if os.path.isfile(outfile): continue raw_data = w.wsi(ticker, fields, smin, emin) if len(raw_data.Data) > 1: output = {'datetime': raw_data.Times, 'open': raw_data.Data[0], 'high': raw_data.Data[1], 'low': raw_data.Data[2], 'close': raw_data.Data[3], 'volume': raw_data.Data[4], 'openInterest': raw_data.Data[5]} dump2csvfile(output, outfile) else: print "no min data obtained for ticker=%s" % ticker elif freq == 'd': outfile = save_loc + instID + '_daily.csv' if os.path.isfile(outfile): continue raw_data = w.wsd(ticker, fields, start_date, end_date) if len(raw_data.Data) > 1: output = {'datetime': raw_data.Times, 'open': raw_data.Data[0], 'high': raw_data.Data[1], 'low': raw_data.Data[2], 'close': raw_data.Data[3], 'volume': raw_data.Data[4], 'openInterest': raw_data.Data[5]} dump2csvfile(output, outfile) else: print "no daily data obtained for ticker=%s" % ticker except ValueError: pass w.stop() return True
def process_min_id(df): df['min_id'] = df['datetime'].apply(lambda x: ((x.hour + 6) % 24) * 100 + x.minute) flag = df['min_id'] >= 1000 df.loc[flag, 'date'] = df['datetime'][flag].apply(lambda x: x.date()) df['date'] = df['date'].fillna(method='bfill') flag = pd.isnull(df['date']) df.loc[flag, 'date'] = df['datetime'][flag].apply( lambda x: misc.day_shift(x.date(), '1b')) return df
def fxfwd_db_loader(market_data, fwd_index, dep_tenors = []): curve_info = cmq_crv_defn.FX_Curve_Map[fwd_index] mkey = market_data['market_key'] mdate = market_data['market_date'] mkt_db = market_data['market_db'] cnx = dbaccess.connect(**mkt_db) df = dbaccess.load_fut_curve(cnx, fwd_index, mkey, dbtable = 'fx_daily', field = 'ccy') if len(df) == 0: print "FXFwd data is not available for %s on %s" % (fwd_index, mdate) df['expiry'] = df['tenor'].apply(lambda x: misc.day_shift(mdate, x.lower())) return df[['tenor', 'expiry', 'rate']].values.tolist()
def prepare_data(self, asset_idx, cont_idx=0): assets = self.sim_assets[asset_idx] cont_map = self.cont_maplist[asset_idx] cont = self.contlist[assets[0]][cont_idx] edate = misc.day_shift(self.exp_dates[assets[0]][cont_idx], self.config['rollrule']) if self.sim_mode == 'OR': mdf = self.min_data[assets[0]][cont] mdf = mdf[mdf.date <= edate] else: mode_keylist = self.sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [ self.min_data[assets[0]][self.contlist[assets[0]][cont_idx + i]] for i in cont_map ] else: all_data = [ self.min_data[asset][self.contlist[asset][cont_idx + i]] for asset, i in zip(assets, cont_map) ] if cmode == 'Full': mdf = pd.concat(all_data, axis=1, join='inner') mdf.columns = [ iter + str(i) for i, x in enumerate(all_data) for iter in x.columns ] mdf = mdf[mdf.date0 < edate] else: for i, (coeff, tmpdf) in enumerate(zip(self.calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis=1).max(axis=1) xlow = pd.concat([xopen, xclose], axis=1).min(axis=1) col_list = ['date', 'min_id', 'volume', 'openInterest'] mdf = pd.concat([xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis=1, join='inner') mdf.columns = ['open', 'high', 'low', 'close'] + col_list mdf['contract'] = cont self.config['mdf'] = mdf
def pnl_stats_by_tenor(df, tenors): res = {} for tenor in tenors: edate = df.index[-1] sdate = misc.day_shift(edate, '-' + tenor) pnl_df = df[df.index >= sdate] res_by_tenor = pnl_stats(pnl_df) for field in res_by_tenor: res[field + '_' + tenor] = 0 if np.isnan(res_by_tenor[field]) else res_by_tenor[field] if sdate < df.index[0]: break return res
def pnl_stats_by_tenor(df, tenors): res = {} for tenor in tenors: edate = df.index[-1] sdate = misc.day_shift(edate, '-' + tenor) pnl_df = df[df.index >= sdate] res_by_tenor = pnl_stats(pnl_df) for field in res_by_tenor: res[field + '_' + tenor] = 0 if np.isnan( res_by_tenor[field]) else res_by_tenor[field] if sdate < df.index[0]: break return res
def ircurve_db_loader(market_data, fwd_index, dep_tenors = []): curve_info = cmq_crv_defn.IR_Curve_Map[fwd_index] ir_idx = curve_info['ir_index'] mdate = market_data['market_date'] mkey = market_data['market_key'] mkt_db = market_data['market_db'] cnx = dbaccess.connect(**mkt_db) df = dbaccess.load_fut_curve(cnx, ir_idx, mkey, dbtable='ir_daily', field='ir_index') if len(df) == 0: print "IRCurve data is not available for %s on %s" % (ir_idx, mkey) df = df[~df['tenor'].isin(IR_Exclusion_Tenors)] df['expiry'] = df['tenor'].apply(lambda x: misc.day_shift(mdate, x.lower())) df['rate'] = df['rate']/100.0 return df[['tenor', 'expiry', 'rate']].values.tolist()
def calc_cov_by_asset(df, asset = None, start_date = None, end_date = None, tenor = None, bias = False): if asset == None: columns = df.columns else: columns = [ col for col in df.columns if col.startswith(asset + '-')] xdf = df[columns].fillna(0.0) if end_date: xdf = xdf[df.index <= end_date] end_date = xdf.index[-1] if tenor: start_date = misc.day_shift(end_date, tenor) if start_date: xdf = xdf[xdf.index >= start_date] cov = np.cov(xdf.values.T, bias = bias) avg = np.mean(xdf.values, axis = 0) weights = np.linalg.inv(cov).dot(avg) res = {'weight': weights, 'avg': avg, 'cov': cov, 'columns': columns} return res
def prepare_data(self, asset_idx, cont_idx = 0): assets = self.sim_assets[asset_idx] cont_map = self.cont_maplist[asset_idx] cont = self.contlist[assets[0]][cont_idx] edate = misc.day_shift(self.exp_dates[assets[0]][cont_idx], self.config['rollrule']) if self.sim_mode == 'OR': df = self.data_store[assets[0]][cont] df = df[df.date <= edate] else: mode_keylist = self.sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [self.data_store[assets[0]][self.contlist[assets[0]][cont_idx+i]] for i in cont_map] else: all_data = [self.data_store[asset][self.contlist[asset][cont_idx+i]] for asset, i in zip(assets, cont_map)] if cmode == 'Full': df = pd.concat(all_data, axis = 1, join = 'inner') df.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] df = df[ df.date0 < edate] else: for i, (coeff, tmpdf) in enumerate(zip(self.calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['date', 'min_id', 'volume', 'openInterest'] df = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') df.columns = ['open', 'high', 'low', 'close'] + col_list df['contract'] = cont self.config['df'] = df
def calc_cov_by_asset(df, asset=None, start_date=None, end_date=None, tenor=None, bias=False): if asset == None: columns = df.columns else: columns = [col for col in df.columns if col.startswith(asset + '-')] xdf = df[columns].fillna(0.0) if end_date: xdf = xdf[df.index <= end_date] end_date = xdf.index[-1] if tenor: start_date = misc.day_shift(end_date, tenor) if start_date: xdf = xdf[xdf.index >= start_date] cov = np.cov(xdf.values.T, bias=bias) avg = np.mean(xdf.values, axis=0) weights = np.linalg.inv(cov).dot(avg) res = {'weight': weights, 'avg': avg, 'cov': cov, 'columns': columns} return res
def simcontract_min(config_file): sim_config = {} with open(config_file, 'r') as fp: sim_config = json.load(fp) bktest_split = sim_config['sim_func'].split('.') run_sim = __import__('.'.join(bktest_split[:-1])) for i in range(1, len(bktest_split)): run_sim = getattr(run_sim, bktest_split[i]) dir_name = config_file.split('.')[0] dir_name = dir_name.split(os.path.sep)[-1] test_folder = get_bktest_folder() file_prefix = test_folder + dir_name + os.path.sep if not os.path.exists(file_prefix): os.makedirs(file_prefix) sim_list = sim_config['products'] if type(sim_list[0]).__name__ != 'list': sim_list = [[str(asset)] for asset in sim_list] sim_mode = sim_config.get('sim_mode', 'OR') calc_coeffs = sim_config.get('calc_coeffs', [1, -1]) cont_maplist = sim_config.get('cont_maplist', []) sim_period = sim_config.get('sim_period', '-12m') need_daily = sim_config.get('need_daily', False) if len(cont_maplist) == 0: cont_maplist = [[0]] * len(sim_list) config = {} start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date() config['start_date'] = start_date end_date = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date() config['end_date'] = end_date scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']] outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \ + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \ 'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \ 'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \ 'profit_per_win', 'profit_per_loss'] scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))] config.update(sim_config['config']) if 'pos_class' in sim_config: config['pos_class'] = eval(sim_config['pos_class']) if 'proc_func' in sim_config: config['proc_func'] = eval(sim_config['proc_func']) file_prefix = file_prefix + sim_config['sim_name'] if 'close_daily' in config and config['close_daily']: file_prefix = file_prefix + 'daily_' config['file_prefix'] = file_prefix summary_df = pd.DataFrame() fname = config['file_prefix'] + 'summary.csv' if os.path.isfile(fname): summary_df = pd.DataFrame.from_csv(fname) for assets, cont_map in zip(sim_list, cont_maplist): file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_' fname = file_prefix + 'stats.json' output = {'total': {}, 'cont': {}} if os.path.isfile(fname): with open(fname, 'r') as fp: output = json.load(fp) #if len(output['total'].keys()) == len(scenarios): # continue min_data = {} day_data = {} config['tick_base'] = 0 config['marginrate'] = (0, 0) rollrule = '-50b' config['exit_min'] = config.get('exit_min', 2057) config['no_trade_set'] = config.get('no_trade_set', []) if assets[0] in ['cu', 'al', 'zn']: rollrule = '-1b' elif assets[0] in ['IF', 'IH', 'IC']: rollrule = '-2b' elif assets[0] in ['au', 'ag']: rollrule = '-25b' elif assets[0] in ['TF', 'T']: rollrule = '-20b' rollrule = config.get('rollrule', rollrule) contlist = {} exp_dates = {} for i, prod in enumerate(assets): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod) contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date) exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ] config['tick_base'] += trade_offset_dict[prod] config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod])) min_data[prod] = {} day_data[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data') tmp_df['contract'] = cont min_data[prod][cont] = cleanup_mindata( tmp_df, prod) if need_daily: tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data') day_data[prod][cont] = tmp_df if 'offset' in sim_config: config['offset'] = sim_config['offset'] * config['tick_base'] else: config['offset'] = config['tick_base'] for ix, s in enumerate(scenarios): fname1 = file_prefix + str(ix) + '_trades.csv' fname2 = file_prefix + str(ix) + '_dailydata.csv' if os.path.isfile(fname1) and os.path.isfile(fname2): continue for key, seq in zip(sim_config['scen_keys'], s): config[key] = sim_config[key][seq] df_list = [] trade_list = [] for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule) if sim_mode == 'OR': mdf = min_data[assets[0]][cont] mdf = mdf[mdf.date <= edate] if need_daily: ddf = day_data[assets[0]][cont] config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue else: mode_keylist = sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)] if cmode == 'Full': mdf = pd.concat(all_data, axis = 1, join = 'inner') mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] mdf = mdf[ mdf.date0 < edate] else: #print all_data[0], all_data[1] for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['date', 'min_id', 'volume', 'openInterest'] mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') mdf.columns = ['open', 'high', 'low', 'close'] + col_list mdf['contract'] = cont #print mdf if need_daily: if smode == 'TS': all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)] if cmode == 'Full': ddf = pd.concat(all_data, axis = 1, join = 'inner') ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] config['ddf'] = ddf[ddf.index <= edate] else: for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['volume', 'openInterest'] ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') ddf.columns = ['open', 'high', 'low', 'close'] + col_list ddf['contract'] = cont config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue df = mdf.copy(deep = True) df, closed_trades = run_sim( df, config) df_list.append(df) trade_list = trade_list + closed_trades (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = cont if cont not in output['cont']: output['cont'][cont] = {} output['cont'][cont][ix] = res (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = '_'.join(assets) output['total'][ix] = res print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix)) all_trades = {} for i, tradepos in enumerate(trade_list): all_trades[i] = strat.tradepos2dict(tradepos) trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname1) ts.to_csv(fname2) fname = file_prefix + 'stats.json' try: with open(fname, 'w') as ofile: json.dump(output, ofile) except: continue cont_df = pd.DataFrame() for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] if cont not in output['cont']: continue res = scen_dict_to_df(output['cont'][cont]) out_res = res[outcol_list] if len(cont_df) == 0: cont_df = out_res[:20].copy(deep = True) else: cont_df = cont_df.append(out_res[:20]) fname = file_prefix + 'cont_stat.csv' cont_df.to_csv(fname) res = scen_dict_to_df(output['total']) out_res = res[outcol_list] if len(summary_df) == 0: summary_df = out_res[:20].copy(deep = True) else: summary_df = summary_df.append(out_res[:20]) fname = config['file_prefix'] + 'summary.csv' summary_df.to_csv(fname) return