def fix_daily_data(contlist, sdate, edate): for inst in contlist: ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, sdate, edate, minid_start=300, minid_end=2115) dailydata = data_handler.conv_ohlc_freq(mdf, 'D') for dd in dailydata.index: d = dd.date() dslice = dailydata.ix[dd] if d not in ddf.index: ddata = {} ddata['date'] = d ddata['open'] = float(dslice.open) ddata['close'] = float(dslice.close) ddata['high'] = float(dslice.high) ddata['low'] = float(dslice.low) ddata['volume'] = int(dslice.volume) ddata['openInterest'] = int(dslice.openInterest) print inst, ddata mysqlaccess.insert_daily_data(inst, ddata)
def rolling_hist_data(product, n, start_date, end_date, cont_roll, freq, win_roll= '-20b', database = 'hist_data'): if start_date > end_date: return None cnx = mysql.connector.connect(**mysqlaccess.dbconfig) cursor = cnx.cursor() stmt = "select exchange, contract from trade_products where product_code='{prod}' ".format(prod=product) cursor.execute(stmt) out = [(exchange, contract) for (exchange, contract) in cursor] exch = str(out[0][0]) cont = str(out[0][1]) cont_mth = [month_code_map[c] for c in cont] cnx.close() contlist = contract_range(product, exch, cont_mth, start_date, end_date) exp_dates = [day_shift(contract_expiry(cont), cont_roll) for cont in contlist] #print contlist, exp_dates sdate = start_date all_data = {} i = 0 for idx, exp in enumerate(exp_dates): if exp < start_date: continue elif sdate > end_date: break nb_cont = contlist[idx+n-1] if freq == 'd': df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database) else: df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database) all_data[i] = {'contract': nb_cont, 'data': df} i += 1 sdate = min(exp,end_date) + datetime.timedelta(days=1) return all_data
def validate_db_data(tday, filter=False): all_insts = filter_main_cont(tday, filter) data_count = {} inst_list = {'min': [], 'daily': []} for instID in all_insts: df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday) if len(df) <= 0: inst_list['daily'].append(instID) elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0: inst_list['daily'].append(instID) df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale') if len(df) <= 100: output = instID + ':' + str(len(df)) inst_list['min'].append(output) elif df.min_id < 2055: output = instID + ': end earlier' inst_list['min'].append(output) print inst_list
def fix_daily_by_tick(contlist, sdate, edate, is_forced=False): res = {} for inst in contlist: product = misc.inst2product(inst) start_tick= 1500000 end_tick = 2100000 if product in misc.night_session_markets: start_tick = 300000 elif product in ['IF','TF']: start_tick = 1515000 end_tick = 2115000 ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) tdf = mysqlaccess.load_tick_to_df('fut_tick', inst, sdate, edate, start_tick=start_tick, end_tick = end_tick) for d in list(set(tdf.date)): if (is_forced) or (d not in ddf.index) or (ddf.ix(d, 'open')==0): df = tdf[tdf['date']==d].sort(['tick_id']) ddata = {} ddata['date'] = d ddata['open'] = float(df.iloc[0].price) ddata['close'] = float(df.iloc[-1].price) ddata['high'] = float(df.iloc[-1].high) ddata['low'] = float(df.iloc[-1].low) ddata['volume'] = int(df.iloc[-1].volume) ddata['openInterest'] = int(df.iloc[-1].openInterest) print inst, ddata res[(inst,d)] = ddata mysqlaccess.insert_daily_data(inst, ddata, is_forced) return res
def fix_daily_by_tick(contlist, sdate, edate, is_forced=False): res = {} for inst in contlist: product = misc.inst2product(inst) start_tick = 1500000 end_tick = 2100000 if product in misc.night_session_markets: start_tick = 300000 elif product in ['IF', 'TF']: start_tick = 1515000 end_tick = 2115000 ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) tdf = mysqlaccess.load_tick_to_df('fut_tick', inst, sdate, edate, start_tick=start_tick, end_tick=end_tick) for d in list(set(tdf.date)): if (is_forced) or (d not in ddf.index) or (ddf.ix(d, 'open') == 0): df = tdf[tdf['date'] == d].sort(['tick_id']) ddata = {} ddata['date'] = d ddata['open'] = float(df.iloc[0].price) ddata['close'] = float(df.iloc[-1].price) ddata['high'] = float(df.iloc[-1].high) ddata['low'] = float(df.iloc[-1].low) ddata['volume'] = int(df.iloc[-1].volume) ddata['openInterest'] = int(df.iloc[-1].openInterest) print inst, ddata res[(inst, d)] = ddata mysqlaccess.insert_daily_data(inst, ddata, is_forced) return res
def hist_realized_vol_by_product(prodcode, start_d, end_d, periods = 12, tenor = '-1m', writeDB = False): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode) contlist = contract_range(prodcode, exch, cont_mth, start_d, end_d) print contlist exp_dates = [get_opt_expiry(cont, inst2contmth(cont)) for cont in contlist] data = {'is_dtime': False, 'data_column': 'close', 'xs': [0.5, 0.25, 0.75], 'xs_names': ['atm', 'd25', 'd75'], 'xs_func': 'bs_delta_to_ratio', 'rehedge_period': 1, 'term_tenor': tenor, } option_input = {'otype': 1, 'rd': 0.0, 'rf': 0.0, 'end_vol': 0.0, 'ref_vol': 0.2, 'pricer_func': 'bsopt.BSOpt', 'delta_func': 'bsopt.BSDelta', } for cont, expiry in zip(contlist, exp_dates): if expiry > end_d: break p_str = '-' + str(int(tenor[1:-1]) * periods) + tenor[-1] d_start = day_shift(expiry, p_str) df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, d_start, expiry, database = 'hist_data') option_input['expiry'] = expiry data['dataframe'] = df vol_df = realized_termstruct(option_input, data) print cont, expiry, vol_df
def nearby(prodcode, n, start_date, end_date, roll_rule, freq, need_shift=False, database = 'hist_data'): if start_date > end_date: return None cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode) contlist = contract_range(prodcode, exch, cont_mth, start_date, day_shift(end_date, roll_rule[1:])) exp_dates = [day_shift(contract_expiry(cont), roll_rule) for cont in contlist] #print contlist, exp_dates sdate = start_date is_new = True for idx, exp in enumerate(exp_dates): if exp < start_date: continue elif sdate > end_date: break nb_cont = contlist[idx+n-1] if freq == 'd': new_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, sdate, min(exp,end_date), database = database) else: minid_start = 1500 minid_end = 2114 if prodcode in night_session_markets: minid_start = 300 new_df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, sdate, min(exp,end_date), minid_start, minid_end, database = database) if len(new_df.shape) == 0: continue nn = new_df.shape[0] if nn > 0: new_df['contract'] = pd.Series([nb_cont]*nn, index=new_df.index) else: continue if is_new: df = new_df is_new = False else: if need_shift: if isinstance(df.index[-1], datetime.datetime): last_date = df.index[-1].date() else: last_date = df.index[-1] tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, last_date, last_date, database = database) shift = tmp_df['close'][-1] - df['close'][-1] for ticker in ['open','high','low','close']: df[ticker] = df[ticker] + shift df = df.append(new_df) sdate = min(exp,end_date) + datetime.timedelta(days=1) return df
def prepare_data_env(self, inst, mid_day = True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day self.day_data[inst] = mysqlaccess.load_daily_data_to_df('fut_daily', inst, daily_start, daily_end) df = self.day_data[inst] if len(df) > 0: self.instruments[inst].price = df['close'][-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = df['close'][-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(df) df[ts.name]= pd.Series(ts, index=df.index) if self.min_data_days > 0 or mid_day: self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id/1000) min_end = int(self.instruments[inst].last_tick_id/1000)+1 mindata = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database = 'blueshale') mindata = backtest.cleanup_mindata(mindata, self.instruments[inst].product) self.min_data[inst][1] = mindata if len(mindata)>0: min_date = mindata.index[-1].date() if (len(self.day_data[inst].index)==0) or (min_date > self.day_data[inst].index[-1]): ddf = data_handler.conv_ohlc_freq(mindata, 'd') self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int(ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime(*mindata.index[-1].timetuple()[0:-3]) self.cur_min[inst]['open'] = float(mindata.ix[-1,'open']) self.cur_min[inst]['close'] = float(mindata.ix[-1,'close']) self.cur_min[inst]['high'] = float(mindata.ix[-1,'high']) self.cur_min[inst]['low'] = float(mindata.ix[-1,'low']) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst]['openInterest'] self.cur_min[inst]['min_id'] = int(mindata.ix[-1,'min_id']) self.instruments[inst].price = float(mindata.ix[-1,'close']) self.instruments[inst].last_update = 0 self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) for m in self.min_data_func[inst]: if m != 1: self.min_data[inst][m] = data_handler.conv_ohlc_freq(self.min_data[inst][1], str(m)+'min') df = self.min_data[inst][m] for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(df) df[ts.name]= pd.Series(ts, index=df.index)
def validate_db_data(tday, filter = False): all_insts = filter_main_cont(tday, filter) data_count = {} inst_list = {'min': [], 'daily': [] } for instID in all_insts: df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday) if len(df) <= 0: inst_list['daily'].append(instID) elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0: inst_list['daily'].append(instID) df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale') if len(df) <= 100: output = instID + ':' + str(len(df)) inst_list['min'].append(output) elif df.min_id < 2055: output = instID + ': end earlier' inst_list['min'].append(output) print inst_list
def fix_daily_data(contlist, sdate, edate): for inst in contlist: ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, sdate, edate, minid_start=300, minid_end = 2115) dailydata = data_handler.conv_ohlc_freq(mdf, 'D') for dd in dailydata.index: d = dd.date() dslice = dailydata.ix[dd] if d not in ddf.index: ddata = {} ddata['date'] = d ddata['open'] = float(dslice.open) ddata['close'] = float(dslice.close) ddata['high'] = float(dslice.high) ddata['low'] = float(dslice.low) ddata['volume'] = int(dslice.volume) ddata['openInterest'] = int(dslice.openInterest) print inst, ddata mysqlaccess.insert_daily_data(inst, ddata)
import matplotlib matplotlib.use('TkAgg') import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.finance import candlestick from matplotlib.finance import candlestick2 import chart import mysqlaccess as db import datetime d_start = datetime.date(2015,1,2) d_end = datetime.date(2015,4,24) dbtable = 'fut_daily' inst = 'm1509' df = db.load_daily_data_to_df(dbtable, inst, d_start, d_end) ohlc = pd.DataFrame(df) ohlc = ohlc.drop(u'openInterest', 1) ohlc = ohlc.drop('volume', 1) ohlc['sn'] = range(len(ohlc.index)) ohlc = ohlc.set_index('sn') fig = plt.figure() ax = fig.add_subplot(311) y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) ax.yaxis.set_major_formatter(y_formatter) chart.cchart(ax, ohlc, width=.5, colorup='g', colordown='r', alpha=1) ohlc['close'].plot(ax=ax) ay = fig.add_subplot(312) ohlc.plot(ax=ay) #ax.set_xticks(np.arange(0,len(ohlc.index),5)) #plt.savefig("1.png")
def prepare_data_env(self, inst, mid_day=True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day self.day_data[inst] = mysqlaccess.load_daily_data_to_df( 'fut_daily', inst, daily_start, daily_end) df = self.day_data[inst] print inst if len(df) > 0: self.instruments[inst].price = df['close'][-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = df['close'][-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(df) df[ts.name] = pd.Series(ts, index=df.index) if self.min_data_days > 0 or mid_day: self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id / 1000) min_end = int(self.instruments[inst].last_tick_id / 1000) + 1 mindata = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database='blueshale') mindata = backtest.cleanup_mindata(mindata, self.instruments[inst].product) self.min_data[inst][1] = mindata if len(mindata) > 0: min_date = mindata.index[-1].date() if (len(self.day_data[inst].index) == 0) or (min_date > self.day_data[inst].index[-1]): ddf = data_handler.conv_ohlc_freq(mindata, 'd') self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int( ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime( *mindata.index[-1].timetuple()[0:-3]) self.cur_min[inst]['open'] = float(mindata.ix[-1, 'open']) self.cur_min[inst]['close'] = float(mindata.ix[-1, 'close']) self.cur_min[inst]['high'] = float(mindata.ix[-1, 'high']) self.cur_min[inst]['low'] = float(mindata.ix[-1, 'low']) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst][ 'openInterest'] self.cur_min[inst]['min_id'] = int(mindata.ix[-1, 'min_id']) self.instruments[inst].price = float(mindata.ix[-1, 'close']) self.instruments[inst].last_update = 0 self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) for m in self.min_data_func[inst]: if m != 1: self.min_data[inst][m] = data_handler.conv_ohlc_freq( self.min_data[inst][1], str(m) + 'min') df = self.min_data[inst][m] for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(df) df[ts.name] = pd.Series(ts, index=df.index)
def hist_realized_vol_by_product(prodcode, start_d, end_d, periods=12, tenor='-1m', writeDB=False): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode) contlist = contract_range(prodcode, exch, cont_mth, start_d, end_d) exp_dates = [get_opt_expiry(cont, inst2contmth(cont)) for cont in contlist] data = { 'is_dtime': True, 'data_column': 'close', 'data_freq': '30min', 'xs': [0.5, 0.25, 0.75], 'xs_names': ['atm', 'v25', 'v75'], 'xs_func': 'bs_delta_to_ratio', 'rehedge_period': 1, 'term_tenor': tenor, 'database': 'hist_data' } option_input = { 'otype': True, 'rd': 0.0, 'rf': 0.0, 'end_vol': 0.0, 'ref_vol': 0.5, 'pricer_func': 'bsopt.BSOpt', 'delta_func': 'bsopt.BSDelta', 'is_dtime': data['is_dtime'], } freq = data['data_freq'] for cont, expiry in zip(contlist, exp_dates): expiry_d = expiry.date() if expiry_d > end_d: break p_str = '-' + str(int(tenor[1:-1]) * periods) + tenor[-1] d_start = day_shift(expiry_d, p_str) if freq == 'd': df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, d_start, expiry_d, database=data['database'], index_col=None) else: mdf = mysqlaccess.load_min_data_to_df('fut_min', cont, d_start, expiry_d, minid_start=300, minid_end=2115, database=data['database'], index_col=None) mdf = backtest.cleanup_mindata(mdf, prodcode, index_col=None) mdf['bar_id'] = dh.bar_conv_func2(mdf['min_id']) df = dh.conv_ohlc_freq(mdf, freq, bar_func=dh.bar_conv_func2, extra_cols=['bar_id'], index_col=None) option_input['expiry'] = expiry data['dataframe'] = df vol_df = realized_termstruct(option_input, data) print cont, expiry_d, vol_df
matplotlib.use("TkAgg") import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.finance import candlestick from matplotlib.finance import candlestick2 import chart import mysqlaccess as db import datetime d_start = datetime.date(2015, 1, 2) d_end = datetime.date(2015, 4, 24) dbtable = "fut_daily" inst = "m1509" df = db.load_daily_data_to_df(dbtable, inst, d_start, d_end) ohlc = pd.DataFrame(df) ohlc = ohlc.drop(u"openInterest", 1) ohlc = ohlc.drop("volume", 1) ohlc["sn"] = range(len(ohlc.index)) ohlc = ohlc.set_index("sn") fig = plt.figure() ax = fig.add_subplot(311) y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) ax.yaxis.set_major_formatter(y_formatter) chart.cchart(ax, ohlc, width=0.5, colorup="g", colordown="r", alpha=1) ohlc["close"].plot(ax=ax) ay = fig.add_subplot(312) ohlc.plot(ax=ay) # ax.set_xticks(np.arange(0,len(ohlc.index),5)) # plt.savefig("1.png")
def prepare_data_env(self, inst, mid_day=True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: #self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, daily_start, daily_end, index_col=None) if len(ddf) > 0: self.instruments[inst].price = ddf['close'].iloc[-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = ddf['close'].iloc[-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(ddf) if type(ts).__name__ == 'Series': if ts.name in ddf.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (ts.name, inst)) ddf[ts.name] = ts elif type(ts).__name__ == 'DataFrame': for col_name in ts.columns: if col_name in ddf.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (col_name, inst)) ddf[col_name] = ts[col_name] self.day_data[inst] = data_handler.DynamicRecArray(dataframe=ddf) if self.min_data_days > 0 or mid_day: #self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id / 1000) min_end = int(self.instruments[inst].last_tick_id / 1000) + 1 mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database='blueshale', index_col=None) mdf = backtest.cleanup_mindata(mdf, self.instruments[inst].product, index_col=None) mdf['bar_id'] = self.conv_bar_id(mdf['min_id'], inst) if len(mdf) > 0: min_date = mdf['date'].iloc[-1] if (len(self.day_data[inst]) == 0) or ( min_date > self.day_data[inst].data['date'][-1]): ddf = data_handler.conv_ohlc_freq(mdf, 'd', index_col=None) self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int( ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime( *mdf['datetime'].iloc[-1].timetuple()[0:-3]) self.cur_min[inst]['date'] = mdf['date'].iloc[-1] self.cur_min[inst]['open'] = float(mdf['open'].iloc[-1]) self.cur_min[inst]['close'] = float(mdf['close'].iloc[-1]) self.cur_min[inst]['high'] = float(mdf['high'].iloc[-1]) self.cur_min[inst]['low'] = float(mdf['low'].iloc[-1]) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst][ 'openInterest'] self.cur_min[inst]['min_id'] = int(mdf['min_id'].iloc[-1]) self.cur_min[inst]['bar_id'] = self.conv_bar_id( self.cur_min[inst]['min_id'], inst) self.instruments[inst].price = float(mdf['close'].iloc[-1]) self.instruments[inst].last_update = 0 #self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) if 1 not in self.min_data_func[inst]: self.min_data[inst][1] = data_handler.DynamicRecArray( dataframe=mdf) for m in sorted(self.min_data_func[inst]): if m != 1: bar_func = lambda ts: self.conv_bar_id(ts, inst) mdf_m = data_handler.conv_ohlc_freq( mdf, str(m) + 'min', index_col=None, bar_func=bar_func, extra_cols=['bar_id']) else: mdf_m = mdf for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(mdf_m) if type(ts).__name__ == 'Series': if ts.name in mdf_m.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (ts.name, inst)) mdf_m[ts.name] = ts elif type(ts).__name__ == 'DataFrame': for col_name in ts.columns: if col_name in mdf_m.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (col_name, inst)) mdf_m[col_name] = ts[col_name] self.min_data[inst][m] = data_handler.DynamicRecArray( dataframe=mdf_m)
def simcontract_min(config_file): sim_config = {} with open(config_file, 'r') as fp: sim_config = json.load(fp) bktest_split = sim_config['sim_func'].split('.') run_sim = __import__('.'.join(bktest_split[:-1])) for i in range(1, len(bktest_split)): run_sim = getattr(run_sim, bktest_split[i]) dir_name = config_file.split('.')[0] dir_name = dir_name.split(os.path.sep)[-1] test_folder = get_bktest_folder() file_prefix = test_folder + dir_name + os.path.sep if not os.path.exists(file_prefix): os.makedirs(file_prefix) sim_list = sim_config['products'] if type(sim_list[0]).__name__ != 'list': sim_list = [[str(asset)] for asset in sim_list] sim_mode = sim_config.get('sim_mode', 'OR') calc_coeffs = sim_config.get('calc_coeffs', [1, -1]) cont_maplist = sim_config.get('cont_maplist', []) sim_period = sim_config.get('sim_period', '-12m') need_daily = sim_config.get('need_daily', False) if len(cont_maplist) == 0: cont_maplist = [[0]] * len(sim_list) config = {} start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date() config['start_date'] = start_date end_date = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date() config['end_date'] = end_date scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']] outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \ + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \ 'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \ 'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \ 'profit_per_win', 'profit_per_loss'] scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))] config.update(sim_config['config']) if 'pos_class' in sim_config: config['pos_class'] = eval(sim_config['pos_class']) if 'proc_func' in sim_config: config['proc_func'] = eval(sim_config['proc_func']) file_prefix = file_prefix + sim_config['sim_name'] if 'close_daily' in config and config['close_daily']: file_prefix = file_prefix + 'daily_' config['file_prefix'] = file_prefix summary_df = pd.DataFrame() fname = config['file_prefix'] + 'summary.csv' if os.path.isfile(fname): summary_df = pd.DataFrame.from_csv(fname) for assets, cont_map in zip(sim_list, cont_maplist): file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_' fname = file_prefix + 'stats.json' output = {'total': {}, 'cont': {}} if os.path.isfile(fname): with open(fname, 'r') as fp: output = json.load(fp) #if len(output['total'].keys()) == len(scenarios): # continue min_data = {} day_data = {} config['tick_base'] = 0 config['marginrate'] = (0, 0) rollrule = '-50b' config['exit_min'] = config.get('exit_min', 2057) config['no_trade_set'] = config.get('no_trade_set', []) if assets[0] in ['cu', 'al', 'zn']: rollrule = '-1b' elif assets[0] in ['IF', 'IH', 'IC']: rollrule = '-2b' elif assets[0] in ['au', 'ag']: rollrule = '-25b' elif assets[0] in ['TF', 'T']: rollrule = '-20b' rollrule = config.get('rollrule', rollrule) contlist = {} exp_dates = {} for i, prod in enumerate(assets): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod) contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date) exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ] config['tick_base'] += trade_offset_dict[prod] config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod])) min_data[prod] = {} day_data[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data') tmp_df['contract'] = cont min_data[prod][cont] = cleanup_mindata( tmp_df, prod) if need_daily: tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data') day_data[prod][cont] = tmp_df if 'offset' in sim_config: config['offset'] = sim_config['offset'] * config['tick_base'] else: config['offset'] = config['tick_base'] for ix, s in enumerate(scenarios): fname1 = file_prefix + str(ix) + '_trades.csv' fname2 = file_prefix + str(ix) + '_dailydata.csv' if os.path.isfile(fname1) and os.path.isfile(fname2): continue for key, seq in zip(sim_config['scen_keys'], s): config[key] = sim_config[key][seq] df_list = [] trade_list = [] for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule) if sim_mode == 'OR': mdf = min_data[assets[0]][cont] mdf = mdf[mdf.date <= edate] if need_daily: ddf = day_data[assets[0]][cont] config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue else: mode_keylist = sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)] if cmode == 'Full': mdf = pd.concat(all_data, axis = 1, join = 'inner') mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] mdf = mdf[ mdf.date0 < edate] else: #print all_data[0], all_data[1] for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['date', 'min_id', 'volume', 'openInterest'] mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') mdf.columns = ['open', 'high', 'low', 'close'] + col_list mdf['contract'] = cont #print mdf if need_daily: if smode == 'TS': all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)] if cmode == 'Full': ddf = pd.concat(all_data, axis = 1, join = 'inner') ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] config['ddf'] = ddf[ddf.index <= edate] else: for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['volume', 'openInterest'] ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') ddf.columns = ['open', 'high', 'low', 'close'] + col_list ddf['contract'] = cont config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue df = mdf.copy(deep = True) df, closed_trades = run_sim( df, config) df_list.append(df) trade_list = trade_list + closed_trades (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = cont if cont not in output['cont']: output['cont'][cont] = {} output['cont'][cont][ix] = res (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = '_'.join(assets) output['total'][ix] = res print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix)) all_trades = {} for i, tradepos in enumerate(trade_list): all_trades[i] = strat.tradepos2dict(tradepos) trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname1) ts.to_csv(fname2) fname = file_prefix + 'stats.json' try: with open(fname, 'w') as ofile: json.dump(output, ofile) except: continue cont_df = pd.DataFrame() for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] if cont not in output['cont']: continue res = scen_dict_to_df(output['cont'][cont]) out_res = res[outcol_list] if len(cont_df) == 0: cont_df = out_res[:20].copy(deep = True) else: cont_df = cont_df.append(out_res[:20]) fname = file_prefix + 'cont_stat.csv' cont_df.to_csv(fname) res = scen_dict_to_df(output['total']) out_res = res[outcol_list] if len(summary_df) == 0: summary_df = out_res[:20].copy(deep = True) else: summary_df = summary_df.append(out_res[:20]) fname = config['file_prefix'] + 'summary.csv' summary_df.to_csv(fname) return