def testAudit1_b(self): # reuse audit1.log again, but now look for walk-forward folds tlog = '%s/test_audit1.log' % os.path.dirname(__file__) aud = AuditParser(tlog) tssbrun = aud.tssbrun() self.assertEqual(len(tssbrun.folds()), 1) self.assertEqual(tssbrun.folds()[0].name(), '2013') f4 = tssbrun.folds()[0].models()['FILTLONG4'] defn = f4.defn() self.assertEqual(len(defn.get_factors()),3) facs = sorted(defn.get_factors()) self.assertEqual(facs[0],('CONSTANT',0.138695,1)) self.assertEqual(facs[1],('QUAD_ATR_100',-0.135129,2)) self.assertEqual(facs[2],('REAC_50',0.137624,1)) # now spot check in- and out-of-sample stats insamp = f4.insample_stats() self.assertEqual(insamp.num_above_high,223) self.assertEqual(insamp.long_only_imp,1.501) self.assertEqual(insamp.hi_thresh,0.22840) self.assertEqual(insamp.lo_thresh,0.10384) outsamp = f4.oosample_stats() self.assertEqual(outsamp.num_below_low,5) self.assertEqual(outsamp.mean_below_low,0.06147) self.assertEqual(outsamp.hi_thresh,0.22840) self.assertEqual(outsamp.lo_thresh,0.10384)
def testAudit3(self): # this is a log with quadratic models and a combination of committees and models tlog = '%s/test_audit3.log' % os.path.dirname(__file__) aud = AuditParser(tlog) wfmstats = aud.tssbrun().walkforward_summ() self.assertEqual(len(wfmstats), 11) self.assertTrue(wfmstats.has_key('COMM6')) # this was a somewhat special case where 0 trades were # below the low threshold. This changes the output format # a bit comm6 = wfmstats['COMM6'] self.assertEqual(comm6.target_grand_mean, 0.19829) self.assertEqual(comm6.total_cases, 828) self.assertEqual(comm6.num_above_high, 263) self.assertEqual(comm6.num_below_low, 282) self.assertEqual(comm6.mean_above_high, 0.31200) self.assertEqual(comm6.mean_below_low, 0.13514) self.assertEqual(comm6.roc_area, 0.55449) self.assertEqual(comm6.long_only_imp, 1.396) self.assertEqual(comm6.short_only_imp, 1.221) self.assertEqual(comm6.long_total_ret, 82.06) self.assertEqual(comm6.long_maxdd, 2.86) self.assertEqual(comm6.short_total_ret, -38.11) self.assertEqual(comm6.short_maxdd, 50.22)
def testAudit4(self): # reuse audit1.log again, but now look for walk-forward folds tlog = '%s/test_audit4.log' % os.path.dirname(__file__) aud = AuditParser(tlog) tssbrun = aud.tssbrun() self.assertEqual(len(tssbrun.folds()), 1) wfmstats = aud.tssbrun().walkforward_summ() self.assertEqual(len(wfmstats), 5) self.assertTrue(wfmstats.has_key('FILTLONG5')) long5 = wfmstats['FILTLONG5'] self.assertEqual(long5.long_profit_fac, 1.336) self.assertEqual(long5.long_only_imp, 1.387)
def run_iteration(year, lag): print 'Running iteration for year %d' % year workdir = '%s' % year if os.path.exists(workdir): shutil.rmtree(workdir) os.mkdir(workdir) os.chdir(workdir) # first instantiate our each script files varmap = { '<YEAR_START>' : '%s' % (year - lag), '<YEAR_END>' : '%s' % year, '<VAL_YEAR>' : '%s' % (year + 1), '<TEST_YEAR>' : '%s' % (year + 2) } for s in stage1_scripts: apply_script_template(os.path.join("..",s), s, varmap) # first run the stage 1 script to find models that perform well # on the test set log = 'stage1.log' run_tssb_wrapper(stage1_scripts[0],log) sub = AuditParser(log) # there should be exactly one fold based on stage1.txt assert( len(sub.tssbrun().folds()) == 1 ) fold = sub.tssbrun().folds()[0] # now we want to rank models by performance on the test set and # pick the best two ranked = sorted(fold.models().itervalues(), key=lambda x: x.oosample_stats().long_only_imp, reverse=True) # add the variables from the two best models to the map and # ansert into our stage 2 script file varmap['<GROUP1>'] = get_vars_from_modeldefn(ranked[0].defn()) varmap['<GROUP2>'] = get_vars_from_modeldefn(ranked[1].defn()) apply_script_template(os.path.join("..",'stage2.txt'), 'stage2.txt', varmap) # Now stage 2 is our true walk-forward test log = 'stage2.log' run_tssb_wrapper(stage2_scripts[0],log) sub = AuditParser(log) os.chdir("..") return sub
def rescan_iteration(self, year): if os.path.exists('%d/pselect_test_audit.log' % year): audfile = '%d/pselect_test_audit.log' % year else: audfile = '%d/pselect_audit.log' % year if not os.path.exists(audfile): raise Exception('Cannot rescan - %s does not exist!' % audfile) return AuditParser(audfile)
def testAudit2(self): # this is a log from FIND GROUPS tlog = '%s/test_audit2.log' % os.path.dirname(__file__) aud = AuditParser(tlog) self.assertEqual(len(aud.tssbrun().folds()),1) fgps = aud.tssbrun().folds()[0].models() self.assertEqual(len(fgps), 5) defn = fgps['1'].defn() self.assertEqual(len(defn.get_factors()), 2) self.assertEqual(defn.get_factors()[0][0],'DINT_50') self.assertEqual(defn.get_factors()[0][1],0.005927) self.assertEqual(defn.get_factors()[1][0],'CONSTANT') self.assertEqual(defn.get_factors()[1][1],0.171395) defn = fgps['5'].defn() self.assertEqual(len(defn.get_factors()), 2) self.assertEqual(defn.get_factors()[0][0],'RDMORLET100') self.assertEqual(defn.get_factors()[0][1],0.009905) self.assertEqual(defn.get_factors()[1][0],'CONSTANT') self.assertEqual(defn.get_factors()[1][1],0.225174)
def run_iteration(year, lag): print 'Running iteration for year %d' % year workdir = '%s' % year if os.path.exists(workdir): shutil.rmtree(workdir) os.mkdir(workdir) os.chdir(workdir) # first instantiate our each script files varmap = { '<YEAR_START>': '%s' % (year - lag), '<YEAR_END>': '%s' % year, '<VAL_YEAR>': '%s' % (year + 1), '<TEST_YEAR>': '%s' % (year + 2) } for s in stage1_scripts: apply_script_template(os.path.join("..", s), s, varmap) # first run the stage 1 script to find models that perform well # on the test set log = 'stage1.log' run_tssb_wrapper(stage1_scripts[0], log) sub = AuditParser(log) # there should be exactly one fold based on stage1.txt assert (len(sub.tssbrun().folds()) == 1) fold = sub.tssbrun().folds()[0] # now we want to rank models by performance on the test set and # pick the best two ranked = sorted(fold.models().itervalues(), key=lambda x: x.oosample_stats().long_only_imp, reverse=True) # add the variables from the two best models to the map and # ansert into our stage 2 script file varmap['<GROUP1>'] = get_vars_from_modeldefn(ranked[0].defn()) varmap['<GROUP2>'] = get_vars_from_modeldefn(ranked[1].defn()) apply_script_template(os.path.join("..", 'stage2.txt'), 'stage2.txt', varmap) # Now stage 2 is our true walk-forward test log = 'stage2.log' run_tssb_wrapper(stage2_scripts[0], log) sub = AuditParser(log) os.chdir("..") return sub
def run_iteration(self,year, vars_, lag): print 'Running iteration for year %d' % year workdir = '%s' % year if os.path.exists(workdir): shutil.rmtree(workdir) os.mkdir(workdir) os.chdir(workdir) # update values for this iteration self._varmap['<YEAR_START>'] = '%s' % (year - lag) self._varmap['<YEAR_END>'] = '%s' % year if self._with_val: self._varmap['<VAL_YEAR>'] = '%s' % (year + 1) self._varmap['<TEST_YEAR>'] = '%s' % (year + 2) else: self._varmap['<VAL_YEAR>'] = '%s' % (year + 1) self._varmap['<TEST_YEAR>'] = '%s' % (year + 1) # check for a special case here - if the DB was generated by build_ind_dbs # the first variable is always RSI_99 which is a dummy variable used to # make TSSB happy if vars_.varlist()[0] == 'RSI_99': self._varmap['<VAR_1>'] = vars_.varlist()[1] else: self._varmap['<VAR_1>'] = vars_.varlist()[0] self._varmap['<VAR_N>'] = vars_.varlist()[-1] for s in self._stage1_scripts: self.apply_script_template(os.path.join("..","..",s), s, self._varmap) # first run the subsample test to narrow predictors log = 'sub_audit.log' self.run_tssb_wrapper(self._stage1_scripts[0],log) sub = AuditParser(log) varlist = sub.tssbrun().selection_stats().list_all_gt(self._var_thresh) app_dbs = '' for var in varlist: app_dbs = app_dbs + ('APPEND DATABASE "..\\\\..\\\\db\\\\%s.DAT" ;\r\n' % var[0]) self._varmap['<APPEND_DATABASES>'] = app_dbs self.apply_script_template(os.path.join("..","..",'createdb.txt'), 'createdb.txt', self._varmap) log = 'create_audit.log' retry_cnt = 0 while retry_cnt < 3: try: # don't understand this, but dealing with periodic failures of # this step in the iteration - most often several years in self.run_tssb_wrapper('createdb.txt',log) break except: ++retry_cnt print 'tssb createdb.txt failed, trying %d more times' % (3-retry_cnt) time.sleep(5.0) self._varmap['<VAR_1>'] = varlist[0][0] self._varmap['<VAR_N>'] = varlist[-1][0] self.apply_script_template(os.path.join("..","..",'findgroups.txt'), 'findgroups.txt', self._varmap) # now get our groups log = 'fgroup_audit.log' self.run_tssb_wrapper('findgroups.txt',log) groups = AuditParser(log) fold = groups.tssbrun().folds()[0] for (name,modeliter) in fold.models().iteritems(): groupname = '<GROUP%s>' % name varspec = '' for var in modeliter.defn().get_factors(): if var[0] != 'CONSTANT': varspec = varspec + ' ' + var[0] self._varmap[groupname] = varspec # there is a potential that we didn't supply enough # variables to find the target number of groups (currently 5) # need to make sure we don't use stale <GROUP> values from # an earlier iteration. We reuse models starting from the # top of the list to fill in up to 5 if len(fold.models()) < 5: count = len(fold.models()) for i in range(count+1,6): fromkey = '<GROUP%d>' % i tokey = '<GROUP%d>' % (i-count) self._varmap[fromkey] = self._varmap[tokey] self.apply_script_template(os.path.join("..","..",'preselect.txt'), 'preselect.txt', self._varmap) log = 'pselect_audit.log' self.run_tssb_wrapper("preselect.txt",log) ret = AuditParser(log) if self._with_val: varmap2 = copy.deepcopy(self._varmap) fold = ret.tssbrun().folds()[0] ranked = sorted(fold.models().itervalues(), key=lambda x: x.oosample_stats().long_only_imp, reverse=True) for i in range(1,4): groupname = '<GROUP%d>' % i modeliter = ranked[i-1] # we know that the model name is FILTLONGN where N=[1..5] and further # that <GROUPN> corresponds to FILTLONGN from the previous step fromkey = '<GROUP%s>' % modeliter.name()[-1] varmap2[groupname] = self._varmap[fromkey] self.apply_script_template(os.path.join("..","..",'preselect_test.txt'), 'preselect_test.txt', varmap2) log = 'pselect_test_audit.log' self.run_tssb_wrapper("preselect_test.txt",log) ret = AuditParser(log) os.chdir("..") return ret
def run_iteration(self, year, vars_, lag): print 'Running iteration for year %d' % year workdir = '%s' % year if os.path.exists(workdir): shutil.rmtree(workdir) os.mkdir(workdir) os.chdir(workdir) # update values for this iteration self._varmap['<YEAR_START>'] = '%s' % (year - lag) self._varmap['<YEAR_END>'] = '%s' % year if self._with_val: self._varmap['<VAL_YEAR>'] = '%s' % (year + 1) self._varmap['<TEST_YEAR>'] = '%s' % (year + 2) else: self._varmap['<VAL_YEAR>'] = '%s' % (year + 1) self._varmap['<TEST_YEAR>'] = '%s' % (year + 1) # check for a special case here - if the DB was generated by build_ind_dbs # the first variable is always RSI_99 which is a dummy variable used to # make TSSB happy if vars_.varlist()[0] == 'RSI_99': self._varmap['<VAR_1>'] = vars_.varlist()[1] else: self._varmap['<VAR_1>'] = vars_.varlist()[0] self._varmap['<VAR_N>'] = vars_.varlist()[-1] for s in self._stage1_scripts: self.apply_script_template(os.path.join("..", "..", s), s, self._varmap) # first run the subsample test to narrow predictors log = 'sub_audit.log' self.run_tssb_wrapper(self._stage1_scripts[0], log) sub = AuditParser(log) varlist = sub.tssbrun().selection_stats().list_all_gt(self._var_thresh) app_dbs = '' for var in varlist: app_dbs = app_dbs + ( 'APPEND DATABASE "..\\\\..\\\\db\\\\%s.DAT" ;\r\n' % var[0]) self._varmap['<APPEND_DATABASES>'] = app_dbs self.apply_script_template(os.path.join("..", "..", 'createdb.txt'), 'createdb.txt', self._varmap) log = 'create_audit.log' retry_cnt = 0 while retry_cnt < 3: try: # don't understand this, but dealing with periodic failures of # this step in the iteration - most often several years in self.run_tssb_wrapper('createdb.txt', log) break except: ++retry_cnt print 'tssb createdb.txt failed, trying %d more times' % ( 3 - retry_cnt) time.sleep(5.0) self._varmap['<VAR_1>'] = varlist[0][0] self._varmap['<VAR_N>'] = varlist[-1][0] self.apply_script_template(os.path.join("..", "..", 'findgroups.txt'), 'findgroups.txt', self._varmap) # now get our groups log = 'fgroup_audit.log' self.run_tssb_wrapper('findgroups.txt', log) groups = AuditParser(log) fold = groups.tssbrun().folds()[0] for (name, modeliter) in fold.models().iteritems(): groupname = '<GROUP%s>' % name varspec = '' for var in modeliter.defn().get_factors(): if var[0] != 'CONSTANT': varspec = varspec + ' ' + var[0] self._varmap[groupname] = varspec # there is a potential that we didn't supply enough # variables to find the target number of groups (currently 5) # need to make sure we don't use stale <GROUP> values from # an earlier iteration. We reuse models starting from the # top of the list to fill in up to 5 if len(fold.models()) < 5: count = len(fold.models()) for i in range(count + 1, 6): fromkey = '<GROUP%d>' % i tokey = '<GROUP%d>' % (i - count) self._varmap[fromkey] = self._varmap[tokey] self.apply_script_template(os.path.join("..", "..", 'preselect.txt'), 'preselect.txt', self._varmap) log = 'pselect_audit.log' self.run_tssb_wrapper("preselect.txt", log) ret = AuditParser(log) if self._with_val: varmap2 = copy.deepcopy(self._varmap) fold = ret.tssbrun().folds()[0] ranked = sorted(fold.models().itervalues(), key=lambda x: x.oosample_stats().long_only_imp, reverse=True) for i in range(1, 4): groupname = '<GROUP%d>' % i modeliter = ranked[i - 1] # we know that the model name is FILTLONGN where N=[1..5] and further # that <GROUPN> corresponds to FILTLONGN from the previous step fromkey = '<GROUP%s>' % modeliter.name()[-1] varmap2[groupname] = self._varmap[fromkey] self.apply_script_template( os.path.join("..", "..", 'preselect_test.txt'), 'preselect_test.txt', varmap2) log = 'pselect_test_audit.log' self.run_tssb_wrapper("preselect_test.txt", log) ret = AuditParser(log) os.chdir("..") return ret
def testAudit1(self): tlog = '%s/test_audit1.log' % os.path.dirname(__file__) aud = AuditParser(tlog) wfmstats = aud.tssbrun().walkforward_summ() self.assertEqual(len(wfmstats), 5) self.assertTrue(wfmstats.has_key('FILTLONG5')) # this was a somewhat special case where 0 trades were # below the low threshold. This changes the output format # a bit comm6 = wfmstats['FILTLONG5'] self.assertEqual(comm6.target_grand_mean, -0.0141) self.assertEqual(comm6.total_cases, 16) self.assertEqual(comm6.num_above_high, 9) self.assertEqual(comm6.num_below_low, 0) self.assertEqual(comm6.mean_above_high, -0.16248) self.assertEqual(comm6.mean_below_low, 0.0) self.assertEqual(comm6.roc_area, 0.24178) self.assertEqual(comm6.long_profit_fac, 0.531) self.assertEqual(comm6.long_only_imp, 0.560) self.assertEqual(comm6.short_profit_fac,1.0) self.assertEqual(comm6.short_only_imp,1.0) self.assertEqual(comm6.long_total_ret, -1.46) self.assertEqual(comm6.long_maxdd, 2.59) self.assertEqual(comm6.short_total_ret, 0.0) self.assertEqual(comm6.short_maxdd, 0.0) # this is a standard case filtlog4 = wfmstats['FILTLONG4'] self.assertEqual(filtlog4.target_grand_mean, -0.0141) self.assertEqual(filtlog4.total_cases, 16) self.assertEqual(filtlog4.num_above_high, 3) self.assertEqual(filtlog4.num_below_low, 5) self.assertEqual(filtlog4.mean_above_high, -0.20522) self.assertEqual(filtlog4.mean_below_low, 0.06147) self.assertEqual(filtlog4.roc_area, 0.43103) self.assertEqual(filtlog4.long_profit_fac, 0.458) self.assertEqual(filtlog4.long_only_imp, 0.484) self.assertEqual(filtlog4.short_profit_fac, 0.795) self.assertEqual(filtlog4.short_only_imp, 0.753) self.assertEqual(filtlog4.long_total_ret, -0.62) self.assertEqual(filtlog4.long_maxdd, 1.14) self.assertEqual(filtlog4.short_total_ret, -0.31) self.assertEqual(filtlog4.short_maxdd, 1.10) # we can also test the selection count parser in this file selstats = aud.tssbrun().selection_stats() self.assertEqual(len(selstats.get_model_vars('FILTLONG1')), 16) self.assertEqual(len(selstats.get_model_vars('FILTLONG2')), 18) self.assertEqual(len(selstats.get_model_vars('FILTLONG3')), 20) self.assertEqual(len(selstats.get_model_vars('FILTLONG4')), 16) sel5 = selstats.get_model_vars('FILTLONG3') self.assertEqual(len(sel5), 20) self.assertEqual(sel5[0][0], 'IDMORLET100') self.assertEqual(sel5[0][1], 26.67) self.assertEqual(sel5[-1][0], 'LIN_ATR_50') self.assertEqual(sel5[-1][1], 3.33) summ = selstats.list_all_gt(5.0) self.assertEqual(len(summ), 7) self.assertEqual(summ[0][0],'IDMORLET100') self.assertEqual(summ[0][1],18.668) self.assertEqual(summ[1][0],'INT_50') self.assertEqual(summ[1][1],9.998) pass
def run_filter_update(self): filtbase = os.path.join(self._basedir,'filters') # update our trade filters long first filtlong = os.path.join(filtbase,'filt_long') # copy the new tssb_(long|short) files shutil.copy('tssb_long.csv', os.path.join(filtlong,'tssb_long.csv')) cwd = os.getcwd() os.chdir(filtlong) # before we try to delete the db directory we need to make sure there # aren't any tssb processes running since they will likely have open # files in there while 'tssb64.exe' in get_process_list(): print 'Warning...tssb64.exe process already running, attempting to kill' kill_tssb() time.sleep(1) # important to clear any previous db directory because TSSB doesn't # overwrite database files (and silently :() if os.path.exists('db'): shutil.rmtree('db') cmd = 'python %s/build_ind_dbs.py TREND_VOLATILITY3.txt db' % os.path.join(self._basedir,'tssb','bin') os.system(cmd) self.run_tssb_wrapper(os.path.join(filtlong,"preselect_test.txt"),'pselect_test_audit.log') longparse = AuditParser('pselect_test_audit.log') longdb = DbParser('FILTLONG.DAT') os.chdir(cwd) # then short... filtshort = os.path.join(filtbase,'filt_short') # copy the new tssb_(long|short) files shutil.copy('tssb_short.csv', os.path.join(filtshort,'tssb_short.csv')) cwd = os.getcwd() os.chdir(filtshort) if os.path.exists('db'): shutil.rmtree('db') cmd = 'python %s/build_ind_dbs.py TREND_VOLATILITY3.txt db' % os.path.join(self._basedir,'tssb','bin') os.system(cmd) self.run_tssb_wrapper(os.path.join(filtshort,"preselect_test.txt"),'pselect_test_audit.log') shortparse = AuditParser('pselect_test_audit.log') shortdb = DbParser('FILTSHORT.DAT') os.chdir(cwd) for alert in self._alerts: # model values are hard-coded for the year based on the tradefilt run if alert.action == 'BUY': parse = longparse db = longdb model = 'COMM5' else: parse = shortparse db = shortdb model = 'COMM5' self.check_filter(alert, parse, db, model)