def test_qlearning(description, group, world_file, best_reward, median_reward, max_time, points, grader): points_earned = 0.0 # initialize points for this test case try: incorrect = True if not 'QLearner' in globals(): import importlib m = importlib.import_module('QLearner') globals()['QLearner'] = m # Unpack test case world = np.array([list(map(float,s.strip().split(','))) for s in util.get_robot_world_file(world_file).readlines()]) student_reward = None student_author = None msgs = [] if group=='nodyna': def timeoutwrapper_nodyna(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) return qltest(worldmap=world,iterations=500,max_steps=10000,learner=learner,verbose=False) student_reward = run_with_timeout(timeoutwrapper_nodyna,max_time,(),{}) incorrect = False if student_reward < 1.5*median_reward: incorrect = True msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward)) elif group=='dyna': def timeoutwrapper_dyna(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.99, \ dyna = 200, \ verbose=False) return qltest(worldmap=world,iterations=50,max_steps=10000,learner=learner,verbose=False) student_reward = run_with_timeout(timeoutwrapper_dyna,max_time,(),{}) incorrect = False if student_reward < 1.5*median_reward: incorrect = True msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward)) elif group=='author': points_earned = -20 def timeoutwrapper_author(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) return learner.author() student_author = run_with_timeout(timeoutwrapper_author,max_time,(),{}) student_reward = best_reward+1 incorrect = False if (student_author is None) or (student_author=='tb34'): incorrect = True msgs.append(" author() method not implemented correctly. Found {}".format(student_author)) else: points_earned = points if (not incorrect): points_earned += points if incorrect: inputs_str = " group: {}\n" \ " world_file: {}\n"\ " median_reward: {}\n".format(group, world_file, median_reward) raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines elif 'grading_traceback' in dir(e): msg += "Traceback:\n" msg += ''.join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader): """Test StrategyLearner. Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float) max time (seconds), points for this test case (int), random seed (long), and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: incorrect = True if not 'StrategyLearner' in globals(): import importlib m = importlib.import_module('StrategyLearner') globals()['StrategyLearner'] = m outsample_cr_to_beat = None if benchmark_type == 'clean': outsample_cr_to_beat = benchmark def timeoutwrapper_strategylearner(): #Set fixed seed for repetability np.random.seed(seed) random.seed(seed) learner = StrategyLearner.StrategyLearner(verbose=False, impact=impact) tmp = time.time() learner.addEvidence(**insample_args) train_t = time.time() - tmp tmp = time.time() insample_trades_1 = learner.testPolicy(**insample_args) test_t = time.time() - tmp insample_trades_2 = learner.testPolicy(**insample_args) tmp = time.time() outsample_trades = learner.testPolicy(**outsample_args) out_test_t = time.time() - tmp return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t msgs = [] in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout( timeoutwrapper_strategylearner, max_time, (), {}) incorrect = False if len(in_trades_1.shape) != 2 or in_trades_1.shape[1] != 1: incorrect = True msgs.append( " First insample trades DF has invalid shape: {}".format( in_trades_1.shape)) elif len(in_trades_2.shape) != 2 or in_trades_2.shape[1] != 1: incorrect = True msgs.append( " Second insample trades DF has invalid shape: {}".format( in_trades_2.shape)) elif len(out_trades.shape) != 2 or out_trades.shape[1] != 1: incorrect = True msgs.append( " Out-of-sample trades DF has invalid shape: {}".format( out_trades.shape)) else: tmp_csum = 0.0 for date, trade in in_trades_1.iterrows(): tmp_csum += trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect = True msgs.append( " illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}" .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date, trade)) break elif abs(tmp_csum) > MAX_HOLDINGS: incorrect = True msgs.append( " holdings more than {} long or short in first insample DF. Date {}, Trade {}" .format(MAX_HOLDINGS, date, trade)) break tmp_csum = 0.0 for date, trade in in_trades_2.iterrows(): tmp_csum += trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect = True msgs.append( " illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}" .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date, trade)) break elif abs(tmp_csum) > MAX_HOLDINGS: incorrect = True msgs.append( " holdings more than {} long or short in second insample DF. Date {}, Trade {}" .format(MAX_HOLDINGS, date, trade)) break tmp_csum = 0.0 for date, trade in out_trades.iterrows(): tmp_csum += trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect = True msgs.append( " illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}" .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date, trade)) break elif abs(tmp_csum) > MAX_HOLDINGS: incorrect = True msgs.append( " holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}" .format(MAX_HOLDINGS, date, trade)) break # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\ # ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\ # ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()): # incorrect = True # msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS)) # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]): # incorrect = True # msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS)) if not (incorrect): if train_t > train_time: incorrect = True msgs.append( " addEvidence() took {} seconds, max allowed {}".format( train_t, train_time)) else: points_earned += 1.0 if test_t > test_time: incorrect = True msgs.append( " testPolicy() took {} seconds, max allowed {}".format( test_t, test_time)) else: points_earned += 2.0 if not ((in_trades_1 == in_trades_2).all()[0]): incorrect = True mismatches = in_trades_1.join(in_trades_2, how='outer', lsuffix='1', rsuffix='2') mismatches = mismatches[mismatches.ix[:, 0] != mismatches.ix[:, 1]] msgs.append( " consecutive calls to testPolicy() with same input did not produce same output:" ) msgs.append(" Mismatched trades:\n {}".format(mismatches)) else: points_earned += 2.0 student_insample_cr = evalPolicy2(insample_args['symbol'], in_trades_1, insample_args['sv'], insample_args['sd'], insample_args['ed'], market_impact=impact, commission_cost=0.0) student_outsample_cr = evalPolicy2(outsample_args['symbol'], out_trades, outsample_args['sv'], outsample_args['sd'], outsample_args['ed'], market_impact=impact, commission_cost=0.0) if student_insample_cr <= benchmark: incorrect = True msgs.append( " in-sample return ({}) did not beat benchmark ({})". format(student_insample_cr, benchmark)) else: points_earned += 5.0 if outsample_cr_to_beat is None: if out_test_t > test_time: incorrect = True msgs.append( " out-sample took {} seconds, max of {}".format( out_test_t, test_time)) else: points_earned += 5.0 else: if student_outsample_cr < outsample_cr_to_beat: incorrect = True msgs.append( " out-sample return ({}) did not beat benchmark ({})". format(student_outsample_cr, outsample_cr_to_beat)) else: points_earned += 5.0 if incorrect: inputs_str = " insample_args: {}\n" \ " outsample_args: {}\n" \ " benchmark_type: {}\n" \ " benchmark: {}\n" \ " train_time: {}\n" \ " test_time: {}\n" \ " max_time: {}\n" \ " seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed) raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines elif 'grading_traceback' in dir(e): msg += "Traceback:\n" msg += ''.join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_learners( description, group, max_tests, needed_wins, row_limits, col_limits, seed, grader, ): """Test data generation methods beat given learner. Requires test description, test case group, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case incorrect = True msgs = [] try: data_x, data_y = None, None same_data_x, same_data_y = None, None diff_data_x, diff_data_y = None, None better_learner, worse_learner = None, None if group == "author": try: from gen_data import author auth_string = run_with_timeout(author, seconds_per_test_case, (), {}) if auth_string == "tb34": incorrect = True msgs.append(" Incorrect author name (tb34)") points_earned = -10 elif auth_string == "": incorrect = True msgs.append(" Empty author name") points_earned = -10 else: incorrect = False except Exception as e: incorrect = True msgs.append( " Exception occured when calling author() method: {}". format(e)) points_earned = -10 else: if group == "best4dt": from gen_data import best_4_dt data_x, data_y = run_with_timeout(best_4_dt, seconds_per_test_case, (), {"seed": seed}) same_data_x, same_data_y = run_with_timeout( best_4_dt, seconds_per_test_case, (), {"seed": seed}) diff_data_x, diff_data_y = run_with_timeout( best_4_dt, seconds_per_test_case, (), {"seed": seed + 1}) better_learner = DTLearner worse_learner = LinRegLearner elif group == "best4lr": from gen_data import best_4_lin_reg data_x, data_y = run_with_timeout(best_4_lin_reg, seconds_per_test_case, (), {"seed": seed}) same_data_x, same_data_y = run_with_timeout( best_4_lin_reg, seconds_per_test_case, (), {"seed": seed}) diff_data_x, diff_data_y = run_with_timeout( best_4_lin_reg, seconds_per_test_case, (), {"seed": seed + 1}) better_learner = LinRegLearner worse_learner = DTLearner num_samples = data_x.shape[0] cutoff = int(num_samples * 0.6) worse_better_err = [] for run in range(max_tests): permutation = np.random.permutation(num_samples) train_x, train_y = ( data_x[permutation[:cutoff]], data_y[permutation[:cutoff]], ) test_x, test_y = ( data_x[permutation[cutoff:]], data_y[permutation[cutoff:]], ) better = better_learner() worse = worse_learner() better.add_evidence(train_x, train_y) worse.add_evidence(train_x, train_y) better_pred = better.query(test_x) worse_pred = worse.query(test_x) better_err = np.linalg.norm(test_y - better_pred) worse_err = np.linalg.norm(test_y - worse_pred) worse_better_err.append((worse_err, better_err)) worse_better_err.sort(key=functools.cmp_to_key(lambda a, b: int( (b[0] - b[1]) - (a[0] - a[1])))) better_wins_count = 0 for worse_err, better_err in worse_better_err: if better_err < 0.9 * worse_err: better_wins_count = better_wins_count + 1 points_earned += 5.0 if better_wins_count >= needed_wins: break incorrect = False if (data_x.shape[0] < row_limits[0]) or (data_x.shape[0] > row_limits[1]): incorrect = True msgs.append(" Invalid number of rows. Should be between {}," " found {}".format(row_limits, data_x.shape[0])) points_earned = max(0, points_earned - 20) if (data_x.shape[1] < col_limits[0]) or (data_x.shape[1] > col_limits[1]): incorrect = True msgs.append( " Invalid number of columns. Should be between {}," " found {}".format(col_limits, data_x.shape[1])) points_earned = max(0, points_earned - 20) if better_wins_count < needed_wins: incorrect = True msgs.append( " Better learner did not exceed worse learner. Expected" " {}, found {}".format(needed_wins, better_wins_count)) if not (np.array_equal(same_data_y, data_y)) or not ( np.array_equal(same_data_x, data_x)): incorrect = True msgs.append( " Did not produce the same data with the same seed.\n" + " First data_x:\n{}\n".format(data_x) + " Second data_x:\n{}\n".format(same_data_x) + " First data_y:\n{}\n".format(data_y) + " Second data_y:\n{}\n".format(same_data_y)) points_earned = max(0, points_earned - 20) if np.array_equal(diff_data_y, data_y) and np.array_equal( diff_data_x, data_x): incorrect = True msgs.append(" Did not produce different data with different" " seeds.\n" + " First data_x:\n{}\n".format(data_x) + " Second data_x:\n{}\n".format(diff_data_x) + " First data_y:\n{}\n".format(data_y) + " Second data_y:\n{}\n".format(diff_data_y)) points_earned = max(0, points_earned - 20) if incorrect: if group == "author": raise IncorrectOutput( "Test failed on one or more criteria.\n {}".format( "\n".join(msgs))) else: inputs_str = " Residuals: {}".format(worse_better_err) raise IncorrectOutput( "Test failed on one or more output criteria.\n " " Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs))) else: if group != "author": avg_ratio = 0.0 worse_better_err.sort(key=functools.cmp_to_key( lambda a, b: int(np.sign((b[0] - b[1]) - (a[0] - a[1]))))) for we, be in worse_better_err[:10]: avg_ratio += float(we) - float(be) avg_ratio = avg_ratio / 10.0 if group == "best4dt": grader.add_performance(np.array([avg_ratio, 0])) else: grader.add_performance(np.array([0, avg_ratio])) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = ( os.path.basename(row[0]), row[1], row[2], row[3], ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if (row[0] == "gen_data.py")] if tb_list: msg += "Traceback:\n" msg += "".join(tb.format_list(tb_list)) # contains newlines elif "grading_traceback" in dir(e): msg += "Traceback:\n" msg += "".join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome="failed", points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome="passed", points=points_earned, msg=None))
def test_learners(description, group, datafile, seed, outputs, grader): """Test ML models returns correct predictions. Requires test description, test case group, inputs, expected outputs, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: learner_class = None kwargs = {'verbose': False} # (BPH) Copied from grade_strategy_qlearning.py #Set fixed seed for repetability np.random.seed(seed) random.seed(seed) #remove ability to seed either np.random or python random tmp_numpy_seed = np.random.seed tmp_random_seed = random.seed np.random.seed = fake_seed random.seed = fake_rseed # Try to import KNNLearner (only once) # if not 'KNNLearner' in globals(): # from KNNLearner import KNNLearner if not 'RTLearner' in globals(): from RTLearner import RTLearner if not 'DTLearner' in globals(): from DTLearner import DTLearner if (group is 'BagLearner') or (group is 'InsaneLearner') or ( group is 'RandomName') and (not 'BagLearner' in globals()): from BagLearner import BagLearner #put seeds back for the moment np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed # Tweak kwargs # kwargs.update(inputs.get('kwargs', {})) # Read separate training and testing data files # with open(inputs['train_file']) as f: # data_partitions=list() testX, testY, trainX, trainY = None, None, None, None permutation = None author = None with util.get_learner_data_file(datafile) as f: alldata = np.genfromtxt(f, delimiter=',') # Skip the date column and header row if we're working on Istanbul data if datafile == 'Istanbul.csv': alldata = alldata[1:, 1:] datasize = alldata.shape[0] cutoff = int(datasize * 0.6) permutation = np.random.permutation(alldata.shape[0]) col_permutation = np.random.permutation(alldata.shape[1] - 1) train_data = alldata[permutation[:cutoff], :] # trainX = train_data[:,:-1] trainX = train_data[:, col_permutation] trainY = train_data[:, -1] test_data = alldata[permutation[cutoff:], :] # testX = test_data[:,:-1] testX = test_data[:, col_permutation] testY = test_data[:, -1] msgs = [] if (group is "RTLearner") or (group is "DTLearner"): clss_name = RTLearner if group is "RTLearner" else DTLearner tree_sptc = 3 if group is "RTLearner" else 10 corr_in, corr_out, corr_in_50 = None, None, None def oneleaf(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner = clss_name(leaf_size=1, verbose=False) learner.addEvidence(trainX, trainY) insample = learner.query(trainX) outsample = learner.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed author_rv = None try: author_rv = learner.author() except: pass return insample, outsample, author_rv def fiftyleaves(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner = clss_name(leaf_size=50, verbose=False) learner.addEvidence(trainX, trainY) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return learner.query(trainX) predY_in, predY_out, author = run_with_timeout( oneleaf, tree_sptc, (), {}) predY_in_50 = run_with_timeout(fiftyleaves, tree_sptc, (), {}) corr_in = np.corrcoef(predY_in, y=trainY)[0, 1] corr_out = np.corrcoef(predY_out, y=testY)[0, 1] corr_in_50 = np.corrcoef(predY_in_50, y=trainY)[0, 1] incorrect = False if corr_in < outputs['insample_corr_min'] or np.isnan(corr_in): incorrect = True msgs.append( " In-sample with leaf_size=1 correlation less than allowed: got {} expected {}" .format(corr_in, outputs['insample_corr_min'])) else: points_earned += 1.0 if corr_out < outputs['outsample_corr_min'] or np.isnan(corr_out): incorrect = True msgs.append( " Out-of-sample correlation less than allowed: got {} expected {}" .format(corr_out, outputs['outsample_corr_min'])) else: points_earned += 1.0 if corr_in_50 > outputs['insample_corr_max'] or np.isnan( corr_in_50): incorrect = True msgs.append( " In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}" .format(corr_in_50, outputs['insample_corr_max'])) else: points_earned += 1.0 # Check author string if (author is None) or (author == 'tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -2.0 elif group is "BagLearner": corr1, corr20 = None, None bag_sptc = 10 def onebag(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner1 = BagLearner(learner=RTLearner, kwargs={"leaf_size": 1}, bags=1, boost=False, verbose=False) learner1.addEvidence(trainX, trainY) q_rv = learner1.query(testX) a_rv = learner1.author() np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return q_rv, a_rv def twentybags(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner20 = BagLearner(learner=RTLearner, kwargs={"leaf_size": 1}, bags=20, boost=False, verbose=False) learner20.addEvidence(trainX, trainY) q_rv = learner20.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return q_rv predY1, author = run_with_timeout(onebag, bag_sptc, pos_args=(), keyword_args={}) predY20 = run_with_timeout(twentybags, bag_sptc, (), {}) corr1 = np.corrcoef(predY1, testY)[0, 1] corr20 = np.corrcoef(predY20, testY)[0, 1] incorrect = False # msgs = [] if corr20 <= corr1: incorrect = True msgs.append( " Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}" .format(corr20, corr1)) else: points_earned += 2.0 # Check author string if (author is None) or (author == 'tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -1.0 elif group is "InsaneLearner": try: def insane(): import InsaneLearner as it learner = it.InsaneLearner(verbose=False) learner.addEvidence(trainX, trainY) Y = learner.query(testX) run_with_timeout(insane, 10, pos_args=(), keyword_args={}) incorrect = False except Exception as e: incorrect = True msgs.append( " Exception calling InsaneLearner: {}".format(e)) points_earned = -10 elif group is "RandomName": try: il_name, il_code = gen_class() exec(il_code) in globals(), locals() il_cobj = eval(il_name) def rnd_name(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner = BagLearner(learner=il_cobj, kwargs={'verbose': False}, bags=20, boost=False, verbose=False) learner.addEvidence(trainX, trainY) Y = learner.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return il_cobj.init_callcount_dict, il_cobj.add_callcount_dict, il_cobj.query_callcount_dict iccd, accd, qccd = run_with_timeout(rnd_name, 10, pos_args=(), keyword_args={}) incorrect = False if (len(iccd) != 20) or (any([v != 1 for v in iccd.values()])): incorrect = True msgs.append( " Unexpected number of calls to __init__, sum={} (should be 20), max={} (should be 1), min={} (should be 1)" .format(len(iccd), max(iccd.values()), min(iccd.values()))) points_earned = -10 if (len(accd) != 20) or (any([v != 1 for v in accd.values()])): incorrect = True msgs.append( " Unexpected number of calls to addEvidence sum={} (should be 20), max={} (should be 1), min={} (should be 1)" .format(len(accd), max(accd.values()), min(accd.values()))) points_earned = -10 if (len(qccd) != 20) or (any([v != 1 for v in qccd.values()])): incorrect = True msgs.append( " Unexpected number of calls to query, sum={} (should be 20), max={} (should be 1), min={} (should be 1)" .format(len(qccd), max(qccd.values()), min(qccd.values()))) points_earned = -10 except Exception as e: incorrect = True msgs.append(" Exception calling BagLearner: {}".format(e)) points_earned = -10 if incorrect: inputs_str = " data file: {}\n" \ " permutation: {}".format(datafile, permutation) raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [ row for row in tb_list if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py') ] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_analysis(inputs, outputs, description, grader): """Test get_portfolio_value() and get_portfolio_stats() return correct values. Requires test inputs, expected outputs, description, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module mod = importlib.import_module(main_code) globals()[main_code] = mod # Unpack test case start_date_str = inputs['start_date'].split('-') start_date = datetime.datetime(int(start_date_str[0]),int(start_date_str[1]),int(start_date_str[2])) end_date_str = inputs['end_date'].split('-') end_date = datetime.datetime(int(end_date_str[0]),int(end_date_str[1]),int(end_date_str[2])) symbols = inputs['symbol_allocs'].keys() # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM'] allocs = inputs['symbol_allocs'].values() # e.g.: [0.2, 0.3, 0.4, 0.1] start_val = inputs['start_val'] risk_free_rate = inputs.get('risk_free_rate',0.0) # the wonky unpacking here is so that we only pull out the values we say we'll test. def timeoutwrapper_analysis(): student_rv = analysis.assess_portfolio(\ sd=start_date, ed=end_date,\ syms=symbols,\ allocs=allocs,\ sv=start_val, rfr=risk_free_rate, sf=252.0, \ gen_plot=False) return student_rv result = run_with_timeout(timeoutwrapper_analysis,max_seconds_per_call,(),{}) student_cr = result[0] student_adr = result[1] student_sr = result[3] port_stats = OrderedDict([('cum_ret',student_cr), ('avg_daily_ret',student_adr), ('sharpe_ratio',student_sr)]) # Verify against expected outputs and assign points incorrect = False msgs = [] for key, value in port_stats.iteritems(): if abs(value - outputs[key]) > abs_margins[key]: incorrect = True msgs.append(" {}: {} (expected: {})".format(key, value, outputs[key])) else: points_earned += points_per_output[key] # partial credit if incorrect: inputs_str = " start_date: {}\n" \ " end_date: {}\n" \ " symbols: {}\n" \ " allocs: {}\n" \ " start_val: {}".format(start_date, end_date, symbols, allocs, start_val) raise IncorrectOutput, "One or more stats were incorrect.\n Inputs:\n{}\n Wrong values:\n{}".format(inputs_str, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == 'analysis.py'] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def test_marketsim(description, group, inputs, outputs, grader): """Test compute_portvals() returns correct daily portfolio values. Requires test description, test case group, inputs, expected outputs, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module mod = importlib.import_module(main_code) globals()[main_code] = mod # * Import methods to test for m in ['compute_portvals']: globals()[m] = getattr(mod, m) incorrect = False msgs = [] if group == 'author': try: auth_string = run_with_timeout(marketsim.author, seconds_per_test_case, (), {}) if auth_string == 'tb34': incorrect = True msgs.append(" Incorrect author name (tb34)") points_earned = -10 elif auth_string == '': incorrect = True msgs.append(" Empty author name") points_earned = -10 except Exception as e: incorrect = True msgs.append( " Exception occured when calling author() method: {}". format(e)) points_earned = -10 else: # Unpack test case orders_file = inputs['orders_file'] start_val = inputs['start_val'] impct = inputs['impact'] commish = inputs['commission'] portvals = None fullpath_orders_file = get_orders_data_file(orders_file) portvals = run_with_timeout( compute_portvals, seconds_per_test_case, (), { 'orders_file': fullpath_orders_file, 'start_val': start_val, 'commission': commish, 'impact': impct }) # * Check return type is correct, coax into Series assert (type(portvals) == pd.Series) or ( type(portvals) == pd.DataFrame and len(portvals.columns) == 1), "You must return a Series or single-column DataFrame!" if type(portvals) == pd.DataFrame: portvals = portvals[portvals.columns[ 0]] # convert single-column DataFrame to Series if group == 'basic': if len(portvals) != outputs['num_days']: incorrect = True msgs.append( " Incorrect number of days: {}, expected {}".format( len(portvals), outputs['num_days'])) else: points_earned += 2.0 if abs(portvals[-1] - outputs['last_day_portval']) > ( 0.001 * outputs['last_day_portval']): incorrect = True msgs.append( " Incorrect final value: {}, expected {}".format( portvals[-1], outputs['last_day_portval'])) else: points_earned += 5.0 adr, sr = get_stats(portvals) if abs(sr - outputs['sharpe_ratio']) > abs( 0.001 * outputs['sharpe_ratio']): incorrect = True msgs.append( " Incorrect sharpe ratio: {}, expected {}".format( sr, outputs['sharpe_ratio'])) else: points_earned += 1.0 if abs(adr - outputs['avg_daily_ret']) > abs( 0.001 * outputs['avg_daily_ret']): incorrect = True msgs.append( " Incorrect avg daily return: {}, expected {}". format(adr, outputs['avg_daily_ret'])) else: points_earned += 1.0 elif group == 'commission' or group == 'impact' or group == 'both': if abs(portvals[-1] - outputs['last_day_portval']) > 0.001: incorrect = True msgs.append( " Incorrect final value: {}, expected {}".format( portvals[-1], outputs['last_day_portval'])) else: points_earned += 2.0 if incorrect: raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format( inputs, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) if 'grading_traceback' in dir(e): tb_list = e.grading_traceback for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == 'marketsim_old.py'] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=max(points_earned, 0), msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_learners(description, group, max_tests, needed_wins, row_limits, col_limits, seed, grader): """Test data generation methods beat given learner. Requires test description, test case group, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case incorrect = True try: # Try to import KNNLearner (only once) # if not 'KNNLearner' in globals(): # from KNNLearner import KNNLearner dataX, dataY = None, None same_dataX, same_dataY = None, None diff_dataX, diff_dataY = None, None betterLearner, worseLearner = None, None if group == "best4rt": from gen_data import best4RT dataX, dataY = run_with_timeout(best4RT, seconds_per_test_case, (), {'seed': seed}) same_dataX, same_dataY = run_with_timeout(best4RT, seconds_per_test_case, (), {'seed': seed}) diff_dataX, diff_dataY = run_with_timeout(best4RT, seconds_per_test_case, (), {'seed': seed + 1}) betterLearner = RTLearner worseLearner = LinRegLearner else: from gen_data import best4LinReg dataX, dataY = run_with_timeout(best4LinReg, seconds_per_test_case, (), {'seed': seed}) same_dataX, same_dataY = run_with_timeout(best4LinReg, seconds_per_test_case, (), {'seed': seed}) diff_dataX, diff_dataY = run_with_timeout(best4LinReg, seconds_per_test_case, (), {'seed': seed + 1}) betterLearner = LinRegLearner worseLearner = RTLearner num_samples = dataX.shape[0] cutoff = int(num_samples * 0.6) worse_better_err = [] for run in range(max_tests): permutation = np.random.permutation(num_samples) train_X, train_Y = dataX[permutation[:cutoff]], dataY[ permutation[:cutoff]] test_X, test_Y = dataX[permutation[cutoff:]], dataY[ permutation[cutoff:]] better = betterLearner() worse = worseLearner() better.addEvidence(train_X, train_Y) worse.addEvidence(train_X, train_Y) better_pred = better.query(test_X) worse_pred = worse.query(test_X) better_err = np.linalg.norm(test_Y - better_pred) worse_err = np.linalg.norm(test_Y - worse_pred) worse_better_err.append((worse_err, better_err)) worse_better_err.sort(lambda a, b: int((b[0] - b[1]) - (a[0] - a[1]))) better_wins_count = 0 for worse_err, better_err in worse_better_err: if better_err < 0.9 * worse_err: better_wins_count = better_wins_count + 1 points_earned += 5.0 if better_wins_count >= needed_wins: break incorrect = False msgs = [] if (dataX.shape[0] < row_limits[0]) or (dataX.shape[0] > row_limits[1]): incorrect = True msgs.append( " Invalid number of rows. Should be between {}, found {}". format(row_limits, dataX.shape[0])) points_earned = max(0, points_earned - 20) if (dataX.shape[1] < col_limits[0]) or (dataX.shape[1] > col_limits[1]): incorrect = True msgs.append( " Invalid number of columns. Should be between {}, found {}" .format(col_limits, dataX.shape[1])) points_earned = max(0, points_earned - 20) if better_wins_count < needed_wins: incorrect = True msgs.append( " Better learner did not exceed worse learner. Expected {}, found {}" .format(needed_wins, better_wins_count)) if not (np.array_equal(same_dataY, dataY)) or not (np.array_equal( same_dataX, dataX)): incorrect = True msgs.append(" Did not produce the same data with the same seed.\n"+\ " First dataX:\n{}\n".format(dataX)+\ " Second dataX:\n{}\n".format(same_dataX)+\ " First dataY:\n{}\n".format(dataY)+\ " Second dataY:\n{}\n".format(same_dataY)) points_earned = max(0, points_earned - 20) if np.array_equal(diff_dataY, dataY) and np.array_equal( diff_dataX, dataX): incorrect = True msgs.append(" Did not produce different data with different seeds.\n"+\ " First dataX:\n{}\n".format(dataX)+\ " Second dataX:\n{}\n".format(diff_dataX)+\ " First dataY:\n{}\n".format(dataY)+\ " Second dataY:\n{}\n".format(diff_dataY)) points_earned = max(0, points_earned - 20) if incorrect: inputs_str = " Residuals: {}".format(worse_better_err) raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs)) else: avg_ratio = 0.0 worse_better_err.sort( lambda a, b: int(np.sign((b[0] - b[1]) - (a[0] - a[1])))) for we, be in worse_better_err[:10]: avg_ratio += (float(we) - float(be)) avg_ratio = avg_ratio / 10.0 if group == "best4rt": grader.add_performance(np.array([avg_ratio, 0])) else: grader.add_performance(np.array([0, avg_ratio])) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if (row[0] == 'gen_data.py')] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines elif 'grading_traceback' in dir(e): msg += "Traceback:\n" msg += ''.join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_optimization(inputs, outputs, description, seed, grader): """Test find_optimal_allocations() returns correct allocations. Requires test inputs, expected outputs, description, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module nprs_func = np.random.seed; rs_func = random.seed np.random.seed = fake_seed; random.seed = fake_seed; mod = importlib.import_module(main_code) globals()[main_code] = mod np.random.seed = nprs_func random.seed = rs_func # Unpack test case start_date = inputs['start_date'] end_date = inputs['end_date'] symbols = inputs['symbols'] # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM'] def timeoutwrapper_optimize(): np.random.seed(seed); random.seed(seed) nprs_func = np.random.seed; rs_func = random.seed np.random.seed = fake_seed; random.seed = fake_seed s_allocs, s_cr, s_adr, s_sddr, s_sr = optimization.optimize_portfolio(sd=start_date, ed=end_date, syms=symbols, gen_plot=False) s_allocs = np.float32(s_allocs) np.random.seed = nprs_func random.seed = rs_func return s_allocs student_allocs = run_with_timeout(timeoutwrapper_optimize,seconds_per_test_case,(),{}) # Verify against expected outputs and assign points incorrect = False msgs = [] correct_allocs = outputs['allocs'] benchmark_value = outputs['benchmark'] # * Check sum_to_one: Allocations sum to 1.0 +/- margin sum_allocs = np.sum(student_allocs) if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']: incorrect = True msgs.append(" sum of allocations: {} (expected: 1.0)".format(sum_allocs)) student_allocs = student_allocs / sum_allocs # normalize allocations, if they don't sum to 1.0 else: points_earned += points_per_component['sum_to_one'] points_per_alloc_range = points_per_component['alloc_range'] / len(correct_allocs) for symbol, alloc in zip(symbols,student_allocs): if alloc < -abs_margins['alloc_range'] or alloc > (1.0+abs_margins['alloc_range']): incorrect = True msgs.append(" {} - allocation out of range: {} (expected [0.0, 1.0)".format(symbol,alloc)) else: points_earned += points_per_alloc_range student_allocs_sddr = alloc2sddr(student_allocs,inputs) if student_allocs_sddr/benchmark_value - 1.0 > abs_margins['sddr_match']: incorrect = True msgs.append(" Sddr too large: {} (expected < {} + {})".format(student_allocs_sddr, benchmark_value, benchmark_value*abs_margins['sddr_match'])) else: points_earned += points_per_component['benchmark_match'] if incorrect: inputs_str = " start_date: {}\n" \ " end_date: {}\n" \ " symbols: {}\n".format(start_date, end_date, symbols) raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == 'optimization.py'] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def test_learners(description, group, datafile, seed, outputs, grader): """Test ML models returns correct predictions. Requires test description, test case group, inputs, expected outputs, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: learner_class = None kwargs = {'verbose': False} # (BPH) Copied from grade_strategy_qlearning.py #Set fixed seed for repetability np.random.seed(seed) random.seed(seed) # These lines will be uncommented in the batch grader to # prevent accidentally fixing the seed within student # code # tmp_numpy_seed = np.random.seed # tmp_random_seed = random.seed # np.random.seed = fake_seed # random.seed = fake_rseed # Try to import KNNLearner (only once) # if not 'KNNLearner' in globals(): # from KNNLearner import KNNLearner if not 'RTLearner' in globals(): from RTLearner import RTLearner if group is 'BagLearner' and (not 'BagLearner' in globals()): from BagLearner import BagLearner # Tweak kwargs # kwargs.update(inputs.get('kwargs', {})) # Read separate training and testing data files # with open(inputs['train_file']) as f: # data_partitions=list() testX, testY, trainX, trainY = None, None, None, None permutation = None author = None with util.get_learner_data_file(datafile) as f: alldata = np.genfromtxt(f, delimiter=',') # Skip the date column and header row if we're working on Istanbul data if datafile == 'Istanbul.csv': alldata = alldata[1:, 1:] datasize = alldata.shape[0] cutoff = int(datasize * 0.6) permutation = np.random.permutation(alldata.shape[0]) col_permutation = np.random.permutation(alldata.shape[1] - 1) train_data = alldata[permutation[:cutoff], :] # trainX = train_data[:,:-1] trainX = train_data[:, col_permutation] trainY = train_data[:, -1] test_data = alldata[permutation[cutoff:], :] # testX = test_data[:,:-1] testX = test_data[:, col_permutation] testY = test_data[:, -1] if group is "RTLearner": corr_in, corr_out, corr_in_50 = None, None, None def oneleaf(): learner = RTLearner(leaf_size=1, verbose=False) learner.addEvidence(trainX, trainY) insample = learner.query(trainX) outsample = learner.query(testX) return insample, outsample, learner.author() def fiftyleaves(): learner = RTLearner(leaf_size=50, verbose=False) learner.addEvidence(trainX, trainY) return learner.query(trainX) predY_in, predY_out, author = run_with_timeout( oneleaf, seconds_per_test_case, (), {}) predY_in_50 = run_with_timeout(fiftyleaves, seconds_per_test_case, (), {}) corr_in = np.corrcoef(predY_in, y=trainY)[0, 1] corr_out = np.corrcoef(predY_out, y=testY)[0, 1] corr_in_50 = np.corrcoef(predY_in_50, y=trainY)[0, 1] incorrect = False msgs = [] if corr_in < outputs['insample_corr_min']: incorrect = True msgs.append( " In-sample with leaf_size=1 correlation less than allowed: got {} expected {}" .format(corr_in, outputs['insample_corr_min'])) else: points_earned += 1.5 if corr_out < outputs['outsample_corr_min']: incorrect = True msgs.append( " Out-of-sample correlation less than allowed: got {} expected {}" .format(corr_out, outputs['outsample_corr_min'])) else: points_earned += 1.5 if corr_in_50 > outputs['insample_corr_max']: incorrect = True msgs.append( " In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}" .format(corr_in_50, outputs['insample_corr_max'])) else: points_earned += 1.0 # Check author string if (author is None) or (author == 'tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -1.0 elif group is "BagLearner": corr1, corr20 = None, None def onebag(): learner1 = BagLearner(learner=RTLearner, kwargs={"leaf_size": 1}, bags=1, boost=False, verbose=False) learner1.addEvidence(trainX, trainY) return learner1.query(testX), learner1.author() def twentybags(): learner20 = BagLearner(learner=RTLearner, kwargs={"leaf_size": 1}, bags=20, boost=False, verbose=False) learner20.addEvidence(trainX, trainY) return learner20.query(testX) predY1, author = run_with_timeout(onebag, seconds_per_test_case, pos_args=(), keyword_args={}) predY20 = run_with_timeout(twentybags, seconds_per_test_case, (), {}) corr1 = np.corrcoef(predY1, testY)[0, 1] corr20 = np.corrcoef(predY20, testY)[0, 1] incorrect = False msgs = [] if corr20 <= corr1: incorrect = True msgs.append( " Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}" .format(corr20, corr1)) else: points_earned += 2.0 # Check author string if (author is None) or (author == 'tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -1.0 if incorrect: inputs_str = " data file: {}\n" \ " permutation: {}".format(datafile, permutation) raise IncorrectOutput, "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs)) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in xrange(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [ row for row in tb_list if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py') ] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, e.message) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))