def run_silent_save(params): """Run the agent for a finite number of trials.""" print 'run_silent({})'.format(params) #...run it 10 times to verify... (just for now) repeat = 1 score = 0.0 scores = [] penalties = [] for _ in range(repeat): # Set up environment and agent e = Environment(params) # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=0.0,silent=True) # reduce update_delay to speed up simulation score += sim.run(n_trials=params['max_epoch']) # press Esc or close pygame window to quit scores += [sim.getScores()] penalties += [sim.getPenalties()] print 'state span size : {}'.format(len(a.qtable)) losses = sim.getLosses() #cannot average over repeated runs a.print_policy() #[debug] scores = np.average(scores,0) penalties = np.average(penalties,0) if params['save']: fscores = open('score.csv','w+') fpenalties = open('penalties.csv','w+') flosses = open('losses.csv','w+') for s in scores: fscores.write(str(s) + '\n') fscores.flush() fscores.close() for p in penalties: fpenalties.write(str(p) + '\n') fpenalties.flush() fpenalties.close() for l in losses: flosses.write(str(l) + '\n') flosses.flush() flosses.close() score /= float(repeat) return params, score