示例#1
0
def run_silent_save(params):
    """Run the agent for a finite number of trials."""
    print 'run_silent({})'.format(params)
    #...run it 10 times to verify... (just for now)

    repeat = 1
    score = 0.0
    scores = []
    penalties = []

    for _ in range(repeat):
        # Set up environment and agent
        e = Environment(params)  # create environment (also adds some dummy traffic)
        a = e.create_agent(LearningAgent)  # create agent
        e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

        # Now simulate it
        sim = Simulator(e, update_delay=0.0,silent=True)  # reduce update_delay to speed up simulation
        
        score += sim.run(n_trials=params['max_epoch'])  # press Esc or close pygame window to quit
        scores += [sim.getScores()]
        penalties += [sim.getPenalties()]
    print 'state span size : {}'.format(len(a.qtable))

    losses = sim.getLosses() #cannot average over repeated runs

    a.print_policy() #[debug]

    scores = np.average(scores,0)
    penalties = np.average(penalties,0)

    if params['save']:
        fscores = open('score.csv','w+')
        fpenalties = open('penalties.csv','w+')
        flosses = open('losses.csv','w+')

        for s in scores:
            fscores.write(str(s) + '\n')
        fscores.flush()
        fscores.close()

        for p in penalties:
            fpenalties.write(str(p) + '\n')
        fpenalties.flush()
        fpenalties.close()

        for l in losses:
            flosses.write(str(l) + '\n')
        flosses.flush()
        flosses.close()

    score /= float(repeat)
    return params, score