示例#1
0
    with Profile(f'log/{args.log_name}'):
        for step in range(5):
            action = player.act(bef_o)
            aft_o,r,d,i = env.step(action)
            player.step(bef_o,action,r,d,i)
            if d :
                bef_o = env.reset()
            else:
                bef_o = aft_o
            if args.render :
                env.render()
    remaining_steps = total_steps - hp.Learn_start - 25
    for step in range(remaining_steps):
        if ((hp.Learn_start + 25 + step) % hp.Model_save) == 0 :
            player.save_model()
            score = evaluate_f(player, eval_env, vid_type)
            print('eval_score:{0}'.format(score))
        action = player.act(bef_o)
        aft_o,r,d,i = env.step(action)
        player.step(bef_o,action,r,d,i)
        if d :
            bef_o = env.reset()
        else:
            bef_o = aft_o
        if args.render :
            env.render()

else :
    for step in range(total_steps):
        if (step>0) and ((step % hp.Model_save) == 0) :
示例#2
0
if not args.vm :
    env.render()
for step in range(total_steps):
    action = player.act(bef_o)
    aft_o,r,d,i = env.step(action)
    player.step(bef_o,action,r,d,i)
    if d :
        bef_o = env.reset()
    else:
        bef_o = aft_o
    if not args.vm :
        env.render()

my_tqdm.close()

next_save = player.save_model()
if not args.load:
    save_dir = player.save_dir
else:
    save_dir, _ = os.path.split(args.load)
next_dir = os.path.join(save_dir,str(next_save))
score = player.evaluate(gym.make(ENVIRONMENT), vid_type)
print('eval_score:{0}'.format(score))
print('{0}steps took {1} sec'.format(total_steps,time.time()-st))

total_loop -= 1
if total_loop <= 0 :
    sys.exit()
else :
    next_args = []
    next_args.append('python')