示例#1
0
def Game(max_ep_len=1000, num_frames=4):
    global exit_game
    global actions

    env = gym.make('CarRacing-v0')
    state_dim = env.observation_space.shape
    action_dim = env.action_space.shape
    print(f"State: {state_dim}")
    print(f"Action: {action_dim}")

    # set interrupts
    env.reset()
    env.viewer.window.on_key_press = key_press
    env.viewer.window.on_key_release = key_release

    # make global actions array 
    actions = np.zeros(4, dtype=np.float32)

    # mem
    memory = Memory()
    memory.create(state_dim, action_dim)

    # logger
    ep_ret_log = []

    # init environment
    obs, ep_ret, ep_len, epoch = env.reset(), 0, 0, 0
    obs = np.expand_dims(obs, axis=0)
    state_stack = np.repeat(obs, num_frames, axis=0)
    print(state_stack.shape)
    print(state_stack.dtype)

    # main loop
    while exit_game == False:
        # render window
        env.render()

        # take action
        obs2, r, d, _ = env.step(actions[:3])
        obs2 = np.expand_dims(obs2, axis=0)
        state_stack = np.append(state_stack[1:], obs2, axis=0)

        # statistics
        ep_ret += r
        ep_len += 1

        # Ignore the 'done' signal
        d = False if ep_len == max_ep_len else d

        # store in memory
        memory.add(state_stack, np.array(actions[:3]), r, d)
        
        # End of episode
        if d or (ep_len == max_ep_len):
            print(f"Epoch: {epoch}, EpRet: {ep_ret}, EpLen: {ep_len}, ReplayBuff: {len(memory)}")

            # if exists statistical data
            if len(ep_ret_log) > 0:
                log = np.array(ep_ret_log)
                print("AvgEpRet:", log.mean())
                print("StdEpRet:", log.std())
                print("MaxEpRet:", log.max())
                print("MinEpRet:", log.min())
            
            print()

            ep_ret_log.append(ep_ret)

            obs, ep_ret, ep_len = env.reset(), 0, 0
            obs = np.expand_dims(obs, axis=0)
            state_stack = np.repeat(obs, num_frames, axis=0)

            epoch += 1
    
    print('\n')

    # save the dataset
    memory.save()
示例#2
0
文件: main.py 项目: apourchot/RLBazar
                                      n_steps=args.n_steps,
                                      render=args.render)

            if total_steps > args.start_steps:
                c_loss, a_loss = drla.train(memory, steps)

            actor_steps += steps
            total_steps += steps

            prLightPurple(
                "Iteration {}; Noisy Actor fitness:{}; Q-Loss:{}; A-Loss:{}".
                format(ite, fitness, c_loss, a_loss))

        fitness, steps = evaluate(drla,
                                  env,
                                  memory=None,
                                  noise=None,
                                  n_episodes=10)
        logger.append([fitness, c_loss, a_loss, total_steps])
        print("---------------------------------")
        prRed("Total steps: {}; Actor fitness:{} \n".format(
            total_steps, fitness))
        drla.save(args.output)
        print(torch.exp(drla.actor.log_alphas))

        if args.save_all_models and total_steps % 100000 == 0:
            drla.actor.save_model(args.output, "actor_{}".format(total_steps))
            drla.critic.save_model(args.output,
                                   "critic_{}".format(total_steps))
            memory.save(args.output)