Python AtariEnvironment.render 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils.atari_environment

클래스/타입: AtariEnvironment

메소드/함수: render

hotexamples.com에서의 예제들: 3

Python AtariEnvironment.render - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.atari_environment.AtariEnvironment.render에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AtariEnvironment(11)

get_action_size(8)

get_state_size(8)

reset(8)

step(4)

render(3)

close(1)

예제 #1

파일 보기

def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()

예제 #2

파일 보기

파일: eval.py 프로젝트: sirebellum/Deep-RL-Keras

    episode = 0

    # run for 100 episodes
    # Note: Please adjust this as needed to work with your model architecture.
    # Make sure you still call evaluate() with the reward received in each episode
    for i in range(wandb.config.episodes):
        # Set reward received in this episode = 0 at the start of the episode
        episodic_reward = 0
        reset = False

        # play a random game
        state = env.reset()

        done = False
        while not done:
            env.render()

            sreward = 0
            reward = 0

            action = agent.predict(np.expand_dims(state, axis=0))

            action = np.argmax(action)
            #action = np.random.choice(np.arange(action_dim), p=action[0])

            # perform the action and fetch next state, reward
            state, reward, done, _ = env.step(action)

            episodic_reward += reward

        # call evaluation function - takes in reward received after playing an episode

예제 #3

파일 보기

def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high

    else:
        if args.env == 'cell':
            #do this
            env = Environment(opticalTweezers(), args.consecutive_frames)
            # env=opticalTweezers(consecutive_frames=args.consecutive_frames)
            env.reset()
            state_dim = (6, )
            action_dim = 4  #note that I have to change the reshape code for a 2d agent # should be 4
        else:
            # Standard Environments
            env = Environment(gym.make(args.env), args.consecutive_frames)
            env.reset()
            state_dim = env.get_state_size()
            print(state_dim)
            action_dim = gym.make(args.env).action_space.n
            print(action_dim)
    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    # all_old_states=[old_state for i in range(args.consecutive_frames)]
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()