Python Env.get_state示例

            episode_total_reward, episode_avg_max_q, episode_duration
        ]  #, episode_avg_loss]
        summary_placeholders = [
            tf.placeholder(tf.float32) for _ in range(len(summary_vars))
        ]
        update_ops = [
            summary_vars[i].assign(summary_placeholders[i])
            for i in range(len(summary_vars))
        ]
        summary_op = tf.summary.merge_all()
        return summary_placeholders, update_ops, summary_op

if __name__ == "__main__":
    # CartPole-v1 환경, 최대 타임스텝 수가 500
    env = Env()
    state_size = len(env.get_state())
    action_size = env.action_size

    # DQN 에이전트 생성
    agent = DQNAgent(state_size, action_size)

    scores, episodes, global_step = [], [], 0

    for e in range(EPISODES):
        print("---------------------------------------------------")
        done = False
        score, step = 0, 0

        # env 초기화
        state = env.reset()
        state = np.reshape(state, [1, state_size])

示例#2

显示文件

文件： run.py 项目： IsaacWallis/udacity_machine_learning

from scipy import ndimage, misc
import image_segment
from environment import Env
from agent import R_Learner
import numpy as np

if __name__ == "__main__":
    img = ndimage.imread('butterfly.jpg')
    img = misc.imresize(img, size=0.0625)

    search_env = img
    patch_src = img
    K = 50
    labels = image_segment.segment(img, K)
    label = 15
    patch_indices = np.where(labels == label)
    patch_pixels = img[patch_indices]

    indices_at_origin = (patch_indices[0] - np.min(patch_indices[0]),
                         patch_indices[1] - np.min(patch_indices[1]))
    env = Env(img)
    agent = R_Learner(patch_pixels, indices_at_origin, env)

    while True:
        state = env.get_state()
        action = agent.get_action(state)
        reward, next_state = env.take_action(action)
        agent.learn(state, action, reward, next_state)