Python REINFORCE.construct_model示例

编程语言: Python

命名空间/包名称: agent

类/类型: REINFORCE

方法/功能: construct_model

hotexamples.com的示例: 4

Python REINFORCE.construct_model - 已找到4个示例。这些是从开源项目中提取的最受好评的agent.REINFORCE.construct_model现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

REINFORCE(5)

construct_model(3)

sample_action(3)

init_var(2)

load(1)

reset(1)

save(1)

store_rollout(1)

update_model(1)

示例#1

显示文件

def main(args):
    # preprocess input state
    def preprocess(obser):
        '''preprocess 210x160x3 frame into 6400(80x80) flat vector'''
        obser = obser[35:195]  # 160x160x3
        obser = obser[::2, ::2, 0]  # downsample (80x80)
        obser[obser == 144] = 0
        obser[obser == 109] = 0
        obser[obser != 0] = 1

        return obser.astype(np.float).ravel()

    INPUT_DIM = 80 * 80
    HIDDEN_UNITS = 200
    ACTION_DIM = 6
    MAX_EPISODES = 20000
    MAX_STEPS = 5000

    # load agent
    agent = REINFORCE(INPUT_DIM, HIDDEN_UNITS, ACTION_DIM)
    agent.construct_model(args.gpu)

    # load model or init a new
    saver = tf.train.Saver(max_to_keep=1)
    if args.model_path is not None:
        # reuse saved model
        saver.restore(agent.sess, args.model_path)
    else:
        # build a new model
        agent.init_var()

    # load env
    env = gym.make("Pong-v0")

    # evaluation
    for ep in xrange(args.ep):
        # reset env
        total_rewards = 0
        state = env.reset()

        while True:
            env.render()
            # preprocess
            state = preprocess(state)
            # sample actions
            action = agent.sample_action(state[np.newaxis, :])
            # act!
            next_state, reward, done, _ = env.step(action)
            total_rewards += reward
            # state shift
            state = next_state
            if done: break

        print 'Ep%s  Reward: %s ' % (ep + 1, total_rewards)

示例#2

显示文件

文件： evaluation.py 项目： Funitus/reinforce_py

def main(args):

    def preprocess(obs):
        obs = obs[35:195]
        obs = obs[::2, ::2, 0]
        obs[obs == 144] = 0
        obs[obs == 109] = 0
        obs[obs != 0] = 1

        return obs.astype(np.float).ravel()

    INPUT_DIM = 80 * 80
    HIDDEN_UNITS = 200
    ACTION_DIM = 6

    # load agent
    agent = REINFORCE(INPUT_DIM, HIDDEN_UNITS, ACTION_DIM)
    agent.construct_model(args.gpu)

    # load model or init a new
    saver = tf.train.Saver(max_to_keep=1)
    if args.model_path is not None:
        # reuse saved model
        saver.restore(agent.sess, args.model_path)
    else:
        # build a new model
        agent.init_var()

    # load env
    env = gym.make('Pong-v0')

    # evaluation
    for ep in range(args.ep):
        # reset env
        total_rewards = 0
        state = env.reset()

        while True:
            env.render()
            # preprocess
            state = preprocess(state)
            # sample actions
            action = agent.sample_action(state[np.newaxis, :])
            # act!
            next_state, reward, done, _ = env.step(action)
            total_rewards += reward
            # state shift
            state = next_state
            if done:
                break

        print('Ep%s  Reward: %s ' % (ep+1, total_rewards))

示例#3

显示文件

def main(args):
    def preprocess(obs):
        obs = obs[35:195]
        obs = obs[::2, ::2, 0]
        obs[obs == 144] = 0
        obs[obs == 109] = 0
        obs[obs != 0] = 1

        return obs.astype(np.float).ravel()

    INPUT_DIM = 80 * 80
    HIDDEN_UNITS = 200
    ACTION_DIM = 6

    # load agent
    agent = REINFORCE(INPUT_DIM, HIDDEN_UNITS, ACTION_DIM)
    agent.construct_model(args.gpu)

    # load model or init a new
    saver = tf.train.Saver(max_to_keep=1)
    if args.model_path is not None:
        # reuse saved model
        saver.restore(agent.sess, args.model_path)
    else:
        # build a new model
        agent.init_var()

    # load env
    env = gym.make('Pong-v0')

    # evaluation
    for ep in range(args.ep):
        # reset env
        total_rewards = 0
        state = env.reset()

        while True:
            env.render()
            # preprocess
            state = preprocess(state)
            # sample actions
            action = agent.sample_action(state[np.newaxis, :])
            # act!
            next_state, reward, done, _ = env.step(action)
            total_rewards += reward
            # state shift
            state = next_state
            if done:
                break

        print('Ep%s  Reward: %s ' % (ep + 1, total_rewards))

示例#4

显示文件

文件： train_REINFORCE.py 项目： i4never/reinforce_py

def main(args):
    MODEL_PATH = args.model_path
    INPUT_DIM = 80 * 80
    HIDDEN_UNITS = 200
    ACTION_DIM = 6
    MAX_EPISODES = 20000
    MAX_STEPS = 5000

    # load agent
    agent = REINFORCE(INPUT_DIM, HIDDEN_UNITS, ACTION_DIM)
    agent.construct_model(args.gpu)

    # model saver
    saver = tf.train.Saver(max_to_keep=1)
    if MODEL_PATH is not None:
        saver.restore(agent.sess, args.model_path)
        ep_base = int(args.model_path.split('_')[-1])
        mean_rewards = float(args.model_path.split('/')[-1].split('_')[0])
    else:
        agent.sess.run(tf.global_variables_initializer())
        ep_base = 0
        mean_rewards = None

    # load env
    env = gym.make('Pong-v0')
    # main loop
    for ep in range(MAX_EPISODES):
        # reset env
        total_rewards = 0
        state = env.reset()

        for step in range(MAX_STEPS):
            # preprocess
            state = preprocess(state)
            # sample actions
            action = agent.sample_action(state[np.newaxis, :])
            # act!
            next_state, reward, done, _ = env.step(action)

            total_rewards += reward
            agent.store_rollout(state, action, reward)
            # state shift
            state = next_state

            if done:
                break

        # update model per episode
        agent.update_model()

        # logging
        if mean_rewards is None:
            mean_rewards = total_rewards
        else:
            mean_rewards = 0.99 * mean_rewards + 0.01 * total_rewards
        rounds = (21 - np.abs(total_rewards)) + 21
        average_steps = (step + 1) / rounds
        print('Ep%s: %d rounds \nAvg_steps: %.2f Reward: %s Avg_reward: %.4f' %
              (ep + 1, rounds, average_steps, total_rewards, mean_rewards))
        if ep % 100 == 0:
            if not os.path.isdir(args.save_path):
                os.makedirs(args.save_path)
            save_name = args.save_path + str(round(mean_rewards, 2)) + '_' \
                + str(ep_base+ep+1)
            saver.save(agent.sess, save_name)