예제 #1
0
        rewardNormalization = "returns"

    env = DummyVecEnv([
        makeEnvLambda(args.gym_id,
                      args.seed,
                      normOb=normOb,
                      rewardNormalization=rewardNormalization,
                      clipOb=clipOb,
                      clipRew=clipRew,
                      gamma=args.gamma)
    ])

    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    discreteActionsSpace = utils.is_discrete(env)

    inputLength = env.observation_space.shape[0]
    outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[
        0]

    #summeries placeholders and summery scalar objects
    epRewTestPh = tf.placeholder(
        tf.float32,
        shape=None,
        name='episode_test_real_reward_latest_mean_summary')
    epRewTrainPh = tf.placeholder(
        tf.float32,
        shape=None,
        name='episode_train_real_reward_latest_mean_summary')
    epTotalRewPh = tf.placeholder(dtype,
예제 #2
0
        "\nBuffer size not specified. Taking value of {} which is the same as total_train_steps, as suggested by the paper\n"
        .format(args.buffer_size))

graph = tf.Graph()
with tf.Session(graph=graph) as sess:

    env = gym.make(args.gym_id)
    env = EnvironmentWrapper(env.env, args.norm_obs, args.norm_rew,
                             args.clip_obs, args.clip_rew)
    np.random.seed(args.seed)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    env.observation_space.seed(args.seed)
    tf.set_random_seed(args.seed)

    if utils.is_discrete(env):
        exit("TD3 can only be applied to continuous action space environments")

    inputLength = env.observation_space.shape[0]
    outputLength = env.action_space.shape[0]

    #summeries placeholders and summery scalar objects
    epRewPh = tf.placeholder(tf.float32,
                             shape=None,
                             name='episode_reward_summary')
    epRewLatestMeanPh = tf.placeholder(
        tf.float32, shape=None, name='episode_reward_latest_mean_summary')
    epLenPh = tf.placeholder(tf.float32,
                             shape=None,
                             name='episode_length_summary')
    expVarPh = tf.placeholder(tf.float32,