rewardNormalization = "returns" env = DummyVecEnv([ makeEnvLambda(args.gym_id, args.seed, normOb=normOb, rewardNormalization=rewardNormalization, clipOb=clipOb, clipRew=clipRew, gamma=args.gamma) ]) np.random.seed(args.seed) tf.set_random_seed(args.seed) discreteActionsSpace = utils.is_discrete(env) inputLength = env.observation_space.shape[0] outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[ 0] #summeries placeholders and summery scalar objects epRewTestPh = tf.placeholder( tf.float32, shape=None, name='episode_test_real_reward_latest_mean_summary') epRewTrainPh = tf.placeholder( tf.float32, shape=None, name='episode_train_real_reward_latest_mean_summary') epTotalRewPh = tf.placeholder(dtype,
"\nBuffer size not specified. Taking value of {} which is the same as total_train_steps, as suggested by the paper\n" .format(args.buffer_size)) graph = tf.Graph() with tf.Session(graph=graph) as sess: env = gym.make(args.gym_id) env = EnvironmentWrapper(env.env, args.norm_obs, args.norm_rew, args.clip_obs, args.clip_rew) np.random.seed(args.seed) env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) tf.set_random_seed(args.seed) if utils.is_discrete(env): exit("TD3 can only be applied to continuous action space environments") inputLength = env.observation_space.shape[0] outputLength = env.action_space.shape[0] #summeries placeholders and summery scalar objects epRewPh = tf.placeholder(tf.float32, shape=None, name='episode_reward_summary') epRewLatestMeanPh = tf.placeholder( tf.float32, shape=None, name='episode_reward_latest_mean_summary') epLenPh = tf.placeholder(tf.float32, shape=None, name='episode_length_summary') expVarPh = tf.placeholder(tf.float32,