params.test_episodes = 10 params.goal = 0 mu = str(params.mu).split(".") mu = str(mu[0] + mu[1]) params.actor_model_dir = "../../logs/models/DDPG-original/{}/actor-mu{}/".format( str(params.env_name.split("-")[0]), mu) params.critic_model_dir = "../../logs/models/DDPG-original/{}/critic-mu{}/".format( str(params.env_name.split("-")[0]), mu) env = gym.make(params.env_name) # env = Monitor(env, "./video/{}/".format(str(params.env_name.split("-")[0])), force=True) tf.random.set_random_seed(params.seed) random_process = None agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) # set seed env.seed(params.seed) state = env.reset() done = False r, episode_reward = list(), 0 reward_forward, reward_ctrl, reward_contact, reward_survive = list(), list( ), list(), list() actions = list() while not done: # deterministic policy action = agent.eval_predict(state) # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
mu = str(params.mu).split(".") mu = str(mu[0] + mu[1]) params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) env = gym.make(params.env_name) env = Monitor(env, params.video_dir) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0], theta=0.15, mu=params.mu, sigma=params.sigma) # random_process = GaussianNoise(mu=params.mu, sigma=params.sigma) agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
now = datetime.datetime.now() if params.debug_flg: params.log_dir = "../logs/logs/" + now.strftime("%Y%m%d-%H%M%S") + "-DDPG/" params.model_dir = "../logs/models/" + now.strftime("%Y%m%d-%H%M%S") + "-DDPG/" else: params.log_dir = "../logs/logs/{}".format(params.env_name) params.model_dir = "../logs/models/{}".format(params.env_name) env = gym.make(params.env_name) # set seed env.seed(params.seed) tf.compat.v1.random.set_random_seed(params.seed) agent = DDPG(Actor, Critic, env.action_space.shape[0], params) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) init_state = env.reset() # reset agent.predict(init_state) # burn the format of the input matrix to get the weight matrices!! gp_model, update = create_bayes_net() optimiser = tf.compat.v1.train.AdamOptimizer() num_sample = 100 # number of sampling get_ready(agent.params) global_timestep = tf.compat.v1.train.get_or_create_global_step() time_buffer = deque(maxlen=agent.params.reward_buffer_ep) log = logger(agent.params)
params.plot_path = "./logs/plots/DDPG-seed{}/".format(params.seed) env = make_grid_env(plot_path=params.plot_path) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0], theta=0.15, mu=params.mu, sigma=params.sigma) agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) get_ready(agent.params) global_timestep = tf.compat.v1.train.get_or_create_global_step() time_buffer = deque(maxlen=agent.params.reward_buffer_ep) log = logger(agent.params) traj = list() with summary_writer.as_default(): # for summary purpose, we put all codes in this context with tf.contrib.summary.always_record_summaries(): for i in itertools.count(): state = env.reset()
now = datetime.now() params.log_dir += "{}".format(params.env_name) params.model_dir += "{}".format(params.env_name) rospy.init_node("start_her") task_and_robot_environment_name = rospy.get_param( '/fetch/task_and_robot_environment_name') # to register our task env to openai env. # so that we don't care the output of this method for now. env = StartOpenAI_ROS_Environment(task_and_robot_environment_name) # params.max_action = env.action_space.high[0] # params.num_action = env.action_space.shape[0] # TODO: this is temp solution..... check openai's fetch's implementation!! params.max_action = 0 params.num_action = 4 # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) agent = DDPG(Actor, Critic, params.num_action, params) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.num_episodes) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) train_HER(agent, env, replay_buffer, reward_buffer, summary_writer)
help="debug mode or not") parser.add_argument("--google_colab", default=False, type=bool, help="if you are executing this on GoogleColab") params = parser.parse_args() params.test_episodes = 10 params.goal = DDPG_ENV_LIST[params.env_name] now = datetime.now() if params.debug_flg: params.log_dir = "../../logs/logs/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDPG/" params.model_dir = "../../logs/models/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDPG/" else: params.log_dir = "../../logs/logs/{}".format(params.env_name) params.model_dir = "../../logs/models/{}".format(params.env_name) env = gym.make(params.env_name) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) agent = DDPG(Actor, Critic, env.action_space.shape[0], params) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) train_DDPG(agent, env, replay_buffer, reward_buffer, summary_writer)