def dqn_push_test(episode, model, noise): env = DoorPushTaskEnv(resolution=(64, 64), cam_noise=noise) act_dim = env.action_dimension() agent = DQNAgent(name='door_push', dim_img=(64, 64, 3), dim_act=act_dim) model_path = os.path.join(sys.path[0], "policy", "door_push", model) agent.dqn_active = tf.keras.models.load_model(model_path) agent.epsilon = 0.0 return run_dqn_test(episode, env, agent, 60)
def dqn_test(episode): env = DoorOpenTaskEnv(resolution=(64, 64)) agent = DQNAgent(name='door_open', dim_img=(64, 64, 3), dim_act=5) model_path = os.path.join(sys.path[0], 'saved_models', agent.name, 'models') agent.dqn_active = tf.keras.models.load_model(model_path) steps = env.max_episode_steps start_time = time.time() success_counter = 0 episodic_returns = [] sedimentary_returns = [] ep_rew = 0 agent.epsilon = 0.0 for ep in range(episode): obs, info = env.reset() ep_rew = 0 img = obs.copy() for st in range(steps): act = agent.epsilon_greedy(img) obs, rew, done, info = env.step(act) img = obs.copy() ep_rew += rew if done: break # log infomation for each episode episodic_returns.append(ep_rew) sedimentary_returns.append(sum(episodic_returns) / (ep + 1)) if env.open: success_counter += 1 rospy.loginfo( "\n================\nEpisode: {} \nEpisodeLength: {} \nEpisodeTotalRewards: {} \nAveragedTotalReward: {} \nSuccess: {} \nTime: {} \n================\n" .format(ep + 1, st + 1, ep_rew, sedimentary_returns[-1], success_counter, time.time() - start_time))
def _load_door_pull_agent(self, pull_policy, pull_model): if pull_policy == 'ppo': agent = PPOAgent(env_type='discrete', dim_obs=(64, 64, 3), dim_act=self.action_dimension()) model_path = os.path.join(sys.path[0], "policy", "door_pull", pull_model) agent.actor.logits_net = tf.keras.models.load_model(model_path) return agent else: agent = DQNAgent(name="door_pull", dim_img=(64, 64, 3), dim_act=self.action_dimension()) model_path = os.path.join(sys.path[0], 'policy', "door_pull", pull_model) agent.dqn_active = tf.keras.models.load_model(model_path) agent.epsilon = 0.0 # determinitic action without random choice return agent
num_episodes = args.max_ep num_steps = args.max_step # instantiate env env = DoorPullAndTraverseEnv(resolution=(64, 64), cam_noise=args.noise) act_dim = env.action_dimension() train_freq = 80 # variables step_counter = 0 success_counter = 0 episodic_returns = [] sedimentary_returns = [] ep_rew = 0 # instantiate agent agent_p = DQNAgent(name='door_pull_traverse', dim_img=(64, 64, 3), dim_act=act_dim) model_path = os.path.join(sys.path[0], 'saved_models', agent_p.name, 'models') model_saver = ModelSaver(500) # use tensorboard train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32) summary_writer = tf.summary.create_file_writer(model_path) summary_writer.set_as_default() start_time = time.time() for ep in range(num_episodes): obs, info = env.reset() ep_rew = 0