Пример #1
0
def create_agent(sess, summary_writer=None):

    # s = os.open(space_path, os.O_RDONLY)
    s = open_read_pipe(space_path)
    # space = json.loads(os.read(s,1024).decode())
    space = read_from_pipe(s)
    close_pipe([channel_pipe, complete_pipe])
    if not debug_mode:
        summary_writer = None
    if agent_name == 'ddpg':
        os.close(s)
        return ddpg_agent_s.DDPGAgent(
            sess,
            action_space=spaces.Box(space[0],
                                    space[1],
                                    shape=space[2],
                                    dtype=np.float32),  #num_actions=space,
            summary_writer=summary_writer)
    elif agent_name == 'dqn':
        os.close(s)
        return dqn_agent.DQNAgent(sess,
                                  num_actions=space,
                                  summary_writer=summary_writer)
    elif agent_name == 'rainbow':
        os.close(s)
        return rainbow_agent.RainbowAgent(sess,
                                          num_actions=space,
                                          summary_writer=summary_writer)
    elif agent_name == 'implicit_quantile':
        os.close(s)
        return implicit_quantile_agent.ImplicitQuantileAgent(
            sess, num_actions=space, summary_writer=summary_writer)
    else:
        os.close(s)
        raise ValueError('Unknown agent: {}'.format(agent_name))
space_pipe = open_write_pipe(space_name)
output("open read pipe space")
channel_pipe = open_read_pipe(channel_name)
output("open read pipe channel")
channel = read_from_pipe(channel_pipe, 1)
output("read from pipe channel: {}".format(channel))

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)
'''evaluation use'''
goal_pipe = open_read_pipe(goal_name)
goal_info = read_from_pipe(goal_pipe)
close_pipe(goal_pipe)

env.environment.set_goal_position(goal_info)
'''.................................'''

# head + tail name pipe
read_name_list = [(i + "%s.pipe" % (channel + 1)) for i in read_name]
write_name_list = [(i + "%s.pipe" % (channel + 1)) for i in write_name]
all_path = read_name_list + write_name_list
# print(all_path)
make_pipe(all_path)
Пример #3
0
    def _run_one_episode(self):
        """Executes a full trajectory of the agent interacting with the environment.

    Returns:
      The number of steps taken and the total reward.
    """
        step_number = 0
        total_reward = 0.

        action = self._initialize_episode()
        is_terminal = False
        n_list = [i for i in range(n)]

        # Keep interacting until we reach a terminal state.
        while 1:
            step_number += 1
            # may use list to remove terminal pipe
            for i in n_list:

                observation, state, reward, is_terminal = self._run_one_step(
                    i, action[i].tolist())

                total_reward += reward

                # Perform reward clipping.
                # reward = np.clip(reward, -1, 1)
                gameover = read_from_pipe(self.over_pipe[i])
                if (gameover or step_number == self._max_steps_per_episode):
                    write_to_pipe(self.reset_pipe[i], True)

                    close_pipe([
                        self.action_pipe[i], self.reset_pipe[i],
                        self.obs_pipe[i], self.touch_pipe[i],
                        self.reward_pipe[i], self.over_pipe[i],
                        self.terminal_pipe[i]
                    ])
                    self.reward_pipe[i] = None
                    print("GAME_OVER")
                    self._end_episode(reward)

                    n_list.remove(i)
                    time.sleep(0.032)

                    continue
                # seems not to go into elif condition
                elif is_terminal:
                    print("TERMINAL")

                    self._agent.end_episode(reward)
                    action[i] = self._agent.begin_episode(observation, state)
                else:
                    write_to_pipe(self.reset_pipe[i], gameover)
                    action[i] = self._agent.step(reward, observation, state)

            if len(n_list) == 0:
                close_pipe(self.channel_pipe)
                close_pipe(self.complete_pipe)
                close_pipe(self.goal_pipe)
                break

            # self._end_episode(reward)

        return step_number, total_reward
Пример #4
0
    def __init__(self,
                 base_dir,
                 create_agent_fn,
                 create_environment_fn=create_webots_environment,
                 sticky_actions=True,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=10,
                 training_steps=1000,
                 evaluation_steps=500,
                 max_steps_per_episode=2700,
                 seed=123):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      create_agent_fn: A function that takes as args a Tensorflow session and an
        Atari 2600 Gym environment, and returns an agent.
      create_environment_fn: A function which receives a game name and creates
        an Atari 2600 Gym environment.
      game_name: str, name of the Atari 2600 domain to run.
      sticky_actions: bool, whether to enable sticky actions in the environment.
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        # assert game_name is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.compat.v1.summary.FileWriter(self._base_dir)
        # self._summary_writer = None
        tf.compat.v1.random.set_random_seed(seed)
        np.random.seed(seed)

        self._environment = create_environment_fn(sticky_actions)

        # Set up a session and initialize variables.
        self.gpu_options = tf.compat.v1.GPUOptions(
            per_process_gpu_memory_fraction=0.2)
        self._sess = tf.compat.v1.Session('',
                                          config=tf.compat.v1.ConfigProto(
                                              allow_soft_placement=True,
                                              gpu_options=self.gpu_options))

        self.channel_pipe = open_write_pipe(channel_name)
        write_to_pipe([self.channel_pipe] * n, [i for i in range(n)])
        print("write to pipe channel: {}".format([i for i in range(n)]))

        self.complete_pipe = open_write_pipe("/tmp/complete.pipe")
        write_to_pipe(self.complete_pipe, 0)
        self._agent = create_agent_fn(self._sess,
                                      self._environment,
                                      summary_writer=self._summary_writer)
        close_pipe(self.channel_pipe)
        close_pipe(self.complete_pipe)

        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._sess.run(self._agent._sync_qt_ops)

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self.reward_pipe = [None] * n

        self.length = np.linspace(0.53, 0.7, 4)
        self.theta = np.linspace(0.3, np.pi / 2, 15)
Пример #5
0
                    cnt2 = 0

                step_cnt += 1
                if step_cnt == 400:
                    is_terminal = True
                """observation = read_from_pipe(obs_pipe)
                observation = np.asarray(observation)"""
                observation = np.zeros([100, 100, 3])
                over = read_from_pipe(over_pipe)
                over = over or is_terminal
                # os.write(reset_p, json.dumps(is_terminal).encode())
                write_to_pipe(reset_pipe, over)
                if over:
                    close_pipe([
                        obs_pipe, touch_pipe, reward_pipe, over_pipe,
                        terminal_pipe, action_pipe, reset_pipe, channel_pipe,
                        complete_pipe
                    ])
                    # print("broken")
                    time.sleep(0.032)
                    break
                else:
                    action = agent.step(reward, observation, state)

            if np.mean(episode_distance[-20:]) < 0.01:
                ckpt0.append(1)
            elif np.mean(episode_distance[-20:]) < 0.02:
                ckpt0.append(2)
            else:
                ckpt0.append(0)
            # if cnt == 20: