Exemplo n.º 1
0
    def _initialize_episode(self):
        """Initialization for a new episode.

    Returns:
      action: int, the initial action chosen by the agent.
    """
        self.length_rnd = np.random.choice(self.length)
        self.theta_rnd = np.random.choice(self.theta)

        if self.reward_pipe[0] == None:
            self.channel_pipe = open_write_pipe(channel_name)
            write_to_pipe([self.channel_pipe] * n, [i for i in range(n)])
            self.complete_pipe = open_write_pipe("/tmp/complete_down.pipe")
            write_to_pipe(self.complete_pipe, 1)
            self.goal_pipe = open_write_pipe(goal_path)
            write_to_pipe(
                self.goal_pipe,
                [round(self.length_rnd, 4),
                 round(self.theta_rnd, 4)])

            self.action_pipe = open_write_pipe(action_name_list)
            self.reset_pipe = open_write_pipe(reset_name_list)

            self.obs_pipe = open_read_pipe(obs_name_list)
            self.touch_pipe = open_read_pipe(touch_name_list)
            self.reward_pipe = open_read_pipe(reward_name_list)
            self.over_pipe = open_read_pipe(over_name_list)
            self.terminal_pipe = open_read_pipe(terminal_name_list)

        # send the number of channel

        # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe]
        '''
    initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe]
    # print("\ncall initial setp")
    # list to np.array
    initial_observation = [np.asarray(observation) for observation in initial_observation]
    '''
        initial_observation = [np.zeros([100, 100, 3])]
        initial_state = [
            np.array(read_from_pipe(state_pipe))
            for state_pipe in self.touch_pipe
        ]
        # print(initial_state)

        return [
            self._agent.begin_episode(observation, state)
            for observation, state in zip(initial_observation, initial_state)
        ]
output("make pipe channel, space, goal")

space_pipe = open_write_pipe(space_name)
output("open read pipe space")
channel_pipe = open_read_pipe(channel_name)
output("open read pipe channel")
channel = read_from_pipe(channel_pipe, 1)
output("read from pipe channel: {}".format(channel))

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)
'''evaluation use'''
goal_pipe = open_read_pipe(goal_name)
goal_info = read_from_pipe(goal_pipe)
close_pipe(goal_pipe)

env.environment.set_goal_position(goal_info)
'''.................................'''

# head + tail name pipe
read_name_list = [(i + "%s.pipe" % (channel + 1)) for i in read_name]
write_name_list = [(i + "%s.pipe" % (channel + 1)) for i in write_name]
all_path = read_name_list + write_name_list
Exemplo n.º 3
0
    def _run_one_episode(self):
        """Executes a full trajectory of the agent interacting with the environment.

    Returns:
      The number of steps taken and the total reward.
    """
        step_number = 0
        total_reward = 0.

        action = self._initialize_episode()
        is_terminal = False
        n_list = [i for i in range(n)]

        # Keep interacting until we reach a terminal state.
        while 1:
            step_number += 1
            # may use list to remove terminal pipe
            for i in n_list:

                observation, state, reward, is_terminal = self._run_one_step(
                    i, action[i].tolist())

                total_reward += reward

                # Perform reward clipping.
                # reward = np.clip(reward, -1, 1)
                gameover = read_from_pipe(self.over_pipe[i])
                if (gameover or step_number == self._max_steps_per_episode):
                    write_to_pipe(self.reset_pipe[i], True)

                    close_pipe([
                        self.action_pipe[i], self.reset_pipe[i],
                        self.obs_pipe[i], self.touch_pipe[i],
                        self.reward_pipe[i], self.over_pipe[i],
                        self.terminal_pipe[i]
                    ])
                    self.reward_pipe[i] = None
                    print("GAME_OVER")
                    self._end_episode(reward)

                    n_list.remove(i)
                    time.sleep(0.032)

                    continue
                # seems not to go into elif condition
                elif is_terminal:
                    print("TERMINAL")

                    self._agent.end_episode(reward)
                    action[i] = self._agent.begin_episode(observation, state)
                else:
                    write_to_pipe(self.reset_pipe[i], gameover)
                    action[i] = self._agent.step(reward, observation, state)

            if len(n_list) == 0:
                close_pipe(self.channel_pipe)
                close_pipe(self.complete_pipe)
                close_pipe(self.goal_pipe)
                break

            # self._end_episode(reward)

        return step_number, total_reward
Exemplo n.º 4
0
    def __init__(self,
                 base_dir,
                 create_agent_fn,
                 create_environment_fn=create_webots_environment,
                 sticky_actions=True,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=10,
                 training_steps=1000,
                 evaluation_steps=500,
                 max_steps_per_episode=2700,
                 seed=123):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      create_agent_fn: A function that takes as args a Tensorflow session and an
        Atari 2600 Gym environment, and returns an agent.
      create_environment_fn: A function which receives a game name and creates
        an Atari 2600 Gym environment.
      game_name: str, name of the Atari 2600 domain to run.
      sticky_actions: bool, whether to enable sticky actions in the environment.
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        # assert game_name is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.compat.v1.summary.FileWriter(self._base_dir)
        # self._summary_writer = None
        tf.compat.v1.random.set_random_seed(seed)
        np.random.seed(seed)

        self._environment = create_environment_fn(sticky_actions)

        # Set up a session and initialize variables.
        self.gpu_options = tf.compat.v1.GPUOptions(
            per_process_gpu_memory_fraction=0.2)
        self._sess = tf.compat.v1.Session('',
                                          config=tf.compat.v1.ConfigProto(
                                              allow_soft_placement=True,
                                              gpu_options=self.gpu_options))

        self.channel_pipe = open_write_pipe(channel_name)
        write_to_pipe([self.channel_pipe] * n, [i for i in range(n)])
        print("write to pipe channel: {}".format([i for i in range(n)]))

        self.complete_pipe = open_write_pipe("/tmp/complete.pipe")
        write_to_pipe(self.complete_pipe, 0)
        self._agent = create_agent_fn(self._sess,
                                      self._environment,
                                      summary_writer=self._summary_writer)
        close_pipe(self.channel_pipe)
        close_pipe(self.complete_pipe)

        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._sess.run(self._agent._sync_qt_ops)

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self.reward_pipe = [None] * n

        self.length = np.linspace(0.53, 0.7, 4)
        self.theta = np.linspace(0.3, np.pi / 2, 15)
Exemplo n.º 5
0
channel_name = "/tmp/channel_in1.pipe"
space_path = "/tmp/space_out1.pipe"
goal_path = "/tmp/goal_in1.pipe"

action_path = "/tmp/action_in1.pipe"
obs_path = "/tmp/obs_out1.pipe"
touch_path = "/tmp/touch_out1.pipe"
reward_path = "/tmp/reward_out1.pipe"
over_path = "/tmp/over_out1.pipe"
terminal_path = "/tmp/term_out1.pipe"
reset_path = "/tmp/reset_in1.pipe"
write_name_list = [action_path, reset_path]
read_name_list = [obs_path, touch_path, reward_path, over_path, terminal_path]

channel_pipe = open_write_pipe(channel_name)
write_to_pipe(channel_pipe, 0)
complete_pipe = open_write_pipe("/tmp/complete.pipe")
write_to_pipe(complete_pipe, 0)
goal_pipe = open_write_pipe(goal_path)

agent_name = 'ddpg'
debug_mode = False


def create_agent(sess, summary_writer=None):

    # s = os.open(space_path, os.O_RDONLY)
    s = open_read_pipe(space_path)
    # space = json.loads(os.read(s,1024).decode())
    space = read_from_pipe(s)
    close_pipe([channel_pipe, complete_pipe])
Exemplo n.º 6
0
make_pipe(channel_name)
make_pipe(space_name)
make_pipe('/tmp/complete.pipe')

space_pipe = open_write_pipe(space_name)
channel_pipe = open_read_pipe(channel_name)
channel = read_from_pipe(channel_pipe, 1)

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)

# head + tail name pipe
read_name_list = [(i + "%s.pipe" % channel) for i in read_name]
write_name_list = [(i + "%s.pipe" % channel) for i in write_name]
all_path = read_name_list + write_name_list
print(all_path)
make_pipe(all_path)

obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe = open_write_pipe(
    write_name_list)
action_pipe, reset_pipe = open_read_pipe(read_name_list)
'''