def create_agent(sess, summary_writer=None): # s = os.open(space_path, os.O_RDONLY) s = open_read_pipe(space_path) # space = json.loads(os.read(s,1024).decode()) space = read_from_pipe(s) close_pipe([channel_pipe, complete_pipe]) if not debug_mode: summary_writer = None if agent_name == 'ddpg': os.close(s) return ddpg_agent_s.DDPGAgent( sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32), #num_actions=space, summary_writer=summary_writer) elif agent_name == 'dqn': os.close(s) return dqn_agent.DQNAgent(sess, num_actions=space, summary_writer=summary_writer) elif agent_name == 'rainbow': os.close(s) return rainbow_agent.RainbowAgent(sess, num_actions=space, summary_writer=summary_writer) elif agent_name == 'implicit_quantile': os.close(s) return implicit_quantile_agent.ImplicitQuantileAgent( sess, num_actions=space, summary_writer=summary_writer) else: os.close(s) raise ValueError('Unknown agent: {}'.format(agent_name))
space_pipe = open_write_pipe(space_name) output("open read pipe space") channel_pipe = open_read_pipe(channel_name) output("open read pipe channel") channel = read_from_pipe(channel_pipe, 1) output("read from pipe channel: {}".format(channel)) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe) print("I AM CHANNEL %s" % channel) '''evaluation use''' goal_pipe = open_read_pipe(goal_name) goal_info = read_from_pipe(goal_pipe) close_pipe(goal_pipe) env.environment.set_goal_position(goal_info) '''.................................''' # head + tail name pipe read_name_list = [(i + "%s.pipe" % (channel + 1)) for i in read_name] write_name_list = [(i + "%s.pipe" % (channel + 1)) for i in write_name] all_path = read_name_list + write_name_list # print(all_path) make_pipe(all_path)
def _run_one_episode(self): """Executes a full trajectory of the agent interacting with the environment. Returns: The number of steps taken and the total reward. """ step_number = 0 total_reward = 0. action = self._initialize_episode() is_terminal = False n_list = [i for i in range(n)] # Keep interacting until we reach a terminal state. while 1: step_number += 1 # may use list to remove terminal pipe for i in n_list: observation, state, reward, is_terminal = self._run_one_step( i, action[i].tolist()) total_reward += reward # Perform reward clipping. # reward = np.clip(reward, -1, 1) gameover = read_from_pipe(self.over_pipe[i]) if (gameover or step_number == self._max_steps_per_episode): write_to_pipe(self.reset_pipe[i], True) close_pipe([ self.action_pipe[i], self.reset_pipe[i], self.obs_pipe[i], self.touch_pipe[i], self.reward_pipe[i], self.over_pipe[i], self.terminal_pipe[i] ]) self.reward_pipe[i] = None print("GAME_OVER") self._end_episode(reward) n_list.remove(i) time.sleep(0.032) continue # seems not to go into elif condition elif is_terminal: print("TERMINAL") self._agent.end_episode(reward) action[i] = self._agent.begin_episode(observation, state) else: write_to_pipe(self.reset_pipe[i], gameover) action[i] = self._agent.step(reward, observation, state) if len(n_list) == 0: close_pipe(self.channel_pipe) close_pipe(self.complete_pipe) close_pipe(self.goal_pipe) break # self._end_episode(reward) return step_number, total_reward
def __init__(self, base_dir, create_agent_fn, create_environment_fn=create_webots_environment, sticky_actions=True, checkpoint_file_prefix='ckpt', logging_file_prefix='log', log_every_n=1, num_iterations=10, training_steps=1000, evaluation_steps=500, max_steps_per_episode=2700, seed=123): """Initialize the Runner object in charge of running a full experiment. Args: base_dir: str, the base directory to host all required sub-directories. create_agent_fn: A function that takes as args a Tensorflow session and an Atari 2600 Gym environment, and returns an agent. create_environment_fn: A function which receives a game name and creates an Atari 2600 Gym environment. game_name: str, name of the Atari 2600 domain to run. sticky_actions: bool, whether to enable sticky actions in the environment. checkpoint_file_prefix: str, the prefix to use for checkpoint files. logging_file_prefix: str, prefix to use for the log files. log_every_n: int, the frequency for writing logs. num_iterations: int, the iteration number threshold (must be greater than start_iteration). training_steps: int, the number of training steps to perform. evaluation_steps: int, the number of evaluation steps to perform. max_steps_per_episode: int, maximum number of steps after which an episode terminates. This constructor will take the following actions: - Initialize an environment. - Initialize a `tf.Session`. - Initialize a logger. - Initialize an agent. - Reload from the latest checkpoint, if available, and initialize the Checkpointer object. """ assert base_dir is not None # assert game_name is not None self._logging_file_prefix = logging_file_prefix self._log_every_n = log_every_n self._num_iterations = num_iterations self._training_steps = training_steps self._evaluation_steps = evaluation_steps self._max_steps_per_episode = max_steps_per_episode self._base_dir = base_dir self._create_directories() self._summary_writer = tf.compat.v1.summary.FileWriter(self._base_dir) # self._summary_writer = None tf.compat.v1.random.set_random_seed(seed) np.random.seed(seed) self._environment = create_environment_fn(sticky_actions) # Set up a session and initialize variables. self.gpu_options = tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=0.2) self._sess = tf.compat.v1.Session('', config=tf.compat.v1.ConfigProto( allow_soft_placement=True, gpu_options=self.gpu_options)) self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) print("write to pipe channel: {}".format([i for i in range(n)])) self.complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(self.complete_pipe, 0) self._agent = create_agent_fn(self._sess, self._environment, summary_writer=self._summary_writer) close_pipe(self.channel_pipe) close_pipe(self.complete_pipe) self._summary_writer.add_graph(graph=tf.get_default_graph()) self._sess.run(tf.global_variables_initializer()) self._sess.run(self._agent._sync_qt_ops) self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix) self.reward_pipe = [None] * n self.length = np.linspace(0.53, 0.7, 4) self.theta = np.linspace(0.3, np.pi / 2, 15)
cnt2 = 0 step_cnt += 1 if step_cnt == 400: is_terminal = True """observation = read_from_pipe(obs_pipe) observation = np.asarray(observation)""" observation = np.zeros([100, 100, 3]) over = read_from_pipe(over_pipe) over = over or is_terminal # os.write(reset_p, json.dumps(is_terminal).encode()) write_to_pipe(reset_pipe, over) if over: close_pipe([ obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe, action_pipe, reset_pipe, channel_pipe, complete_pipe ]) # print("broken") time.sleep(0.032) break else: action = agent.step(reward, observation, state) if np.mean(episode_distance[-20:]) < 0.01: ckpt0.append(1) elif np.mean(episode_distance[-20:]) < 0.02: ckpt0.append(2) else: ckpt0.append(0) # if cnt == 20: