def _initialize_episode(self): """Initialization for a new episode. Returns: action: int, the initial action chosen by the agent. """ self.length_rnd = np.random.choice(self.length) self.theta_rnd = np.random.choice(self.theta) if self.reward_pipe[0] == None: self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) self.complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(self.complete_pipe, 1) self.goal_pipe = open_write_pipe(goal_path) write_to_pipe( self.goal_pipe, [round(self.length_rnd, 4), round(self.theta_rnd, 4)]) self.action_pipe = open_write_pipe(action_name_list) self.reset_pipe = open_write_pipe(reset_name_list) self.obs_pipe = open_read_pipe(obs_name_list) self.touch_pipe = open_read_pipe(touch_name_list) self.reward_pipe = open_read_pipe(reward_name_list) self.over_pipe = open_read_pipe(over_name_list) self.terminal_pipe = open_read_pipe(terminal_name_list) # send the number of channel # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe] ''' initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe] # print("\ncall initial setp") # list to np.array initial_observation = [np.asarray(observation) for observation in initial_observation] ''' initial_observation = [np.zeros([100, 100, 3])] initial_state = [ np.array(read_from_pipe(state_pipe)) for state_pipe in self.touch_pipe ] # print(initial_state) return [ self._agent.begin_episode(observation, state) for observation, state in zip(initial_observation, initial_state) ]
def create_agent(sess, summary_writer=None): # s = os.open(space_path, os.O_RDONLY) s = open_read_pipe(space_path) # space = json.loads(os.read(s,1024).decode()) space = read_from_pipe(s) close_pipe([channel_pipe, complete_pipe]) if not debug_mode: summary_writer = None if agent_name == 'ddpg': os.close(s) return ddpg_agent_s.DDPGAgent( sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32), #num_actions=space, summary_writer=summary_writer) elif agent_name == 'dqn': os.close(s) return dqn_agent.DQNAgent(sess, num_actions=space, summary_writer=summary_writer) elif agent_name == 'rainbow': os.close(s) return rainbow_agent.RainbowAgent(sess, num_actions=space, summary_writer=summary_writer) elif agent_name == 'implicit_quantile': os.close(s) return implicit_quantile_agent.ImplicitQuantileAgent( sess, num_actions=space, summary_writer=summary_writer) else: os.close(s) raise ValueError('Unknown agent: {}'.format(agent_name))
def create_agent(sess, environment, summary_writer=None): """Creates a DQN agent. Args: sess: A `tf.Session` object for running associated ops. environment: An Atari 2600 Gym environment. summary_writer: A Tensorflow summary writer to pass to the agent for in-agent training statistics in Tensorboard. Returns: agent: An RL agent. Raises: ValueError: If `agent_name` is not in supported list. """ print("enter") s = open_read_pipe(space_path) space = read_from_pipe(s) print("read space pipe") # print("space ", space) if not FLAGS.debug_mode: summary_writer = None if FLAGS.agent_name == 'ddpg': os.close(s) # return ddpg_agent.DDPGAgent(sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32),# num_actions=space, # summary_writer=summary_writer) return ddpg_agent_s.DDPGAgent( sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32), # num_actions=space, summary_writer=summary_writer) elif FLAGS.agent_name == 'dqn': # return dqn_agent.DQNAgent(sess, num_actions=environment.action_space_n(), # summary_writer=summary_writer) os.close(s) return dqn_agent.DQNAgent(sess, num_actions=space[2][0], summary_writer=summary_writer) elif FLAGS.agent_name == 'rainbow': # return rainbow_agent.RainbowAgent( # sess, num_actions=environment.action_space_n(), # summary_writer=summary_writer) os.close(s) return rainbow_agent.RainbowAgent(sess, num_actions=space[2][0], summary_writer=summary_writer) elif FLAGS.agent_name == 'implicit_quantile': # return implicit_quantile_agent.ImplicitQuantileAgent( # sess, num_actions=environment.action_space_n(), # summary_writer=summary_writer) os.close(s) return implicit_quantile_agent.ImplicitQuantileAgent( sess, num_actions=space[2][0], summary_writer=summary_writer) else: os.close(s) raise ValueError('Unknown agent: {}'.format(FLAGS.agent_name))
reward_name = "/tmp/reward_out" over_name = "/tmp/over_out" terminal_name = "/tmp/term_out" reset_name = "/tmp/reset_in" read_name = [action_name, reset_name] write_name = [obs_name, touch_name, reward_name, over_name, terminal_name] output("ready to make pipe") make_pipe(channel_name) make_pipe(space_name) make_pipe(goal_name) make_pipe('/tmp/complete.pipe') output("make pipe channel, space, goal") space_pipe = open_write_pipe(space_name) output("open read pipe space") channel_pipe = open_read_pipe(channel_name) output("open read pipe channel") channel = read_from_pipe(channel_pipe, 1) output("read from pipe channel: {}".format(channel)) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe) print("I AM CHANNEL %s" % channel) '''evaluation use'''
# Run 100 episodes for j in range(100): l = int(j / 20) t = j % 20 total_reward = 0 is_terminal = False channel_pipe = open_write_pipe(channel_name) write_to_pipe(channel_pipe, 0) complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(complete_pipe, 1) write_to_pipe(goal_pipe, [round(length[l], 4), round(theta[t], 4)]) action_pipe, reset_pipe = open_write_pipe(write_name_list) obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe = open_read_pipe( read_name_list) """initial_observation_list = read_from_pipe(obs_pipe)""" """initial_observation = np.asarray(initial_observation_list)""" initial_observation = np.zeros([100, 100, 3]) initial_state_list = read_from_pipe(touch_pipe) initial_state = np.asarray(initial_state_list) action = agent.begin_episode(initial_observation, initial_state) time.sleep(0.032) print('episodes %d' % j) episode_distance = [] cnt1 = 0 cnt2 = 0 step_cnt = 0 while 1: action = action.tolist() write_to_pipe(action_pipe, action)
action_name = "/tmp/action_in" obs_name = "/tmp/obs_out" touch_name = "/tmp/touch_out" reward_name = "/tmp/reward_out" over_name = "/tmp/over_out" terminal_name = "/tmp/term_out" reset_name = "/tmp/reset_in" read_name = [action_name, reset_name] write_name = [obs_name, touch_name, reward_name, over_name, terminal_name] make_pipe(channel_name) make_pipe(space_name) make_pipe('/tmp/complete.pipe') space_pipe = open_write_pipe(space_name) channel_pipe = open_read_pipe(channel_name) channel = read_from_pipe(channel_pipe, 1) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe) print("I AM CHANNEL %s" % channel) # head + tail name pipe read_name_list = [(i + "%s.pipe" % channel) for i in read_name]