def get_trajectory(self, last_pred=None): """Get trajectory""" # Need copy, when run with explore time > 1, # if not, will clear trajectory before sent. # trajectory = message(self.trajectory.copy()) trajectory = message(deepcopy(self.trajectory)) set_msg_info(trajectory, agent_id=self.id) return trajectory
def get_trajectory(self, last_pred=None): for _data_key in ("cur_state", "logit", "action"): self.trajectory[_data_key] = np.asarray(self.trajectory[_data_key]) self.trajectory["action"].astype(np.int32) # self.trajectory["cur_state"].astype(np.int32) # print(self.trajectory) trajectory = message(self.trajectory) set_msg_info(trajectory, agent_id=self.id) return trajectory
def transfer_to_broker(self): """Send train data to learner.""" while True: data = self.recv_agent.recv() info_cmd = get_msg_info(data, "cmd") new_cmd = info_cmd + self.learner_postfix set_msg_info(data, broker_id=self.broker_id, explorer_id=self.explorer_id, cmd=new_cmd) self.send_broker.send(data)
def transfer_to_broker(self): """Send train data to learner.""" while True: data = self.recv_agent.recv() info_cmd = get_msg_info(data, "cmd") # print("info_cmd in explorer: ", info_cmd, data) data_type = "buf_reduce" if info_cmd == "buf_reduce" else "data" set_msg_info(data, broker_id=self.broker_id, explorer_id=self.explorer_id) self.send_broker.send(data, data_type=data_type)
def get_trajectory(self, last_pred=None): for env_id in range(self.vector_env_size): for _data_key in ("cur_state", "logit", "action", "reward", "done", "info"): self.trajectory[_data_key].extend( self.sample_vector[env_id][_data_key]) # merge data into env_num * seq_len for _data_key in self.trajectory: self.trajectory[_data_key] = np.stack(self.trajectory[_data_key]) self.trajectory["action"].astype(np.int32) trajectory = message(self.trajectory.copy()) set_msg_info(trajectory, agent_id=self.id) return trajectory
def predict(self): """Predict action.""" while True: start_t0 = time() data = self.request_q.recv() state = get_msg_data(data) self._stats.obs_wait_time += time() - start_t0 start_t1 = time() with self.lock: action = self.alg.predict(state) self._stats.inference_time += time() - start_t1 set_msg_info(data, cmd="predict_reply") set_msg_data(data, action) # logging.debug("msg to explore: ", data) self.reply_q.send(data) self._stats.iters += 1 if self._stats.iters > self._report_period: _report = self._stats.get() self.stats_deliver.send(_report, block=True)
def get_trajectory(self): trajectory = message(self.trajectory.copy()) set_msg_info(trajectory, agent_id=self.id) return trajectory
def get_trajectory(self): transition = self.batch.data.transition_data transition.update(self._info.copy()) # record win rate within train trajectory = message(transition) set_msg_info(trajectory, agent_id=self.id) return trajectory