def _from_json(batch): if isinstance(batch, bytes): # smart_open S3 doesn't respect "r" batch = batch.decode("utf-8") data = json.loads(batch) for k, v in data.items(): data[k] = [unpack_if_needed(x) for x in unpack_if_needed(v)] return SampleBatch(data)
def _from_json(batch: str) -> SampleBatchType: if isinstance(batch, bytes): # smart_open S3 doesn't respect "r" batch = batch.decode("utf-8") data = json.loads(batch) if "type" in data: data_type = data.pop("type") else: raise ValueError("JSON record missing 'type' field") if data_type == "SampleBatch": for k, v in data.items(): data[k] = unpack_if_needed(v) return SampleBatch(data) elif data_type == "MultiAgentBatch": policy_batches = {} for policy_id, policy_batch in data["policy_batches"].items(): inner = {} for k, v in policy_batch.items(): inner[k] = unpack_if_needed(v) policy_batches[policy_id] = SampleBatch(inner) return MultiAgentBatch(policy_batches, data["count"]) else: raise ValueError( "Type field must be one of ['SampleBatch', 'MultiAgentBatch']", data_type)
def _encode_sample(self, idxes): """Add action_logps""" obses_t, actions, rewards, obses_tp1, dones = \ [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done, *_ = data obses_t.append(np.array(unpack_if_needed(obs_t), copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False)) dones.append(done) # action_logps.append(logp) self._hit_count[i] += 1 ret = [ np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones) ] # Add other necessary information for item_id in range(5, len(data)): ret.append(np.array([self._storage[i][item_id] for i in idxes])) return ret
def from_json_data(json_data: Any, worker: Optional["RolloutWorker"]): # Try to infer the SampleBatchType (SampleBatch or MultiAgentBatch). if "type" in json_data: data_type = json_data.pop("type") else: raise ValueError("JSON record missing 'type' field") if data_type == "SampleBatch": if worker is not None and len(worker.policy_map) != 1: raise ValueError( "Found single-agent SampleBatch in input file, but our " "PolicyMap contains more than 1 policy!") for k, v in json_data.items(): json_data[k] = unpack_if_needed(v) if worker is not None: policy = next(iter(worker.policy_map.values())) json_data = _adjust_obs_actions_for_policy(json_data, policy) return SampleBatch(json_data) elif data_type == "MultiAgentBatch": policy_batches = {} for policy_id, policy_batch in json_data["policy_batches"].items(): inner = {} for k, v in policy_batch.items(): inner[k] = unpack_if_needed(v) if worker is not None: policy = worker.policy_map[policy_id] inner = _adjust_obs_actions_for_policy(inner, policy) policy_batches[policy_id] = SampleBatch(inner) return MultiAgentBatch(policy_batches, json_data["count"]) else: raise ValueError( "Type field must be one of ['SampleBatch', 'MultiAgentBatch']", data_type)
def _from_json(batch): if isinstance(batch, bytes): # smart_open S3 doesn't respect "r" batch = batch.decode("utf-8") data = json.loads(batch) if "type" in data: data_type = data.pop("type") else: raise ValueError("JSON record missing 'type' field") if data_type == "SampleBatch": for k, v in data.items(): data[k] = unpack_if_needed(v) return SampleBatch(data) elif data_type == "MultiAgentBatch": policy_batches = {} for policy_id, policy_batch in data["policy_batches"].items(): inner = {} for k, v in policy_batch.items(): inner[k] = unpack_if_needed(v) policy_batches[policy_id] = SampleBatch(inner) return MultiAgentBatch(policy_batches, data["count"]) else: raise ValueError( "Type field must be one of ['SampleBatch', 'MultiAgentBatch']", data_type)
def _encode_sample(self, idxes): obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done = data obses_t.append(np.array(unpack_if_needed(obs_t), copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False)) dones.append(done) self._hit_count[i] += 1 return (np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones))
def _encode_sample(self, idxes): obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done = data obses_t.append(np.array(unpack_if_needed(obs_t), copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False)) dones.append(done) self._hit_count[i] += 1 return (np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones))
def _encode_sample(self, idxes): sample = [] for i in idxes: sample.append(self._storage[i]) self._hit_count[i] += 1 obses_t, actions, rewards, obses_tp1, dones, *extras = zip(*sample) obses_t = [np.array(unpack_if_needed(o), copy=False) for o in obses_t] actions = [np.array(a, copy=False) for a in actions] obses_tp1 = [ np.array(unpack_if_needed(o), copy=False) for o in obses_tp1 ] return tuple( map(np.array, [obses_t, actions, rewards, obses_tp1, dones] + extras))
def _encode_sample(self, idxes): obses_t, actions = [], [] for i in idxes: data = self._storage[i] obs_t, action = data obses_t.append(np.array(unpack_if_needed(obs_t), copy=False)) actions.append(np.array(action, copy=False)) self._hit_count[i] += 1 return np.array(obses_t), np.array(actions)
def _from_json(self, data: str) -> SampleBatchType: if isinstance(data, bytes): # smart_open S3 doesn't respect "r" data = data.decode("utf-8") json_data = json.loads(data) # Try to infer the SampleBatchType (SampleBatch or MultiAgentBatch). if "type" in json_data: data_type = json_data.pop("type") else: raise ValueError("JSON record missing 'type' field") if data_type == "SampleBatch": if self.ioctx.worker is not None and \ len(self.ioctx.worker.policy_map) != 1: raise ValueError( "Found single-agent SampleBatch in input file, but our " "PolicyMap contains more than 1 policy!") for k, v in json_data.items(): json_data[k] = unpack_if_needed(v) if self.ioctx.worker is not None: policy = next(iter(self.ioctx.worker.policy_map.values())) json_data = self._adjust_obs_actions_for_policy( json_data, policy) return SampleBatch(json_data) elif data_type == "MultiAgentBatch": policy_batches = {} for policy_id, policy_batch in json_data["policy_batches"].items(): inner = {} for k, v in policy_batch.items(): inner[k] = unpack_if_needed(v) if self.ioctx.worker is not None: policy = self.ioctx.worker.policy_map[policy_id] inner = self._adjust_obs_actions_for_policy(inner, policy) policy_batches[policy_id] = SampleBatch(inner) return MultiAgentBatch(policy_batches, json_data["count"]) else: raise ValueError( "Type field must be one of ['SampleBatch', 'MultiAgentBatch']", data_type)
def _encode_sample(self, idxes): batch = {k: [] for k in self.expected_keys} for i in idxes: data = self._storage[i] for data_item, k in zip(data, self.expected_keys): if k in self.can_pack_list: data_item = unpack_if_needed(data_item) data_item = self.keys_to_types_dict[k](data_item) batch[k].append(data_item) self._hit_count[i] += 1 return batch
def sample(self, batch_size): """Sample a batch of experiences. Parameters ---------- batch_size: int How many transitions to sample. Returns ------- obs_batch: np.array batch of observations act_batch: np.array batch of actions executed given obs_batch """ observations = [] actions = [] in_episode_samples = batch_size episode_size = int(batch_size/in_episode_samples) for i in range(episode_size): episode_idx = random.randint(0, len(self._storage)-1) while any([x==None for x in self._storage[episode_idx][0]]): episode_idx = random.randint(0, len(self._storage)-1) episode_sample = self._storage[episode_idx][0] for _ in range(in_episode_samples): obs = [] ac = [] for policy_batch in episode_sample: policy_batch.decompress_if_needed() num_samples = policy_batch.count sample_idx = np.random.randint(0, num_samples) o = unpack_if_needed(policy_batch['obs'][sample_idx]) a = policy_batch['actions'][sample_idx] # o = unpack_if_needed(policy[0]) # traj_ids = np.random.randint(0, len(o)) obs.append(o) ac.append(a) observations.append(obs) actions.append(ac) return np.array(observations, copy=False).reshape(batch_size, -1),\ np.array(actions, copy=False).reshape(batch_size, -1)