示例#1
0
文件: json_reader.py 项目: wym42/ray
def _from_json(batch):
    if isinstance(batch, bytes):  # smart_open S3 doesn't respect "r"
        batch = batch.decode("utf-8")
    data = json.loads(batch)
    for k, v in data.items():
        data[k] = [unpack_if_needed(x) for x in unpack_if_needed(v)]
    return SampleBatch(data)
示例#2
0
def _from_json(batch: str) -> SampleBatchType:
    if isinstance(batch, bytes):  # smart_open S3 doesn't respect "r"
        batch = batch.decode("utf-8")
    data = json.loads(batch)

    if "type" in data:
        data_type = data.pop("type")
    else:
        raise ValueError("JSON record missing 'type' field")

    if data_type == "SampleBatch":
        for k, v in data.items():
            data[k] = unpack_if_needed(v)
        return SampleBatch(data)
    elif data_type == "MultiAgentBatch":
        policy_batches = {}
        for policy_id, policy_batch in data["policy_batches"].items():
            inner = {}
            for k, v in policy_batch.items():
                inner[k] = unpack_if_needed(v)
            policy_batches[policy_id] = SampleBatch(inner)
        return MultiAgentBatch(policy_batches, data["count"])
    else:
        raise ValueError(
            "Type field must be one of ['SampleBatch', 'MultiAgentBatch']",
            data_type)
示例#3
0
    def _encode_sample(self, idxes):
        """Add action_logps"""
        obses_t, actions, rewards, obses_tp1, dones = \
            [], [], [], [], []

        for i in idxes:
            data = self._storage[i]
            obs_t, action, reward, obs_tp1, done, *_ = data
            obses_t.append(np.array(unpack_if_needed(obs_t), copy=False))
            actions.append(np.array(action, copy=False))
            rewards.append(reward)
            obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False))
            dones.append(done)
            # action_logps.append(logp)
            self._hit_count[i] += 1

        ret = [
            np.array(obses_t),
            np.array(actions),
            np.array(rewards),
            np.array(obses_tp1),
            np.array(dones)
        ]

        # Add other necessary information
        for item_id in range(5, len(data)):
            ret.append(np.array([self._storage[i][item_id] for i in idxes]))

        return ret
示例#4
0
def from_json_data(json_data: Any, worker: Optional["RolloutWorker"]):
    # Try to infer the SampleBatchType (SampleBatch or MultiAgentBatch).
    if "type" in json_data:
        data_type = json_data.pop("type")
    else:
        raise ValueError("JSON record missing 'type' field")

    if data_type == "SampleBatch":
        if worker is not None and len(worker.policy_map) != 1:
            raise ValueError(
                "Found single-agent SampleBatch in input file, but our "
                "PolicyMap contains more than 1 policy!")
        for k, v in json_data.items():
            json_data[k] = unpack_if_needed(v)
        if worker is not None:
            policy = next(iter(worker.policy_map.values()))
            json_data = _adjust_obs_actions_for_policy(json_data, policy)
        return SampleBatch(json_data)
    elif data_type == "MultiAgentBatch":
        policy_batches = {}
        for policy_id, policy_batch in json_data["policy_batches"].items():
            inner = {}
            for k, v in policy_batch.items():
                inner[k] = unpack_if_needed(v)
            if worker is not None:
                policy = worker.policy_map[policy_id]
                inner = _adjust_obs_actions_for_policy(inner, policy)
            policy_batches[policy_id] = SampleBatch(inner)
        return MultiAgentBatch(policy_batches, json_data["count"])
    else:
        raise ValueError(
            "Type field must be one of ['SampleBatch', 'MultiAgentBatch']",
            data_type)
示例#5
0
def _from_json(batch):
    if isinstance(batch, bytes):  # smart_open S3 doesn't respect "r"
        batch = batch.decode("utf-8")
    data = json.loads(batch)

    if "type" in data:
        data_type = data.pop("type")
    else:
        raise ValueError("JSON record missing 'type' field")

    if data_type == "SampleBatch":
        for k, v in data.items():
            data[k] = unpack_if_needed(v)
        return SampleBatch(data)
    elif data_type == "MultiAgentBatch":
        policy_batches = {}
        for policy_id, policy_batch in data["policy_batches"].items():
            inner = {}
            for k, v in policy_batch.items():
                inner[k] = unpack_if_needed(v)
            policy_batches[policy_id] = SampleBatch(inner)
        return MultiAgentBatch(policy_batches, data["count"])
    else:
        raise ValueError(
            "Type field must be one of ['SampleBatch', 'MultiAgentBatch']",
            data_type)
 def _encode_sample(self, idxes):
     obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
     for i in idxes:
         data = self._storage[i]
         obs_t, action, reward, obs_tp1, done = data
         obses_t.append(np.array(unpack_if_needed(obs_t), copy=False))
         actions.append(np.array(action, copy=False))
         rewards.append(reward)
         obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False))
         dones.append(done)
         self._hit_count[i] += 1
     return (np.array(obses_t), np.array(actions), np.array(rewards),
             np.array(obses_tp1), np.array(dones))
示例#7
0
 def _encode_sample(self, idxes):
     obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
     for i in idxes:
         data = self._storage[i]
         obs_t, action, reward, obs_tp1, done = data
         obses_t.append(np.array(unpack_if_needed(obs_t), copy=False))
         actions.append(np.array(action, copy=False))
         rewards.append(reward)
         obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False))
         dones.append(done)
         self._hit_count[i] += 1
     return (np.array(obses_t), np.array(actions), np.array(rewards),
             np.array(obses_tp1), np.array(dones))
示例#8
0
    def _encode_sample(self, idxes):
        sample = []
        for i in idxes:
            sample.append(self._storage[i])
            self._hit_count[i] += 1

        obses_t, actions, rewards, obses_tp1, dones, *extras = zip(*sample)

        obses_t = [np.array(unpack_if_needed(o), copy=False) for o in obses_t]
        actions = [np.array(a, copy=False) for a in actions]
        obses_tp1 = [
            np.array(unpack_if_needed(o), copy=False) for o in obses_tp1
        ]

        return tuple(
            map(np.array,
                [obses_t, actions, rewards, obses_tp1, dones] + extras))
示例#9
0
    def _encode_sample(self, idxes):
        obses_t, actions = [], []
        for i in idxes:
            data = self._storage[i]
            obs_t, action = data
            obses_t.append(np.array(unpack_if_needed(obs_t), copy=False))
            actions.append(np.array(action, copy=False))

            self._hit_count[i] += 1
        return np.array(obses_t), np.array(actions)
示例#10
0
    def _from_json(self, data: str) -> SampleBatchType:
        if isinstance(data, bytes):  # smart_open S3 doesn't respect "r"
            data = data.decode("utf-8")
        json_data = json.loads(data)

        # Try to infer the SampleBatchType (SampleBatch or MultiAgentBatch).
        if "type" in json_data:
            data_type = json_data.pop("type")
        else:
            raise ValueError("JSON record missing 'type' field")

        if data_type == "SampleBatch":
            if self.ioctx.worker is not None and \
                    len(self.ioctx.worker.policy_map) != 1:
                raise ValueError(
                    "Found single-agent SampleBatch in input file, but our "
                    "PolicyMap contains more than 1 policy!")
            for k, v in json_data.items():
                json_data[k] = unpack_if_needed(v)
            if self.ioctx.worker is not None:
                policy = next(iter(self.ioctx.worker.policy_map.values()))
                json_data = self._adjust_obs_actions_for_policy(
                    json_data, policy)
            return SampleBatch(json_data)
        elif data_type == "MultiAgentBatch":
            policy_batches = {}
            for policy_id, policy_batch in json_data["policy_batches"].items():
                inner = {}
                for k, v in policy_batch.items():
                    inner[k] = unpack_if_needed(v)
                if self.ioctx.worker is not None:
                    policy = self.ioctx.worker.policy_map[policy_id]
                    inner = self._adjust_obs_actions_for_policy(inner, policy)
                policy_batches[policy_id] = SampleBatch(inner)
            return MultiAgentBatch(policy_batches, json_data["count"])
        else:
            raise ValueError(
                "Type field must be one of ['SampleBatch', 'MultiAgentBatch']",
                data_type)
示例#11
0
    def _encode_sample(self, idxes):

        batch = {k: [] for k in self.expected_keys}

        for i in idxes:
            data = self._storage[i]
            for data_item, k in zip(data, self.expected_keys):

                if k in self.can_pack_list:
                    data_item = unpack_if_needed(data_item)

                data_item = self.keys_to_types_dict[k](data_item)

                batch[k].append(data_item)

            self._hit_count[i] += 1
        return batch
示例#12
0
    def sample(self, batch_size):
        """Sample a batch of experiences.
        Parameters
        ----------
        batch_size: int
            How many transitions to sample.
        Returns
        -------
        obs_batch: np.array
          batch of observations
        act_batch: np.array
          batch of actions executed given obs_batch
        """
        observations = []
        actions = []
        in_episode_samples = batch_size
        episode_size = int(batch_size/in_episode_samples)
        for i in range(episode_size):
            episode_idx = random.randint(0, len(self._storage)-1)
            while any([x==None for x in self._storage[episode_idx][0]]):
                episode_idx = random.randint(0, len(self._storage)-1)
            episode_sample = self._storage[episode_idx][0]
            for _ in range(in_episode_samples):
                obs = []
                ac = []
                for policy_batch in episode_sample:
                    policy_batch.decompress_if_needed()
                    num_samples = policy_batch.count
                    sample_idx = np.random.randint(0, num_samples)
                    o = unpack_if_needed(policy_batch['obs'][sample_idx])
                    a = policy_batch['actions'][sample_idx]
                    # o = unpack_if_needed(policy[0])
                    # traj_ids = np.random.randint(0, len(o))
                    obs.append(o)
                    ac.append(a)
                observations.append(obs)
                actions.append(ac)

        return np.array(observations, copy=False).reshape(batch_size, -1),\
               np.array(actions, copy=False).reshape(batch_size, -1)