Exemplo n.º 1
0
 def decompress_if_needed(self, columns=frozenset(["obs", "new_obs"])):
     for key in columns:
         if key in self.data:
             arr = self.data[key]
             if is_compressed(arr):
                 self.data[key] = unpack(arr)
             elif len(arr) > 0 and is_compressed(arr[0]):
                 self.data[key] = np.array(
                     [unpack(o) for o in self.data[key]])
Exemplo n.º 2
0
 def _decompress_in_place(path, value):
     if path[0] not in columns:
         return
     curr = self
     for p in path[:-1]:
         curr = curr[p]
     # Bulk compressed.
     if is_compressed(value):
         curr[path[-1]] = unpack(value)
     # Non bulk compressed.
     elif len(value) > 0 and is_compressed(value[0]):
         curr[path[-1]] = np.array([unpack(o) for o in value])
Exemplo n.º 3
0
 def _encode_sample(self, idxes):
     obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
     for i in idxes:
         data = self._storage[i]
         obs_t, action, reward, obs_tp1, done = data
         obses_t.append(np.array(unpack(obs_t), copy=False))
         actions.append(np.array(action, copy=False))
         rewards.append(reward)
         obses_tp1.append(np.array(unpack(obs_tp1), copy=False))
         dones.append(done)
         self._hit_count[i] += 1
     return (np.array(obses_t), np.array(actions), np.array(rewards),
             np.array(obses_tp1), np.array(dones))
Exemplo n.º 4
0
 def _encode_sample(self, idxes):
     obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
     for i in idxes:
         data = self._storage[i]
         obs_t, action, reward, obs_tp1, done = data
         obses_t.append(np.array(unpack(obs_t), copy=False))
         actions.append(np.array(action, copy=False))
         rewards.append(reward)
         obses_tp1.append(np.array(unpack(obs_tp1), copy=False))
         dones.append(done)
         self._hit_count[i] += 1
     return (np.array(obses_t), np.array(actions), np.array(rewards),
             np.array(obses_tp1), np.array(dones))
Exemplo n.º 5
0
def worker_train(ps, replay_buffer, opt, learner_index):
    agent = Learner(opt, job="learner")
    keys = agent.get_weights()[0]
    weights = ray.get(ps.pull.remote(keys))
    agent.set_weights(keys, weights)

    cache = Cache(replay_buffer)

    cache.start()

    cnt = 1
    while True:

        # time1 = time.time()
        batch = cache.q1.get()

        # time2 = time.time()
        # print('cache get time:', time2-time1)
        if opt.model == "cnn":
            batch['obs'] = np.array([[unpack(o) for o in lno] for lno in batch['obs']])
        agent.train(batch, cnt)
        # time3 = time.time()
        # print('agent train time:', time3 - time2)
        # TODO cnt % 300 == 0 before
        if cnt % 100 == 0:
            cache.q2.put(agent.get_weights())
        cnt += 1
Exemplo n.º 6
0
    def decompress_if_needed(
        self, columns: Set[str] = frozenset(["obs",
                                             "new_obs"])) -> "SampleBatch":
        """Decompresses data buffers (per column if not compressed) in place.

        Args:
            columns (Set[str]): The columns to decompress. Default: Only
                decompress the obs and new_obs columns.

        Returns:
            SampleBatch: This very SampleBatch.
        """
        for key in columns:
            if key in self.keys():
                arr = self[key]
                if is_compressed(arr):
                    self[key] = unpack(arr)
                elif len(arr) > 0 and is_compressed(arr[0]):
                    self[key] = np.array([unpack(o) for o in self[key]])
        return self