示例#1
0
    def _retrieve_batch(self, batch_size, chunk_size):
        sampled_game_idx = self._sample_game_idx(batch_size)

        observations = []
        actions = []
        rewards = []
        nonterminals = []

        for idx in sampled_game_idx:
            _observations, _actions, _rewards, _nonterminals = self._retrieve_game(
                idx, chunk_size)
            observations.append(_observations)
            actions.append(_actions)
            rewards.append(_rewards)
            nonterminals.append(_nonterminals)

        observations = torch.as_tensor(np.array(observations,
                                                dtype=np.float32))
        if not self.symbolic_env:
            preprocess_observation_(
                observations,
                self.bit_depth)  # Undo discretisation for visual observations

        observations = observations.reshape(chunk_size, batch_size,
                                            *observations.shape[-3:])
        actions = np.array(actions).reshape(chunk_size, batch_size, -1)
        rewards = np.array(rewards).reshape(chunk_size, batch_size)
        nonterminals = np.array(nonterminals).reshape(chunk_size, batch_size)

        return observations, actions, rewards, nonterminals
示例#2
0
文件: memory.py 项目: masa-su/pixyzoo
 def _retrieve_batch(self, idxs, n, L):
     vec_idxs = idxs.transpose().reshape(-1)  # Unroll indices
     observations = torch.as_tensor(self.observations[vec_idxs].astype(
         np.float32))
     if not self.symbolic_env:
         # Undo discretisation for visual observations
         preprocess_observation_(observations, self.bit_depth)
     return observations.reshape(
         L, n, *observations.shape[1:]), self.actions[vec_idxs].reshape(
             L, n, -1), self.rewards[vec_idxs].reshape(
                 L, n), self.nonterminals[vec_idxs].reshape(L, n, 1)
示例#3
0
    def _retrieve_batch(self, idxs, n, L):
        vec_idxs = idxs.transpose().reshape(-1)  # Unroll indices

        obs_ = self.observations[vec_idxs].astype(np.float32)
        # next_obs_ = self.next_observations[vec_idxs].astype(np.float32)
        # obs_aug = obs_.copy()
        # next_obs_aug = next_obs_.copy()

        # Undo discretisation for visual observations
        observations = torch.as_tensor(obs_)
        observations = preprocess_observation_(observations,
                                               self.bit_depth).to(self.device)

        # next_observations = torch.as_tensor(next_obs_)
        # next_observations = preprocess_observation_(
        #     next_observations, self.bit_depth).to(self.device)

        # I think we need to preserve the original observations for recontructions
        # observations0 = self.aug_trans(observations)
        # next_observations0 = self.aug_trans(next_observations)

        # observations_aug = observations.clone()
        observations_aug0 = self.aug_trans(observations)
        observations_aug1 = self.aug_trans(observations)

        # next_observations_aug = next_observations.clone()
        # next_observations_aug = self.aug_trans(next_observations_aug)

        return (observations.reshape(L, n, *observations.shape[1:]),
                self.actions[vec_idxs].reshape(L, n, -1),
                self.rewards[vec_idxs].reshape(L, n),
                self.nonterminals[vec_idxs].reshape(L, n, 1),
                observations_aug0.reshape(L, n, *observations_aug0.shape[1:]),
                observations_aug1.reshape(L, n, *observations_aug1.shape[1:]))
示例#4
0
            D.append(observation, action, reward, done)
            observation = next_observation
            t += 1
        metrics['steps'].append(t * args.action_repeat + (
            0 if len(metrics['steps']) == 0 else metrics['steps'][-1]))
        metrics['episodes'].append(s)
    if args.experience_list:
        from torch.nn import functional as F
        elst = torch.load(args.experience_list)
        done_cnt = 0
        for (obs_, action, reward, done) in elst:
            if done:
                done_cnt += 1
                print(f"Loading {done_cnt}")
            observation_ = torch.from_numpy(obs_.astype(np.float32))
            preprocess_observation_(observation_, args.bit_depth)
            action = torch.tensor(action)
            # print(action,reward,done,obs_.shape)
            # act_=F.one_hot(idx_, env.action_size).float()
            D.append(observation_, action, reward, done)
            if done_cnt == 3:
                break

# Initialise model parameters randomly
transition_model = TransitionModel(args.belief_size, args.state_size,
                                   env.action_size, args.hidden_size,
                                   args.embedding_size,
                                   args.dense_activation_function).to(device)
observation_model = ObservationModel(env.observation_size, args.belief_size,
                                     args.state_size, args.embedding_size,
                                     args.cnn_activation_function,