示例#1
0
 def _train_ppo_epoch(self, full_input):
     total_obs = self.n_steps * self.envs.n_envs
     shuffle_idx = np.random.permutation(total_obs)
     batches = dict_of_lists_to_list_of_dicst({
         k: np.split(v[shuffle_idx], total_obs // self.ppo_par.batch_size)
         for k, v in full_input.items()
     })
     for b in batches:
         self.agent.train(b)
 def _train_ppo_recurrent_epoch(self, full_input, rnn_state):
     # HE SHUFFLES SO BE CAREFUL!!! RECHECK IT: rnn_state might need to get in the full_input
     total_obs = self.n_steps * self.envs.num_envs
     shuffle_idx = np.random.permutation(total_obs)
     batches = dict_of_lists_to_list_of_dicst({
         k: np.split(v[shuffle_idx], total_obs // self.ppo_par.batch_size)
         for k, v in full_input.items()
     })
     for b in batches:
         self.agent.train_recurrent(b, rnn_state) # IMPORTANT : όταν κανεις training δεν χρειαζεσαι την rnn_State, ξεκινας απο το 0 και αθτη παιρνη την μορφή πουπρεπει να εχει
 def _train_ppo_epoch(self, full_input):
     total_obs = self.n_steps * self.envs.num_envs
     shuffle_idx = np.random.permutation(total_obs)
     batches = dict_of_lists_to_list_of_dicst({
         k: np.split(v[shuffle_idx], total_obs // self.ppo_par.batch_size)
         for k, v in full_input.items()
     })
     if self.policy_type == MetaPolicy: # We take out the if from the loop so you choose trainer BEFORE getting into the batch loop
         for b in batches:
             self.agent.train_recurrent(b)
     else:
         for b in batches:
             self.agent.train(b)
示例#4
0
 def test_dict_list_transpose(self):
     x = {
         "a": [1, 2, 3],
         "b": [np.array([5, 6]), np.array([7, 8]), np.array([90, 100])]
     }
     result = dict_of_lists_to_list_of_dicst(x)
     expected = [
         {'a': 1, 'b': np.array([5, 6])},
         {'a': 2, 'b': np.array([7, 8])},
         {'a': 3, 'b': np.array([90, 100])}
     ]
     assert len(result) == len(expected)
     for r, e in zip(expected, result):
         assert r.keys() == e.keys()
         assert r["a"] == e["a"]
         self.assertAllEqual(r["b"], e["b"])