def test_minibatches_per_step(self): _epochs = self.epochs self.epochs = 2 rl_parameters = RLParameters(gamma=0.95, target_update_rate=0.9, maxq_learning=True) rainbow_parameters = RainbowDQNParameters(double_q_learning=True, dueling_architecture=False) training_parameters1 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=1024, minibatches_per_step=1, learning_rate=0.25, optimizer="ADAM", ) training_parameters2 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=128, minibatches_per_step=8, learning_rate=0.25, optimizer="ADAM", ) env1 = Env(self.state_dims, self.action_dims) env2 = Env(self.state_dims, self.action_dims) model_parameters1 = DiscreteActionModelParameters( actions=env1.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters1, ) model_parameters2 = DiscreteActionModelParameters( actions=env2.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters2, ) # minibatch_size / 8, minibatches_per_step * 8 should give the same result logger.info("Training model 1") trainer1 = self._train(model_parameters1, env1) SummaryWriterContext._reset_globals() logger.info("Training model 2") trainer2 = self._train(model_parameters2, env2) weight1 = trainer1.q_network.fc.dnn[-2].weight.detach().numpy() weight2 = trainer2.q_network.fc.dnn[-2].weight.detach().numpy() # Due to numerical stability this tolerance has to be fairly high self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3)) self.epochs = _epochs
def tearDown(self): SummaryWriterContext._reset_globals()
def setUp(self): SummaryWriterContext._reset_globals() logging.basicConfig(level=logging.INFO) np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED)
def __iter__(self): SummaryWriterContext._reset_globals() for epoch in range(self.num_epochs): self.notify_observers(epoch_start=epoch) yield epoch self.notify_observers(epoch_end=epoch)
def setUp(self): SummaryWriterContext._reset_globals()
def setUp(self): logging.getLogger().setLevel(logging.INFO) SummaryWriterContext._reset_globals() np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED)