def test_minibatches_per_step(self): _epochs = self.epochs self.epochs = 2 rl_parameters = RLParameters(gamma=0.95, target_update_rate=0.9, maxq_learning=True) rainbow_parameters = RainbowDQNParameters(double_q_learning=True, dueling_architecture=False) training_parameters1 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=1024, minibatches_per_step=1, learning_rate=0.25, optimizer="ADAM", ) training_parameters2 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=128, minibatches_per_step=8, learning_rate=0.25, optimizer="ADAM", ) env1 = Env(self.state_dims, self.action_dims) env2 = Env(self.state_dims, self.action_dims) model_parameters1 = DiscreteActionModelParameters( actions=env1.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters1, ) model_parameters2 = DiscreteActionModelParameters( actions=env2.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters2, ) # minibatch_size / 8, minibatches_per_step * 8 should give the same result logger.info("Training model 1") trainer1 = self._train(model_parameters1, env1) SummaryWriterContext._reset_globals() logger.info("Training model 2") trainer2 = self._train(model_parameters2, env2) weight1 = trainer1.q_network.fc.layers[-1].weight.detach().numpy() weight2 = trainer2.q_network.fc.layers[-1].weight.detach().numpy() # Due to numerical stability this tolerance has to be fairly high self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3)) self.epochs = _epochs
def get_sarsa_parameters(self, environment, reward_shape, dueling, categorical, quantile, clip_grad_norm): rl_parameters = RLParameters( gamma=DISCOUNT, target_update_rate=1.0, maxq_learning=False, reward_boost=reward_shape, ) training_parameters = TrainingParameters( layers=[-1, 128, -1] if dueling else [-1, -1], activations=["relu", "relu"] if dueling else ["linear"], minibatch_size=self.minibatch_size, learning_rate=0.05, optimizer="ADAM", clip_grad_norm=clip_grad_norm, ) return DiscreteActionModelParameters( actions=environment.ACTIONS, rl=rl_parameters, training=training_parameters, rainbow=RainbowDQNParameters( double_q_learning=True, dueling_architecture=dueling, categorical=categorical, quantile=quantile, num_atoms=5, ), )
def test_no_soft_update(self): model = Model() target_model = copy.deepcopy(model) for target_param, param in zip(model.parameters(), target_model.parameters()): self.assertIs(target_param, param) optimizer = torch.optim.Adam(model.parameters()) x = torch.tensor([1, 2], dtype=torch.int64) emb = model(x) loss = emb.sum() loss.backward() optimizer.step() params = list(model.parameters()) self.assertEqual(1, len(params)) param = params[0].detach().numpy() trainer = RLTrainer(DiscreteActionModelParameters(rl=RLParameters()), use_gpu=False) trainer._soft_update(model, target_model, 0.1) target_params = list(target_model.parameters()) self.assertEqual(1, len(target_params)) target_param = target_params[0].detach().numpy() npt.assert_array_equal(target_param, param)
def test_trainer_maxq(self): env = Env(self.state_dims, self.action_dims) maxq_parameters = DiscreteActionModelParameters( actions=env.actions, rl=RLParameters(gamma=0.95, target_update_rate=0.9, maxq_learning=True), rainbow=RainbowDQNParameters(double_q_learning=True, dueling_architecture=False), training=TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=1024, learning_rate=0.25, optimizer="ADAM", ), ) # Q value should converge to very close to 20 trainer = self._train(maxq_parameters, env) avg_q_value_after_training = torch.mean(trainer.all_action_scores) self.assertLess(avg_q_value_after_training, 22) self.assertGreater(avg_q_value_after_training, 18)
def create_trainer(params: OpenAiGymParameters, env: OpenAIGymEnvironment): use_gpu = params.use_gpu model_type = params.model_type assert params.rl is not None rl_parameters = params.rl if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: assert params.training is not None training_parameters = params.training assert params.rainbow is not None if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters._replace( cnn_parameters=training_parameters.cnn_parameters._replace( input_height=env.height, input_width=env.width, num_input_channels=env.num_input_channels, ) ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" discrete_trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=params.rainbow, evaluation=params.evaluation, ) trainer = create_dqn_trainer_from_params( discrete_trainer_params, env.normalization, use_gpu ) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: assert params.training is not None training_parameters = params.training assert params.rainbow is not None if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" continuous_trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=params.rainbow ) trainer = create_parametric_dqn_trainer_from_params( continuous_trainer_params, env.normalization, env.normalization_action, use_gpu, ) elif model_type == ModelType.TD3.value: assert params.td3_training is not None assert params.critic_training is not None assert params.actor_training is not None td3_trainer_params = TD3ModelParameters( rl=rl_parameters, training=params.td3_training, q_network=params.critic_training, actor_network=params.actor_training, ) trainer = get_td3_trainer(env, td3_trainer_params, use_gpu) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: assert params.sac_training is not None assert params.critic_training is not None assert params.actor_training is not None trainer = get_sac_trainer( env, rl_parameters, params.sac_training, params.critic_training, params.actor_training, params.sac_value_training, use_gpu, ) elif model_type == ModelType.CEM.value: assert params.cem is not None cem_trainer_params = params.cem._replace(rl=params.rl) trainer = get_cem_trainer(env, cem_trainer_params, use_gpu) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return trainer
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) action_names = params["actions"] rl_parameters = from_json(params["rl"], RLParameters) training_parameters = from_json(params["training"], TrainingParameters) rainbow_parameters = from_json(params["rainbow"], RainbowDQNParameters) if "evaluation" in params: evaluation_parameters = from_json(params["evaluation"], EvaluationParameters) else: evaluation_parameters = EvaluationParameters() model_params = DiscreteActionModelParameters( actions=action_names, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, evaluation=evaluation_parameters, ) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = DqnWorkflow( model_params, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) sorted_features, _ = sort_features_by_normalization(state_normalization) preprocess_handler = DiscreteDqnPreprocessHandler( len(action_names), PandasSparseToDenseProcessor(sorted_features)) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader( params["eval_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) if int(params["node_index"]) == 0 and gpu_index == 0: workflow.save_models(params["model_output_path"])