def get_critic_exporter(self, trainer, environment): feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() return ParametricDQNExporter(trainer.q1_network, feature_extractor, output_transformer)
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ParametricDqnPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ParametricDqnWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def critic_predictor( self, feature_extractor=None, output_trasnformer=None, net_container=None ) -> _ParametricDQNPredictor: # TODO: We should combine the two Q functions q_network = self.q1_network.cpu_model() if net_container is not None: q_network = net_container(q_network) predictor = ParametricDQNExporter( q_network, feature_extractor, output_trasnformer ).export() self.q1_network.train() return predictor
def get_modular_sarsa_trainer_exporter(self, environment, parameters=None, use_gpu=False, use_all_avail_gpus=False): parameters = parameters or self.get_sarsa_parameters() q_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) reward_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) if use_gpu: q_network = q_network.cuda() reward_network = reward_network.cuda() if use_all_avail_gpus: q_network = q_network.get_data_parallel_model() reward_network = reward_network.get_data_parallel_model() q_network_target = q_network.get_target_network() trainer = _ParametricDQNTrainer(q_network, q_network_target, reward_network, parameters) state_preprocessor = Preprocessor(environment.normalization, False, True) action_preprocessor = Preprocessor(environment.normalization_action, False, True) feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() exporter = ParametricDQNExporter( q_network, feature_extractor, output_transformer, state_preprocessor, action_preprocessor, ) return (trainer, exporter)
def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False): # FIXME:the test not really working self.run_pre_training_eval = False self.check_tolerance = False environment = GridworldContinuous() parameters = self.get_ddpg_parameters() state_dim = get_num_output_features(environment.normalization) action_dim = get_num_output_features(environment.normalization_action) # Build Actor Network actor_network = ActorNetModel( layers=[state_dim] + parameters.actor_training.layers[1:-1] + [action_dim], activations=parameters.actor_training.activations, fl_init=parameters.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + parameters.critic_training.layers[1:-1] + [1], activations=parameters.critic_training.activations, fl_init=parameters.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = DDPGTrainer( actor_network, critic_network, parameters, environment.normalization, environment.normalization_action, environment.min_action_range, environment.max_action_range, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) exporter = ParametricDQNExporter.from_state_action_normalization( trainer.critic, state_normalization=environment.normalization, action_normalization=environment.normalization_action, ) evaluator = GridworldDDPGEvaluator(environment, DISCOUNT) self.evaluate_gridworld(environment, evaluator, trainer, exporter, use_gpu) # Make sure actor predictor works actor = ActorExporter.from_state_action_normalization( trainer.actor, state_normalization=environment.normalization, action_normalization=environment.normalization_action, ).export() # Make sure all actions are optimal error = evaluator.evaluate_actor(actor, thres=0.2) print("gridworld optimal action match MAE: {0:.3f}".format(error))
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = ParametricDqnWorkflow( model_params, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) state_sorted_features, _ = sort_features_by_normalization( state_normalization) action_sorted_features, _ = sort_features_by_normalization( action_normalization) preprocess_handler = ParametricDqnPreprocessHandler( PandasSparseToDenseProcessor(state_sorted_features), PandasSparseToDenseProcessor(action_sorted_features), ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16, preprocess_handler=preprocess_handler) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) if int(params["node_index"]) == 0 and gpu_index == 0: export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa