def __init__( self, model_params: ContinuousActionModelParameters, preprocess_handler: PreprocessHandler, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running Parametric DQN workflow with params:") logger.info(model_params) model_params = model_params trainer = ParametricDQNTrainer( model_params, state_normalization, action_normalization, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert (type(trainer) == ParametricDQNTrainer ), "Warm started wrong model type: " + str(type(trainer)) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super(ParametricDqnWorkflow, self).__init__(preprocess_handler, trainer, evaluator, model_params.training.minibatch_size)
def __init__( self, model_params: DiscreteActionModelParameters, state_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running DQN workflow with params:") logger.info(model_params) model_params = model_params trainer = create_dqn_trainer_from_params( model_params, state_normalization, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type(trainer) == DQNTrainer, "Warm started wrong model type: " + str( type(trainer) ) evaluator = Evaluator( model_params.actions, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super().__init__( DiscreteDqnBatchPreprocessor(Preprocessor(state_normalization, use_gpu)), trainer, evaluator, model_params.training.minibatch_size, )
def train(self, train_dataset: Dataset, eval_dataset: Optional[Dataset], num_epochs: int) -> RLTrainingOutput: """ Train the model Returns partially filled RLTrainningOutput. The field that should not be filled are: - output_path - warmstart_output_path - vis_metrics - validation_output """ logger.info("Creating reporter") reporter = DiscreteDQNReporter( self.trainer_param.actions, target_action_distribution=self.target_action_distribution, ) logger.info("Adding reporter to trainer") self.trainer.add_observer(reporter) training_page_handler = TrainingPageHandler(self.trainer) training_page_handler.add_observer(reporter) evaluator = Evaluator( self.action_names, self.rl_parameters.gamma, self.trainer, metrics_to_score=self.metrics_to_score, ) logger.info("Adding reporter to evaluator") evaluator.add_observer(reporter) evaluation_page_handler = EvaluationPageHandler( self.trainer, evaluator, reporter) batch_preprocessor = self.build_batch_preprocessor() train_and_evaluate_generic( train_dataset, eval_dataset, self.trainer, num_epochs, self.use_gpu, batch_preprocessor, training_page_handler, evaluation_page_handler, reader_options=self.reader_options, ) return RLTrainingOutput( training_report=reporter.generate_training_report())
def __init__( self, model_params: ContinuousActionModelParameters, preprocess_handler: PreprocessHandler, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running continuous workflow with params:") logger.info(model_params) model_params = model_params min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, model_params.action_rescale_map) trainer = DDPGTrainer( model_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type( trainer) == DDPGTrainer, "Warm started wrong model type: " + str( type(trainer)) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super(ContinuousWorkflow, self).__init__( preprocess_handler, trainer, evaluator, model_params.shared_training.minibatch_size, )
def train_network(params): writer = None if params["model_output_path"] is not None: writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("Running DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) action_names = np.array(params["actions"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) trainer_params = DiscreteActionModelParameters( actions=params["actions"], rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) dataset = JSONDataset(params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDataset(params["eval_data_path"], batch_size=16) state_normalization = read_norm_file(params["state_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info("Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, )) trainer = DQNTrainer( trainer_params, state_normalization, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) preprocessor = Preprocessor(state_normalization, False) evaluator = Evaluator( trainer_params.actions, trainer_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) start_time = time.time() for epoch in range(int(params["epochs"])): dataset.reset_iterator() for batch_idx in range(num_batches): report_training_status(batch_idx, num_batches, epoch, int(params["epochs"])) batch = dataset.read_batch(batch_idx) tdp = preprocess_batch_for_training(preprocessor, batch, action_names) tdp.set_type(trainer.dtype) trainer.train(tdp) eval_dataset.reset_iterator() accumulated_edp = None while True: batch = eval_dataset.read_batch(batch_idx) if batch is None: break tdp = preprocess_batch_for_training(preprocessor, batch, action_names) edp = EvaluationDataPage.create_from_tdp(tdp, trainer) if accumulated_edp is None: accumulated_edp = edp else: accumulated_edp = accumulated_edp.append(edp) accumulated_edp = accumulated_edp.compute_values(trainer.gamma) cpe_start_time = time.time() details = evaluator.evaluate_post_training(accumulated_edp) details.log() logger.info("CPE evaluation took {} seconds.".format(time.time() - cpe_start_time)) through_put = (len(dataset) * int(params["epochs"])) / (time.time() - start_time) logger.info("Training finished. Processed ~{} examples / s.".format( round(through_put))) if writer is not None: writer.close() return export_trainer_and_predictor(trainer, params["model_output_path"])
def train_network(params): logger.info("Running Parametric DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) if params["in_training_cpe"] is not None: in_training_cpe_parameters = InTrainingCPEParameters( **params["in_training_cpe"]) else: in_training_cpe_parameters = None trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, in_training_cpe=in_training_cpe_parameters, ) dataset = JSONDataset(params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDataset(params["eval_data_path"], batch_size=training_parameters.minibatch_size) state_normalization = read_norm_file(params["state_norm_data_path"]) action_normalization = read_norm_file(params["action_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info("Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, )) trainer = ParametricDQNTrainer( trainer_params, state_normalization, action_normalization, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) state_preprocessor = Preprocessor(state_normalization, False) action_preprocessor = Preprocessor(action_normalization, False) if trainer_params.in_training_cpe is not None: evaluator = Evaluator( None, trainer_params.rl.gamma, trainer, trainer_params.in_training_cpe.mdp_sampled_rate, metrics_to_score=trainer.metrics_to_score, ) else: evaluator = Evaluator( None, trainer_params.rl.gamma, trainer, float(DEFAULT_NUM_SAMPLES_FOR_CPE) / len(dataset), metrics_to_score=trainer.metrics_to_score, ) start_time = time.time() for epoch in range(params["epochs"]): dataset.reset_iterator() for batch_idx in range(num_batches): report_training_status(batch_idx, num_batches, epoch, params["epochs"]) batch = dataset.read_batch(batch_idx) tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor) tdp.set_type(trainer.dtype) trainer.train(tdp) eval_dataset.reset_iterator() accumulated_edp = None for batch_idx in range(num_batches): batch = eval_dataset.read_batch(batch_idx) tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor) edp = EvaluationDataPage.create_from_tdp(tdp, trainer) if accumulated_edp is None: accumulated_edp = edp else: accumulated_edp = accumulated_edp.append(edp) accumulated_edp = accumulated_edp.compute_values(trainer.gamma) cpe_start_time = time.time() details = evaluator.evaluate_post_training(accumulated_edp) details.log() logger.info("CPE evaluation took {} seconds.".format(time.time() - cpe_start_time)) through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time) logger.info("Training finished. Processed ~{} examples / s.".format( round(through_put))) return export_trainer_and_predictor(trainer, params["model_output_path"])
def train_network(params): writer = None if params["model_output_path"] is not None: writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("Running DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) action_names = np.array(params["actions"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) trainer_params = DiscreteActionModelParameters( actions=params["actions"], rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) dataset = JSONDataset( params["training_data_path"], batch_size=training_parameters.minibatch_size ) eval_dataset = JSONDataset(params["eval_data_path"], batch_size=16) state_normalization = read_norm_file(params["state_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info( "Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, ) ) trainer = DQNTrainer( trainer_params, state_normalization, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) preprocessor = Preprocessor(state_normalization, False) evaluator = Evaluator( trainer_params.actions, trainer_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) start_time = time.time() for epoch in range(int(params["epochs"])): dataset.reset_iterator() batch_idx = -1 while True: batch_idx += 1 report_training_status(batch_idx, num_batches, epoch, int(params["epochs"])) batch = dataset.read_batch() if batch is None: break tdp = preprocess_batch_for_training(preprocessor, batch, action_names) tdp.set_type(trainer.dtype) trainer.train(tdp) eval_dataset.reset_iterator() accumulated_edp = None while True: batch = eval_dataset.read_batch() if batch is None: break tdp = preprocess_batch_for_training(preprocessor, batch, action_names) tdp.set_type(trainer.dtype) edp = EvaluationDataPage.create_from_tdp(tdp, trainer) if accumulated_edp is None: accumulated_edp = edp else: accumulated_edp = accumulated_edp.append(edp) accumulated_edp = accumulated_edp.compute_values(trainer.gamma) cpe_start_time = time.time() details = evaluator.evaluate_post_training(accumulated_edp) details.log() logger.info( "CPE evaluation took {} seconds.".format(time.time() - cpe_start_time) ) through_put = (len(dataset) * int(params["epochs"])) / (time.time() - start_time) logger.info( "Training finished. Processed ~{} examples / s.".format(round(through_put)) ) if writer is not None: writer.close() return export_trainer_and_predictor(trainer, params["model_output_path"])
def __init__( self, model_params: ContinuousActionModelParameters, preprocess_handler: PreprocessHandler, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running continuous workflow with params:") logger.info(model_params) model_params = model_params min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, model_params.action_rescale_map ) state_dim = get_num_output_features(state_normalization) action_dim = get_num_output_features(action_normalization) # Build Actor Network actor_network = ActorNetModel( layers=( [state_dim] + model_params.actor_training.layers[1:-1] + [action_dim] ), activations=model_params.actor_training.activations, fl_init=model_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + model_params.critic_training.layers[1:-1] + [1], activations=model_params.critic_training.activations, fl_init=model_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, ) trainer = DDPGTrainer( actor_network, critic_network, model_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type(trainer) == DDPGTrainer, "Warm started wrong model type: " + str( type(trainer) ) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super().__init__( preprocess_handler, trainer, evaluator, model_params.shared_training.minibatch_size, )