def __init__( self, rl_parameters: RLParameters, use_gpu: bool, metrics_to_score=None, actions: Optional[List[str]] = None, evaluation_parameters: Optional[EvaluationParameters] = None, loss_reporter=None, ) -> None: self.minibatch = 0 self.minibatch_size: Optional[int] = None self.minibatches_per_step: Optional[int] = None self.rl_parameters = rl_parameters self.rl_temperature = float(rl_parameters.temperature) self.maxq_learning = rl_parameters.maxq_learning self.gamma = rl_parameters.gamma self.tau = rl_parameters.target_update_rate self.use_seq_num_diff_as_time_diff = rl_parameters.use_seq_num_diff_as_time_diff self.time_diff_unit_length = rl_parameters.time_diff_unit_length self.tensorboard_logging_freq = rl_parameters.tensorboard_logging_freq self.multi_steps = rl_parameters.multi_steps self.calc_cpe_in_training = ( evaluation_parameters and evaluation_parameters.calc_cpe_in_training ) if rl_parameters.q_network_loss == "mse": self.q_network_loss = F.mse_loss elif rl_parameters.q_network_loss == "huber": self.q_network_loss = F.smooth_l1_loss else: raise Exception( "Q-Network loss type {} not valid loss.".format( rl_parameters.q_network_loss ) ) if metrics_to_score: self.metrics_to_score = metrics_to_score + ["reward"] else: self.metrics_to_score = ["reward"] cuda_available = torch.cuda.is_available() logger.info("CUDA availability: {}".format(cuda_available)) if use_gpu and cuda_available: logger.info("Using GPU: GPU requested and available.") self.use_gpu = True self.device = torch.device("cuda") # type: ignore else: logger.info("NOT Using GPU: GPU not requested or not available.") self.use_gpu = False self.device = torch.device("cpu") # type: ignore self.loss_reporter = loss_reporter or LossReporter(actions) self._actions = actions
def __init__( self, parameters, use_gpu, additional_feature_types, metrics_to_score=None, gradient_handler=None, actions: Optional[List[str]] = None, ): self.minibatch = 0 self.parameters = parameters self.reward_burnin = parameters.rl.reward_burnin self._additional_feature_types = additional_feature_types self.rl_temperature = parameters.rl.temperature self.maxq_learning = parameters.rl.maxq_learning self.gamma = parameters.rl.gamma self.tau = parameters.rl.target_update_rate self.use_seq_num_diff_as_time_diff = parameters.rl.use_seq_num_diff_as_time_diff self.time_diff_unit_length = parameters.rl.time_diff_unit_length self.gradient_handler = gradient_handler self.tensorboard_logging_freq = parameters.rl.tensorboard_logging_freq self.multi_steps = parameters.rl.multi_steps if parameters.rl.q_network_loss == "mse": self.q_network_loss = getattr(F, "mse_loss") elif parameters.rl.q_network_loss == "huber": self.q_network_loss = getattr(F, "smooth_l1_loss") else: raise Exception( "Q-Network loss type {} not valid loss.".format( parameters.rl.q_network_loss ) ) if metrics_to_score: self.metrics_to_score = metrics_to_score + ["reward"] else: self.metrics_to_score = ["reward"] cuda_available = torch.cuda.is_available() logger.info("CUDA availability: {}".format(cuda_available)) if use_gpu and cuda_available: logger.info("Using GPU: GPU requested and available.") self.use_gpu = True self.dtype = torch.cuda.FloatTensor self.dtypelong = torch.cuda.LongTensor else: logger.info("NOT Using GPU: GPU not requested or not available.") self.use_gpu = False self.dtype = torch.FloatTensor self.dtypelong = torch.LongTensor self.loss_reporter = LossReporter(actions)