def from_discrete_action_model_parameters( cls, params: rlp.DiscreteActionModelParameters): return cls( actions=params.actions, rl=params.rl, double_q_learning=params.rainbow.double_q_learning, num_atoms=params.rainbow.num_atoms, minibatch_size=params.training.minibatch_size, minibatches_per_step=params.training.minibatches_per_step, cpe_optimizer=rlp.OptimizerParameters( optimizer=params.training.optimizer, learning_rate=params.training.learning_rate, l2_decay=params.training.l2_decay, ), optimizer=rlp.OptimizerParameters( optimizer=params.training.optimizer, learning_rate=params.training.learning_rate, l2_decay=params.rainbow.c51_l2_decay, ), evaluation=params.evaluation, )
def from_discrete_action_model_parameters( cls, params: DiscreteActionModelParameters): return cls( actions=params.actions, rl=params.rl, double_q_learning=params.rainbow.double_q_learning, bcq=BCQConfig(drop_threshold=params.rainbow.bcq_drop_threshold) if params.rainbow.bcq else None, minibatch_size=params.training.minibatch_size, minibatches_per_step=params.training.minibatches_per_step, optimizer=rlp.OptimizerParameters( optimizer=params.training.optimizer, learning_rate=params.training.learning_rate, l2_decay=params.training.l2_decay, ), evaluation=params.evaluation, )