def __init__(self, dt: float, gamma: float, lr: float, tau: float, optimizer: str, val_function: ParametricFunction, adv_function: ParametricFunction) -> None: CompoundStateful.__init__(self) self._reference_obs: Tensor = None self._val_function = val_function self._adv_function = adv_function self._target_val_function = copy.deepcopy(val_function) self._target_adv_function = copy.deepcopy(adv_function) self._adv_optimizer = \ setup_optimizer(self._adv_function.parameters(), opt_name=optimizer, lr=lr, dt=dt, inverse_gradient_magnitude=1, weight_decay=0) self._val_optimizer = \ setup_optimizer(self._val_function.parameters(), opt_name=optimizer, lr=lr, dt=dt, inverse_gradient_magnitude=dt, weight_decay=0) self._dt = dt self._gamma = gamma self._tau = tau info( f"setup> using AdvantageCritic, the provided gamma and rewards are scaled," f" actual values: gamma={gamma ** dt}, rewards=original_rewards * {dt}" ) self._device = 'cpu'
def __init__(self, dt: float, gamma: float, lr: float, optimizer: str, q_function: ParametricFunction, tau: float, noscale: bool) -> None: CompoundStateful.__init__(self) self._reference_obs: Tensor = None self._q_function = q_function self._target_q_function = copy.deepcopy(self._q_function) self._tau = tau ref_dt = 0.02 if noscale: self._gamma = gamma**(dt / ref_dt) else: self._gamma = gamma if noscale: dt = ref_dt self._dt = ref_dt else: self._dt = dt info( f"setup> using ValueCritic, the provided gamma and rewards are scaled," f" actual values: gamma={gamma ** self._dt}," f" rewards=original_rewards * {self._dt}") self._q_optimizer = \ setup_optimizer(self._q_function.parameters(), opt_name=optimizer, lr=lr, dt=self._dt, inverse_gradient_magnitude=self._dt, weight_decay=0) self._device = 'cpu'
def __init__(self, T: int, actor: TypeA2CActor, critic: A2CCritic, opt_name: str, lr: float, dt: float, weight_decay: float): OnlineAgent.__init__(self, T=T, actor=actor, critic=critic) self._optimizer = setup_optimizer(chain( self._actor._policy_function.parameters(), self._critic._v_function.parameters()), opt_name=opt_name, lr=lr, dt=dt, inverse_gradient_magnitude=1, weight_decay=weight_decay)
def __init__(self, T: int, actor: OnlineActor, critic: OnlineCritic, learn_per_step: int, batch_size: int, opt_name: str, lr: float, dt: float, weight_decay: float): OnlineAgent.__init__(self, T=T, actor=actor, critic=critic) self._learn_per_step = learn_per_step self._batch_size = batch_size self._optimizer = setup_optimizer(chain( self._actor._policy_function.parameters(), self._critic._v_function.parameters()), opt_name=opt_name, lr=lr, dt=dt, inverse_gradient_magnitude=1, weight_decay=weight_decay)
def __init__(self, policy_function: ParametricFunction, noise: Noise, lr: float, tau: float, opt_name: str, dt: float, weight_decay: float) -> None: CompoundStateful.__init__(self) self._policy_function = policy_function self._target_policy_function = copy.deepcopy(self._policy_function) self._optimizer = setup_optimizer(self._policy_function.parameters(), opt_name=opt_name, lr=lr, dt=dt, inverse_gradient_magnitude=1, weight_decay=weight_decay) self._noise = noise self._tau = tau