def __init__(self, lr, lr_schedule): self.cur_lr = tf.get_variable("lr", initializer=lr) if lr_schedule is None: self.lr_schedule = ConstantSchedule(lr) else: self.lr_schedule = PiecewiseSchedule( lr_schedule, outside_value=lr_schedule[-1][-1])
def __init__(self, lr, lr_schedule): self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False) self._lr_schedule = lr_schedule if self._lr_schedule is not None: self._lr_schedule = PiecewiseSchedule( lr_schedule, outside_value=lr_schedule[-1][-1], framework=None) if self.framework == "tf": self._lr_placeholder = tf1.placeholder(dtype=tf.float32, name="lr") self._lr_update = self.cur_lr.assign(self._lr_placeholder, read_value=False)
class ExtRewardCoeffSchedule: @DeveloperAPI def __init__(self, ext_reward_coeff, ext_reward_coeff_schedule): self.ext_reward_coeff = tf.get_variable( "ext_reward_coeff", initializer=float(ext_reward_coeff), trainable=False) if ext_reward_coeff_schedule is None: self.ext_reward_coeff_schedule = ConstantSchedule(ext_reward_coeff, framework=None) else: # Allows for custom schedule similar to lr_schedule format if isinstance(ext_reward_coeff_schedule, list): self.ext_reward_coeff_schedule = PiecewiseSchedule( ext_reward_coeff_schedule, outside_value=ext_reward_coeff_schedule[-1][-1], framework=None) else: # Implements previous version but enforces outside_value self.ext_reward_coeff_schedule = PiecewiseSchedule( [[0, ext_reward_coeff], [ext_reward_coeff_schedule, 0.0]], outside_value=0.0, framework=None) @override(Policy) def on_global_var_update(self, global_vars): super(ExtRewardCoeffSchedule, self).on_global_var_update(global_vars) self.ext_reward_coeff.load(self.ext_reward_coeff_schedule.value( global_vars["timestep"]), session=self._sess)
def __init__(self, action_space, *, framework, initial_temperature=1.0, final_temperature=0.0, temperature_timesteps=int(1e5), temperature_schedule=None, **kwargs): """Initializes a SoftQ Exploration object. Args: action_space (Space): The gym action space used by the environment. temperature (Schedule): The temperature to divide model outputs by before creating the Categorical distribution to sample from. framework (str): One of None, "tf", "torch". temperature_schedule (Optional[Schedule]): An optional Schedule object to use (instead of constructing one from the given parameters). """ assert isinstance(action_space, Discrete) super().__init__(action_space, framework=framework, **kwargs) self.temperature_schedule = \ from_config(Schedule, temperature_schedule, framework=framework) or \ PiecewiseSchedule( endpoints=[ (0, initial_temperature), (temperature_timesteps, final_temperature)], outside_value=final_temperature, framework=self.framework) # The current timestep value (tf-var or python int). self.last_timestep = get_variable(0, framework=framework, tf_name="timestep") self.temperature = self.temperature_schedule(self.last_timestep)
def __init__(self, lr, lr_schedule): self.cur_lr = tf.get_variable("lr", initializer=lr, trainable=False) if lr_schedule is None: self.lr_schedule = ConstantSchedule(lr) elif isinstance(lr_schedule, list): self.lr_schedule = PiecewiseSchedule( lr_schedule, outside_value=lr_schedule[-1][-1]) elif isinstance(lr_schedule, dict): self.lr_schedule = LinearSchedule( schedule_timesteps=lr_schedule["schedule_timesteps"], initial_p=lr, final_p=lr_schedule["final_lr"]) else: raise ValueError('lr_schedule must be either list, dict or None')
class ManualLearningRateSchedule: """Mixin for TFPolicy that adds a learning rate schedule.""" def __init__(self, lr, lr_schedule): self.cur_lr = lr if lr_schedule is None: self.lr_schedule = ConstantSchedule(lr, framework=None) else: self.lr_schedule = PiecewiseSchedule( lr_schedule, outside_value=lr_schedule[-1][-1], framework=None) # not called automatically by any rllib logic, call this in your training script or a trainer callback def update_lr(self, timesteps_total): print(f"cur lr {self.cur_lr}") self.cur_lr = self.lr_schedule.value(timesteps_total) for opt in self._optimizers: for p in opt.param_groups: p["lr"] = self.cur_lr
def __init__(self, action_space, *, framework: str, initial_epsilon=1.0, final_epsilon=0.05, epsilon_timesteps=int(1e5), epsilon_schedule=None, **kwargs): """Create an EpsilonGreedy exploration class. Args: initial_epsilon (float): The initial epsilon value to use. final_epsilon (float): The final epsilon value to use. epsilon_timesteps (int): The time step after which epsilon should always be `final_epsilon`. epsilon_schedule (Optional[Schedule]): An optional Schedule object to use (instead of constructing one from the given parameters). """ assert framework is not None super().__init__(action_space=action_space, framework=framework, **kwargs) self.epsilon_schedule = \ from_config(Schedule, epsilon_schedule, framework=framework) or \ PiecewiseSchedule( endpoints=[ (0, initial_epsilon), (epsilon_timesteps, final_epsilon)], outside_value=final_epsilon, framework=self.framework) # The current timestep value (tf-var or python int). self.last_timestep = get_variable(0, framework=framework, tf_name="timestep") # Build the tf-info-op. if self.framework == "tf": raise ValueError("Torch version does not support " "multiobj episilon-greedy yet!")
class LearningRateSchedule(object): """Mixin for TFPolicyGraph that adds a learning rate schedule.""" def __init__(self, lr, lr_schedule): self.cur_lr = tf.get_variable("lr", initializer=lr) if lr_schedule is None: self.lr_schedule = ConstantSchedule(lr) else: self.lr_schedule = PiecewiseSchedule( lr_schedule, outside_value=lr_schedule[-1][-1]) @override(PolicyGraph) def on_global_var_update(self, global_vars): super(LearningRateSchedule, self).on_global_var_update(global_vars) self.cur_lr.load( self.lr_schedule.value(global_vars["timestep"]), session=self._sess) @override(TFPolicyGraph) def optimizer(self): return tf.train.AdamOptimizer(self.cur_lr)