def __init__(self, t_prof, seat_id, chief_handle): self.ddqn_args = t_prof.module_args["ddqn"] self.avg_args = t_prof.module_args["avg"] super().__init__(t_prof=t_prof, chief_handle=chief_handle) self.seat_id = seat_id self.global_iter_id = 0 self.eps = self.ddqn_args.eps_start self.antic = self._t_prof.antic_start self.q_net = DuelingQNet(q_args=self.ddqn_args.q_args, env_bldr=self._env_bldr, device=self._device) self.avg_net = AvrgStrategyNet( avrg_net_args=self.avg_args.avg_net_args, env_bldr=self._env_bldr, device=self._device) self.br_optim = rl_util.str_to_optim_cls(self.ddqn_args.optim_str)( self.q_net.parameters(), lr=self.ddqn_args.lr) self.avg_optim = rl_util.str_to_optim_cls(self.avg_args.optim_str)( self.avg_net.parameters(), lr=self.avg_args.lr) self.eps_exp = self._ray.remote( self._chief_handle.create_experiment, t_prof.name + ": epsilon Plyr" + str(seat_id)) self.antic_exp = self._ray.remote( self._chief_handle.create_experiment, t_prof.name + ": anticipatory Plyr" + str(seat_id)) self._log_eps() self._log_antic()
def _get_new_avrg_optim(self): opt = rl_util.str_to_optim_cls(self._avrg_args.optim_str)( self._avrg_net.parameters(), lr=self._avrg_args.lr) scheduler = lr_scheduler.ReduceLROnPlateau( optimizer=opt, threshold=0.0001, factor=0.5, patience=self._avrg_args.lr_patience, min_lr=0.00002) return opt, scheduler
def _get_new_optim(self, p_id): return rl_util.str_to_optim_cls(self._args.ddqn_args.optim_str)( self._nets[p_id].parameters(), lr=self._args.ddqn_args.lr)
def _get_new_baseline_optim(self): opt = rl_util.str_to_optim_cls(self._baseline_args.optim_str)( self._baseline_net.parameters(), lr=self._baseline_args.lr) return opt