示例#1
0
    def __init__(self, t_prof, seat_id, chief_handle):
        self.ddqn_args = t_prof.module_args["ddqn"]
        self.avg_args = t_prof.module_args["avg"]
        super().__init__(t_prof=t_prof, chief_handle=chief_handle)

        self.seat_id = seat_id
        self.global_iter_id = 0

        self.eps = self.ddqn_args.eps_start
        self.antic = self._t_prof.antic_start

        self.q_net = DuelingQNet(q_args=self.ddqn_args.q_args,
                                 env_bldr=self._env_bldr,
                                 device=self._device)
        self.avg_net = AvrgStrategyNet(
            avrg_net_args=self.avg_args.avg_net_args,
            env_bldr=self._env_bldr,
            device=self._device)

        self.br_optim = rl_util.str_to_optim_cls(self.ddqn_args.optim_str)(
            self.q_net.parameters(), lr=self.ddqn_args.lr)
        self.avg_optim = rl_util.str_to_optim_cls(self.avg_args.optim_str)(
            self.avg_net.parameters(), lr=self.avg_args.lr)

        self.eps_exp = self._ray.remote(
            self._chief_handle.create_experiment,
            t_prof.name + ": epsilon Plyr" + str(seat_id))
        self.antic_exp = self._ray.remote(
            self._chief_handle.create_experiment,
            t_prof.name + ": anticipatory Plyr" + str(seat_id))
        self._log_eps()
        self._log_antic()
示例#2
0
 def _get_new_avrg_optim(self):
     opt = rl_util.str_to_optim_cls(self._avrg_args.optim_str)(
         self._avrg_net.parameters(), lr=self._avrg_args.lr)
     scheduler = lr_scheduler.ReduceLROnPlateau(
         optimizer=opt,
         threshold=0.0001,
         factor=0.5,
         patience=self._avrg_args.lr_patience,
         min_lr=0.00002)
     return opt, scheduler
 def _get_new_optim(self, p_id):
     return rl_util.str_to_optim_cls(self._args.ddqn_args.optim_str)(
         self._nets[p_id].parameters(), lr=self._args.ddqn_args.lr)
示例#4
0
 def _get_new_baseline_optim(self):
     opt = rl_util.str_to_optim_cls(self._baseline_args.optim_str)(
         self._baseline_net.parameters(), lr=self._baseline_args.lr)
     return opt