def __init__(self, device: torch.device, settings: OnPolicyIterationReinforcerSettings, model: Model, algo: AlgoBase, env_roller: EnvRollerBase) -> None: self.device = device self.settings = settings self._trained_model = model.to(self.device) self.env_roller = env_roller self.algo = algo
def __init__(self, device: torch.device, settings: BufferedSingleOffPolicyIterationReinforcerSettings, environment: gym.Env, model: Model, algo: AlgoBase, env_roller: ReplayEnvRollerBase): self.device = device self.settings = settings self.environment = environment self._trained_model = model.to(self.device) self.algo = algo self.env_roller = env_roller
def __init__(self, device: torch.device, settings: BufferedMixedPolicyIterationReinforcerSettings, env: VecEnv, model: Model, env_roller: ReplayEnvRollerBase, algo: AlgoBase) -> None: self.device = device self.settings = settings self.environment = env self._trained_model = model.to(self.device) self.env_roller = env_roller self.algo = algo