def __init__(self, config: Params, device="cuda"): super().__init__() self.bert = BertModel.from_pretrained("bert-base-uncased").eval() self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") self.hidden_size = config.get("hidden_size") self.embedding_size = config.get("embedding_size") # TODO: probably need recurrent nets here self.obs_to_hidden = nn.Linear(self.embedding_size, self.hidden_size) self.actions_to_hidden = nn.Linear(self.embedding_size, self.hidden_size) self.hidden_to_hidden = nn.Linear(self.hidden_size, self.hidden_size // 2) self.hidden_to_scores = nn.Linear(self.hidden_size // 2, 1) self.state_layer_norm = LayerNorm(self.hidden_size) self.action_layer_norm = LayerNorm(self.hidden_size) self.hidden_layer_norm = LayerNorm(self.hidden_size // 2) self.lrelu = nn.LeakyReLU(0.2) self.device = device
def __init__(self, params: Params, net, policy) -> None: self._initialized = False self.max_steps_per_episode = params.pop("max_steps_per_episode") self.batch_size = params.get("n_parallel_envs") self.net = net self.policy = policy self.vectorizer = SpacyVectorizer() self.reset() self._episode_has_started = True self.exploration_bonus = params.pop("exploration_bonus") self.reward_penalty = params.pop("reward_penalty")
def __init__(self, config: Params): super().__init__() self.hidden_size = config.pop("hidden_size") self.embedding_size = config.pop("embedding_size") self.obs_to_hidden = nn.Linear(self.embedding_size, self.hidden_size) self.actions_to_hidden = nn.Linear(self.embedding_size, self.hidden_size) self.hidden_to_scores = nn.Linear(self.hidden_size, 1) self.lrelu = nn.LeakyReLU(0.2)
def __init__(self, config: Params) -> None: self._initialized = False self._epsiode_has_started = False self.device = config.pop("device") self.max_steps_per_episode = config.pop("max_steps_per_episode") self.bert = BertModel.from_pretrained('bert-base-uncased').to( self.device).eval() self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.qnet = QNet(config.pop("network")) self.eps_scheduler = EpsScheduler(config.pop("epsilon")) self.current_step = 0
def train(game_files): logging.basicConfig(level=logging.INFO) params = Params.from_file("configs/config.jsonnet") agent_params = params.pop("agent") train_params = params.pop("training") agent = BaseQlearningAgent(agent_params) requested_infos = agent.select_additional_infos() _validate_requested_infos(requested_infos) env_id = textworld.gym.register_games(game_files, requested_infos, max_episode_steps=agent.max_steps_per_episode, name="training") # env_id = textworld.gym.make_batch(env_id, batch_size=agent.batch_size, parallel=True) env = gym.make(env_id) for epoch_no in range(1, train_params.pop("n_epochs") + 1): stats = { "scores": [], "steps": [], } for _ in tqdm(range(len(game_files))): obs, infos = env.reset() agent.train() done, steps, scores = False, [], [] step = 0 while not done: # Increase step counts. # steps = [step + int(not done) for step, done in zip(steps, dones)] step = step + int(not done) command = agent.act(obs, scores, done, infos) obs, score, done, infos = env.step(command) # Let the agent knows the game is done. agent.act(obs, scores, done, infos) stats["scores"].append(scores) stats["steps"].append(step) score = sum(stats["scores"]) steps = sum(stats["steps"]) print(f"Epoch: {epoch_no:3d} | {score:2.1f} pts | {steps:4.1f} steps")
def __init__(self, config: Params): self.init_eps = config.pop("init_eps") self.gamma = config.pop("gamma") self.step_size = config.pop("step_size") self.min_eps = config.pop("min_eps") self.current_step = 1
q.get_nowait() except Empty: pass if __name__ == "__main__": mp.set_start_method("spawn") parser = argparse.ArgumentParser(description="Train baseline Q-learning agent.") parser.add_argument( "games", metavar="game", type=str, help="path to the folder with games" ) args = parser.parse_args() train_dir = Path(args.games) games = [str(f) for f in train_dir.iterdir() if f.is_file() and f.suffix == ".ulx"][:1] print(games) params = Params.from_file("configs/config.jsonnet") train_params = params.pop("training") network_params = params.get("network") learner_device = train_params.pop("learner_device") tok = spacy.load("en_core_web_sm").tokenizer policy_net = SimpleNet(device=learner_device, tokenizer=tok).to(learner_device) policy_net.share_memory() actor_device = train_params.pop("actor_device") target_net = SimpleNet(device=actor_device, tokenizer=tok).to(actor_device) target_net.load_state_dict(policy_net.state_dict()) target_net.share_memory() # TODO: change this