Пример #1
0
    def __init__(self, config: Params, device="cuda"):
        super().__init__()

        self.bert = BertModel.from_pretrained("bert-base-uncased").eval()
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        self.hidden_size = config.get("hidden_size")
        self.embedding_size = config.get("embedding_size")

        # TODO: probably need recurrent nets here
        self.obs_to_hidden = nn.Linear(self.embedding_size, self.hidden_size)
        self.actions_to_hidden = nn.Linear(self.embedding_size,
                                           self.hidden_size)

        self.hidden_to_hidden = nn.Linear(self.hidden_size,
                                          self.hidden_size // 2)

        self.hidden_to_scores = nn.Linear(self.hidden_size // 2, 1)

        self.state_layer_norm = LayerNorm(self.hidden_size)
        self.action_layer_norm = LayerNorm(self.hidden_size)
        self.hidden_layer_norm = LayerNorm(self.hidden_size // 2)

        self.lrelu = nn.LeakyReLU(0.2)

        self.device = device
Пример #2
0
 def __init__(self, params: Params, net, policy) -> None:
     self._initialized = False
     self.max_steps_per_episode = params.pop("max_steps_per_episode")
     self.batch_size = params.get("n_parallel_envs")
     self.net = net
     self.policy = policy
     self.vectorizer = SpacyVectorizer()
     self.reset()
     self._episode_has_started = True
     self.exploration_bonus = params.pop("exploration_bonus")
     self.reward_penalty = params.pop("reward_penalty")
    def __init__(self, config: Params):
        super().__init__()

        self.hidden_size = config.pop("hidden_size")
        self.embedding_size = config.pop("embedding_size")

        self.obs_to_hidden = nn.Linear(self.embedding_size, self.hidden_size)
        self.actions_to_hidden = nn.Linear(self.embedding_size,
                                           self.hidden_size)
        self.hidden_to_scores = nn.Linear(self.hidden_size, 1)

        self.lrelu = nn.LeakyReLU(0.2)
    def __init__(self, config: Params) -> None:
        self._initialized = False
        self._epsiode_has_started = False
        self.device = config.pop("device")
        self.max_steps_per_episode = config.pop("max_steps_per_episode")

        self.bert = BertModel.from_pretrained('bert-base-uncased').to(
            self.device).eval()
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

        self.qnet = QNet(config.pop("network"))
        self.eps_scheduler = EpsScheduler(config.pop("epsilon"))

        self.current_step = 0
Пример #5
0
def train(game_files):
    logging.basicConfig(level=logging.INFO)
    params = Params.from_file("configs/config.jsonnet")
    agent_params = params.pop("agent")
    train_params = params.pop("training")
    agent = BaseQlearningAgent(agent_params)
    requested_infos = agent.select_additional_infos()
    _validate_requested_infos(requested_infos)

    env_id = textworld.gym.register_games(game_files, requested_infos,
                                          max_episode_steps=agent.max_steps_per_episode,
                                          name="training")
    # env_id = textworld.gym.make_batch(env_id, batch_size=agent.batch_size, parallel=True)
    env = gym.make(env_id)

    for epoch_no in range(1, train_params.pop("n_epochs") + 1):
        stats = {
            "scores": [],
            "steps": [],
        }
        for _ in tqdm(range(len(game_files))):
            obs, infos = env.reset()
            agent.train()

            done, steps, scores = False, [], []
            step = 0

            while not done:
                # Increase step counts.
                # steps = [step + int(not done) for step, done in zip(steps, dones)]
                step = step + int(not done)
                command = agent.act(obs, scores, done, infos)
                obs, score, done, infos = env.step(command)

            # Let the agent knows the game is done.
            agent.act(obs, scores, done, infos)

            stats["scores"].append(scores)
            stats["steps"].append(step)

        score = sum(stats["scores"])
        steps = sum(stats["steps"])
        print(f"Epoch: {epoch_no:3d} | {score:2.1f} pts | {steps:4.1f} steps")
Пример #6
0
 def __init__(self, config: Params):
     self.init_eps = config.pop("init_eps")
     self.gamma = config.pop("gamma")
     self.step_size = config.pop("step_size")
     self.min_eps = config.pop("min_eps")
     self.current_step = 1
Пример #7
0
            q.get_nowait()
    except Empty:
        pass


if __name__ == "__main__":
    mp.set_start_method("spawn")
    parser = argparse.ArgumentParser(description="Train baseline Q-learning agent.")
    parser.add_argument(
        "games", metavar="game", type=str, help="path to the folder with games"
    )
    args = parser.parse_args()
    train_dir = Path(args.games)
    games = [str(f) for f in train_dir.iterdir() if f.is_file() and f.suffix == ".ulx"][:1]
    print(games)
    params = Params.from_file("configs/config.jsonnet")
    train_params = params.pop("training")

    network_params = params.get("network")
    learner_device = train_params.pop("learner_device")

    tok = spacy.load("en_core_web_sm").tokenizer
    policy_net = SimpleNet(device=learner_device, tokenizer=tok).to(learner_device)
    policy_net.share_memory()

    actor_device = train_params.pop("actor_device")
    target_net = SimpleNet(device=actor_device, tokenizer=tok).to(actor_device)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.share_memory()

    # TODO: change this