Exemplo n.º 1
0
    def run(self):
        pytorch_utils.add_model_indicators(self.policy)

        for epoch, (game, arrange) in enumerate(self.games):
            board = Board(arrange)

            # TODO change this
            state = board.get_current_board()

            for iteration in count():
                logger.log('epoch : {}, iteration : {}'.format(epoch, iteration), Color.cyan)

                action = self.get_action(state)
                next_state, reward, done = self.step(board, action.item())

                if done:
                    next_state = None

                self.memory.push(state, action, next_state, reward)

                state = next_state

                self.train()

                if done:
                    tracker.add(iterations=iteration)
                    tracker.save()
                    break

            if epoch % self.target_update == 0:
                self.target.load_state_dict(self.policy.state_dict())

            if self.is_log_parameters:
                pytorch_utils.store_model_indicators(self.policy)
Exemplo n.º 2
0
    def step(self, board: Board, action: int):
        done = False
        num, let = unravel_index(action, [BOARD_SIZE, BOARD_SIZE])

        res = board.play(num, let)

        if board.is_sunk_ship():
            res = SUNK_SHIP

        if board.is_won():
            res = WON
            done = True

        reward = get_reward(res)
        reward = torch.tensor([reward], device=self.device)

        next_state = board.get_current_board()

        return next_state, reward, done,