Пример #1
0
class SimpleQBot(qbot.QBot):
    """A simple Q-bot

    Attributes:
        entity_iden (int): the entity we are controlling
        model (FeedforwardComplex): the model that does the evaluating
        teacher (FFTeacher): the teacher for the model
        optimizer (torch.nn.optimizer): the optimizer for the network
        criterion (callable): the evaluator for the network

        offline (OfflineLearner): the offline learner

        encoder (Encoder): the encoder
    """
    def __init__(self, entity_iden):
        self.entity_iden = entity_iden
        self.model = gen.init_or_load_model(_init_model, MODELFILE)
        self.teacher = FFTeacher()
        self.optimizer = torch.optim.Adam(
            [p for p in self.model.parameters() if p.requires_grad], lr=0.003)
        self.criterion = torch.nn.MSELoss()
        self.encoder = _init_encoder(entity_iden)

        self.offline = OfflineLearner(self._learn, heap_size=10)

    def __call__(self, entity_iden):
        self.entity_iden = entity_iden
        self.encoder = _init_encoder(entity_iden)

    @property
    def cutoff(self):
        return 3

    @property
    def alpha(self):
        return 0.3

    def evaluate(self, game_state: GameState, move: Move) -> float:
        result = torch.tensor([0.0], dtype=torch.float)
        self.teacher.classify(self.model,
                              self.encoder.encode(game_state, move), result)
        return float(result.item())

    def learn(self, game_state: GameState, move: Move, reward: float) -> None:
        self.offline(game_state, move, reward)

    def think(self, max_time: float):
        self.offline.think(max_time)

    def _learn(self, game_state: GameState, move: Move, reward: float) -> None:
        self.teacher.teach(self.model, self.optimizer, self.criterion,
                           self.encoder.encode(game_state, move),
                           torch.tensor([reward], dtype=torch.float32))
        return abs(reward)

    def save(self) -> None:
        gen.save_model(self.model, MODELFILE)
Пример #2
0
class SimpleBot(Bot):
    """Simple pathfinding bot

    Attributes:
        history (deque[GameState]): recent game states, where the left corresponds to len(history)
            ticks ago and the right corresponds to the last tick

        model (FeedforwardComplex): the model that predicts q-values
        teacher (FFTeacher): the teacher for the model
        optimizer (torch.nn.Optimizer): the optimizer
        criterion (callable): criterion
    """
    def __init__(self, entity_iden: int):
        super().__init__(entity_iden)
        self.model = _init_or_load_model()
        self.history = deque()
        self.teacher = FFTeacher()
        self.optimizer = torch.optim.Adam(
            [p for p in self.model.parameters() if p.requires_grad], lr=0.003)
        self.criterion = torch.nn.MSELoss()

        self.spam_loss = False
        self.spam_moves = False
        self.print_loss_improves = True
        self.random_perc = 0.2
        self.best_loss = float('inf')
        self.next_save = 50

    def move(self, game_state: GameState):
        gs_copy = ser.deserialize(ser.serialize(game_state))
        self.history.append((gs_copy, None))

        if len(self.history) == CUTOFF + 1:
            self.teach()

        move = self.eval(game_state)
        if np.random.uniform(0, 1) < self.random_perc:
            move = random.choice(MOVE_MAP)
        self.history.pop()
        self.history.append((gs_copy, move))

        self.next_save -= 1
        if self.next_save <= 0:
            self.save()
            self.next_save = 50

        return move

    def finished(self, game_state: GameState, result):
        self.save()

    def save(self):
        """saves the model"""
        print(f'[simplebot] {time.ctime()} saving')
        sys.stdout.flush()
        _save_model(self.model)

    def teach(self):
        """Must be called when we have CUTOFF+1 history. Takes the oldest history item, calculates
        the value for the finite series of diminished rewards, and then trains the network
        on that"""
        original, og_move = self.history.popleft()
        previous = original
        penalty = 1
        reward = 0
        for i in range(CUTOFF):
            reward += penalty * _reward(previous, self.history[i][0],
                                        self.entity_iden)
            previous = self.history[i][0]
            penalty *= ALPHA

        loss = self.teacher.teach(self.model, self.optimizer, self.criterion,
                                  _encode(original, self.entity_iden, og_move),
                                  torch.tensor([reward], dtype=torch.float32))
        if self.spam_loss:
            print(f'[simplebot] loss={loss}')
            sys.stdout.flush()
        if self.print_loss_improves:
            if loss < self.best_loss:
                self.best_loss = loss
                print(f'[simplebot] loss improved to {loss} for move ' +
                      f'{og_move.name} reward {reward}')
                sys.stdout.flush()

    def eval(self, game_state: GameState) -> Move:
        """Chooses the best move according to our model for the given state"""
        scores = []
        out = torch.tensor([0.0])
        for move in MOVE_MAP:
            self.teacher.classify(self.model,
                                  _encode(game_state, self.entity_iden, move),
                                  out)
            scores.append(out.item())
        if self.spam_moves:
            toprint = []
            for ind, move in enumerate(MOVE_MAP):
                toprint.extend((str(move), ': ', f'{scores[ind]:.3f}'))
            print('{' + ', '.join(toprint) + '}')
            sys.stdout.flush()
        return MOVE_MAP[int(np.argmax(scores))]
Пример #3
0
class DeepQBot(qbot.QBot):
    """The Q-bot implementation

    Attributes:
        entity_iden (int): the entity we are controlling
        model (FeedforwardComplex): the model that does the evaluating
        teacher (FFTeacher): the teacher for the model
        evaluation (bool): True to not store experiences, False to store experiences

        replay (WritableReplayBuffer, optional): the buffer for replays

        encoder (Encoder): the encoder
    """
    def __init__(self,
                 entity_iden: int,
                 replay_path=REPLAY_FOLDER,
                 evaluation=False):
        self.entity_iden = entity_iden
        if not os.path.exists(EVAL_MODELFILE):
            _init_model()

        self.model = Deep1ModelEval.load(EVAL_MODELFILE)

        self.teacher = FFTeacher()
        self.evaluation = evaluation
        self.encoder = init_encoder(entity_iden)

        if not evaluation:
            self.replay = replay_buffer.FileWritableReplayBuffer(replay_path,
                                                                 exist_ok=True)
        else:
            self.replay = None

    def __call__(self, entity_iden):
        self.entity_iden = entity_iden
        self.encoder = init_encoder(entity_iden)

    @property
    def cutoff(self):
        return CUTOFF

    @property
    def alpha(self):
        return ALPHA

    def evaluate(self, game_state: GameState, move: Move):
        result = torch.tensor([0.0], dtype=torch.float)
        self.teacher.classify(self.model,
                              self.encoder.encode(game_state, move), result)
        return float(result.item())

    def learn(self, game_state: GameState, move: Move, new_state: GameState,
              reward_raw: float, reward_pred: float) -> None:
        if self.evaluation:
            print(
                f'predicted reward: {self.evaluate(game_state, move):.2f} vs actual reward '
                +
                f'{reward_raw:.2f} + {reward_pred:.2f} = {reward_raw + reward_pred:.2f}'
            )
            return
        player_id = 1 if self.entity_iden == game_state.player_1_iden else 2
        self.replay.add(
            replay_buffer.Experience(
                game_state, move, self.cutoff, new_state, reward_raw,
                player_id, None,
                self.encoder.encode(game_state, move).numpy(),
                self.encoder.encode(new_state, move).numpy()))

    def save(self) -> None:
        pass