Пример #1
0
async def main() -> None:
    while True:
        transport, engine = await chess.engine.popen_uci("/usr/games/stockfish")
        pub = RMQPublisher(game_id=str(uuid.uuid4()))
        board = chess.Board()
        pub.send(GameState(0, 'WHITE', None, board.fen()))
        while not board.is_game_over():
            result = await engine.play(board, chess.engine.Limit(time=5))
            board.push(result.move)
            pub.send(GameState(board.ply(), 'WHITE' if board.turn else 'BLACK', result.move.uci(), board.fen()))

        await engine.quit()
        pub.send(ResultEvent(board.result()))
Пример #2
0
 def create_game(self):
     with app.app_context():
         gs = GameState()
         gs.nr_players = 1
         app.db.session.add(gs)
         app.db.session.commit()
         return gs.id
Пример #3
0
def new_game():
    gs = GameState()
    gs.nr_players = 1
    db.session.add(gs)
    db.session.commit()
    game_id = gs.id
    session["game_id"] = game_id
    session["player_id"] = GameState.Player1
    return redirect(url_for('game', game_id=game_id))
Пример #4
0
    def test_make_move_asserts(self):
        gs = GameState()
        self.assertRaises(AssertionError, gs.make_move, -1, 0)
        self.assertRaises(AssertionError, gs.make_move, 0, 3)

        gs = GameState.create_from_string("oxoxxoxox")
        self.assertRaises(AssertionError, gs.make_move, 0, 0)

        gs = GameState.create_from_string(" xoxxoxox")
        self.assertRaises(AssertionError, gs.make_move, 2, 2)
Пример #5
0
def index(request: Request):
    content = json.loads(request.content.read())
    try:
        new_state = GameState(**content)

    except Exception as e:
        print(content)
        print(e)
        return "wew"

    wamp_component.state = new_state
    print(wamp_component.state.map.game_state)

    return "ok"
Пример #6
0
def index(request: Request):
    print("h")
    content = json.loads(request.content.read())
    try:
        new_state = GameState(**content)

    except Exception as e:
        print(content)
        print(e)
        return "wew"

    # check for diffs
    # print(json.dumps(content, indent=3))
    # new_state
    wamp_component.state = new_state

    return "ok"
 def save_game(self):
     game_state = GameState()
     game_state.next_player = self.player_order[0]
     game_state.wind_index = self.game_board.wind_direction.index(0)
     for player in sorted(list(self.players)):
         game_state.player_data[player] = self.players[player].export()
     for empire in Empires:
         game_state.taverns[empire.value.capital] = self.engine.varostar[
             empire.value.capital].export_matroz()
     game_state.card_decks = [
         self.engine.eventdeck, self.engine.eventstack,
         self.engine.kincspakli, self.engine.treasurestack
     ]
     game_state.is_grog_lord_defeated = self.engine.grogbaroLegyozve.get()
     game_state.is_lieutenant_found = self.engine.hadnagyElokerult.get()
     if game_state.check():
         self.save_handler.write_save(game_state)
     else:
         raise RuntimeError('Invalid game state.')
Пример #8
0
 def load_saved_state(self):
     current_state = GameState()
     file_name = askopenfilename(defaultextension=self.extension,
                                 filetypes=self.type,
                                 initialdir='saved')
     if file_name == '':
         return None
     xml_content = parse(file_name)
     save = xml_content.getroot()
     for player in save.findall('player'):
         player_id = player.get('id')
         current_state.player_data[player_id] = self._load_player(player)
     current_state.next_player = save.find('currentPlayer').text
     current_state.wind_index = int(save.find('windDirection').text)
     current_state.is_lieutenant_found = True if save.find(
         'firstMateFound').text == 'True' else False
     current_state.is_grog_lord_defeated = True if save.find(
         'firstMateFound').text == 'True' else False
     tavern_tag = save.find('taverns')
     for tavern in tavern_tag.findall('tavern'):
         current_state.taverns[tavern.get('port')] = int(
             tavern.get('sailors'))
     current_state.card_decks = self._load_cards(save)
     return current_state
Пример #9
0
 def test_creation(self):
     gs = GameState()
Пример #10
0
 def test_make_move(self):
     gs = GameState()
     gs.make_move(0, 0)
     assert gs.state == GameState.Ongoing
Пример #11
0
def train(args):
    chrome_driver_path = args.chrome_driver_path
    checkpoint_path = args.checkpoint_path
    nb_actions = args.nb_actions
    initial_epsilon = args.initial_epsilon
    epsilon = initial_epsilon
    final_epsilon = args.final_epsilon
    gamma = args.gamma
    nb_memory = args.nb_memory
    nb_expolre = args.nb_expolre
    is_debug = args.is_debug
    batch_size = args.batch_size
    nb_observation = args.nb_observation
    desired_fps = args.desired_fps
    is_cuda = True if args.use_cuda and torch.cuda.is_available() else False
    log_frequency = args.log_frequency
    save_frequency = args.save_frequency
    ratio_of_win = args.ratio_of_win
    if args.exploiting:
        nb_observation = -1
        epsilon = final_epsilon

    seed = 22
    np.random.seed(seed)
    memory = deque()
    env = DinoSeleniumEnv(chrome_driver_path, speed=args.game_speed)
    agent = Agent(env)
    game_state = GameState(agent, debug=is_debug)
    qnetwork = QNetwork(nb_actions)
    if is_cuda:
        qnetwork.cuda()
    optimizer = torch.optim.Adam(qnetwork.parameters(), 1e-4)
    tmp_param = next(qnetwork.parameters())
    try:
        m = torch.load(checkpoint_path)
        qnetwork.load_state_dict(m["qnetwork"])
        optimizer.load_state_dict(m["optimizer"])
    except:
        logger.warn("No model found in {}".format(checkpoint_path))
    loss_fcn = torch.nn.MSELoss()
    action_indx = 0  # do nothing as the first action
    screen, reward, is_gameover, score = game_state.get_state(action_indx)
    current_state = np.expand_dims(screen, 0)
    # [IMAGE_CHANNELS,IMAGE_WIDTH,IMAGE_HEIGHT]
    current_state = np.tile(current_state, (IMAGE_CHANNELS, 1, 1))
    initial_state = current_state

    t = 0
    last_time = 0
    sum_scores = 0
    total_loss = 0
    max_score = 0
    qvalues = np.array([0, 0])
    lost_action = []
    win_actions = []
    action_random = 0
    action_greedy = 0
    episodes = 0
    nb_episodes = 0
    if not args.exploiting:
        try:
            t, memory, epsilon, nb_episodes = pickle.load(open(
                "cache.p", "rb"))
        except:
            logger.warn("Could not load cache file! Starting from scratch.")
    try:
        while True:
            qnetwork.eval()
            if np.random.random() < epsilon:  # epsilon greedy
                action_indx = np.random.randint(nb_actions)
                action_random += 1
            else:
                action_greedy += 1
                tensor = torch.from_numpy(current_state).float().unsqueeze(0)
                with torch.no_grad():
                    qvalues = qnetwork(tensor).squeeze()
                _, action_indx = qvalues.max(-1)
                action_indx = action_indx.item()
            if epsilon > final_epsilon and t > nb_observation:
                epsilon -= (initial_epsilon - final_epsilon) / nb_expolre
            screen, reward, is_gameover, score = game_state.get_state(
                action_indx)
            if is_gameover:
                episodes += 1
                nb_episodes += 1
                lost_action.append(action_indx)
                sum_scores += score
            else:
                win_actions.append(action_indx)
            if score > max_score:
                max_score = score
            if last_time:
                fps = 1 / (time.time() - last_time)
                if fps > desired_fps:
                    time.sleep(1 / desired_fps - 1 / fps)
            if last_time and t % log_frequency == 0:
                logger.info('fps: {0}'.format(1 / (time.time() - last_time)))
            last_time = time.time()
            screen = np.expand_dims(screen, 0)
            next_state = np.append(screen,
                                   current_state[:IMAGE_CHANNELS - 1, :, :],
                                   axis=0)
            if not args.exploiting and (is_gameover
                                        or np.random.random() < ratio_of_win):
                memory.append((current_state, action_indx, reward, next_state,
                               is_gameover))
            if len(memory) > nb_memory:
                memory.popleft()
            if nb_observation > 0 and t > nb_observation:
                indxes = np.random.choice(len(memory),
                                          batch_size,
                                          replace=False)
                minibatch = [memory[b] for b in indxes]
                inputs = tmp_param.new(batch_size, IMAGE_CHANNELS, IMAGE_WIDTH,
                                       IMAGE_HEIGHT).zero_()
                targets = tmp_param.new(batch_size, nb_actions).zero_()
                for i, (state_t, action_t, reward_t, state_t1,
                        is_gameover_t1) in enumerate(minibatch):
                    inputs[i] = torch.from_numpy(state_t).float()
                    tensor = inputs[i].unsqueeze(0)
                    with torch.no_grad():
                        qvalues = qnetwork(tensor).squeeze()
                    targets[i] = qvalues
                    if is_gameover_t1:
                        assert reward_t == -1
                        targets[i, action_t] = reward_t
                    else:
                        tensor = torch.from_numpy(state_t1).float().unsqueeze(
                            0)
                        with torch.no_grad():
                            qvalues = qnetwork(tensor).squeeze()
                        qvalues = qvalues.cpu().numpy()
                        targets[i, action_t] = reward_t + gamma * qvalues.max()
                qnetwork.train()
                qnetwork.zero_grad()
                q_values = qnetwork(inputs)
                loss = loss_fcn(q_values, targets)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            current_state = initial_state if is_gameover else next_state
            t += 1
            if t % log_frequency == 0:
                logger.info(
                    "For t {}: mean score is {} max score is {} mean loss: {} number of episode: {}"
                    .format(t, sum_scores / (episodes + 0.1), max_score,
                            total_loss / 1000, episodes))
                logger.info(
                    "t: {} action_index: {} reward: {} max qvalue: {} total number of eposodes so far: {}"
                    .format(t, action_indx, reward, qvalues.max(),
                            nb_episodes))
                tmp = np.array(lost_action)
                dnc = (tmp == 0).sum()
                logger.info(
                    "Lost actions do_nothing: {} jump: {} length of memory {}".
                    format(dnc,
                           len(tmp) - dnc, len(memory)))
                tmp = np.array(win_actions)
                dnc = (tmp == 0).sum()
                logger.info("Win actions do_nothing: {} jump: {}".format(
                    dnc,
                    len(tmp) - dnc))
                logger.info("Greedy action {} Random action {}".format(
                    action_greedy, action_random))
                action_greedy = 0
                action_random = 0
                lost_action = []
                win_actions = []
                if episodes != 0:
                    sum_scores = 0
                total_loss = 0
                episodes = 0
            if t % save_frequency and not args.exploiting:
                env.pause_game()
                with open("cache.p", "wb") as fh:
                    pickle.dump((t, memory, epsilon, nb_episodes), fh)
                gc.collect()
                torch.save(
                    {
                        "qnetwork": qnetwork.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }, checkpoint_path)
                env.resume_game()
    except KeyboardInterrupt:
        if not args.exploiting:
            torch.save(
                {
                    "qnetwork": qnetwork.state_dict(),
                    "optimizer": optimizer.state_dict()
                }, checkpoint_path)
            with open("cache.p", "wb") as fh:
                pickle.dump((t, memory, epsilon, nb_episodes), fh)