Пример #1
0
def main():
	pygame.init()

	while(1):
		width = GameSize
		height = GameSize

		x1, y1 = randomPosition(width,height)
		x2, y2 = randomPosition(width, height)

		while x1==x2 and y1==y2:
			x1, y1 = randomPosition(width, height)

		game = Game(width, height, [
			PositionPlayer(1, AiBasic(), [x1,y1]),
			PositionPlayer(2, Aisurvivor(), [x2,y2]),
		])

		pygame.mouse.set_visible(False)

		if VisibleScreen:
			window = Window(game, 40)
			# displayGameMenu(window, game)
			game.main_loop(window)
		else:
			game.main_loop()

		printGameResults(game)
Пример #2
0
def main():
    # Prepare the size for the game.
    # Those values may be good if you want to play, they might not be so good
    # to train your AI. Decreasing them will make the learning faster.
    width = 10
    height = 10

    # Create a game from its size and its players
    game = Game(
        width,
        height,
        [
            # Here we create two players with constant direction.
            # It's not very interesting but it's the basis of everything else.
            PositionPlayer(1, ConstantPlayer(Direction.RIGHT), [0, 0]),
            PositionPlayer(2, ConstantPlayer(Direction.LEFT),
                           [width - 1, height - 1]),
        ])

    # Run the game.
    # Since no window is passed as parameter, not only the game will not
    # display anything, which avoid doing useless computations, but it will
    # also not be limited to a certain framerate, which would be necessary for
    # human users.
    game.main_loop()

    # The game is done, you can get information about it and do what you want.
    if game.winner is None:
        print("It's a draw!")
    else:
        print('Player {} wins!'.format(game.winner))
Пример #3
0
def play(width, height):

    # Initialize players' position
    init_player_1 = init_player_position(width, height)
    init_player_2 = init_player_position(width, height)

    # Ensure the players do not start at the same position
    while init_player_1[0] == init_player_2[0] and init_player_1[
            1] == init_player_2[1]:
        init_player_2 = init_player_position(width, height)

    # Create a game from its size and its players
    game = Game(
        width,
        height,
        [
            # We create two PositionPlayer for each player of the game.
            PositionPlayer(1, Ai(), init_player_1),
            PositionPlayer(2, Ai(), init_player_2)
        ])

    # Run the game.
    game.main_loop()

    return game
Пример #4
0
def main():
    # Initialize the game engine
    pygame.init()

    # Prepare the size for the game.
    # Those values may be good if you want to play, they might not be so good
    # to train your AI. Decreasing them will make the learning faster.
    width = 10
    height = 10

    # Create a game from its size and its players
    game = Game(
        width,
        height,
        [
            # We create two PositionPlayer for each player of the game.
            # The first one has the id 1, and will use keyboard interaction, with a
            # default direction that will be to the right, and that will use the Z,
            # Q, S and D keys.
            # The last array defines the initial position of the player.
            PositionPlayer(1, KeyboardPlayer(Direction.RIGHT, Mode.ZQSD),
                           [0, 0]),

            # We create a second player that will use the arrow keys.
            PositionPlayer(2, KeyboardPlayer(Direction.LEFT, Mode.ARROWS),
                           [width - 1, height - 1]),
        ])

    # Create a window for the game so the players can see what they're doing.
    window = Window(game, 10)

    # Hide mouse
    pygame.mouse.set_visible(False)

    # Run the game.
    game.main_loop(window)

    # Once the game is finished, if game.winner is None, it means it's a draw
    # Otherwise, game.winner will tell us which player has won the game.
    if game.winner is None:
        print("It's a draw!")
    else:
        print('Player {} wins!'.format(game.winner))
Пример #5
0
def train(model):
    writer = SummaryWriter()

    # Initialize neural network parameters and optimizer
    optimizer = optim.Adam(model.parameters())

    # Initialize exploration rate
    epsilon = EPSILON_START
    epsilon_temp = float(epsilon)

    # Initialize memory
    memory = ReplayMemory(MEM_CAPACITY)

    # Initialize the game counter
    game_counter = 0
    move_counter = 0
    vs_min_p1_win = 0
    minimax_game = 0

    while True:

        # Initialize the game cycle parameters
        cycle_step = 0
        p1_victories = 0
        p2_victories = 0
        null_games = 0

        player_1 = Ai(epsilon)
        player_2 = Ai(epsilon)

        # Play a cycle of games
        while cycle_step < GAME_CYCLE:
            # Increment the counters
            game_counter += 1
            cycle_step += 1

            # Initialize the starting positions
            x1 = random.randint(0, MAP_WIDTH - 1)
            y1 = random.randint(0, MAP_HEIGHT - 1)
            x2 = random.randint(0, MAP_WIDTH - 1)
            y2 = random.randint(0, MAP_HEIGHT - 1)
            while x1 == x2 and y1 == y2:
                x1 = random.randint(0, MAP_WIDTH - 1)
                y1 = random.randint(0, MAP_HEIGHT - 1)

            # Initialize the game
            player_1.epsilon = epsilon
            player_2.epsilon = epsilon

            game = Game(MAP_WIDTH, MAP_HEIGHT, [
                PositionPlayer(1, player_1, [x1, y1]),
                PositionPlayer(2, player_2, [x2, y2]), ])

            # Get the initial state for each player
            old_state_p1 = game.map().state_for_player(1)
            old_state_p1 = np.reshape(old_state_p1, (1, 1, old_state_p1.shape[0], old_state_p1.shape[1]))
            old_state_p1 = torch.from_numpy(old_state_p1).float()
            old_state_p2 = game.map().state_for_player(2)
            old_state_p2 = np.reshape(old_state_p2, (1, 1, old_state_p2.shape[0], old_state_p2.shape[1]))
            old_state_p2 = torch.from_numpy(old_state_p2).float()

            game.main_loop(model)

            # Analyze the game
            move_counter += len(game.history)
            terminal = False

            for historyStep in range(len(game.history) - 1):

                # Get the state for each player
                new_state_p1 = game.history[historyStep + 1].map.state_for_player(1)
                new_state_p1 = np.reshape(new_state_p1, (1, 1, new_state_p1.shape[0], new_state_p1.shape[1]))
                new_state_p1 = torch.from_numpy(new_state_p1).float()

                new_state_p2 = game.history[historyStep + 1].map.state_for_player(2)
                new_state_p2 = np.reshape(new_state_p2, (1, 1, new_state_p2.shape[0], new_state_p2.shape[1]))
                new_state_p2 = torch.from_numpy(new_state_p2).float()

                # Get the action for each player
                if game.history[historyStep].player_one_direction is not None:
                    action_p1 = torch.from_numpy(np.array([game.history[historyStep].player_one_direction.value - 1],
                                                          dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(np.array([game.history[historyStep].player_two_direction.value - 1],
                                                          dtype=np.float32)).unsqueeze(0)
                else:
                    action_p1 = torch.from_numpy(np.array([0], dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(np.array([0], dtype=np.float32)).unsqueeze(0)

                # Compute the reward for each player
                reward_p1 = historyStep
                reward_p2 = historyStep

                if historyStep + 1 == len(game.history) - 1:
                    if game.winner is None:
                        null_games += 1
                        reward_p1 = 0
                        reward_p2 = 0
                    elif game.winner == 1:
                        reward_p1 = 100
                        reward_p2 = -25
                        p1_victories += 1
                    else:
                        reward_p1 = -25
                        reward_p2 = 100
                        p2_victories += 1

                    terminal = True

                reward_p1 = torch.from_numpy(np.array([reward_p1], dtype=np.float32)).unsqueeze(0)
                reward_p2 = torch.from_numpy(np.array([reward_p2], dtype=np.float32)).unsqueeze(0)

                # Save the transition for each player
                memory.push(old_state_p1, action_p1, new_state_p1, reward_p1, terminal)
                memory.push(old_state_p2, action_p2, new_state_p2, reward_p2, terminal)

                # Update old state for each player
                old_state_p1 = new_state_p1
                old_state_p2 = new_state_p2

            # Update exploration rate
            nouv_epsilon = epsilon * DECAY_RATE
            if nouv_epsilon > ESPILON_END:
                epsilon = nouv_epsilon

            if epsilon == 0 and game_counter % 100 == 0:
                epsilon = epsilon_temp

        # Get a sample for training
        transitions = memory.sample(min(len(memory), model.batch_size))

        batch = Transition(*zip(*transitions))

        old_state_batch = torch.cat(batch.old_state)
        action_batch = torch.cat(batch.action).long()
        new_state_batch = torch.cat(batch.new_state)
        reward_batch = torch.cat(batch.reward).to(device)

        # Compute predicted Q-values for each action
        pred_q_values_batch = torch.sum(model(old_state_batch).gather(1, action_batch.to(device)), dim=1)
        pred_q_values_next_batch = model(new_state_batch)

        # Compute targeted Q-value for action performed
        target_q_values_batch = torch.cat(
            tuple(reward_batch[i] if batch[4][i] else reward_batch[i] + model.gamma * torch.max(
                pred_q_values_next_batch[i]) for i in range(len(reward_batch)))).to(device)

        # zero the parameter gradients

        model.zero_grad()

        # Compute the loss
        target_q_values_batch = target_q_values_batch.detach()
        # loss = criterion(pred_q_values_batch,target_q_values_batch)
        loss = F.smooth_l1_loss(pred_q_values_batch, target_q_values_batch)

        # Do backward pass
        loss.backward()
        optimizer.step()

        # Update bak
        torch.save(model.state_dict(), 'save/DQN.bak')
        p1_winrate = p1_victories / (GAME_CYCLE)

        if game_counter % DISPLAY_CYCLE == 0:
            loss_string = str(loss)
            loss_string = loss_string[7:len(loss_string)]
            loss_value = loss_string.split(',')[0]
            vis_loss = float(loss_value)

            writer.add_scalar('loss_tracker', vis_loss, game_counter)
            writer.add_scalar('duration_tracker', (float(move_counter) / float(DISPLAY_CYCLE)), game_counter)
            writer.add_scalar('ration_tracker', p1_winrate, game_counter)

            move_counter = 0
Пример #6
0
def train(model):

    # Initialize neural network parameters and optimizer
    optimizer = optim.Adam(model.parameters())
    criterion = nn.MSELoss()

    # Initialize exploration rate
    epsilon = EPSILON_START
    epsilon_temp = float(epsilon)

    # Initialize memory
    memory = ReplayMemory(MEM_CAPACITY)

    # Initialize the game counter
    game_counter = 0
    move_counter = 0

    # Start training
    while True:

        # Initialize the game cycle parameters
        cycle_step = 0
        p1_victories = 0
        p2_victories = 0
        null_games = 0
        player_1 = Ai(epsilon)
        player_2 = Ai(epsilon)
        otherOpponent = True

        # Play a cycle of games
        while cycle_step < GAME_CYCLE:

            # Increment the counters
            game_counter += 1
            cycle_step += 1

            # Initialize the starting positions
            x1 = random.randint(0, MAP_WIDTH - 1)
            y1 = random.randint(0, MAP_HEIGHT - 1)
            x2 = random.randint(0, MAP_WIDTH - 1)
            y2 = random.randint(0, MAP_HEIGHT - 1)
            while x1 == x2 and y1 == y2:
                x1 = random.randint(0, MAP_WIDTH - 1)
                y1 = random.randint(0, MAP_HEIGHT - 1)

            # Initialize the game
            player_1.epsilon = epsilon
            player_2.epsilon = epsilon
            game = Game(MAP_WIDTH, MAP_HEIGHT, [
                PositionPlayer(1, player_1, [x1, y1]),
                PositionPlayer(2, player_2, [x2, y2]),
            ])

            # Get the initial state for each player
            old_state_p1 = game.map().state_for_player(1)
            old_state_p1 = np.reshape(
                old_state_p1,
                (1, 1, old_state_p1.shape[0], old_state_p1.shape[1]))
            old_state_p1 = torch.from_numpy(old_state_p1).float()
            old_state_p2 = game.map().state_for_player(2)
            old_state_p2 = np.reshape(
                old_state_p2,
                (1, 1, old_state_p2.shape[0], old_state_p2.shape[1]))
            old_state_p2 = torch.from_numpy(old_state_p2).float()

            # Run the game
            window = Window(game, 40)
            game.main_loop(window)
            #game.main_loop()

            # Analyze the game
            move_counter += len(game.history)
            terminal = False

            for historyStep in range(len(game.history) - 1):

                # Get the state for each player
                new_state_p1 = game.history[historyStep +
                                            1].map.state_for_player(1)
                new_state_p1 = np.reshape(
                    new_state_p1,
                    (1, 1, new_state_p1.shape[0], new_state_p1.shape[1]))
                new_state_p1 = torch.from_numpy(new_state_p1).float()
                new_state_p2 = game.history[historyStep +
                                            1].map.state_for_player(2)
                new_state_p2 = np.reshape(
                    new_state_p2,
                    (1, 1, new_state_p2.shape[0], new_state_p2.shape[1]))
                new_state_p2 = torch.from_numpy(new_state_p2).float()

                # Get the action for each player
                if game.history[historyStep].player_one_direction is not None:
                    action_p1 = torch.from_numpy(
                        np.array([
                            game.history[historyStep].player_one_direction.
                            value - 1
                        ],
                                 dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(
                        np.array([
                            game.history[historyStep].player_two_direction.
                            value - 1
                        ],
                                 dtype=np.float32)).unsqueeze(0)
                else:
                    action_p1 = torch.from_numpy(
                        np.array([0], dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(
                        np.array([0], dtype=np.float32)).unsqueeze(0)

                # Compute the reward for each player
                reward_p1 = +1
                reward_p2 = +1
                if historyStep + 1 == len(game.history) - 1:
                    if game.winner is None:
                        null_games += 1
                        reward_p1 = 0
                        reward_p2 = 0
                    elif game.winner == 1:
                        reward_p1 = 100
                        reward_p2 = -25
                        p1_victories += 1
                    else:
                        reward_p1 = -25
                        reward_p2 = 100
                        p2_victories += 1
                    terminal = True

                reward_p1 = torch.from_numpy(
                    np.array([reward_p1], dtype=np.float32)).unsqueeze(0)
                reward_p2 = torch.from_numpy(
                    np.array([reward_p2], dtype=np.float32)).unsqueeze(0)

                # Save the transition for each player
                memory.push(old_state_p1, action_p1, new_state_p1, reward_p1,
                            terminal)
                if not (otherOpponent):
                    memory.push(old_state_p2, action_p2, new_state_p2,
                                reward_p2, terminal)

                # Update old state for each player
                old_state_p1 = new_state_p1
                old_state_p2 = new_state_p2

            # Update exploration rate
            nouv_epsilon = epsilon * DECAY_RATE
            if nouv_epsilon > ESPILON_END:
                epsilon = nouv_epsilon
            if epsilon == 0 and game_counter % 100 == 0:
                epsilon = espilon_temp

        # Get a sample for training
        transitions = memory.sample(min(len(memory), model.batch_size))
        batch = Transition(*zip(*transitions))
        old_state_batch = torch.cat(batch.old_state)
        action_batch = torch.cat(batch.action).long()
        new_state_batch = torch.cat(batch.new_state)
        reward_batch = torch.cat(batch.reward)

        # Compute predicted Q-values for each action
        pred_q_values_batch = torch.sum(model(old_state_batch).gather(
            1, action_batch),
                                        dim=1)
        pred_q_values_next_batch = model(new_state_batch)

        # Compute targeted Q-value for action performed
        target_q_values_batch = torch.cat(
            tuple(reward_batch[i] if batch[4] else reward_batch[i] +
                  model.gamma * torch.max(pred_q_values_next_batch[i])
                  for i in range(len(reward_batch))))

        # zero the parameter gradients
        model.zero_grad()

        # Compute the loss
        target_q_values_batch = target_q_values_batch.detach()
        loss = criterion(pred_q_values_batch, target_q_values_batch)

        # Do backward pass
        loss.backward()
        optimizer.step()

        # Update bak
        torch.save(model.state_dict(), 'ais/' + folderName + '/ai.bak')

        # Display results
        if (game_counter % DISPLAY_CYCLE) == 0:
            loss_string = str(loss)
            loss_string = loss_string[7:len(loss_string)]
            loss_value = loss_string.split(',')[0]
            print("--- Match", game_counter, "---")
            print("Average duration :",
                  float(move_counter) / float(DISPLAY_CYCLE))
            print("Loss =", loss_value)
            print("Epsilon =", epsilon)
            print("")
            with open('ais/' + folderName + '/data.txt', 'a') as myfile:
                myfile.write(
                    str(game_counter) + ', ' +
                    str(float(move_counter) / float(DISPLAY_CYCLE)) + ', ' +
                    loss_value + '\n')
            move_counter = 0