示例#1
0
def generate_dataset(gen_function=gen_random_data,
                     num_training_samples=60000,
                     num_testing_samples=10000):
    """Return generated boards and scores as training/testing data.

	Arguments:
		gen_function (function name): The name of the function used to
		                              generate a dataset.
		num_training_samples (int): The number of training samples to generate.
		num_testing_samples (int): The numebr of testing samples to generate.
	Returns:
		(tuple), (tuple): Numpy arrays with the training and test data.
	"""
    x_train = []
    y_train = []
    for n in range(num_training_samples):
        data = gen_function()
        score, score_pos = bs.analyze_board(data)
        x_train.append(data)
        y_train.append(score)

    x_test = []
    y_test = []
    for n in range(num_testing_samples):
        data = gen_function()
        score, score_pos = bs.analyze_board(data)
        x_test.append(data)
        y_test.append(score)

    # Convert lists to numpy arrays
    np.array(x_train)
    np.array(y_train)
    np.array(x_test)
    np.array(y_test)

    return (x_train, y_train), (x_test, y_test)
示例#2
0
    def choose_swap(self, board, bOffline=False):
        
        swaps = []
        potential_boards = []

        for index1 in range(0, self.game.numTiles):
            for index2 in range(index1+1, self.game.numTiles):
                if board[index1] != '.' and board[index2] != '.':
                    r1 = int(index1 // self.game.numCols)
                    c1 = int(index1 % self.game.numCols)
                    r2 = int(index2 // self.game.numCols)
                    c2 = int(index2 % self.game.numCols)
                    
                    swapped_board = np.copy(board)
                    swapped_board[index1] = board[index2]
                    swapped_board[index2] = board[index1]

                    swaps.append( ((r1, c1), (r2, c2)) )
                    potential_boards.append(swapped_board)

        if bOffline:
            predictions = np.array([BoardScorer.analyze_board(b)[0] for b in potential_boards])
        else:
            predictions = self.nn.predict(estimatorModel.split_into_channels(potential_boards)).flatten()

        best_index = predictions.argmax()

        '''
        from sortedcontainers import SortedDict

        sd = SortedDict()
        for i in range(len(potential_boards)):
            sd[predictions[i]] = potential_boards[i]
        
        #for val, board in sd.items():
            #print("board value: ", val)
            #self.print_board(board)
        '''
        
        #print("\n\n")
        print("chose index", best_index)
        #print(predictions[best_index])
        #self.print_board(potential_boards[best_index])
        
        return (swaps[best_index], potential_boards[best_index])
示例#3
0
    else:
        #inputs = (eval_features, eval_labels)
        inputs = (features, scores)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset


if __name__ == "__main__":
    tf.logging.set_verbosity(tf.logging.INFO)

    nn = CreateNetwork()

    for i in range(0, 100):
        batch_boards = []
        batch_scores = []
        for j in range(0, 100000):
            b = list(BoardGen.gen_random_data())
            batch_boards.append(b)
            batch_scores.append(BoardScorer.analyze_board(b)[0])

        nn.train(input_fn=lambda: train_input_fn(batch_boards, batch_scores))
        #nn.evaluate(input_fn=lambda:eval_input_fn(batch_boards, batch_scores))
示例#4
0
    print("run 13")
    player = SuperBall.SuperBall()

    boards = []
    rewards = []

    for i in range(0, 75):
        print("episode", i)

        player.StartGame()

        while not player.gameOver:
            start_board = copy.deepcopy(player.board)

            if player.numOpenTiles < 5:
                score, score_pos = BoardScorer.analyze_board(start_board)

                if score_pos == (-1, -1):
                    boards.append((start_board, start_board))
                    rewards.append(-1)

                    player.gameOver = True
                    print("game over")

                else:
                    tiles_scored = player.score_ignore_color(
                        score_pos[0], score_pos[1])

                    end_board = copy.deepcopy(player.board)
                    boards.append((start_board, end_board))
                    rewards.append(tiles_scored)
示例#5
0
    def generate_episode(self, self_play=True):
        self.game.StartGame()

        saved_afterstate = None
        iter_count = 0

        while (not self.game.gameOver):

            actions, afterstates, action_types = self.get_actions(
                self.game.board)
            # self.game.print()

            # Evaluate each action's value from self-play or otherwise
            if (self_play):
                channels = estimatorModel.split_into_channels(afterstates)
                action_values = self.nn.predict(channels)
            else:
                action_values = np.zeros(len(afterstates))
                for i in range(len(afterstates)):
                    score, score_pos = bs.analyze_board(afterstates[i])
                    action_values[i] = score

            # If there are fewer than 5 open tiles, all swaps yield GAME OVER
            if (self.game.numOpenTiles < 5):
                for a in range(len(actions)):
                    if (action_types[a] == 'swap'):
                        action_values[a] = -1

            best_index = action_values.argmax()
            best_action = actions[best_index]
            # print(action_values[best_index])

            # All actions lead to game over state
            if (action_values[best_index] == -1):
                self.game.gameOver = True

            # Swap or score the tiles based on chosen action
            elif (action_types[best_index] == 'swap'):
                r1 = best_action[0][0]
                c1 = best_action[0][1]
                r2 = best_action[1][0]
                c2 = best_action[1][1]
                self.game.swap(r1, c1, r2, c2)
            elif (action_types[best_index] == 'score'):
                r = best_action[0]
                c = best_action[1]
                self.game.score(r, c)

            new_afterstate = np.copy(self.game.board)

            # Give large negative reward for losing
            if (self.game.gameOver):
                reward = -1000
            else:
                reward = 0

            # Find the target for the value function
            channels = estimatorModel.split_into_channels([new_afterstate])
            v_new_afterstate = self.nn.predict(channels)
            v_target = reward + self.discount_factor * v_new_afterstate

            # Determine the saved afterstate value
            if (iter_count > 0):
                channels = estimatorModel.split_into_channels(
                    [saved_afterstate])
                v_saved_afterstate = self.nn.predict(channels)
                player.nn.fit(channels, [v_target], epochs=1, batch_size=1)
            else:
                v_saved_afterstate = 0

            # print('target:', v_target, 'v_sa:', v_saved_afterstate)
            # if (iter_count > 1):
            #     exit()

            saved_afterstate = new_afterstate

            iter_count += 1

        print('Final score:', self.game.totalScore)
        # print('DONE')

        # Sound the bell!
        print('\007')
        return