Exemplo n.º 1
0
def training_4():
    snakes_games = [SnakeAI([30, 30]), SnakeAIBorders([30, 30])]
    for snake in snakes_games:
        rewards = [
            {
                'aprox': [1, 0],
                'eat': [10, 0],
                'dead': [-100, 0]
            },
            {
                'aprox': [2, -1],
                'eat': [10, 0],
                'dead': [-100, 0]
            },
        ]

        for reward in rewards:

            agent_environment = SnakeGameEnv(snake, reward,
                                             len(snakes_games[0].state()))
            for i in range(3):
                Lp = LearningProcess(agent_environment)
                Lp.pre_learning_process()
                returns, losses = Lp.training()
                visualization(Lp, returns, losses)
                path = Lp.policy_saver(i)
                save_returns = path + '/returns.csv'
                save_losses = path + '/losses.csv'
                write_data(save_returns, returns)
                write_data(save_losses, losses)
Exemplo n.º 2
0
def playing_AI():
    snake = SnakeAI([30, 30])
    agent_environment = SnakeGameEnv(snake)
    screen = Screen(300, 300, [30, 30], 5)
    Lp = LearningProcess(agent_environment)

    Lp.play_previous_policy(screen)
Exemplo n.º 3
0
def training():
    snake = SnakeAI([30, 30])
    agent_environment = SnakeGameEnv(snake, -100)
    Lp = LearningProcess(agent_environment)
    Lp.pre_learning_process()
    Lp.training()
    Lp.policy_saver()
Exemplo n.º 4
0
def samples_1():
    snake = SnakeAI([30, 30])
    rewards = {'aprox': [1, 0], 'eat': [10, 0], 'dead': [-100, 0]}
    agent_environment = SnakeGameEnv(snake, rewards, 33)
    Lp = LearningProcess(agent_environment)
    policy = Lp.load_previous_policy(0, 0)
    return compute_avg_return(Lp.eval_env, policy, 30)
Exemplo n.º 5
0
def training_2():
    snake = SnakeAIBorders([30, 30])
    rewards = {'aprox': [1, 0], 'eat': [10, 0], 'dead': [-100, 0]}
    agent_environment = SnakeGameEnv(snake, rewards, 33)
    Lp = LearningProcess(agent_environment)
    Lp.pre_learning_process()
    Lp.training()
    Lp.policy_saver()
Exemplo n.º 6
0
def training_3():
    snake = SnakeAIBorders([30, 30])
    rewards = {'aprox': [1, 0], 'eat': [10, 0], 'dead': [-100, 0]}
    agent_environment = SnakeGameEnv(snake, rewards, 33)
    Lp = LearningProcess(agent_environment)
    Lp.pre_learning_process()
    returns, losses = Lp.training()
    visualization(Lp, returns, losses)
    path = Lp.policy_saver()
    save_returns = path + '/returns.csv'
    save_losses = path + '/losses.csv'
    write_data(save_returns, returns)
    write_data(save_losses, losses)
    returns_read = read_data(save_returns)
    losses_read = read_data(save_losses)
    visualization(Lp, returns_read, losses_read)
Exemplo n.º 7
0
def training_loops_2(snakes_games, rewards, redundance):
    for snake in snakes_games:
        for reward in rewards:
            agent_environment = SnakeGameEnv(snake, reward, len(snake.state()))
            Lp = LearningProcess(agent_environment)
            for i in range(redundance):
                print('*-*' * 15)
                print(
                    'iteration {} using the reward {} with the rules/game {} and input {}'
                    .format(i, reward, snake.__class__.__name__,
                            len(snake.state())))
                Lp.pre_learning_process()
                returns, losses = Lp.training()
                path = Lp.policy_saver(i)
                save_returns = path + '/returns.csv'
                save_losses = path + '/losses.csv'
                write_data(save_returns, returns)
                write_data(save_losses, losses)
Exemplo n.º 8
0
def sampler(snake, reward, num_reward=0, iteration=0, num_episodes=30, points=False):
    """Evaluates n samples of a trained network.
    
    snake (object): type of game.
    reward (dictionary): reward of the snake game environment.
    num_reward (int): Number wich indentifies the reward.
    iteration (int): Number of the iteration.
    num_episodes (int): amount of samples to be taken.
    points (bool): If its true then the return will be the a list of lists wich 
                   contains the amount of points per step. The lists have a length
                   accord with the number of steps the snake 'survived'.
    """
    
    agent_environment = SnakeGameEnv(snake, reward, len(snake.state()))
    Lp = LearningProcess(agent_environment)
    policy = Lp.load_previous_policy(num_reward, iteration)

    if points:
        return points_history(Lp.sample_env, policy, num_episodes)
    else:
        return compute_avg_return(Lp.sample_env, policy, num_episodes)
Exemplo n.º 9
0
def training_loops(snakes_games, rewards, redundance):
    """Total training loop for multiple types of games and rewards, each type of game
    will be proved with all the rewards an n number of iterations.

    snake_games (list): List of objects to be train.
    rewards (list): List of dictionaries (rewards of the snake game).
    redundance (int): Number of iterations.

    """
    for snake in snakes_games:
        for num, reward in enumerate(rewards):
            agent_environment = SnakeGameEnv(snake, reward, len(snake.state()))
            Lp = LearningProcess(agent_environment)
            for i in range(redundance):
                print('*-*'*15)
                print('iteration {} using the reward {} with the rules/game {} and input {}'.format(i, reward, snake.__class__.__name__, len(snake.state())))
                Lp.pre_learning_process()
                returns, losses = Lp.training() 
                path = Lp.policy_saver(num, i)
                save_returns = path + '/returns.csv'
                save_losses = path + '/losses.csv'
                write_data(save_returns, returns)
                write_data(save_losses, losses)
Exemplo n.º 10
0
def samples_3(snake, reward, num_reward=0, iteration=0, num_episodes=30):

    agent_environment = SnakeGameEnv(snake, reward, len(snake.state()))
    Lp = LearningProcess(agent_environment)
    policy = Lp.load_previous_policy(num_reward, iteration)
    return points_history(Lp.sample_env, policy, num_episodes)
Exemplo n.º 11
0
def samples_2(snake, reward, num_reward=0, iteration=0, num_episodes=30):

    agent_environment = SnakeGameEnv(snake, reward, len(snake.state()))
    Lp = LearningProcess(agent_environment)
    policy = Lp.load_previous_policy(num_reward, iteration)
    return compute_avg_return(Lp.eval_env, policy, num_episodes)