Exemplo n.º 1
0
    ball_y = state[1]
    ball_speed_x = state[2]
    ball_speed_y = state[3]
    paddle_x = state[4]

    index += 2000 * ball_speed_x
    index += 1000 * ball_speed_y
    index += 100 * ball_x
    index += 10 * ball_y

    index += 1 * paddle_x
    return index


# Init environment
env = Game()

# Create Q table
action_space_size = env.n_actions
state_space_size = env.n_states_disc
q_table = np.zeros(
    (state_space_size, action_space_size))  # n_states x n_actions

# Hyper parameters
num_episodes = 10000
max_steps_per_episode = 1000000

learning_rate = 0.1  # alpha
discount_rate = 0.99  # gamma

exploration_rate = 1  # epsilon
Exemplo n.º 2
0
lambda_2 = 1.0
lambda_3 = 1e-5
l_margin = 0.8
e_d = 0.005  # bonus for demonstration # todo try with 1 as that is what the paper said
e_a = 0.001

# Environment Parameters
lr = 0.002
batch_size = 64
epsilon = 1.0  # Decreased over time
max_exploration_rate = 1
min_exploration_rate = 0.001
exploration_decay_rate = 0.01

# Initialize Environment
env = Game()

# Initialize Agent
agent = Agent(lr=lr,
              eps=epsilon,
              gamma=gamma,
              max_memory=max_memory,
              n_steps=n_steps,
              batch_size=batch_size,
              tau=tau,
              lambda_1=lambda_1,
              lambda_2=lambda_2,
              lambda_3=lambda_3,
              l_margin=l_margin)

# Load Demonstration Data