Пример #1
0
    indices = np.where(line == 200)
    return np.mean(indices)

def find_ball(a,b):
    diff = b-a
    diff = diff[tim_sux:chris_sux,:,0]
    indices = np.where(diff == 200)
    y = np.mean(indices[0]) + tim_sux
    x = np.mean(indices[1]) # chris_sux
    return (x,y)

env = gym.make('Breakout-v0')
learner = QLearner(num_states=500, num_actions=env.action_space.n)
for i_episode in range(2000):
    observation = env.reset()
    action = learner.set_initial_state(0)
    prev = observation
    total_reward = 0
    for t in range(10000):
        # env.render()
        prev = observation
        observation, reward, done, info = env.step(action)
        total_reward += reward
        paddle = find_paddle(observation)
        x,y = find_ball(prev, observation)
        try:
            feature = int(paddle - x)
            action = learner.move(feature, reward)
        except ValueError:
            feature = 250
            action = learner.move(feature, reward, force_random=True)