Пример #1
0
    diff = diff[tim_sux:chris_sux,:,0]
    indices = np.where(diff == 200)
    y = np.mean(indices[0]) + tim_sux
    x = np.mean(indices[1]) # chris_sux
    return (x,y)

env = gym.make('Breakout-v0')
learner = QLearner(num_states=500, num_actions=env.action_space.n)
for i_episode in range(2000):
    observation = env.reset()
    action = learner.set_initial_state(0)
    prev = observation
    total_reward = 0
    for t in range(10000):
        # env.render()
        prev = observation
        observation, reward, done, info = env.step(action)
        total_reward += reward
        paddle = find_paddle(observation)
        x,y = find_ball(prev, observation)
        try:
            feature = int(paddle - x)
            action = learner.move(feature, reward)
        except ValueError:
            feature = 250
            action = learner.move(feature, reward, force_random=True)

        if done:
            print("Episode finished after {} timesteps.  {} reward".format(t+1, total_reward))
            break
Пример #2
0
env = gym.make('Breakout-v0')
learner = QLearner(num_states=200, num_actions=env.action_space.n)
for i_episode in range(2000):
    observation = env.reset()
    action = learner.set_initial_state(0)
    prev = observation
    for t in range(10000):
        env.render()
        # print(observation)
        paddle = find_paddle(observation)
        x,y = find_ball(prev, observation)
        try:
            feature = int(paddle - x)
            if feature > 15:
                feature = 15
            if feature < -15:
                feature = -15
        except ValueError:
            action = env.action_space.sample()
            feature = 100
        # import pdb; pdb.set_trace()
        # action = env.action_space.sample()
        prev = observation
        observation, reward, done, info = env.step(action)
        print feature, action
        action = learner.move(feature, reward)

        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break