Пример #1
0
# We patch the environment to be closer to what Mnih et al. actually do: The environment
# repeats the action 4 times and a game is considered to be over during training as soon as a live
# is lost.
def _step(a):
    reward_1, reward_2 = 0.0, 0.0
    action = env._action_set[a]
    lives_before = env.ale.lives()
    for _ in range(4):
        reward += env.ale.act(action)
    ob = env._get_obs()
    done = env.ale.game_over() or (args.mode == 'train'
                                   and lives_before != env.ale.lives())
    return ob, reward, done, {}


env._step = _step

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE
model_1 = Sequential()
if K.image_dim_ordering() == 'tf':
    # (width, height, channels)
    model_1.add(Permute((2, 3, 1), input_shape=input_shape))
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    model_1.add(Permute((1, 2, 3), input_shape=input_shape))
else:
    raise RuntimeError('Unknown image_dim_ordering.')
model_1.add(Convolution2D(32, 8, 8, subsample=(4, 4)))
model_1.add(Activation('relu'))
model_1.add(Convolution2D(64, 4, 4, subsample=(2, 2)))