Пример #1
0
def test_single_continuous_dqn_input():
    nb_actions = 2

    V_model = Sequential()
    V_model.add(Flatten(input_shape=(2, 3)))
    V_model.add(Dense(1))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(2, 3)))
    mu_model.add(Dense(nb_actions))

    L_input = Input(shape=(2, 3))
    L_input_action = Input(shape=(nb_actions, ))
    x = concatenate([Flatten()(L_input), L_input_action])
    x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
    L_model = Model(input=[L_input_action, L_input], output=x)

    memory = SequentialMemory(limit=10, window_length=2)
    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=V_model,
                     L_model=L_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=5,
                     batch_size=4)
    agent.compile('sgd')
    agent.fit(MultiInputTestEnv((3, )), nb_steps=10)
Пример #2
0
def test_cdqn():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    V_model = Sequential()
    V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(1))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(nb_actions))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    x = concatenate([action_input, Flatten()(observation_input)])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
    L_model = Model(input=[action_input, observation_input], output=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)
    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=V_model,
                     L_model=L_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=50,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3)
    agent.compile(Adam(lr=1e-3))

    agent.fit(env,
              nb_steps=400,
              visualize=False,
              verbose=0,
              nb_max_episode_steps=100)
    h = agent.test(env,
                   nb_episodes=2,
                   visualize=False,
                   nb_max_episode_steps=100)
Пример #3
0
def test_multi_continuous_dqn_input():
    nb_actions = 2

    V_input1 = Input(shape=(2, 3))
    V_input2 = Input(shape=(2, 4))
    x = concatenate([V_input1, V_input2])
    x = Flatten()(x)
    x = Dense(1)(x)
    V_model = Model(input=[V_input1, V_input2], output=x)

    mu_input1 = Input(shape=(2, 3))
    mu_input2 = Input(shape=(2, 4))
    x = concatenate([mu_input1, mu_input2])
    x = Flatten()(x)
    x = Dense(nb_actions)(x)
    mu_model = Model(input=[mu_input1, mu_input2], output=x)

    L_input1 = Input(shape=(2, 3))
    L_input2 = Input(shape=(2, 4))
    L_input_action = Input(shape=(nb_actions, ))
    x = concatenate([L_input1, L_input2])
    x = concatenate([Flatten()(x), L_input_action])
    x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
    L_model = Model(input=[L_input_action, L_input1, L_input2], output=x)

    memory = SequentialMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=V_model,
                     L_model=L_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=5,
                     batch_size=4,
                     processor=processor)
    agent.compile('sgd')
    agent.fit(MultiInputTestEnv([(3, ), (4, )]), nb_steps=10)
Пример #4
0
def test_ddpg():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(input=[action_input, observation_input], output=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=50,
                      nb_steps_warmup_actor=50,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)
    agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])

    agent.fit(env,
              nb_steps=400,
              visualize=False,
              verbose=0,
              nb_max_episode_steps=100)
    h = agent.test(env,
                   nb_episodes=2,
                   visualize=False,
                   nb_max_episode_steps=100)
mu_model = Sequential()
mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
mu_model.add(Dense(16))
mu_model.add(Activation('relu'))
mu_model.add(Dense(16))
mu_model.add(Activation('relu'))
mu_model.add(Dense(16))
mu_model.add(Activation('relu'))
mu_model.add(Dense(nb_actions))
mu_model.add(Activation('linear'))
print(mu_model.summary())

action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
x = concatenate([action_input, Flatten()(observation_input)])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x)
x = Activation('linear')(x)
L_model = Model(input=[action_input, observation_input], output=x)
print(L_model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
processor = PendulumProcessor()
memory = SequentialMemory(limit=100000, window_length=1)
actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
actor.add(Dense(400))
actor.add(Activation('relu'))
actor.add(Dense(300))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('tanh'))
print(actor.summary())

action_input = Input(shape=(nb_actions, ), name='action_input')
observation_input = Input(shape=(1, ) + env.observation_space.shape,
                          name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Dense(400)(flattened_observation)
x = Activation('relu')(x)
x = concatenate([x, action_input])
x = Dense(300)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions,